1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import static org.junit.Assert.assertArrayEquals;
23  import static org.junit.Assert.assertEquals;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.List;
28  import java.util.TreeMap;
29  
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.KeyValue;
37  import org.apache.hadoop.hbase.LargeTests;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.Put;
41  import org.apache.hadoop.hbase.client.UserProvider;
42  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
43  import org.apache.hadoop.hbase.io.hfile.Compression;
44  import org.apache.hadoop.hbase.io.hfile.HFile;
45  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
46  import org.apache.hadoop.hbase.regionserver.StoreFile;
47  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.junit.AfterClass;
50  import org.junit.BeforeClass;
51  import org.junit.Test;
52  import org.junit.experimental.categories.Category;
53  
54  /**
55   * Test cases for the "load" half of the HFileOutputFormat bulk load
56   * functionality. These tests run faster than the full MR cluster
57   * tests in TestHFileOutputFormat
58   */
59  @Category(LargeTests.class)
60  public class TestLoadIncrementalHFiles {
61    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
62    private static final byte[] FAMILY = Bytes.toBytes("myfam");
63  
64    private static final byte[][] SPLIT_KEYS = new byte[][] {
65      Bytes.toBytes("ddd"),
66      Bytes.toBytes("ppp")
67    };
68  
69    public static int BLOCKSIZE = 64*1024;
70    public static String COMPRESSION =
71      Compression.Algorithm.NONE.getName();
72  
73    static HBaseTestingUtility util = new HBaseTestingUtility();
74  
75    @BeforeClass
76    public static void setUpBeforeClass() throws Exception {
77      util.startMiniCluster();
78    }
79  
80    @AfterClass
81    public static void tearDownAfterClass() throws Exception {
82      util.shutdownMiniCluster();
83    }
84  
85    /**
86     * Test case that creates some regions and loads
87     * HFiles that fit snugly inside those regions
88     */
89    @Test
90    public void testSimpleLoad() throws Exception {
91      runTest("testSimpleLoad", BloomType.NONE,
92          new byte[][][] {
93            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
94            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
95      });
96    }
97  
98    /**
99     * Test case that creates some regions and loads
100    * HFiles that cross the boundaries of those regions
101    */
102   @Test
103   public void testRegionCrossingLoad() throws Exception {
104     runTest("testRegionCrossingLoad", BloomType.NONE,
105         new byte[][][] {
106           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
107           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
108     });
109   }
110 
111   /**
112    * Test loading into a column family that has a ROW bloom filter.
113    */
114   @Test
115   public void testRegionCrossingRowBloom() throws Exception {
116     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
117         new byte[][][] {
118           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
119           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
120     });
121   }
122   
123   /**
124    * Test loading into a column family that has a ROWCOL bloom filter.
125    */
126   @Test
127   public void testRegionCrossingRowColBloom() throws Exception {
128     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
129         new byte[][][] {
130           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
131           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
132     });
133   }
134 
135   private void runTest(String testName, BloomType bloomType, 
136           byte[][][] hfileRanges) throws Exception {
137     Path dir = util.getDataTestDir(testName);
138     FileSystem fs = util.getTestFileSystem();
139     dir = dir.makeQualified(fs);
140     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
141 
142     int hfileIdx = 0;
143     for (byte[][] range : hfileRanges) {
144       byte[] from = range[0];
145       byte[] to = range[1];
146       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
147           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
148     }
149     int expectedRows = hfileIdx * 1000;
150 
151     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
152 
153     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
154     HTableDescriptor htd = new HTableDescriptor(TABLE);
155     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
156     familyDesc.setBloomFilterType(bloomType);
157     htd.addFamily(familyDesc);
158     admin.createTable(htd, SPLIT_KEYS);
159 
160     HTable table = new HTable(util.getConfiguration(), TABLE);
161     util.waitTableAvailable(TABLE, 30000);
162 
163     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
164     loader.doBulkLoad(dir, table);
165 
166     assertEquals(expectedRows, util.countRows(table));
167   }
168 
169   private void
170       verifyAssignedSequenceNumber(String testName, byte[][][] hfileRanges, boolean nonZero)
171           throws Exception {
172     Path dir = util.getDataTestDir(testName);
173     FileSystem fs = util.getTestFileSystem();
174     dir = dir.makeQualified(fs);
175     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
176 
177     int hfileIdx = 0;
178     for (byte[][] range : hfileRanges) {
179       byte[] from = range[0];
180       byte[] to = range[1];
181       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_" + hfileIdx++), FAMILY,
182         QUALIFIER, from, to, 1000);
183     }
184 
185     final byte[] TABLE = Bytes.toBytes("mytable_" + testName);
186 
187     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
188     HTableDescriptor htd = new HTableDescriptor(TABLE);
189     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
190     htd.addFamily(familyDesc);
191     admin.createTable(htd, SPLIT_KEYS);
192 
193     HTable table = new HTable(util.getConfiguration(), TABLE);
194     util.waitTableAvailable(TABLE, 30000);
195     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
196 
197     // Do a dummy put to increase the hlog sequence number
198     Put put = new Put(Bytes.toBytes("row"));
199     put.add(FAMILY, QUALIFIER, Bytes.toBytes("value"));
200     table.put(put);
201 
202     loader.doBulkLoad(dir, table);
203 
204     // Get the store files
205     List<StoreFile> files =
206         util.getHBaseCluster().getRegions(TABLE).get(0).getStore(FAMILY).getStorefiles();
207     for (StoreFile file : files) {
208       // the sequenceId gets initialized during createReader
209       file.createReader();
210 
211       if (nonZero) assertTrue(file.getMaxSequenceId() > 0);
212       else assertTrue(file.getMaxSequenceId() == -1);
213     }
214   }
215 
216   /**
217    * Test loading into a column family that does not exist.
218    */
219   @Test
220   public void testNonexistentColumnFamilyLoad() throws Exception {
221     String testName = "testNonexistentColumnFamilyLoad";
222     byte[][][] hfileRanges = new byte[][][] {
223       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
224       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
225     }; 
226 
227     Path dir = util.getDataTestDir(testName);
228     FileSystem fs = util.getTestFileSystem();
229     dir = dir.makeQualified(fs);
230     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
231 
232     int hfileIdx = 0;
233     for (byte[][] range : hfileRanges) {
234       byte[] from = range[0];
235       byte[] to = range[1];
236       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
237           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
238     }
239 
240     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
241 
242     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
243     HTableDescriptor htd = new HTableDescriptor(TABLE);
244     admin.createTable(htd, SPLIT_KEYS);
245 
246     HTable table = new HTable(util.getConfiguration(), TABLE);
247     util.waitTableAvailable(TABLE, 30000);
248     // make sure we go back to the usual user provider
249     UserProvider.setUserProviderForTesting(util.getConfiguration(), UserProvider.class);
250     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
251     try {
252       loader.doBulkLoad(dir, table);
253       assertTrue("Loading into table with non-existent family should have failed", false);
254     } catch (Exception e) {
255       assertTrue("IOException expected", e instanceof IOException);
256     }
257     table.close();
258     admin.close();
259   }
260 
261   @Test
262   public void testSplitStoreFile() throws IOException {
263     Path dir = util.getDataTestDir("testSplitHFile");
264     FileSystem fs = util.getTestFileSystem();
265     Path testIn = new Path(dir, "testhfile");
266     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
267     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
268         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
269 
270     Path bottomOut = new Path(dir, "bottom.out");
271     Path topOut = new Path(dir, "top.out");
272 
273     LoadIncrementalHFiles.splitStoreFile(
274         util.getConfiguration(), testIn,
275         familyDesc, Bytes.toBytes("ggg"),
276         bottomOut,
277         topOut);
278 
279     int rowCount = verifyHFile(bottomOut);
280     rowCount += verifyHFile(topOut);
281     assertEquals(1000, rowCount);
282   }
283 
284   private int verifyHFile(Path p) throws IOException {
285     Configuration conf = util.getConfiguration();
286     HFile.Reader reader = HFile.createReader(
287         p.getFileSystem(conf), p, new CacheConfig(conf));
288     reader.loadFileInfo();
289     HFileScanner scanner = reader.getScanner(false, false);
290     scanner.seekTo();
291     int count = 0;
292     do {
293       count++;
294     } while (scanner.next());
295     assertTrue(count > 0);
296     reader.close();
297     return count;
298   }
299 
300 
301   /**
302    * Create an HFile with the given number of rows between a given
303    * start key and end key.
304    * TODO put me in an HFileTestUtil or something?
305    */
306   static void createHFile(
307       Configuration conf,
308       FileSystem fs, Path path,
309       byte[] family, byte[] qualifier,
310       byte[] startKey, byte[] endKey, int numRows) throws IOException
311   {
312     HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
313         .withPath(fs, path)
314         .withBlockSize(BLOCKSIZE)
315         .withCompression(COMPRESSION)
316         .withComparator(KeyValue.KEY_COMPARATOR)
317         .create();
318     long now = System.currentTimeMillis();
319     try {
320       // subtract 2 since iterateOnSplits doesn't include boundary keys
321       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
322         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
323         writer.append(kv);
324       }
325     } finally {
326       writer.close();
327     }
328   }
329 
330   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
331     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
332     map.put(first, value+1);
333 
334     value = map.containsKey(last)?(Integer)map.get(last):0;
335     map.put(last, value-1);
336   }
337 
338   @Test 
339   public void testInferBoundaries() {
340     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
341 
342     /* Toy example
343      *     c---------i            o------p          s---------t     v------x
344      * a------e    g-----k   m-------------q   r----s            u----w
345      *
346      * Should be inferred as:
347      * a-----------------k   m-------------q   r--------------t  u---------x
348      * 
349      * The output should be (m,r,u) 
350      */
351 
352     String first;
353     String last;
354 
355     first = "a"; last = "e";
356     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
357     
358     first = "r"; last = "s";
359     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
360 
361     first = "o"; last = "p";
362     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
363 
364     first = "g"; last = "k";
365     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
366 
367     first = "v"; last = "x";
368     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
369 
370     first = "c"; last = "i";
371     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
372 
373     first = "m"; last = "q";
374     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
375 
376     first = "s"; last = "t";
377     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
378     
379     first = "u"; last = "w";
380     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
381 
382     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
383     byte[][] compare = new byte[3][];
384     compare[0] = "m".getBytes();
385     compare[1] = "r".getBytes(); 
386     compare[2] = "u".getBytes();
387 
388     assertEquals(keysArray.length, 3);
389 
390     for (int row = 0; row<keysArray.length; row++){
391       assertArrayEquals(keysArray[row], compare[row]);
392     }
393   }
394 
395 
396   @org.junit.Rule
397   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
398     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
399 }
400