View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.LargeTests;
36  import org.apache.hadoop.hbase.TableNotFoundException;
37  import org.apache.hadoop.hbase.NamespaceDescriptor;
38  import org.apache.hadoop.hbase.TableName;
39  import org.apache.hadoop.hbase.client.HBaseAdmin;
40  import org.apache.hadoop.hbase.client.HTable;
41  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
42  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
43  import org.apache.hadoop.hbase.io.hfile.HFile;
44  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
45  import org.apache.hadoop.hbase.regionserver.BloomType;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.HFileTestUtil;
48  import org.junit.AfterClass;
49  import org.junit.BeforeClass;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  
53  /**
54   * Test cases for the "load" half of the HFileOutputFormat bulk load
55   * functionality. These tests run faster than the full MR cluster
56   * tests in TestHFileOutputFormat
57   */
58  @Category(LargeTests.class)
59  public class TestLoadIncrementalHFiles {
60    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
61    private static final byte[] FAMILY = Bytes.toBytes("myfam");
62    private static final String NAMESPACE = "bulkNS";
63  
64    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
65    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
66  
67    private static final byte[][] SPLIT_KEYS = new byte[][] {
68      Bytes.toBytes("ddd"),
69      Bytes.toBytes("ppp")
70    };
71  
72    static HBaseTestingUtility util = new HBaseTestingUtility();
73  
74    @BeforeClass
75    public static void setUpBeforeClass() throws Exception {
76      util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
77      util.getConfiguration().setInt(
78        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
79        MAX_FILES_PER_REGION_PER_FAMILY);
80      util.startMiniCluster();
81  
82      setupNamespace();
83    }
84  
85    protected static void setupNamespace() throws Exception {
86      util.getHBaseAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
87    }
88  
89    @AfterClass
90    public static void tearDownAfterClass() throws Exception {
91      util.shutdownMiniCluster();
92    }
93  
94    /**
95     * Test case that creates some regions and loads
96     * HFiles that fit snugly inside those regions
97     */
98    @Test
99    public void testSimpleLoad() throws Exception {
100     runTest("testSimpleLoad", BloomType.NONE,
101         new byte[][][] {
102           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
103           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
104     });
105   }
106 
107   /**
108    * Test case that creates some regions and loads
109    * HFiles that cross the boundaries of those regions
110    */
111   @Test
112   public void testRegionCrossingLoad() throws Exception {
113     runTest("testRegionCrossingLoad", BloomType.NONE,
114         new byte[][][] {
115           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
116           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
117     });
118   }
119 
120   /**
121    * Test loading into a column family that has a ROW bloom filter.
122    */
123   @Test
124   public void testRegionCrossingRowBloom() throws Exception {
125     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
126         new byte[][][] {
127           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
128           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
129     });
130   }
131 
132   /**
133    * Test loading into a column family that has a ROWCOL bloom filter.
134    */
135   @Test
136   public void testRegionCrossingRowColBloom() throws Exception {
137     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
138         new byte[][][] {
139           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
140           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
141     });
142   }
143 
144   /**
145    * Test case that creates some regions and loads HFiles that have
146    * different region boundaries than the table pre-split.
147    */
148   @Test
149   public void testSimpleHFileSplit() throws Exception {
150     runTest("testHFileSplit", BloomType.NONE,
151         new byte[][] {
152           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
153           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
154         },
155         new byte[][][] {
156           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
157           new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
158         }
159     );
160   }
161 
162   /**
163    * Test case that creates some regions and loads HFiles that cross the boundaries
164    * and have different region boundaries than the table pre-split.
165    */
166   @Test
167   public void testRegionCrossingHFileSplit() throws Exception {
168     testRegionCrossingHFileSplit(BloomType.NONE);
169   }
170 
171   /**
172    * Test case that creates some regions and loads HFiles that cross the boundaries
173    * have a ROW bloom filter and a different region boundaries than the table pre-split.
174    */
175   @Test
176   public void testRegionCrossingHFileSplitRowBloom() throws Exception {
177     testRegionCrossingHFileSplit(BloomType.ROW);
178   }
179 
180   /**
181    * Test case that creates some regions and loads HFiles that cross the boundaries
182    * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
183    */
184   @Test
185   public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
186     testRegionCrossingHFileSplit(BloomType.ROWCOL);
187   }
188 
189   private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
190     runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
191         new byte[][] {
192           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
193           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
194         },
195         new byte[][][] {
196           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
197           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
198         }
199     );
200   }
201 
202   private void runTest(String testName, BloomType bloomType,
203       byte[][][] hfileRanges) throws Exception {
204     runTest(testName, bloomType, null, hfileRanges);
205   }
206 
207   private void runTest(String testName, BloomType bloomType,
208       byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
209     final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
210     final boolean preCreateTable = tableSplitKeys != null;
211 
212     // Run the test bulkloading the table to the default namespace
213     final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
214     runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
215 
216     // Run the test bulkloading the table to the specified namespace
217     final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
218     runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
219   }
220 
221   private void runTest(String testName, TableName tableName, BloomType bloomType,
222       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
223     HTableDescriptor htd = new HTableDescriptor(tableName);
224     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
225     familyDesc.setBloomFilterType(bloomType);
226     htd.addFamily(familyDesc);
227     runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
228   }
229 
230   private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
231       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
232     Path dir = util.getDataTestDirOnTestFS(testName);
233     FileSystem fs = util.getTestFileSystem();
234     dir = dir.makeQualified(fs);
235     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
236 
237     int hfileIdx = 0;
238     for (byte[][] range : hfileRanges) {
239       byte[] from = range[0];
240       byte[] to = range[1];
241       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
242           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
243     }
244     int expectedRows = hfileIdx * 1000;
245 
246     if (preCreateTable) {
247       util.getHBaseAdmin().createTable(htd, tableSplitKeys);
248     }
249 
250     final TableName tableName = htd.getTableName();
251     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
252     String [] args= {dir.toString(), tableName.toString()};
253     loader.run(args);
254 
255     HTable table = new HTable(util.getConfiguration(), tableName);
256     try {
257       assertEquals(expectedRows, util.countRows(table));
258     } finally {
259       table.close();
260     }
261 
262     util.deleteTable(tableName);
263   }
264 
265   /**
266    * Test loading into a column family that does not exist.
267    */
268   @Test
269   public void testNonexistentColumnFamilyLoad() throws Exception {
270     String testName = "testNonexistentColumnFamilyLoad";
271     byte[][][] hFileRanges = new byte[][][] {
272       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
273       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
274     };
275 
276     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
277     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
278     // set real family name to upper case in purpose to simulate the case that
279     // family name in HFiles is invalid
280     HColumnDescriptor family =
281         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
282     htd.addFamily(family);
283 
284     try {
285       runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges);
286       assertTrue("Loading into table with non-existent family should have failed", false);
287     } catch (Exception e) {
288       assertTrue("IOException expected", e instanceof IOException);
289       // further check whether the exception message is correct
290       String errMsg = e.getMessage();
291       assertTrue("Incorrect exception message, expected message: ["
292           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
293           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
294     }
295   }
296 
297   @Test
298   public void testSplitStoreFile() throws IOException {
299     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
300     FileSystem fs = util.getTestFileSystem();
301     Path testIn = new Path(dir, "testhfile");
302     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
303     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
304         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
305 
306     Path bottomOut = new Path(dir, "bottom.out");
307     Path topOut = new Path(dir, "top.out");
308 
309     LoadIncrementalHFiles.splitStoreFile(
310         util.getConfiguration(), testIn,
311         familyDesc, Bytes.toBytes("ggg"),
312         bottomOut,
313         topOut);
314 
315     int rowCount = verifyHFile(bottomOut);
316     rowCount += verifyHFile(topOut);
317     assertEquals(1000, rowCount);
318   }
319 
320   private int verifyHFile(Path p) throws IOException {
321     Configuration conf = util.getConfiguration();
322     HFile.Reader reader = HFile.createReader(
323         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
324     reader.loadFileInfo();
325     HFileScanner scanner = reader.getScanner(false, false);
326     scanner.seekTo();
327     int count = 0;
328     do {
329       count++;
330     } while (scanner.next());
331     assertTrue(count > 0);
332     reader.close();
333     return count;
334   }
335 
336   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
337     Integer value = map.containsKey(first)?map.get(first):0;
338     map.put(first, value+1);
339 
340     value = map.containsKey(last)?map.get(last):0;
341     map.put(last, value-1);
342   }
343 
344   @Test
345   public void testInferBoundaries() {
346     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
347 
348     /* Toy example
349      *     c---------i            o------p          s---------t     v------x
350      * a------e    g-----k   m-------------q   r----s            u----w
351      *
352      * Should be inferred as:
353      * a-----------------k   m-------------q   r--------------t  u---------x
354      *
355      * The output should be (m,r,u)
356      */
357 
358     String first;
359     String last;
360 
361     first = "a"; last = "e";
362     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
363 
364     first = "r"; last = "s";
365     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
366 
367     first = "o"; last = "p";
368     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
369 
370     first = "g"; last = "k";
371     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
372 
373     first = "v"; last = "x";
374     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
375 
376     first = "c"; last = "i";
377     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
378 
379     first = "m"; last = "q";
380     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
381 
382     first = "s"; last = "t";
383     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
384 
385     first = "u"; last = "w";
386     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
387 
388     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
389     byte[][] compare = new byte[3][];
390     compare[0] = "m".getBytes();
391     compare[1] = "r".getBytes();
392     compare[2] = "u".getBytes();
393 
394     assertEquals(keysArray.length, 3);
395 
396     for (int row = 0; row<keysArray.length; row++){
397       assertArrayEquals(keysArray[row], compare[row]);
398     }
399   }
400 
401   @Test
402   public void testLoadTooMayHFiles() throws Exception {
403     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
404     FileSystem fs = util.getTestFileSystem();
405     dir = dir.makeQualified(fs);
406     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
407 
408     byte[] from = Bytes.toBytes("begin");
409     byte[] to = Bytes.toBytes("end");
410     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
411       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
412           + i), FAMILY, QUALIFIER, from, to, 1000);
413     }
414 
415     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
416     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
417     try {
418       loader.run(args);
419       fail("Bulk loading too many files should fail");
420     } catch (IOException ie) {
421       assertTrue(ie.getMessage().contains("Trying to load more than "
422         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
423     }
424   }
425 
426   @Test(expected = TableNotFoundException.class)
427   public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
428     Configuration conf = util.getConfiguration();
429     conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
430     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
431     String[] args = { "directory", "nonExistingTable" };
432     loader.run(args);
433   }
434 
435   @Test
436   public void testTableWithCFNameStartWithUnderScore() throws Exception {
437     Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
438     FileSystem fs = util.getTestFileSystem();
439     dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
440     String family = "_cf";
441     Path familyDir = new Path(dir, family);
442 
443     byte[] from = Bytes.toBytes("begin");
444     byte[] to = Bytes.toBytes("end");
445     Configuration conf = util.getConfiguration();
446     String tableName = "mytable_cfNameStartWithUnderScore";
447     HTable table = util.createTable(tableName, family);
448     HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
449       QUALIFIER, from, to, 1000);
450 
451     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
452     String[] args = { dir.toString(), tableName };
453     try {
454       loader.run(args);
455       assertEquals(1000, util.countRows(table));
456     } finally {
457       if (null != table) {
458         table.close();
459       }
460     }
461   }
462 }
463