View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HColumnDescriptor;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.testclassification.LargeTests;
38  import org.apache.hadoop.hbase.TableNotFoundException;
39  import org.apache.hadoop.hbase.NamespaceDescriptor;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.client.HTable;
42  import org.apache.hadoop.hbase.client.Table;
43  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
44  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
45  import org.apache.hadoop.hbase.io.hfile.HFile;
46  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
47  import org.apache.hadoop.hbase.regionserver.BloomType;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.HFileTestUtil;
50  import org.junit.AfterClass;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  import org.apache.hadoop.hbase.security.SecureBulkLoadUtil;
55  
56  /**
57   * Test cases for the "load" half of the HFileOutputFormat bulk load
58   * functionality. These tests run faster than the full MR cluster
59   * tests in TestHFileOutputFormat
60   */
61  @Category(LargeTests.class)
62  public class TestLoadIncrementalHFiles {
63    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
64    private static final byte[] FAMILY = Bytes.toBytes("myfam");
65    private static final String NAMESPACE = "bulkNS";
66  
67    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
68    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
69  
70    private static final byte[][] SPLIT_KEYS = new byte[][] {
71      Bytes.toBytes("ddd"),
72      Bytes.toBytes("ppp")
73    };
74  
75    static HBaseTestingUtility util = new HBaseTestingUtility();
76  
77    @BeforeClass
78    public static void setUpBeforeClass() throws Exception {
79      util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
80      util.getConfiguration().setInt(
81        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
82        MAX_FILES_PER_REGION_PER_FAMILY);
83      util.startMiniCluster();
84  
85      setupNamespace();
86    }
87  
88    protected static void setupNamespace() throws Exception {
89      util.getHBaseAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
90    }
91  
92    @AfterClass
93    public static void tearDownAfterClass() throws Exception {
94      util.shutdownMiniCluster();
95    }
96  
97    /**
98     * Test case that creates some regions and loads
99     * HFiles that fit snugly inside those regions
100    */
101   @Test
102   public void testSimpleLoad() throws Exception {
103     runTest("testSimpleLoad", BloomType.NONE,
104         new byte[][][] {
105           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
106           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
107     });
108   }
109 
110   /**
111    * Test case that creates some regions and loads
112    * HFiles that cross the boundaries of those regions
113    */
114   @Test
115   public void testRegionCrossingLoad() throws Exception {
116     runTest("testRegionCrossingLoad", BloomType.NONE,
117         new byte[][][] {
118           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
119           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
120     });
121   }
122 
123   /**
124    * Test loading into a column family that has a ROW bloom filter.
125    */
126   @Test
127   public void testRegionCrossingRowBloom() throws Exception {
128     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
129         new byte[][][] {
130           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
131           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
132     });
133   }
134 
135   /**
136    * Test loading into a column family that has a ROWCOL bloom filter.
137    */
138   @Test
139   public void testRegionCrossingRowColBloom() throws Exception {
140     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
141         new byte[][][] {
142           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
143           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
144     });
145   }
146 
147   /**
148    * Test case that creates some regions and loads HFiles that have
149    * different region boundaries than the table pre-split.
150    */
151   @Test
152   public void testSimpleHFileSplit() throws Exception {
153     runTest("testHFileSplit", BloomType.NONE,
154         new byte[][] {
155           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
156           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
157         },
158         new byte[][][] {
159           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
160           new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
161         }
162     );
163   }
164 
165   /**
166    * Test case that creates some regions and loads HFiles that cross the boundaries
167    * and have different region boundaries than the table pre-split.
168    */
169   @Test
170   public void testRegionCrossingHFileSplit() throws Exception {
171     testRegionCrossingHFileSplit(BloomType.NONE);
172   }
173 
174   /**
175    * Test case that creates some regions and loads HFiles that cross the boundaries
176    * have a ROW bloom filter and a different region boundaries than the table pre-split.
177    */
178   @Test
179   public void testRegionCrossingHFileSplitRowBloom() throws Exception {
180     testRegionCrossingHFileSplit(BloomType.ROW);
181   }
182 
183   /**
184    * Test case that creates some regions and loads HFiles that cross the boundaries
185    * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
186    */
187   @Test
188   public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
189     testRegionCrossingHFileSplit(BloomType.ROWCOL);
190   }
191 
192   private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
193     runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
194         new byte[][] {
195           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
196           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
197         },
198         new byte[][][] {
199           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
200           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
201         }
202     );
203   }
204 
205   private void runTest(String testName, BloomType bloomType,
206       byte[][][] hfileRanges) throws Exception {
207     runTest(testName, bloomType, null, hfileRanges);
208   }
209 
210   private void runTest(String testName, BloomType bloomType,
211       byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
212     final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
213     final boolean preCreateTable = tableSplitKeys != null;
214 
215     // Run the test bulkloading the table to the default namespace
216     final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
217     runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
218 
219     // Run the test bulkloading the table to the specified namespace
220     final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
221     runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
222   }
223 
224   private void runTest(String testName, TableName tableName, BloomType bloomType,
225       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
226     HTableDescriptor htd = new HTableDescriptor(tableName);
227     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
228     familyDesc.setBloomFilterType(bloomType);
229     htd.addFamily(familyDesc);
230     runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
231   }
232 
233   private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
234       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
235     Path dir = util.getDataTestDirOnTestFS(testName);
236     FileSystem fs = util.getTestFileSystem();
237     dir = dir.makeQualified(fs);
238     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
239 
240     int hfileIdx = 0;
241     for (byte[][] range : hfileRanges) {
242       byte[] from = range[0];
243       byte[] to = range[1];
244       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
245           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
246     }
247     int expectedRows = hfileIdx * 1000;
248 
249     if (preCreateTable) {
250       util.getHBaseAdmin().createTable(htd, tableSplitKeys);
251     }
252 
253     final TableName tableName = htd.getTableName();
254     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
255     String [] args= {dir.toString(), tableName.toString()};
256     loader.run(args);
257 
258     Table table = new HTable(util.getConfiguration(), tableName);
259     try {
260       assertEquals(expectedRows, util.countRows(table));
261     } finally {
262       table.close();
263     }
264 
265     // verify staging folder has been cleaned up
266     Path stagingBasePath = SecureBulkLoadUtil.getBaseStagingDir(util.getConfiguration());
267     if(fs.exists(stagingBasePath)) {
268       FileStatus[] files = fs.listStatus(stagingBasePath);
269       for(FileStatus file : files) {
270         assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
271           file.getPath().getName() != "DONOTERASE");
272       }
273     }
274 
275     util.deleteTable(tableName);
276   }
277 
278   /**
279    * Test loading into a column family that does not exist.
280    */
281   @Test
282   public void testNonexistentColumnFamilyLoad() throws Exception {
283     String testName = "testNonexistentColumnFamilyLoad";
284     byte[][][] hFileRanges = new byte[][][] {
285       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
286       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
287     };
288 
289     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
290     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
291     // set real family name to upper case in purpose to simulate the case that
292     // family name in HFiles is invalid
293     HColumnDescriptor family =
294         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
295     htd.addFamily(family);
296 
297     try {
298       runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges);
299       assertTrue("Loading into table with non-existent family should have failed", false);
300     } catch (Exception e) {
301       assertTrue("IOException expected", e instanceof IOException);
302       // further check whether the exception message is correct
303       String errMsg = e.getMessage();
304       assertTrue("Incorrect exception message, expected message: ["
305           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
306           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
307     }
308   }
309 
310   @Test(timeout = 60000)
311   public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception {
312     testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
313   }
314 
315   @Test(timeout = 60000)
316   public void testNonHfileFolder() throws Exception {
317     testNonHfileFolder("testNonHfileFolder", false);
318   }
319 
320   /**
321    * Write a random data file and a non-file in a dir with a valid family name
322    * but not part of the table families. we should we able to bulkload without
323    * getting the unmatched family exception. HBASE-13037/HBASE-13227
324    */
325   private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception {
326     Path dir = util.getDataTestDirOnTestFS(tableName);
327     FileSystem fs = util.getTestFileSystem();
328     dir = dir.makeQualified(fs);
329 
330     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
331     HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"),
332         FAMILY, QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500);
333     createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024);
334 
335     final String NON_FAMILY_FOLDER = "_logs";
336     Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
337     fs.mkdirs(nonFamilyDir);
338     fs.mkdirs(new Path(nonFamilyDir, "non-file"));
339     createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);
340 
341     Table table = null;
342     try {
343       if (preCreateTable) {
344         table = util.createTable(TableName.valueOf(tableName), FAMILY);
345       } else {
346         table = util.getConnection().getTable(TableName.valueOf(tableName));
347       }
348 
349       final String[] args = {dir.toString(), tableName};
350       new LoadIncrementalHFiles(util.getConfiguration()).run(args);
351       assertEquals(500, util.countRows(table));
352     } finally {
353       if (table != null) {
354         table.close();
355       }
356       fs.delete(dir, true);
357     }
358   }
359 
360   private static void createRandomDataFile(FileSystem fs, Path path, int size)
361       throws IOException {
362     FSDataOutputStream stream = fs.create(path);
363     try {
364       byte[] data = new byte[1024];
365       for (int i = 0; i < data.length; ++i) {
366         data[i] = (byte)(i & 0xff);
367       }
368       while (size >= data.length) {
369         stream.write(data, 0, data.length);
370         size -= data.length;
371       }
372       if (size > 0) {
373         stream.write(data, 0, size);
374       }
375     } finally {
376       stream.close();
377     }
378   }
379 
380   @Test
381   public void testSplitStoreFile() throws IOException {
382     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
383     FileSystem fs = util.getTestFileSystem();
384     Path testIn = new Path(dir, "testhfile");
385     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
386     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
387         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
388 
389     Path bottomOut = new Path(dir, "bottom.out");
390     Path topOut = new Path(dir, "top.out");
391 
392     LoadIncrementalHFiles.splitStoreFile(
393         util.getConfiguration(), testIn,
394         familyDesc, Bytes.toBytes("ggg"),
395         bottomOut,
396         topOut);
397 
398     int rowCount = verifyHFile(bottomOut);
399     rowCount += verifyHFile(topOut);
400     assertEquals(1000, rowCount);
401   }
402 
403   private int verifyHFile(Path p) throws IOException {
404     Configuration conf = util.getConfiguration();
405     HFile.Reader reader = HFile.createReader(
406         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
407     reader.loadFileInfo();
408     HFileScanner scanner = reader.getScanner(false, false);
409     scanner.seekTo();
410     int count = 0;
411     do {
412       count++;
413     } while (scanner.next());
414     assertTrue(count > 0);
415     reader.close();
416     return count;
417   }
418 
419   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
420     Integer value = map.containsKey(first)?map.get(first):0;
421     map.put(first, value+1);
422 
423     value = map.containsKey(last)?map.get(last):0;
424     map.put(last, value-1);
425   }
426 
427   @Test
428   public void testInferBoundaries() {
429     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
430 
431     /* Toy example
432      *     c---------i            o------p          s---------t     v------x
433      * a------e    g-----k   m-------------q   r----s            u----w
434      *
435      * Should be inferred as:
436      * a-----------------k   m-------------q   r--------------t  u---------x
437      *
438      * The output should be (m,r,u)
439      */
440 
441     String first;
442     String last;
443 
444     first = "a"; last = "e";
445     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
446 
447     first = "r"; last = "s";
448     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
449 
450     first = "o"; last = "p";
451     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
452 
453     first = "g"; last = "k";
454     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
455 
456     first = "v"; last = "x";
457     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
458 
459     first = "c"; last = "i";
460     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
461 
462     first = "m"; last = "q";
463     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
464 
465     first = "s"; last = "t";
466     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
467 
468     first = "u"; last = "w";
469     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
470 
471     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
472     byte[][] compare = new byte[3][];
473     compare[0] = "m".getBytes();
474     compare[1] = "r".getBytes();
475     compare[2] = "u".getBytes();
476 
477     assertEquals(keysArray.length, 3);
478 
479     for (int row = 0; row<keysArray.length; row++){
480       assertArrayEquals(keysArray[row], compare[row]);
481     }
482   }
483 
484   @Test
485   public void testLoadTooMayHFiles() throws Exception {
486     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
487     FileSystem fs = util.getTestFileSystem();
488     dir = dir.makeQualified(fs);
489     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
490 
491     byte[] from = Bytes.toBytes("begin");
492     byte[] to = Bytes.toBytes("end");
493     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
494       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
495           + i), FAMILY, QUALIFIER, from, to, 1000);
496     }
497 
498     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
499     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
500     try {
501       loader.run(args);
502       fail("Bulk loading too many files should fail");
503     } catch (IOException ie) {
504       assertTrue(ie.getMessage().contains("Trying to load more than "
505         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
506     }
507   }
508 
509   @Test(expected = TableNotFoundException.class)
510   public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
511     Configuration conf = util.getConfiguration();
512     conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
513     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
514     String[] args = { "directory", "nonExistingTable" };
515     loader.run(args);
516   }
517 
518   @Test
519   public void testTableWithCFNameStartWithUnderScore() throws Exception {
520     Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
521     FileSystem fs = util.getTestFileSystem();
522     dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
523     String family = "_cf";
524     Path familyDir = new Path(dir, family);
525 
526     byte[] from = Bytes.toBytes("begin");
527     byte[] to = Bytes.toBytes("end");
528     Configuration conf = util.getConfiguration();
529     String tableName = "mytable_cfNameStartWithUnderScore";
530     Table table = util.createTable(TableName.valueOf(tableName), family);
531     HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
532       QUALIFIER, from, to, 1000);
533 
534     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
535     String[] args = { dir.toString(), tableName };
536     try {
537       loader.run(args);
538       assertEquals(1000, util.countRows(table));
539     } finally {
540       if (null != table) {
541         table.close();
542       }
543     }
544   }
545 }
546