View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HColumnDescriptor;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.testclassification.LargeTests;
38  import org.apache.hadoop.hbase.TableNotFoundException;
39  import org.apache.hadoop.hbase.NamespaceDescriptor;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.client.HTable;
42  import org.apache.hadoop.hbase.client.Table;
43  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
44  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
45  import org.apache.hadoop.hbase.io.hfile.HFile;
46  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
47  import org.apache.hadoop.hbase.regionserver.BloomType;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.HFileTestUtil;
50  import org.junit.AfterClass;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  import org.apache.hadoop.hbase.security.SecureBulkLoadUtil;
55  
56  /**
57   * Test cases for the "load" half of the HFileOutputFormat bulk load
58   * functionality. These tests run faster than the full MR cluster
59   * tests in TestHFileOutputFormat
60   */
61  @Category(LargeTests.class)
62  public class TestLoadIncrementalHFiles {
63    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
64    private static final byte[] FAMILY = Bytes.toBytes("myfam");
65    private static final String NAMESPACE = "bulkNS";
66  
67    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
68    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
69  
70    private static final byte[][] SPLIT_KEYS = new byte[][] {
71      Bytes.toBytes("ddd"),
72      Bytes.toBytes("ppp")
73    };
74  
75    static HBaseTestingUtility util = new HBaseTestingUtility();
76  
77    @BeforeClass
78    public static void setUpBeforeClass() throws Exception {
79      util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
80      util.getConfiguration().setInt(
81        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
82        MAX_FILES_PER_REGION_PER_FAMILY);
83      util.startMiniCluster();
84  
85      setupNamespace();
86    }
87  
88    protected static void setupNamespace() throws Exception {
89      util.getHBaseAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
90    }
91  
92    @AfterClass
93    public static void tearDownAfterClass() throws Exception {
94      util.shutdownMiniCluster();
95    }
96  
97    /**
98     * Test case that creates some regions and loads
99     * HFiles that fit snugly inside those regions
100    */
101   @Test
102   public void testSimpleLoad() throws Exception {
103     runTest("testSimpleLoad", BloomType.NONE,
104         new byte[][][] {
105           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
106           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
107     });
108   }
109 
110   /**
111    * Test case that creates some regions and loads
112    * HFiles that cross the boundaries of those regions
113    */
114   @Test
115   public void testRegionCrossingLoad() throws Exception {
116     runTest("testRegionCrossingLoad", BloomType.NONE,
117         new byte[][][] {
118           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
119           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
120     });
121   }
122 
123   /**
124    * Test loading into a column family that has a ROW bloom filter.
125    */
126   @Test
127   public void testRegionCrossingRowBloom() throws Exception {
128     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
129         new byte[][][] {
130           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
131           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
132     });
133   }
134 
135   /**
136    * Test loading into a column family that has a ROWCOL bloom filter.
137    */
138   @Test
139   public void testRegionCrossingRowColBloom() throws Exception {
140     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
141         new byte[][][] {
142           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
143           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
144     });
145   }
146 
147   /**
148    * Test case that creates some regions and loads HFiles that have
149    * different region boundaries than the table pre-split.
150    */
151   @Test
152   public void testSimpleHFileSplit() throws Exception {
153     runTest("testHFileSplit", BloomType.NONE,
154         new byte[][] {
155           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
156           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
157         },
158         new byte[][][] {
159           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
160           new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
161         }
162     );
163   }
164 
165   /**
166    * Test case that creates some regions and loads HFiles that cross the boundaries
167    * and have different region boundaries than the table pre-split.
168    */
169   @Test
170   public void testRegionCrossingHFileSplit() throws Exception {
171     testRegionCrossingHFileSplit(BloomType.NONE);
172   }
173 
174   /**
175    * Test case that creates some regions and loads HFiles that cross the boundaries
176    * have a ROW bloom filter and a different region boundaries than the table pre-split.
177    */
178   @Test
179   public void testRegionCrossingHFileSplitRowBloom() throws Exception {
180     testRegionCrossingHFileSplit(BloomType.ROW);
181   }
182 
183   /**
184    * Test case that creates some regions and loads HFiles that cross the boundaries
185    * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
186    */
187   @Test
188   public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
189     testRegionCrossingHFileSplit(BloomType.ROWCOL);
190   }
191 
192   private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
193     runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
194         new byte[][] {
195           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
196           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
197         },
198         new byte[][][] {
199           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
200           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
201         }
202     );
203   }
204 
205   private void runTest(String testName, BloomType bloomType,
206       byte[][][] hfileRanges) throws Exception {
207     runTest(testName, bloomType, null, hfileRanges);
208   }
209 
210   private void runTest(String testName, BloomType bloomType,
211       byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
212     final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
213     final boolean preCreateTable = tableSplitKeys != null;
214 
215     // Run the test bulkloading the table to the default namespace
216     final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
217     runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
218 
219     // Run the test bulkloading the table to the specified namespace
220     final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
221     runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
222   }
223 
224   private void runTest(String testName, TableName tableName, BloomType bloomType,
225       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
226     HTableDescriptor htd = new HTableDescriptor(tableName);
227     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
228     familyDesc.setBloomFilterType(bloomType);
229     htd.addFamily(familyDesc);
230     runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
231   }
232 
233   private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
234       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
235     Path dir = util.getDataTestDirOnTestFS(testName);
236     FileSystem fs = util.getTestFileSystem();
237     dir = dir.makeQualified(fs);
238     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
239 
240     int hfileIdx = 0;
241     for (byte[][] range : hfileRanges) {
242       byte[] from = range[0];
243       byte[] to = range[1];
244       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
245           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
246     }
247     int expectedRows = hfileIdx * 1000;
248 
249     if (preCreateTable) {
250       util.getHBaseAdmin().createTable(htd, tableSplitKeys);
251     }
252 
253     final TableName tableName = htd.getTableName();
254     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
255     String [] args= {dir.toString(), tableName.toString()};
256     loader.run(args);
257 
258     Table table = new HTable(util.getConfiguration(), tableName);
259     try {
260       assertEquals(expectedRows, util.countRows(table));
261     } finally {
262       table.close();
263     }
264 
265     // verify staging folder has been cleaned up
266     Path stagingBasePath = SecureBulkLoadUtil.getBaseStagingDir(util.getConfiguration());
267     if(fs.exists(stagingBasePath)) {
268       FileStatus[] files = fs.listStatus(stagingBasePath);
269       for(FileStatus file : files) {
270         assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
271           file.getPath().getName() != "DONOTERASE");
272       }
273     }
274 
275     util.deleteTable(tableName);
276   }
277 
278   /**
279    * Test loading into a column family that does not exist.
280    */
281   @Test
282   public void testNonexistentColumnFamilyLoad() throws Exception {
283     String testName = "testNonexistentColumnFamilyLoad";
284     byte[][][] hFileRanges = new byte[][][] {
285       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
286       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
287     };
288 
289     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
290     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
291     // set real family name to upper case in purpose to simulate the case that
292     // family name in HFiles is invalid
293     HColumnDescriptor family =
294         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
295     htd.addFamily(family);
296 
297     try {
298       runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges);
299       assertTrue("Loading into table with non-existent family should have failed", false);
300     } catch (Exception e) {
301       assertTrue("IOException expected", e instanceof IOException);
302       // further check whether the exception message is correct
303       String errMsg = e.getMessage();
304       assertTrue("Incorrect exception message, expected message: ["
305           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
306           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
307     }
308   }
309 
310   /**
311    * Write a random data file in a dir with a valid family name but not part of the table families
312    * we should we able to bulkload without getting the unmatched family exception. HBASE-13037
313    */
314   @Test(timeout = 60000)
315   public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception {
316     Path dir = util.getDataTestDirOnTestFS("testNonHfileFolderWithUnmatchedFamilyName");
317     FileSystem fs = util.getTestFileSystem();
318     dir = dir.makeQualified(fs);
319 
320     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
321     HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"),
322         FAMILY, QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500);
323 
324     final String NON_FAMILY_FOLDER = "_logs";
325     Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
326     fs.mkdirs(nonFamilyDir);
327     createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);
328 
329     Table table = null;
330     try {
331       final String TABLE_NAME = "mytable_testNonHfileFolderWithUnmatchedFamilyName";
332       table = util.createTable(TableName.valueOf(TABLE_NAME), FAMILY);
333 
334       final String[] args = {dir.toString(), TABLE_NAME};
335       new LoadIncrementalHFiles(util.getConfiguration()).run(args);
336       assertEquals(500, util.countRows(table));
337     } finally {
338       if (table != null) {
339         table.close();
340       }
341       fs.delete(dir, true);
342     }
343   }
344 
345   private static void createRandomDataFile(FileSystem fs, Path path, int size)
346       throws IOException {
347     FSDataOutputStream stream = fs.create(path);
348     try {
349       byte[] data = new byte[1024];
350       for (int i = 0; i < data.length; ++i) {
351         data[i] = (byte)(i & 0xff);
352       }
353       while (size >= data.length) {
354         stream.write(data, 0, data.length);
355         size -= data.length;
356       }
357       if (size > 0) {
358         stream.write(data, 0, size);
359       }
360     } finally {
361       stream.close();
362     }
363   }
364 
365   @Test
366   public void testSplitStoreFile() throws IOException {
367     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
368     FileSystem fs = util.getTestFileSystem();
369     Path testIn = new Path(dir, "testhfile");
370     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
371     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
372         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
373 
374     Path bottomOut = new Path(dir, "bottom.out");
375     Path topOut = new Path(dir, "top.out");
376 
377     LoadIncrementalHFiles.splitStoreFile(
378         util.getConfiguration(), testIn,
379         familyDesc, Bytes.toBytes("ggg"),
380         bottomOut,
381         topOut);
382 
383     int rowCount = verifyHFile(bottomOut);
384     rowCount += verifyHFile(topOut);
385     assertEquals(1000, rowCount);
386   }
387 
388   private int verifyHFile(Path p) throws IOException {
389     Configuration conf = util.getConfiguration();
390     HFile.Reader reader = HFile.createReader(
391         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
392     reader.loadFileInfo();
393     HFileScanner scanner = reader.getScanner(false, false);
394     scanner.seekTo();
395     int count = 0;
396     do {
397       count++;
398     } while (scanner.next());
399     assertTrue(count > 0);
400     reader.close();
401     return count;
402   }
403 
404   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
405     Integer value = map.containsKey(first)?map.get(first):0;
406     map.put(first, value+1);
407 
408     value = map.containsKey(last)?map.get(last):0;
409     map.put(last, value-1);
410   }
411 
412   @Test
413   public void testInferBoundaries() {
414     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
415 
416     /* Toy example
417      *     c---------i            o------p          s---------t     v------x
418      * a------e    g-----k   m-------------q   r----s            u----w
419      *
420      * Should be inferred as:
421      * a-----------------k   m-------------q   r--------------t  u---------x
422      *
423      * The output should be (m,r,u)
424      */
425 
426     String first;
427     String last;
428 
429     first = "a"; last = "e";
430     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
431 
432     first = "r"; last = "s";
433     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
434 
435     first = "o"; last = "p";
436     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
437 
438     first = "g"; last = "k";
439     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
440 
441     first = "v"; last = "x";
442     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
443 
444     first = "c"; last = "i";
445     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
446 
447     first = "m"; last = "q";
448     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
449 
450     first = "s"; last = "t";
451     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
452 
453     first = "u"; last = "w";
454     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
455 
456     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
457     byte[][] compare = new byte[3][];
458     compare[0] = "m".getBytes();
459     compare[1] = "r".getBytes();
460     compare[2] = "u".getBytes();
461 
462     assertEquals(keysArray.length, 3);
463 
464     for (int row = 0; row<keysArray.length; row++){
465       assertArrayEquals(keysArray[row], compare[row]);
466     }
467   }
468 
469   @Test
470   public void testLoadTooMayHFiles() throws Exception {
471     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
472     FileSystem fs = util.getTestFileSystem();
473     dir = dir.makeQualified(fs);
474     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
475 
476     byte[] from = Bytes.toBytes("begin");
477     byte[] to = Bytes.toBytes("end");
478     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
479       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
480           + i), FAMILY, QUALIFIER, from, to, 1000);
481     }
482 
483     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
484     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
485     try {
486       loader.run(args);
487       fail("Bulk loading too many files should fail");
488     } catch (IOException ie) {
489       assertTrue(ie.getMessage().contains("Trying to load more than "
490         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
491     }
492   }
493 
494   @Test(expected = TableNotFoundException.class)
495   public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
496     Configuration conf = util.getConfiguration();
497     conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
498     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
499     String[] args = { "directory", "nonExistingTable" };
500     loader.run(args);
501   }
502 
503   @Test
504   public void testTableWithCFNameStartWithUnderScore() throws Exception {
505     Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
506     FileSystem fs = util.getTestFileSystem();
507     dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
508     String family = "_cf";
509     Path familyDir = new Path(dir, family);
510 
511     byte[] from = Bytes.toBytes("begin");
512     byte[] to = Bytes.toBytes("end");
513     Configuration conf = util.getConfiguration();
514     String tableName = "mytable_cfNameStartWithUnderScore";
515     Table table = util.createTable(TableName.valueOf(tableName), family);
516     HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
517       QUALIFIER, from, to, 1000);
518 
519     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
520     String[] args = { dir.toString(), tableName };
521     try {
522       loader.run(args);
523       assertEquals(1000, util.countRows(table));
524     } finally {
525       if (null != table) {
526         table.close();
527       }
528     }
529   }
530 }
531