View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileStatus;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.testclassification.LargeTests;
37  import org.apache.hadoop.hbase.TableNotFoundException;
38  import org.apache.hadoop.hbase.NamespaceDescriptor;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.client.HTable;
41  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
42  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
43  import org.apache.hadoop.hbase.io.hfile.HFile;
44  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
45  import org.apache.hadoop.hbase.regionserver.BloomType;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.HFileTestUtil;
48  import org.junit.AfterClass;
49  import org.junit.BeforeClass;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  import org.apache.hadoop.hbase.security.SecureBulkLoadUtil;
53  
54  /**
55   * Test cases for the "load" half of the HFileOutputFormat bulk load
56   * functionality. These tests run faster than the full MR cluster
57   * tests in TestHFileOutputFormat
58   */
59  @Category(LargeTests.class)
60  public class TestLoadIncrementalHFiles {
61    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
62    private static final byte[] FAMILY = Bytes.toBytes("myfam");
63    private static final String NAMESPACE = "bulkNS";
64  
65    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
66    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
67  
68    private static final byte[][] SPLIT_KEYS = new byte[][] {
69      Bytes.toBytes("ddd"),
70      Bytes.toBytes("ppp")
71    };
72  
73    static HBaseTestingUtility util = new HBaseTestingUtility();
74  
75    @BeforeClass
76    public static void setUpBeforeClass() throws Exception {
77      util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
78      util.getConfiguration().setInt(
79        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
80        MAX_FILES_PER_REGION_PER_FAMILY);
81      util.startMiniCluster();
82  
83      setupNamespace();
84    }
85  
86    protected static void setupNamespace() throws Exception {
87      util.getHBaseAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
88    }
89  
90    @AfterClass
91    public static void tearDownAfterClass() throws Exception {
92      util.shutdownMiniCluster();
93    }
94  
95    /**
96     * Test case that creates some regions and loads
97     * HFiles that fit snugly inside those regions
98     */
99    @Test
100   public void testSimpleLoad() throws Exception {
101     runTest("testSimpleLoad", BloomType.NONE,
102         new byte[][][] {
103           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
104           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
105     });
106   }
107 
108   /**
109    * Test case that creates some regions and loads
110    * HFiles that cross the boundaries of those regions
111    */
112   @Test
113   public void testRegionCrossingLoad() throws Exception {
114     runTest("testRegionCrossingLoad", BloomType.NONE,
115         new byte[][][] {
116           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
117           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
118     });
119   }
120 
121   /**
122    * Test loading into a column family that has a ROW bloom filter.
123    */
124   @Test
125   public void testRegionCrossingRowBloom() throws Exception {
126     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
127         new byte[][][] {
128           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
129           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
130     });
131   }
132 
133   /**
134    * Test loading into a column family that has a ROWCOL bloom filter.
135    */
136   @Test
137   public void testRegionCrossingRowColBloom() throws Exception {
138     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
139         new byte[][][] {
140           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
141           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
142     });
143   }
144 
145   /**
146    * Test case that creates some regions and loads HFiles that have
147    * different region boundaries than the table pre-split.
148    */
149   @Test
150   public void testSimpleHFileSplit() throws Exception {
151     runTest("testHFileSplit", BloomType.NONE,
152         new byte[][] {
153           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
154           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
155         },
156         new byte[][][] {
157           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
158           new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
159         }
160     );
161   }
162 
163   /**
164    * Test case that creates some regions and loads HFiles that cross the boundaries
165    * and have different region boundaries than the table pre-split.
166    */
167   @Test
168   public void testRegionCrossingHFileSplit() throws Exception {
169     testRegionCrossingHFileSplit(BloomType.NONE);
170   }
171 
172   /**
173    * Test case that creates some regions and loads HFiles that cross the boundaries
174    * have a ROW bloom filter and a different region boundaries than the table pre-split.
175    */
176   @Test
177   public void testRegionCrossingHFileSplitRowBloom() throws Exception {
178     testRegionCrossingHFileSplit(BloomType.ROW);
179   }
180 
181   /**
182    * Test case that creates some regions and loads HFiles that cross the boundaries
183    * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
184    */
185   @Test
186   public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
187     testRegionCrossingHFileSplit(BloomType.ROWCOL);
188   }
189 
190   private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
191     runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
192         new byte[][] {
193           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
194           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
195         },
196         new byte[][][] {
197           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
198           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
199         }
200     );
201   }
202 
203   private void runTest(String testName, BloomType bloomType,
204       byte[][][] hfileRanges) throws Exception {
205     runTest(testName, bloomType, null, hfileRanges);
206   }
207 
208   private void runTest(String testName, BloomType bloomType,
209       byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
210     final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
211     final boolean preCreateTable = tableSplitKeys != null;
212 
213     // Run the test bulkloading the table to the default namespace
214     final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
215     runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
216 
217     // Run the test bulkloading the table to the specified namespace
218     final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
219     runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
220   }
221 
222   private void runTest(String testName, TableName tableName, BloomType bloomType,
223       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
224     HTableDescriptor htd = new HTableDescriptor(tableName);
225     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
226     familyDesc.setBloomFilterType(bloomType);
227     htd.addFamily(familyDesc);
228     runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
229   }
230 
231   private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
232       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
233     Path dir = util.getDataTestDirOnTestFS(testName);
234     FileSystem fs = util.getTestFileSystem();
235     dir = dir.makeQualified(fs);
236     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
237 
238     int hfileIdx = 0;
239     for (byte[][] range : hfileRanges) {
240       byte[] from = range[0];
241       byte[] to = range[1];
242       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
243           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
244     }
245     int expectedRows = hfileIdx * 1000;
246 
247     if (preCreateTable) {
248       util.getHBaseAdmin().createTable(htd, tableSplitKeys);
249     }
250 
251     final TableName tableName = htd.getTableName();
252     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
253     String [] args= {dir.toString(), tableName.toString()};
254     loader.run(args);
255 
256     HTable table = new HTable(util.getConfiguration(), tableName);
257     try {
258       assertEquals(expectedRows, util.countRows(table));
259     } finally {
260       table.close();
261     }
262 
263     // verify staging folder has been cleaned up
264     Path stagingBasePath = SecureBulkLoadUtil.getBaseStagingDir(util.getConfiguration());
265     if(fs.exists(stagingBasePath)) {
266       FileStatus[] files = fs.listStatus(stagingBasePath);
267       for(FileStatus file : files) {
268         assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
269           file.getPath().getName() != "DONOTERASE");
270       }
271     }
272     
273     util.deleteTable(tableName);
274   }
275 
276   /**
277    * Test loading into a column family that does not exist.
278    */
279   @Test
280   public void testNonexistentColumnFamilyLoad() throws Exception {
281     String testName = "testNonexistentColumnFamilyLoad";
282     byte[][][] hFileRanges = new byte[][][] {
283       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
284       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
285     };
286 
287     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
288     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
289     // set real family name to upper case in purpose to simulate the case that
290     // family name in HFiles is invalid
291     HColumnDescriptor family =
292         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
293     htd.addFamily(family);
294 
295     try {
296       runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges);
297       assertTrue("Loading into table with non-existent family should have failed", false);
298     } catch (Exception e) {
299       assertTrue("IOException expected", e instanceof IOException);
300       // further check whether the exception message is correct
301       String errMsg = e.getMessage();
302       assertTrue("Incorrect exception message, expected message: ["
303           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
304           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
305     }
306   }
307 
308   @Test
309   public void testSplitStoreFile() throws IOException {
310     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
311     FileSystem fs = util.getTestFileSystem();
312     Path testIn = new Path(dir, "testhfile");
313     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
314     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
315         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
316 
317     Path bottomOut = new Path(dir, "bottom.out");
318     Path topOut = new Path(dir, "top.out");
319 
320     LoadIncrementalHFiles.splitStoreFile(
321         util.getConfiguration(), testIn,
322         familyDesc, Bytes.toBytes("ggg"),
323         bottomOut,
324         topOut);
325 
326     int rowCount = verifyHFile(bottomOut);
327     rowCount += verifyHFile(topOut);
328     assertEquals(1000, rowCount);
329   }
330 
331   private int verifyHFile(Path p) throws IOException {
332     Configuration conf = util.getConfiguration();
333     HFile.Reader reader = HFile.createReader(
334         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
335     reader.loadFileInfo();
336     HFileScanner scanner = reader.getScanner(false, false);
337     scanner.seekTo();
338     int count = 0;
339     do {
340       count++;
341     } while (scanner.next());
342     assertTrue(count > 0);
343     reader.close();
344     return count;
345   }
346 
347   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
348     Integer value = map.containsKey(first)?map.get(first):0;
349     map.put(first, value+1);
350 
351     value = map.containsKey(last)?map.get(last):0;
352     map.put(last, value-1);
353   }
354 
355   @Test
356   public void testInferBoundaries() {
357     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
358 
359     /* Toy example
360      *     c---------i            o------p          s---------t     v------x
361      * a------e    g-----k   m-------------q   r----s            u----w
362      *
363      * Should be inferred as:
364      * a-----------------k   m-------------q   r--------------t  u---------x
365      *
366      * The output should be (m,r,u)
367      */
368 
369     String first;
370     String last;
371 
372     first = "a"; last = "e";
373     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
374 
375     first = "r"; last = "s";
376     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
377 
378     first = "o"; last = "p";
379     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
380 
381     first = "g"; last = "k";
382     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
383 
384     first = "v"; last = "x";
385     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
386 
387     first = "c"; last = "i";
388     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
389 
390     first = "m"; last = "q";
391     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
392 
393     first = "s"; last = "t";
394     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
395 
396     first = "u"; last = "w";
397     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
398 
399     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
400     byte[][] compare = new byte[3][];
401     compare[0] = "m".getBytes();
402     compare[1] = "r".getBytes();
403     compare[2] = "u".getBytes();
404 
405     assertEquals(keysArray.length, 3);
406 
407     for (int row = 0; row<keysArray.length; row++){
408       assertArrayEquals(keysArray[row], compare[row]);
409     }
410   }
411 
412   @Test
413   public void testLoadTooMayHFiles() throws Exception {
414     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
415     FileSystem fs = util.getTestFileSystem();
416     dir = dir.makeQualified(fs);
417     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
418 
419     byte[] from = Bytes.toBytes("begin");
420     byte[] to = Bytes.toBytes("end");
421     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
422       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
423           + i), FAMILY, QUALIFIER, from, to, 1000);
424     }
425 
426     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
427     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
428     try {
429       loader.run(args);
430       fail("Bulk loading too many files should fail");
431     } catch (IOException ie) {
432       assertTrue(ie.getMessage().contains("Trying to load more than "
433         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
434     }
435   }
436 
437   @Test(expected = TableNotFoundException.class)
438   public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
439     Configuration conf = util.getConfiguration();
440     conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
441     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
442     String[] args = { "directory", "nonExistingTable" };
443     loader.run(args);
444   }
445 
446   @Test
447   public void testTableWithCFNameStartWithUnderScore() throws Exception {
448     Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
449     FileSystem fs = util.getTestFileSystem();
450     dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
451     String family = "_cf";
452     Path familyDir = new Path(dir, family);
453 
454     byte[] from = Bytes.toBytes("begin");
455     byte[] to = Bytes.toBytes("end");
456     Configuration conf = util.getConfiguration();
457     String tableName = "mytable_cfNameStartWithUnderScore";
458     HTable table = util.createTable(tableName, family);
459     HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
460       QUALIFIER, from, to, 1000);
461 
462     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
463     String[] args = { dir.toString(), tableName };
464     try {
465       loader.run(args);
466       assertEquals(1000, util.countRows(table));
467     } finally {
468       if (null != table) {
469         table.close();
470       }
471     }
472   }
473 }
474