View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.LargeTests;
36  import org.apache.hadoop.hbase.NamespaceDescriptor;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
41  import org.apache.hadoop.hbase.io.hfile.HFile;
42  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
43  import org.apache.hadoop.hbase.regionserver.BloomType;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.HFileTestUtil;
46  import org.junit.AfterClass;
47  import org.junit.BeforeClass;
48  import org.junit.Test;
49  import org.junit.experimental.categories.Category;
50  
51  /**
52   * Test cases for the "load" half of the HFileOutputFormat bulk load
53   * functionality. These tests run faster than the full MR cluster
54   * tests in TestHFileOutputFormat
55   */
56  @Category(LargeTests.class)
57  public class TestLoadIncrementalHFiles {
58    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
59    private static final byte[] FAMILY = Bytes.toBytes("myfam");
60    private static final String NAMESPACE = "bulkNS";
61  
62    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
63    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
64  
65    private static final byte[][] SPLIT_KEYS = new byte[][] {
66      Bytes.toBytes("ddd"),
67      Bytes.toBytes("ppp")
68    };
69  
70    static HBaseTestingUtility util = new HBaseTestingUtility();
71  
72    @BeforeClass
73    public static void setUpBeforeClass() throws Exception {
74      util.getConfiguration().setInt(
75        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
76        MAX_FILES_PER_REGION_PER_FAMILY);
77      util.startMiniCluster();
78  
79      setupNamespace();
80    }
81  
82    protected static void setupNamespace() throws Exception {
83      util.getHBaseAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
84    }
85  
86    @AfterClass
87    public static void tearDownAfterClass() throws Exception {
88      util.shutdownMiniCluster();
89    }
90  
91    /**
92     * Test case that creates some regions and loads
93     * HFiles that fit snugly inside those regions
94     */
95    @Test
96    public void testSimpleLoad() throws Exception {
97      runTest("testSimpleLoad", BloomType.NONE,
98          new byte[][][] {
99            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
100           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
101     });
102   }
103 
104   /**
105    * Test case that creates some regions and loads
106    * HFiles that cross the boundaries of those regions
107    */
108   @Test
109   public void testRegionCrossingLoad() throws Exception {
110     runTest("testRegionCrossingLoad", BloomType.NONE,
111         new byte[][][] {
112           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
113           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
114     });
115   }
116 
117   /**
118    * Test loading into a column family that has a ROW bloom filter.
119    */
120   @Test
121   public void testRegionCrossingRowBloom() throws Exception {
122     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
123         new byte[][][] {
124           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
125           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
126     });
127   }
128   
129   /**
130    * Test loading into a column family that has a ROWCOL bloom filter.
131    */
132   @Test
133   public void testRegionCrossingRowColBloom() throws Exception {
134     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
135         new byte[][][] {
136           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
137           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
138     });
139   }
140 
141   /**
142    * Test case that creates some regions and loads HFiles that have
143    * different region boundaries than the table pre-split.
144    */
145   @Test
146   public void testSimpleHFileSplit() throws Exception {
147     runTest("testHFileSplit", BloomType.NONE,
148         new byte[][] {
149           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
150           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
151         },
152         new byte[][][] {
153           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
154           new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
155         }
156     );
157   }
158 
159   /**
160    * Test case that creates some regions and loads HFiles that cross the boundaries
161    * and have different region boundaries than the table pre-split.
162    */
163   @Test
164   public void testRegionCrossingHFileSplit() throws Exception {
165     testRegionCrossingHFileSplit(BloomType.NONE);
166   }
167 
168   /**
169    * Test case that creates some regions and loads HFiles that cross the boundaries
170    * have a ROW bloom filter and a different region boundaries than the table pre-split.
171    */
172   @Test
173   public void testRegionCrossingHFileSplitRowBloom() throws Exception {
174     testRegionCrossingHFileSplit(BloomType.ROW);
175   }
176 
177   /**
178    * Test case that creates some regions and loads HFiles that cross the boundaries
179    * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
180    */
181   @Test
182   public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
183     testRegionCrossingHFileSplit(BloomType.ROWCOL);
184   }
185 
186   private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
187     runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
188         new byte[][] {
189           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
190           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
191         },
192         new byte[][][] {
193           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
194           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
195         }
196     );
197   }
198 
199   private void runTest(String testName, BloomType bloomType,
200       byte[][][] hfileRanges) throws Exception {
201     runTest(testName, bloomType, null, hfileRanges);
202   }
203 
204   private void runTest(String testName, BloomType bloomType,
205       byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
206     final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
207     final boolean preCreateTable = tableSplitKeys != null;
208 
209     // Run the test bulkloading the table to the default namespace
210     final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
211     runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
212 
213     // Run the test bulkloading the table to the specified namespace
214     final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
215     runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
216   }
217 
218   private void runTest(String testName, TableName tableName, BloomType bloomType,
219       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
220     HTableDescriptor htd = new HTableDescriptor(tableName);
221     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
222     familyDesc.setBloomFilterType(bloomType);
223     htd.addFamily(familyDesc);
224     runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
225   }
226 
227   private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
228       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
229     Path dir = util.getDataTestDirOnTestFS(testName);
230     FileSystem fs = util.getTestFileSystem();
231     dir = dir.makeQualified(fs);
232     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
233 
234     int hfileIdx = 0;
235     for (byte[][] range : hfileRanges) {
236       byte[] from = range[0];
237       byte[] to = range[1];
238       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
239           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
240     }
241     int expectedRows = hfileIdx * 1000;
242 
243     if (preCreateTable) {
244       util.getHBaseAdmin().createTable(htd, tableSplitKeys);
245     }
246 
247     final TableName tableName = htd.getTableName();
248     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
249     String [] args= {dir.toString(), tableName.toString()};
250     loader.run(args);
251 
252     HTable table = new HTable(util.getConfiguration(), tableName);
253     try {
254       assertEquals(expectedRows, util.countRows(table));
255     } finally {
256       table.close();
257     }
258 
259     util.deleteTable(tableName);
260   }
261 
262   /**
263    * Test loading into a column family that does not exist.
264    */
265   @Test
266   public void testNonexistentColumnFamilyLoad() throws Exception {
267     String testName = "testNonexistentColumnFamilyLoad";
268     byte[][][] hFileRanges = new byte[][][] {
269       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
270       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
271     };
272 
273     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
274     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
275     // set real family name to upper case in purpose to simulate the case that
276     // family name in HFiles is invalid
277     HColumnDescriptor family =
278         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
279     htd.addFamily(family);
280 
281     try {
282       runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges);
283       assertTrue("Loading into table with non-existent family should have failed", false);
284     } catch (Exception e) {
285       assertTrue("IOException expected", e instanceof IOException);
286       // further check whether the exception message is correct
287       String errMsg = e.getMessage();
288       assertTrue("Incorrect exception message, expected message: ["
289           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
290           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
291     }
292   }
293 
294   @Test
295   public void testSplitStoreFile() throws IOException {
296     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
297     FileSystem fs = util.getTestFileSystem();
298     Path testIn = new Path(dir, "testhfile");
299     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
300     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
301         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
302 
303     Path bottomOut = new Path(dir, "bottom.out");
304     Path topOut = new Path(dir, "top.out");
305 
306     LoadIncrementalHFiles.splitStoreFile(
307         util.getConfiguration(), testIn,
308         familyDesc, Bytes.toBytes("ggg"),
309         bottomOut,
310         topOut);
311 
312     int rowCount = verifyHFile(bottomOut);
313     rowCount += verifyHFile(topOut);
314     assertEquals(1000, rowCount);
315   }
316 
317   private int verifyHFile(Path p) throws IOException {
318     Configuration conf = util.getConfiguration();
319     HFile.Reader reader = HFile.createReader(
320         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
321     reader.loadFileInfo();
322     HFileScanner scanner = reader.getScanner(false, false);
323     scanner.seekTo();
324     int count = 0;
325     do {
326       count++;
327     } while (scanner.next());
328     assertTrue(count > 0);
329     reader.close();
330     return count;
331   }
332 
333   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
334     Integer value = map.containsKey(first)?map.get(first):0;
335     map.put(first, value+1);
336 
337     value = map.containsKey(last)?map.get(last):0;
338     map.put(last, value-1);
339   }
340 
341   @Test 
342   public void testInferBoundaries() {
343     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
344 
345     /* Toy example
346      *     c---------i            o------p          s---------t     v------x
347      * a------e    g-----k   m-------------q   r----s            u----w
348      *
349      * Should be inferred as:
350      * a-----------------k   m-------------q   r--------------t  u---------x
351      * 
352      * The output should be (m,r,u) 
353      */
354 
355     String first;
356     String last;
357 
358     first = "a"; last = "e";
359     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
360     
361     first = "r"; last = "s";
362     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
363 
364     first = "o"; last = "p";
365     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
366 
367     first = "g"; last = "k";
368     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
369 
370     first = "v"; last = "x";
371     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
372 
373     first = "c"; last = "i";
374     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
375 
376     first = "m"; last = "q";
377     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
378 
379     first = "s"; last = "t";
380     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
381     
382     first = "u"; last = "w";
383     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
384 
385     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
386     byte[][] compare = new byte[3][];
387     compare[0] = "m".getBytes();
388     compare[1] = "r".getBytes(); 
389     compare[2] = "u".getBytes();
390 
391     assertEquals(keysArray.length, 3);
392 
393     for (int row = 0; row<keysArray.length; row++){
394       assertArrayEquals(keysArray[row], compare[row]);
395     }
396   }
397 
398   @Test
399   public void testLoadTooMayHFiles() throws Exception {
400     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
401     FileSystem fs = util.getTestFileSystem();
402     dir = dir.makeQualified(fs);
403     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
404 
405     byte[] from = Bytes.toBytes("begin");
406     byte[] to = Bytes.toBytes("end");
407     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
408       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
409           + i), FAMILY, QUALIFIER, from, to, 1000);
410     }
411 
412     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
413     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
414     try {
415       loader.run(args);
416       fail("Bulk loading too many files should fail");
417     } catch (IOException ie) {
418       assertTrue(ie.getMessage().contains("Trying to load more than "
419         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
420     }
421   }
422 }
423