View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.LargeTests;
36  import org.apache.hadoop.hbase.NamespaceDescriptor;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
41  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
42  import org.apache.hadoop.hbase.io.hfile.HFile;
43  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
44  import org.apache.hadoop.hbase.regionserver.BloomType;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.HFileTestUtil;
47  import org.junit.AfterClass;
48  import org.junit.BeforeClass;
49  import org.junit.Test;
50  import org.junit.experimental.categories.Category;
51  
52  /**
53   * Test cases for the "load" half of the HFileOutputFormat bulk load
54   * functionality. These tests run faster than the full MR cluster
55   * tests in TestHFileOutputFormat
56   */
57  @Category(LargeTests.class)
58  public class TestLoadIncrementalHFiles {
59    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
60    private static final byte[] FAMILY = Bytes.toBytes("myfam");
61    private static final String NAMESPACE = "bulkNS";
62  
63    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
64    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
65  
66    private static final byte[][] SPLIT_KEYS = new byte[][] {
67      Bytes.toBytes("ddd"),
68      Bytes.toBytes("ppp")
69    };
70  
71    static HBaseTestingUtility util = new HBaseTestingUtility();
72  
73    @BeforeClass
74    public static void setUpBeforeClass() throws Exception {
75      util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
76      util.getConfiguration().setInt(
77        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
78        MAX_FILES_PER_REGION_PER_FAMILY);
79      util.startMiniCluster();
80  
81      setupNamespace();
82    }
83  
84    protected static void setupNamespace() throws Exception {
85      util.getHBaseAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
86    }
87  
88    @AfterClass
89    public static void tearDownAfterClass() throws Exception {
90      util.shutdownMiniCluster();
91    }
92  
93    /**
94     * Test case that creates some regions and loads
95     * HFiles that fit snugly inside those regions
96     */
97    @Test
98    public void testSimpleLoad() throws Exception {
99      runTest("testSimpleLoad", BloomType.NONE,
100         new byte[][][] {
101           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
102           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
103     });
104   }
105 
106   /**
107    * Test case that creates some regions and loads
108    * HFiles that cross the boundaries of those regions
109    */
110   @Test
111   public void testRegionCrossingLoad() throws Exception {
112     runTest("testRegionCrossingLoad", BloomType.NONE,
113         new byte[][][] {
114           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
115           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
116     });
117   }
118 
119   /**
120    * Test loading into a column family that has a ROW bloom filter.
121    */
122   @Test
123   public void testRegionCrossingRowBloom() throws Exception {
124     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
125         new byte[][][] {
126           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
127           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
128     });
129   }
130   
131   /**
132    * Test loading into a column family that has a ROWCOL bloom filter.
133    */
134   @Test
135   public void testRegionCrossingRowColBloom() throws Exception {
136     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
137         new byte[][][] {
138           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
139           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
140     });
141   }
142 
143   /**
144    * Test case that creates some regions and loads HFiles that have
145    * different region boundaries than the table pre-split.
146    */
147   @Test
148   public void testSimpleHFileSplit() throws Exception {
149     runTest("testHFileSplit", BloomType.NONE,
150         new byte[][] {
151           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
152           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
153         },
154         new byte[][][] {
155           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
156           new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
157         }
158     );
159   }
160 
161   /**
162    * Test case that creates some regions and loads HFiles that cross the boundaries
163    * and have different region boundaries than the table pre-split.
164    */
165   @Test
166   public void testRegionCrossingHFileSplit() throws Exception {
167     testRegionCrossingHFileSplit(BloomType.NONE);
168   }
169 
170   /**
171    * Test case that creates some regions and loads HFiles that cross the boundaries
172    * have a ROW bloom filter and a different region boundaries than the table pre-split.
173    */
174   @Test
175   public void testRegionCrossingHFileSplitRowBloom() throws Exception {
176     testRegionCrossingHFileSplit(BloomType.ROW);
177   }
178 
179   /**
180    * Test case that creates some regions and loads HFiles that cross the boundaries
181    * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
182    */
183   @Test
184   public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
185     testRegionCrossingHFileSplit(BloomType.ROWCOL);
186   }
187 
188   private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
189     runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
190         new byte[][] {
191           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
192           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
193         },
194         new byte[][][] {
195           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
196           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
197         }
198     );
199   }
200 
201   private void runTest(String testName, BloomType bloomType,
202       byte[][][] hfileRanges) throws Exception {
203     runTest(testName, bloomType, null, hfileRanges);
204   }
205 
206   private void runTest(String testName, BloomType bloomType,
207       byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
208     final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
209     final boolean preCreateTable = tableSplitKeys != null;
210 
211     // Run the test bulkloading the table to the default namespace
212     final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
213     runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
214 
215     // Run the test bulkloading the table to the specified namespace
216     final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
217     runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
218   }
219 
220   private void runTest(String testName, TableName tableName, BloomType bloomType,
221       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
222     HTableDescriptor htd = new HTableDescriptor(tableName);
223     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
224     familyDesc.setBloomFilterType(bloomType);
225     htd.addFamily(familyDesc);
226     runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
227   }
228 
229   private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
230       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
231     Path dir = util.getDataTestDirOnTestFS(testName);
232     FileSystem fs = util.getTestFileSystem();
233     dir = dir.makeQualified(fs);
234     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
235 
236     int hfileIdx = 0;
237     for (byte[][] range : hfileRanges) {
238       byte[] from = range[0];
239       byte[] to = range[1];
240       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
241           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
242     }
243     int expectedRows = hfileIdx * 1000;
244 
245     if (preCreateTable) {
246       util.getHBaseAdmin().createTable(htd, tableSplitKeys);
247     }
248 
249     final TableName tableName = htd.getTableName();
250     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
251     String [] args= {dir.toString(), tableName.toString()};
252     loader.run(args);
253 
254     HTable table = new HTable(util.getConfiguration(), tableName);
255     try {
256       assertEquals(expectedRows, util.countRows(table));
257     } finally {
258       table.close();
259     }
260 
261     util.deleteTable(tableName);
262   }
263 
264   /**
265    * Test loading into a column family that does not exist.
266    */
267   @Test
268   public void testNonexistentColumnFamilyLoad() throws Exception {
269     String testName = "testNonexistentColumnFamilyLoad";
270     byte[][][] hFileRanges = new byte[][][] {
271       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
272       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
273     };
274 
275     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
276     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
277     // set real family name to upper case in purpose to simulate the case that
278     // family name in HFiles is invalid
279     HColumnDescriptor family =
280         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
281     htd.addFamily(family);
282 
283     try {
284       runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges);
285       assertTrue("Loading into table with non-existent family should have failed", false);
286     } catch (Exception e) {
287       assertTrue("IOException expected", e instanceof IOException);
288       // further check whether the exception message is correct
289       String errMsg = e.getMessage();
290       assertTrue("Incorrect exception message, expected message: ["
291           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
292           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
293     }
294   }
295 
296   @Test
297   public void testSplitStoreFile() throws IOException {
298     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
299     FileSystem fs = util.getTestFileSystem();
300     Path testIn = new Path(dir, "testhfile");
301     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
302     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
303         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
304 
305     Path bottomOut = new Path(dir, "bottom.out");
306     Path topOut = new Path(dir, "top.out");
307 
308     LoadIncrementalHFiles.splitStoreFile(
309         util.getConfiguration(), testIn,
310         familyDesc, Bytes.toBytes("ggg"),
311         bottomOut,
312         topOut);
313 
314     int rowCount = verifyHFile(bottomOut);
315     rowCount += verifyHFile(topOut);
316     assertEquals(1000, rowCount);
317   }
318 
319   private int verifyHFile(Path p) throws IOException {
320     Configuration conf = util.getConfiguration();
321     HFile.Reader reader = HFile.createReader(
322         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
323     reader.loadFileInfo();
324     HFileScanner scanner = reader.getScanner(false, false);
325     scanner.seekTo();
326     int count = 0;
327     do {
328       count++;
329     } while (scanner.next());
330     assertTrue(count > 0);
331     reader.close();
332     return count;
333   }
334 
335   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
336     Integer value = map.containsKey(first)?map.get(first):0;
337     map.put(first, value+1);
338 
339     value = map.containsKey(last)?map.get(last):0;
340     map.put(last, value-1);
341   }
342 
343   @Test 
344   public void testInferBoundaries() {
345     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
346 
347     /* Toy example
348      *     c---------i            o------p          s---------t     v------x
349      * a------e    g-----k   m-------------q   r----s            u----w
350      *
351      * Should be inferred as:
352      * a-----------------k   m-------------q   r--------------t  u---------x
353      * 
354      * The output should be (m,r,u) 
355      */
356 
357     String first;
358     String last;
359 
360     first = "a"; last = "e";
361     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
362     
363     first = "r"; last = "s";
364     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
365 
366     first = "o"; last = "p";
367     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
368 
369     first = "g"; last = "k";
370     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
371 
372     first = "v"; last = "x";
373     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
374 
375     first = "c"; last = "i";
376     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
377 
378     first = "m"; last = "q";
379     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
380 
381     first = "s"; last = "t";
382     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
383     
384     first = "u"; last = "w";
385     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
386 
387     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
388     byte[][] compare = new byte[3][];
389     compare[0] = "m".getBytes();
390     compare[1] = "r".getBytes(); 
391     compare[2] = "u".getBytes();
392 
393     assertEquals(keysArray.length, 3);
394 
395     for (int row = 0; row<keysArray.length; row++){
396       assertArrayEquals(keysArray[row], compare[row]);
397     }
398   }
399 
400   @Test
401   public void testLoadTooMayHFiles() throws Exception {
402     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
403     FileSystem fs = util.getTestFileSystem();
404     dir = dir.makeQualified(fs);
405     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
406 
407     byte[] from = Bytes.toBytes("begin");
408     byte[] to = Bytes.toBytes("end");
409     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
410       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
411           + i), FAMILY, QUALIFIER, from, to, 1000);
412     }
413 
414     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
415     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
416     try {
417       loader.run(args);
418       fail("Bulk loading too many files should fail");
419     } catch (IOException ie) {
420       assertTrue(ie.getMessage().contains("Trying to load more than "
421         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
422     }
423   }
424 }
425