View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.Collection;
28  import java.util.TreeMap;
29  import java.util.List;
30  
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.*;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.io.compress.Compression;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.io.hfile.HFile;
41  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
42  import org.apache.hadoop.hbase.regionserver.BloomType;
43  import org.apache.hadoop.hbase.regionserver.StoreFile;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.junit.*;
46  import org.junit.experimental.categories.Category;
47  
48  /**
49   * Test cases for the "load" half of the HFileOutputFormat bulk load
50   * functionality. These tests run faster than the full MR cluster
51   * tests in TestHFileOutputFormat
52   */
53  @Category(LargeTests.class)
54  public class TestLoadIncrementalHFiles {
55    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
56    private static final byte[] FAMILY = Bytes.toBytes("myfam");
57    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
58    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
59  
60    private static final byte[][] SPLIT_KEYS = new byte[][] {
61      Bytes.toBytes("ddd"),
62      Bytes.toBytes("ppp")
63    };
64  
65    public static int BLOCKSIZE = 64*1024;
66    public static String COMPRESSION =
67      Compression.Algorithm.NONE.getName();
68  
69    static HBaseTestingUtility util = new HBaseTestingUtility();
70  
71    @BeforeClass
72    public static void setUpBeforeClass() throws Exception {
73      util.getConfiguration().setInt(
74        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
75        MAX_FILES_PER_REGION_PER_FAMILY);
76      util.startMiniCluster();
77    }
78  
79    @AfterClass
80    public static void tearDownAfterClass() throws Exception {
81      util.shutdownMiniCluster();
82    }
83  
84    /**
85     * Test case that creates some regions and loads
86     * HFiles that fit snugly inside those regions
87     */
88    @Test
89    public void testSimpleLoad() throws Exception {
90      runTest("testSimpleLoad", BloomType.NONE,
91          new byte[][][] {
92            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
93            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
94      });
95    }
96  
97    /**
98     * Test case that creates some regions and loads
99     * HFiles that cross the boundaries of those regions
100    */
101   @Test
102   public void testRegionCrossingLoad() throws Exception {
103     runTest("testRegionCrossingLoad", BloomType.NONE,
104         new byte[][][] {
105           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
106           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
107     });
108   }
109 
110   /**
111    * Test loading into a column family that has a ROW bloom filter.
112    */
113   @Test
114   public void testRegionCrossingRowBloom() throws Exception {
115     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
116         new byte[][][] {
117           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
118           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
119     });
120   }
121 
122   /**
123    * Test loading into a column family that has a ROWCOL bloom filter.
124    */
125   @Test
126   public void testRegionCrossingRowColBloom() throws Exception {
127     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
128         new byte[][][] {
129           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
130           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
131     });
132   }
133 
134   private void runTest(String testName, BloomType bloomType,
135           byte[][][] hfileRanges) throws Exception {
136     Path dir = util.getDataTestDirOnTestFS(testName);
137     FileSystem fs = util.getTestFileSystem();
138     dir = dir.makeQualified(fs);
139     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
140 
141     int hfileIdx = 0;
142     for (byte[][] range : hfileRanges) {
143       byte[] from = range[0];
144       byte[] to = range[1];
145       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
146           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
147     }
148     int expectedRows = hfileIdx * 1000;
149 
150     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
151 
152     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
153     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
154     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
155     familyDesc.setBloomFilterType(bloomType);
156     htd.addFamily(familyDesc);
157     admin.createTable(htd, SPLIT_KEYS);
158 
159     HTable table = new HTable(util.getConfiguration(), TABLE);
160     util.waitTableEnabled(TABLE);
161     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
162     loader.doBulkLoad(dir, table);
163 
164     assertEquals(expectedRows, util.countRows(table));
165   }
166 
167   /**
168    * Test loading into a column family that does not exist.
169    */
170   @Test
171   public void testNonexistentColumnFamilyLoad() throws Exception {
172     String testName = "testNonexistentColumnFamilyLoad";
173     byte[][][] hfileRanges = new byte[][][] {
174       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
175       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
176     };
177 
178     Path dir = util.getDataTestDirOnTestFS(testName);
179     FileSystem fs = util.getTestFileSystem();
180     dir = dir.makeQualified(fs);
181     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
182 
183     int hfileIdx = 0;
184     for (byte[][] range : hfileRanges) {
185       byte[] from = range[0];
186       byte[] to = range[1];
187       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
188           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
189     }
190 
191     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
192 
193     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
194     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
195     // set real family name to upper case in purpose to simulate the case that
196     // family name in HFiles is invalid
197     HColumnDescriptor family =
198         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
199     htd.addFamily(family);
200     admin.createTable(htd, SPLIT_KEYS);
201 
202     HTable table = new HTable(util.getConfiguration(), TABLE);
203     util.waitTableEnabled(TABLE);
204     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
205     try {
206       loader.doBulkLoad(dir, table);
207       assertTrue("Loading into table with non-existent family should have failed", false);
208     } catch (Exception e) {
209       assertTrue("IOException expected", e instanceof IOException);
210       // further check whether the exception message is correct
211       String errMsg = e.getMessage();
212       assertTrue("Incorrect exception message, expected message: ["
213           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
214           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
215     }
216     table.close();
217     admin.close();
218   }
219 
220   private void verifyAssignedSequenceNumber(String testName,
221       byte[][][] hfileRanges, boolean nonZero) throws Exception {
222     Path dir = util.getDataTestDir(testName);
223     FileSystem fs = util.getTestFileSystem();
224     dir = dir.makeQualified(fs);
225     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
226 
227     int hfileIdx = 0;
228     for (byte[][] range : hfileRanges) {
229       byte[] from = range[0];
230       byte[] to = range[1];
231       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
232           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
233     }
234 
235     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
236 
237     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
238     HTableDescriptor htd = new HTableDescriptor(TABLE);
239     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
240     htd.addFamily(familyDesc);
241     admin.createTable(htd, SPLIT_KEYS);
242 
243     HTable table = new HTable(util.getConfiguration(), TABLE);
244     util.waitTableEnabled(TABLE);
245     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(
246       util.getConfiguration());
247 
248     // Do a dummy put to increase the hlog sequence number
249     Put put = new Put(Bytes.toBytes("row"));
250     put.add(FAMILY, QUALIFIER, Bytes.toBytes("value"));
251     table.put(put);
252 
253     loader.doBulkLoad(dir, table);
254 
255     // Get the store files
256     Collection<StoreFile> files = util.getHBaseCluster().
257         getRegions(TABLE).get(0).getStore(FAMILY).getStorefiles();
258     for (StoreFile file: files) {
259       // the sequenceId gets initialized during createReader
260       file.createReader();
261 
262       if (nonZero)
263         assertTrue(file.getMaxSequenceId() > 0);
264       else
265         assertTrue(file.getMaxSequenceId() == -1);
266     }
267   }
268 
269   @Test
270   public void testSplitStoreFile() throws IOException {
271     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
272     FileSystem fs = util.getTestFileSystem();
273     Path testIn = new Path(dir, "testhfile");
274     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
275     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
276         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
277 
278     Path bottomOut = new Path(dir, "bottom.out");
279     Path topOut = new Path(dir, "top.out");
280 
281     LoadIncrementalHFiles.splitStoreFile(
282         util.getConfiguration(), testIn,
283         familyDesc, Bytes.toBytes("ggg"),
284         bottomOut,
285         topOut);
286 
287     int rowCount = verifyHFile(bottomOut);
288     rowCount += verifyHFile(topOut);
289     assertEquals(1000, rowCount);
290   }
291 
292   private int verifyHFile(Path p) throws IOException {
293     Configuration conf = util.getConfiguration();
294     HFile.Reader reader = HFile.createReader(
295         p.getFileSystem(conf), p, new CacheConfig(conf));
296     reader.loadFileInfo();
297     HFileScanner scanner = reader.getScanner(false, false);
298     scanner.seekTo();
299     int count = 0;
300     do {
301       count++;
302     } while (scanner.next());
303     assertTrue(count > 0);
304     reader.close();
305     return count;
306   }
307 
308 
309   /**
310    * Create an HFile with the given number of rows between a given
311    * start key and end key.
312    * TODO put me in an HFileTestUtil or something?
313    */
314   static void createHFile(
315       Configuration conf,
316       FileSystem fs, Path path,
317       byte[] family, byte[] qualifier,
318       byte[] startKey, byte[] endKey, int numRows) throws IOException
319   {
320     HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
321         .withPath(fs, path)
322         .withBlockSize(BLOCKSIZE)
323         .withCompression(COMPRESSION)
324         .create();
325     long now = System.currentTimeMillis();
326     try {
327       // subtract 2 since iterateOnSplits doesn't include boundary keys
328       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
329         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
330         writer.append(kv);
331       }
332     } finally {
333       writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
334           Bytes.toBytes(System.currentTimeMillis()));
335       writer.close();
336     }
337   }
338 
339   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
340     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
341     map.put(first, value+1);
342 
343     value = map.containsKey(last)?(Integer)map.get(last):0;
344     map.put(last, value-1);
345   }
346 
347   @Test
348   public void testInferBoundaries() {
349     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
350 
351     /* Toy example
352      *     c---------i            o------p          s---------t     v------x
353      * a------e    g-----k   m-------------q   r----s            u----w
354      *
355      * Should be inferred as:
356      * a-----------------k   m-------------q   r--------------t  u---------x
357      *
358      * The output should be (m,r,u)
359      */
360 
361     String first;
362     String last;
363 
364     first = "a"; last = "e";
365     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
366 
367     first = "r"; last = "s";
368     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
369 
370     first = "o"; last = "p";
371     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
372 
373     first = "g"; last = "k";
374     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
375 
376     first = "v"; last = "x";
377     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
378 
379     first = "c"; last = "i";
380     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
381 
382     first = "m"; last = "q";
383     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
384 
385     first = "s"; last = "t";
386     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
387 
388     first = "u"; last = "w";
389     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
390 
391     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
392     byte[][] compare = new byte[3][];
393     compare[0] = "m".getBytes();
394     compare[1] = "r".getBytes();
395     compare[2] = "u".getBytes();
396 
397     assertEquals(keysArray.length, 3);
398 
399     for (int row = 0; row<keysArray.length; row++){
400       assertArrayEquals(keysArray[row], compare[row]);
401     }
402   }
403 
404   @Test
405   public void testLoadTooMayHFiles() throws Exception {
406     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
407     FileSystem fs = util.getTestFileSystem();
408     dir = dir.makeQualified(fs);
409     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
410 
411     byte[] from = Bytes.toBytes("begin");
412     byte[] to = Bytes.toBytes("end");
413     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
414       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
415           + i), FAMILY, QUALIFIER, from, to, 1000);
416     }
417 
418     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
419     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
420     try {
421       loader.run(args);
422       fail("Bulk loading too many files should fail");
423     } catch (IOException ie) {
424       assertTrue(ie.getMessage().contains("Trying to load more than "
425         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
426     }
427   }
428 }
429