View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.Collection;
27  import java.util.TreeMap;
28  import java.util.List;
29  
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.*;
34  import org.apache.hadoop.hbase.client.HBaseAdmin;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.client.Put;
37  import org.apache.hadoop.hbase.io.compress.Compression;
38  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
39  import org.apache.hadoop.hbase.io.hfile.HFile;
40  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
41  import org.apache.hadoop.hbase.regionserver.BloomType;
42  import org.apache.hadoop.hbase.regionserver.StoreFile;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.junit.*;
45  import org.junit.experimental.categories.Category;
46  
47  /**
48   * Test cases for the "load" half of the HFileOutputFormat bulk load
49   * functionality. These tests run faster than the full MR cluster
50   * tests in TestHFileOutputFormat
51   */
52  @Category(LargeTests.class)
53  public class TestLoadIncrementalHFiles {
54    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
55    private static final byte[] FAMILY = Bytes.toBytes("myfam");
56    private static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
57  
58    private static final byte[][] SPLIT_KEYS = new byte[][] {
59      Bytes.toBytes("ddd"),
60      Bytes.toBytes("ppp")
61    };
62  
63    public static int BLOCKSIZE = 64*1024;
64    public static String COMPRESSION =
65      Compression.Algorithm.NONE.getName();
66  
67    static HBaseTestingUtility util = new HBaseTestingUtility();
68    //used by secure subclass
69    static boolean useSecure = false;
70  
71    @BeforeClass
72    public static void setUpBeforeClass() throws Exception {
73      util.startMiniCluster();
74    }
75  
76    @AfterClass
77    public static void tearDownAfterClass() throws Exception {
78      util.shutdownMiniCluster();
79    }
80  
81    /**
82     * Test case that creates some regions and loads
83     * HFiles that fit snugly inside those regions
84     */
85    @Test
86    public void testSimpleLoad() throws Exception {
87      runTest("testSimpleLoad", BloomType.NONE,
88          new byte[][][] {
89            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
90            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
91      });
92    }
93  
94    /**
95     * Test case that creates some regions and loads
96     * HFiles that cross the boundaries of those regions
97     */
98    @Test
99    public void testRegionCrossingLoad() throws Exception {
100     runTest("testRegionCrossingLoad", BloomType.NONE,
101         new byte[][][] {
102           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
103           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
104     });
105   }
106 
107   /**
108    * Test loading into a column family that has a ROW bloom filter.
109    */
110   @Test
111   public void testRegionCrossingRowBloom() throws Exception {
112     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
113         new byte[][][] {
114           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
115           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
116     });
117   }
118 
119   /**
120    * Test loading into a column family that has a ROWCOL bloom filter.
121    */
122   @Test
123   public void testRegionCrossingRowColBloom() throws Exception {
124     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
125         new byte[][][] {
126           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
127           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
128     });
129   }
130 
131   private void runTest(String testName, BloomType bloomType,
132           byte[][][] hfileRanges) throws Exception {
133     Path dir = util.getDataTestDirOnTestFS(testName);
134     FileSystem fs = util.getTestFileSystem();
135     dir = dir.makeQualified(fs);
136     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
137 
138     int hfileIdx = 0;
139     for (byte[][] range : hfileRanges) {
140       byte[] from = range[0];
141       byte[] to = range[1];
142       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
143           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
144     }
145     int expectedRows = hfileIdx * 1000;
146 
147     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
148 
149     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
150     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
151     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
152     familyDesc.setBloomFilterType(bloomType);
153     htd.addFamily(familyDesc);
154     admin.createTable(htd, SPLIT_KEYS);
155 
156     HTable table = new HTable(util.getConfiguration(), TABLE);
157     util.waitTableEnabled(TABLE);
158     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration(), useSecure);
159     loader.doBulkLoad(dir, table);
160 
161     assertEquals(expectedRows, util.countRows(table));
162   }
163 
164   /**
165    * Test loading into a column family that does not exist.
166    */
167   @Test
168   public void testNonexistentColumnFamilyLoad() throws Exception {
169     String testName = "testNonexistentColumnFamilyLoad";
170     byte[][][] hfileRanges = new byte[][][] {
171       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
172       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
173     };
174 
175     Path dir = util.getDataTestDirOnTestFS(testName);
176     FileSystem fs = util.getTestFileSystem();
177     dir = dir.makeQualified(fs);
178     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
179 
180     int hfileIdx = 0;
181     for (byte[][] range : hfileRanges) {
182       byte[] from = range[0];
183       byte[] to = range[1];
184       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
185           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
186     }
187 
188     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
189 
190     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
191     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
192     // set real family name to upper case in purpose to simulate the case that
193     // family name in HFiles is invalid
194     HColumnDescriptor family =
195         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
196     htd.addFamily(family);
197     admin.createTable(htd, SPLIT_KEYS);
198 
199     HTable table = new HTable(util.getConfiguration(), TABLE);
200     util.waitTableEnabled(TABLE);
201     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration(), false);
202     try {
203       loader.doBulkLoad(dir, table);
204       assertTrue("Loading into table with non-existent family should have failed", false);
205     } catch (Exception e) {
206       assertTrue("IOException expected", e instanceof IOException);
207       // further check whether the exception message is correct
208       String errMsg = e.getMessage();
209       assertTrue("Incorrect exception message, expected message: ["
210           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
211           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
212     }
213     table.close();
214     admin.close();
215   }
216 
217   private void verifyAssignedSequenceNumber(String testName,
218       byte[][][] hfileRanges, boolean nonZero) throws Exception {
219     Path dir = util.getDataTestDir(testName);
220     FileSystem fs = util.getTestFileSystem();
221     dir = dir.makeQualified(fs);
222     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
223 
224     int hfileIdx = 0;
225     for (byte[][] range : hfileRanges) {
226       byte[] from = range[0];
227       byte[] to = range[1];
228       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
229           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
230     }
231 
232     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
233 
234     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
235     HTableDescriptor htd = new HTableDescriptor(TABLE);
236     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
237     htd.addFamily(familyDesc);
238     admin.createTable(htd, SPLIT_KEYS);
239 
240     HTable table = new HTable(util.getConfiguration(), TABLE);
241     util.waitTableEnabled(TABLE);
242     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(
243       util.getConfiguration());
244 
245     // Do a dummy put to increase the hlog sequence number
246     Put put = new Put(Bytes.toBytes("row"));
247     put.add(FAMILY, QUALIFIER, Bytes.toBytes("value"));
248     table.put(put);
249 
250     loader.doBulkLoad(dir, table);
251 
252     // Get the store files
253     Collection<StoreFile> files = util.getHBaseCluster().
254         getRegions(TABLE).get(0).getStore(FAMILY).getStorefiles();
255     for (StoreFile file: files) {
256       // the sequenceId gets initialized during createReader
257       file.createReader();
258 
259       if (nonZero)
260         assertTrue(file.getMaxSequenceId() > 0);
261       else
262         assertTrue(file.getMaxSequenceId() == -1);
263     }
264   }
265 
266   @Test
267   public void testSplitStoreFile() throws IOException {
268     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
269     FileSystem fs = util.getTestFileSystem();
270     Path testIn = new Path(dir, "testhfile");
271     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
272     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
273         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
274 
275     Path bottomOut = new Path(dir, "bottom.out");
276     Path topOut = new Path(dir, "top.out");
277 
278     LoadIncrementalHFiles.splitStoreFile(
279         util.getConfiguration(), testIn,
280         familyDesc, Bytes.toBytes("ggg"),
281         bottomOut,
282         topOut);
283 
284     int rowCount = verifyHFile(bottomOut);
285     rowCount += verifyHFile(topOut);
286     assertEquals(1000, rowCount);
287   }
288 
289   private int verifyHFile(Path p) throws IOException {
290     Configuration conf = util.getConfiguration();
291     HFile.Reader reader = HFile.createReader(
292         p.getFileSystem(conf), p, new CacheConfig(conf));
293     reader.loadFileInfo();
294     HFileScanner scanner = reader.getScanner(false, false);
295     scanner.seekTo();
296     int count = 0;
297     do {
298       count++;
299     } while (scanner.next());
300     assertTrue(count > 0);
301     reader.close();
302     return count;
303   }
304 
305 
306   /**
307    * Create an HFile with the given number of rows between a given
308    * start key and end key.
309    * TODO put me in an HFileTestUtil or something?
310    */
311   static void createHFile(
312       Configuration conf,
313       FileSystem fs, Path path,
314       byte[] family, byte[] qualifier,
315       byte[] startKey, byte[] endKey, int numRows) throws IOException
316   {
317     HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
318         .withPath(fs, path)
319         .withBlockSize(BLOCKSIZE)
320         .withCompression(COMPRESSION)
321         .withComparator(KeyValue.KEY_COMPARATOR)
322         .create();
323     long now = System.currentTimeMillis();
324     try {
325       // subtract 2 since iterateOnSplits doesn't include boundary keys
326       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
327         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
328         writer.append(kv);
329       }
330     } finally {
331       writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
332           Bytes.toBytes(System.currentTimeMillis()));
333       writer.close();
334     }
335   }
336 
337   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
338     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
339     map.put(first, value+1);
340 
341     value = map.containsKey(last)?(Integer)map.get(last):0;
342     map.put(last, value-1);
343   }
344 
345   @Test
346   public void testInferBoundaries() {
347     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
348 
349     /* Toy example
350      *     c---------i            o------p          s---------t     v------x
351      * a------e    g-----k   m-------------q   r----s            u----w
352      *
353      * Should be inferred as:
354      * a-----------------k   m-------------q   r--------------t  u---------x
355      *
356      * The output should be (m,r,u)
357      */
358 
359     String first;
360     String last;
361 
362     first = "a"; last = "e";
363     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
364 
365     first = "r"; last = "s";
366     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
367 
368     first = "o"; last = "p";
369     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
370 
371     first = "g"; last = "k";
372     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
373 
374     first = "v"; last = "x";
375     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
376 
377     first = "c"; last = "i";
378     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
379 
380     first = "m"; last = "q";
381     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
382 
383     first = "s"; last = "t";
384     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
385 
386     first = "u"; last = "w";
387     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
388 
389     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
390     byte[][] compare = new byte[3][];
391     compare[0] = "m".getBytes();
392     compare[1] = "r".getBytes();
393     compare[2] = "u".getBytes();
394 
395     assertEquals(keysArray.length, 3);
396 
397     for (int row = 0; row<keysArray.length; row++){
398       assertArrayEquals(keysArray[row], compare[row]);
399     }
400   }
401 
402 
403 }
404