View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.Collection;
27  import java.util.TreeMap;
28  import java.util.List;
29  
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.*;
34  import org.apache.hadoop.hbase.client.HBaseAdmin;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.client.Put;
37  import org.apache.hadoop.hbase.io.compress.Compression;
38  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
39  import org.apache.hadoop.hbase.io.hfile.HFile;
40  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
41  import org.apache.hadoop.hbase.regionserver.BloomType;
42  import org.apache.hadoop.hbase.regionserver.StoreFile;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.junit.*;
45  import org.junit.experimental.categories.Category;
46  
47  /**
48   * Test cases for the "load" half of the HFileOutputFormat bulk load
49   * functionality. These tests run faster than the full MR cluster
50   * tests in TestHFileOutputFormat
51   */
52  @Category(LargeTests.class)
53  public class TestLoadIncrementalHFiles {
54    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
55    private static final byte[] FAMILY = Bytes.toBytes("myfam");
56    private static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
57  
58    private static final byte[][] SPLIT_KEYS = new byte[][] {
59      Bytes.toBytes("ddd"),
60      Bytes.toBytes("ppp")
61    };
62  
63    public static int BLOCKSIZE = 64*1024;
64    public static String COMPRESSION =
65      Compression.Algorithm.NONE.getName();
66  
67    static HBaseTestingUtility util = new HBaseTestingUtility();
68  
69    @BeforeClass
70    public static void setUpBeforeClass() throws Exception {
71      util.startMiniCluster();
72    }
73  
74    @AfterClass
75    public static void tearDownAfterClass() throws Exception {
76      util.shutdownMiniCluster();
77    }
78  
79    /**
80     * Test case that creates some regions and loads
81     * HFiles that fit snugly inside those regions
82     */
83    @Test
84    public void testSimpleLoad() throws Exception {
85      runTest("testSimpleLoad", BloomType.NONE,
86          new byte[][][] {
87            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
88            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
89      });
90    }
91  
92    /**
93     * Test case that creates some regions and loads
94     * HFiles that cross the boundaries of those regions
95     */
96    @Test
97    public void testRegionCrossingLoad() throws Exception {
98      runTest("testRegionCrossingLoad", BloomType.NONE,
99          new byte[][][] {
100           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
101           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
102     });
103   }
104 
105   /**
106    * Test loading into a column family that has a ROW bloom filter.
107    */
108   @Test
109   public void testRegionCrossingRowBloom() throws Exception {
110     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
111         new byte[][][] {
112           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
113           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
114     });
115   }
116 
117   /**
118    * Test loading into a column family that has a ROWCOL bloom filter.
119    */
120   @Test
121   public void testRegionCrossingRowColBloom() throws Exception {
122     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
123         new byte[][][] {
124           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
125           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
126     });
127   }
128 
129   private void runTest(String testName, BloomType bloomType,
130           byte[][][] hfileRanges) throws Exception {
131     Path dir = util.getDataTestDirOnTestFS(testName);
132     FileSystem fs = util.getTestFileSystem();
133     dir = dir.makeQualified(fs);
134     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
135 
136     int hfileIdx = 0;
137     for (byte[][] range : hfileRanges) {
138       byte[] from = range[0];
139       byte[] to = range[1];
140       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
141           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
142     }
143     int expectedRows = hfileIdx * 1000;
144 
145     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
146 
147     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
148     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
149     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
150     familyDesc.setBloomFilterType(bloomType);
151     htd.addFamily(familyDesc);
152     admin.createTable(htd, SPLIT_KEYS);
153 
154     HTable table = new HTable(util.getConfiguration(), TABLE);
155     util.waitTableEnabled(TABLE);
156     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
157     loader.doBulkLoad(dir, table);
158 
159     assertEquals(expectedRows, util.countRows(table));
160   }
161 
162   /**
163    * Test loading into a column family that does not exist.
164    */
165   @Test
166   public void testNonexistentColumnFamilyLoad() throws Exception {
167     String testName = "testNonexistentColumnFamilyLoad";
168     byte[][][] hfileRanges = new byte[][][] {
169       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
170       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
171     };
172 
173     Path dir = util.getDataTestDirOnTestFS(testName);
174     FileSystem fs = util.getTestFileSystem();
175     dir = dir.makeQualified(fs);
176     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
177 
178     int hfileIdx = 0;
179     for (byte[][] range : hfileRanges) {
180       byte[] from = range[0];
181       byte[] to = range[1];
182       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
183           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
184     }
185 
186     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
187 
188     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
189     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
190     // set real family name to upper case in purpose to simulate the case that
191     // family name in HFiles is invalid
192     HColumnDescriptor family =
193         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
194     htd.addFamily(family);
195     admin.createTable(htd, SPLIT_KEYS);
196 
197     HTable table = new HTable(util.getConfiguration(), TABLE);
198     util.waitTableEnabled(TABLE);
199     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
200     try {
201       loader.doBulkLoad(dir, table);
202       assertTrue("Loading into table with non-existent family should have failed", false);
203     } catch (Exception e) {
204       assertTrue("IOException expected", e instanceof IOException);
205       // further check whether the exception message is correct
206       String errMsg = e.getMessage();
207       assertTrue("Incorrect exception message, expected message: ["
208           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
209           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
210     }
211     table.close();
212     admin.close();
213   }
214 
215   private void verifyAssignedSequenceNumber(String testName,
216       byte[][][] hfileRanges, boolean nonZero) throws Exception {
217     Path dir = util.getDataTestDir(testName);
218     FileSystem fs = util.getTestFileSystem();
219     dir = dir.makeQualified(fs);
220     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
221 
222     int hfileIdx = 0;
223     for (byte[][] range : hfileRanges) {
224       byte[] from = range[0];
225       byte[] to = range[1];
226       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
227           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
228     }
229 
230     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
231 
232     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
233     HTableDescriptor htd = new HTableDescriptor(TABLE);
234     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
235     htd.addFamily(familyDesc);
236     admin.createTable(htd, SPLIT_KEYS);
237 
238     HTable table = new HTable(util.getConfiguration(), TABLE);
239     util.waitTableEnabled(TABLE);
240     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(
241       util.getConfiguration());
242 
243     // Do a dummy put to increase the hlog sequence number
244     Put put = new Put(Bytes.toBytes("row"));
245     put.add(FAMILY, QUALIFIER, Bytes.toBytes("value"));
246     table.put(put);
247 
248     loader.doBulkLoad(dir, table);
249 
250     // Get the store files
251     Collection<StoreFile> files = util.getHBaseCluster().
252         getRegions(TABLE).get(0).getStore(FAMILY).getStorefiles();
253     for (StoreFile file: files) {
254       // the sequenceId gets initialized during createReader
255       file.createReader();
256 
257       if (nonZero)
258         assertTrue(file.getMaxSequenceId() > 0);
259       else
260         assertTrue(file.getMaxSequenceId() == -1);
261     }
262   }
263 
264   @Test
265   public void testSplitStoreFile() throws IOException {
266     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
267     FileSystem fs = util.getTestFileSystem();
268     Path testIn = new Path(dir, "testhfile");
269     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
270     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
271         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
272 
273     Path bottomOut = new Path(dir, "bottom.out");
274     Path topOut = new Path(dir, "top.out");
275 
276     LoadIncrementalHFiles.splitStoreFile(
277         util.getConfiguration(), testIn,
278         familyDesc, Bytes.toBytes("ggg"),
279         bottomOut,
280         topOut);
281 
282     int rowCount = verifyHFile(bottomOut);
283     rowCount += verifyHFile(topOut);
284     assertEquals(1000, rowCount);
285   }
286 
287   private int verifyHFile(Path p) throws IOException {
288     Configuration conf = util.getConfiguration();
289     HFile.Reader reader = HFile.createReader(
290         p.getFileSystem(conf), p, new CacheConfig(conf));
291     reader.loadFileInfo();
292     HFileScanner scanner = reader.getScanner(false, false);
293     scanner.seekTo();
294     int count = 0;
295     do {
296       count++;
297     } while (scanner.next());
298     assertTrue(count > 0);
299     reader.close();
300     return count;
301   }
302 
303 
304   /**
305    * Create an HFile with the given number of rows between a given
306    * start key and end key.
307    * TODO put me in an HFileTestUtil or something?
308    */
309   static void createHFile(
310       Configuration conf,
311       FileSystem fs, Path path,
312       byte[] family, byte[] qualifier,
313       byte[] startKey, byte[] endKey, int numRows) throws IOException
314   {
315     HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
316         .withPath(fs, path)
317         .withBlockSize(BLOCKSIZE)
318         .withCompression(COMPRESSION)
319         .create();
320     long now = System.currentTimeMillis();
321     try {
322       // subtract 2 since iterateOnSplits doesn't include boundary keys
323       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
324         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
325         writer.append(kv);
326       }
327     } finally {
328       writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
329           Bytes.toBytes(System.currentTimeMillis()));
330       writer.close();
331     }
332   }
333 
334   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
335     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
336     map.put(first, value+1);
337 
338     value = map.containsKey(last)?(Integer)map.get(last):0;
339     map.put(last, value-1);
340   }
341 
342   @Test
343   public void testInferBoundaries() {
344     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
345 
346     /* Toy example
347      *     c---------i            o------p          s---------t     v------x
348      * a------e    g-----k   m-------------q   r----s            u----w
349      *
350      * Should be inferred as:
351      * a-----------------k   m-------------q   r--------------t  u---------x
352      *
353      * The output should be (m,r,u)
354      */
355 
356     String first;
357     String last;
358 
359     first = "a"; last = "e";
360     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
361 
362     first = "r"; last = "s";
363     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
364 
365     first = "o"; last = "p";
366     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
367 
368     first = "g"; last = "k";
369     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
370 
371     first = "v"; last = "x";
372     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
373 
374     first = "c"; last = "i";
375     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
376 
377     first = "m"; last = "q";
378     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
379 
380     first = "s"; last = "t";
381     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
382 
383     first = "u"; last = "w";
384     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
385 
386     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
387     byte[][] compare = new byte[3][];
388     compare[0] = "m".getBytes();
389     compare[1] = "r".getBytes();
390     compare[2] = "u".getBytes();
391 
392     assertEquals(keysArray.length, 3);
393 
394     for (int row = 0; row<keysArray.length; row++){
395       assertArrayEquals(keysArray[row], compare[row]);
396     }
397   }
398 
399 
400 }
401