View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.TreeMap;
27  
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.HColumnDescriptor;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.LargeTests;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.client.HBaseAdmin;
38  import org.apache.hadoop.hbase.client.HTable;
39  import org.apache.hadoop.hbase.io.compress.Compression;
40  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
41  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
42  import org.apache.hadoop.hbase.io.hfile.HFile;
43  import org.apache.hadoop.hbase.io.hfile.HFileContext;
44  import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
45  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
46  import org.apache.hadoop.hbase.regionserver.BloomType;
47  import org.apache.hadoop.hbase.regionserver.StoreFile;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.junit.AfterClass;
50  import org.junit.BeforeClass;
51  import org.junit.Test;
52  import org.junit.experimental.categories.Category;
53  
54  /**
55   * Test cases for the "load" half of the HFileOutputFormat bulk load
56   * functionality. These tests run faster than the full MR cluster
57   * tests in TestHFileOutputFormat
58   */
59  @Category(LargeTests.class)
60  public class TestLoadIncrementalHFiles {
61    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
62    private static final byte[] FAMILY = Bytes.toBytes("myfam");
63    private static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
64  
65    private static final byte[][] SPLIT_KEYS = new byte[][] {
66      Bytes.toBytes("ddd"),
67      Bytes.toBytes("ppp")
68    };
69  
70    public static int BLOCKSIZE = 64*1024;
71    public static Algorithm COMPRESSION =
72      Compression.Algorithm.NONE;
73  
74    static HBaseTestingUtility util = new HBaseTestingUtility();
75  
76    @BeforeClass
77    public static void setUpBeforeClass() throws Exception {
78      util.startMiniCluster();
79    }
80  
81    @AfterClass
82    public static void tearDownAfterClass() throws Exception {
83      util.shutdownMiniCluster();
84    }
85  
86    /**
87     * Test case that creates some regions and loads
88     * HFiles that fit snugly inside those regions
89     */
90    @Test
91    public void testSimpleLoad() throws Exception {
92      runTest("testSimpleLoad", BloomType.NONE,
93          new byte[][][] {
94            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
95            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
96      });
97    }
98  
99    /**
100    * Test case that creates some regions and loads
101    * HFiles that cross the boundaries of those regions
102    */
103   @Test
104   public void testRegionCrossingLoad() throws Exception {
105     runTest("testRegionCrossingLoad", BloomType.NONE,
106         new byte[][][] {
107           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
108           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
109     });
110   }
111 
112   /**
113    * Test loading into a column family that has a ROW bloom filter.
114    */
115   @Test
116   public void testRegionCrossingRowBloom() throws Exception {
117     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
118         new byte[][][] {
119           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
120           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
121     });
122   }
123   
124   /**
125    * Test loading into a column family that has a ROWCOL bloom filter.
126    */
127   @Test
128   public void testRegionCrossingRowColBloom() throws Exception {
129     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
130         new byte[][][] {
131           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
132           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
133     });
134   }
135 
136   private void runTest(String testName, BloomType bloomType, 
137           byte[][][] hfileRanges) throws Exception {
138     Path dir = util.getDataTestDirOnTestFS(testName);
139     FileSystem fs = util.getTestFileSystem();
140     dir = dir.makeQualified(fs);
141     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
142 
143     int hfileIdx = 0;
144     for (byte[][] range : hfileRanges) {
145       byte[] from = range[0];
146       byte[] to = range[1];
147       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
148           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
149     }
150     int expectedRows = hfileIdx * 1000;
151 
152     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
153 
154     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
155     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
156     familyDesc.setBloomFilterType(bloomType);
157     htd.addFamily(familyDesc);
158 
159     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
160     String [] args= {dir.toString(),"mytable_"+testName};
161     loader.run(args);
162     HTable table = new HTable(util.getConfiguration(), TABLE);
163     
164     assertEquals(expectedRows, util.countRows(table));
165   }
166 
167   /**
168    * Test loading into a column family that does not exist.
169    */
170   @Test
171   public void testNonexistentColumnFamilyLoad() throws Exception {
172     String testName = "testNonexistentColumnFamilyLoad";
173     byte[][][] hFileRanges = new byte[][][] {
174       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
175       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
176     }; 
177 
178     Path dir = util.getDataTestDirOnTestFS(testName);
179     FileSystem fs = util.getTestFileSystem();
180     dir = dir.makeQualified(fs);
181     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
182 
183     int hFileIdx = 0;
184     for (byte[][] range : hFileRanges) {
185       byte[] from = range[0];
186       byte[] to = range[1];
187       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
188           + hFileIdx++), FAMILY, QUALIFIER, from, to, 1000);
189     }
190 
191     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
192 
193     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
194     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
195     // set real family name to upper case in purpose to simulate the case that
196     // family name in HFiles is invalid
197     HColumnDescriptor family =
198         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
199     htd.addFamily(family);
200     admin.createTable(htd, SPLIT_KEYS);
201 
202     HTable table = new HTable(util.getConfiguration(), TABLE);
203     util.waitTableEnabled(TABLE);
204     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
205     try {
206       loader.doBulkLoad(dir, table);
207       assertTrue("Loading into table with non-existent family should have failed", false);
208     } catch (Exception e) {
209       assertTrue("IOException expected", e instanceof IOException);
210       // further check whether the exception message is correct
211       String errMsg = e.getMessage();
212       assertTrue("Incorrect exception message, expected message: ["
213           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
214           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
215     }
216     table.close();
217     admin.close();
218   }
219 
220   @Test
221   public void testSplitStoreFile() throws IOException {
222     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
223     FileSystem fs = util.getTestFileSystem();
224     Path testIn = new Path(dir, "testhfile");
225     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
226     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
227         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
228 
229     Path bottomOut = new Path(dir, "bottom.out");
230     Path topOut = new Path(dir, "top.out");
231 
232     LoadIncrementalHFiles.splitStoreFile(
233         util.getConfiguration(), testIn,
234         familyDesc, Bytes.toBytes("ggg"),
235         bottomOut,
236         topOut);
237 
238     int rowCount = verifyHFile(bottomOut);
239     rowCount += verifyHFile(topOut);
240     assertEquals(1000, rowCount);
241   }
242 
243   private int verifyHFile(Path p) throws IOException {
244     Configuration conf = util.getConfiguration();
245     HFile.Reader reader = HFile.createReader(
246         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
247     reader.loadFileInfo();
248     HFileScanner scanner = reader.getScanner(false, false);
249     scanner.seekTo();
250     int count = 0;
251     do {
252       count++;
253     } while (scanner.next());
254     assertTrue(count > 0);
255     reader.close();
256     return count;
257   }
258 
259 
260   /**
261    * Create an HFile with the given number of rows between a given
262    * start key and end key.
263    * TODO put me in an HFileTestUtil or something?
264    */
265   static void createHFile(
266       Configuration configuration,
267       FileSystem fs, Path path,
268       byte[] family, byte[] qualifier,
269       byte[] startKey, byte[] endKey, int numRows) throws IOException
270   {
271     HFileContext meta = new HFileContextBuilder()
272                         .withBlockSize(BLOCKSIZE)
273                         .withCompression(COMPRESSION)
274                         .build();
275     HFile.Writer writer = HFile.getWriterFactory(configuration, new CacheConfig(configuration))
276         .withPath(fs, path)
277         .withFileContext(meta)
278         .create();
279     long now = System.currentTimeMillis();
280     try {
281       // subtract 2 since iterateOnSplits doesn't include boundary keys
282       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
283         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
284         writer.append(kv);
285       }
286     } finally {
287       writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
288           Bytes.toBytes(System.currentTimeMillis()));
289       writer.close();
290     }
291   }
292 
293   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
294     Integer value = map.containsKey(first)?map.get(first):0;
295     map.put(first, value+1);
296 
297     value = map.containsKey(last)?map.get(last):0;
298     map.put(last, value-1);
299   }
300 
301   @Test 
302   public void testInferBoundaries() {
303     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
304 
305     /* Toy example
306      *     c---------i            o------p          s---------t     v------x
307      * a------e    g-----k   m-------------q   r----s            u----w
308      *
309      * Should be inferred as:
310      * a-----------------k   m-------------q   r--------------t  u---------x
311      * 
312      * The output should be (m,r,u) 
313      */
314 
315     String first;
316     String last;
317 
318     first = "a"; last = "e";
319     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
320     
321     first = "r"; last = "s";
322     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
323 
324     first = "o"; last = "p";
325     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
326 
327     first = "g"; last = "k";
328     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
329 
330     first = "v"; last = "x";
331     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
332 
333     first = "c"; last = "i";
334     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
335 
336     first = "m"; last = "q";
337     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
338 
339     first = "s"; last = "t";
340     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
341     
342     first = "u"; last = "w";
343     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
344 
345     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
346     byte[][] compare = new byte[3][];
347     compare[0] = "m".getBytes();
348     compare[1] = "r".getBytes(); 
349     compare[2] = "u".getBytes();
350 
351     assertEquals(keysArray.length, 3);
352 
353     for (int row = 0; row<keysArray.length; row++){
354       assertArrayEquals(keysArray[row], compare[row]);
355     }
356   }
357 
358 
359 }
360