View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.KeyValue;
36  import org.apache.hadoop.hbase.LargeTests;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.io.compress.Compression;
41  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
42  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
43  import org.apache.hadoop.hbase.io.hfile.HFile;
44  import org.apache.hadoop.hbase.io.hfile.HFileContext;
45  import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
46  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
47  import org.apache.hadoop.hbase.regionserver.BloomType;
48  import org.apache.hadoop.hbase.regionserver.StoreFile;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.junit.AfterClass;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  
55  /**
56   * Test cases for the "load" half of the HFileOutputFormat bulk load
57   * functionality. These tests run faster than the full MR cluster
58   * tests in TestHFileOutputFormat
59   */
60  @Category(LargeTests.class)
61  public class TestLoadIncrementalHFiles {
62    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
63    private static final byte[] FAMILY = Bytes.toBytes("myfam");
64    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
65    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
66  
67    private static final byte[][] SPLIT_KEYS = new byte[][] {
68      Bytes.toBytes("ddd"),
69      Bytes.toBytes("ppp")
70    };
71  
72    public static int BLOCKSIZE = 64*1024;
73    public static Algorithm COMPRESSION =
74      Compression.Algorithm.NONE;
75  
76    static HBaseTestingUtility util = new HBaseTestingUtility();
77  
78    @BeforeClass
79    public static void setUpBeforeClass() throws Exception {
80      util.getConfiguration().setInt(
81        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
82        MAX_FILES_PER_REGION_PER_FAMILY);
83      util.startMiniCluster();
84    }
85  
86    @AfterClass
87    public static void tearDownAfterClass() throws Exception {
88      util.shutdownMiniCluster();
89    }
90  
91    /**
92     * Test case that creates some regions and loads
93     * HFiles that fit snugly inside those regions
94     */
95    @Test
96    public void testSimpleLoad() throws Exception {
97      runTest("testSimpleLoad", BloomType.NONE,
98          new byte[][][] {
99            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
100           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
101     });
102   }
103 
104   /**
105    * Test case that creates some regions and loads
106    * HFiles that cross the boundaries of those regions
107    */
108   @Test
109   public void testRegionCrossingLoad() throws Exception {
110     runTest("testRegionCrossingLoad", BloomType.NONE,
111         new byte[][][] {
112           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
113           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
114     });
115   }
116 
117   /**
118    * Test loading into a column family that has a ROW bloom filter.
119    */
120   @Test
121   public void testRegionCrossingRowBloom() throws Exception {
122     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
123         new byte[][][] {
124           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
125           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
126     });
127   }
128   
129   /**
130    * Test loading into a column family that has a ROWCOL bloom filter.
131    */
132   @Test
133   public void testRegionCrossingRowColBloom() throws Exception {
134     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
135         new byte[][][] {
136           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
137           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
138     });
139   }
140 
141   private void runTest(String testName, BloomType bloomType, 
142           byte[][][] hfileRanges) throws Exception {
143     Path dir = util.getDataTestDirOnTestFS(testName);
144     FileSystem fs = util.getTestFileSystem();
145     dir = dir.makeQualified(fs);
146     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
147 
148     int hfileIdx = 0;
149     for (byte[][] range : hfileRanges) {
150       byte[] from = range[0];
151       byte[] to = range[1];
152       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
153           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
154     }
155     int expectedRows = hfileIdx * 1000;
156 
157     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
158 
159     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
160     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
161     familyDesc.setBloomFilterType(bloomType);
162     htd.addFamily(familyDesc);
163 
164     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
165     String [] args= {dir.toString(),"mytable_"+testName};
166     loader.run(args);
167     HTable table = new HTable(util.getConfiguration(), TABLE);
168     
169     assertEquals(expectedRows, util.countRows(table));
170   }
171 
172   /**
173    * Test loading into a column family that does not exist.
174    */
175   @Test
176   public void testNonexistentColumnFamilyLoad() throws Exception {
177     String testName = "testNonexistentColumnFamilyLoad";
178     byte[][][] hFileRanges = new byte[][][] {
179       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
180       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
181     }; 
182 
183     Path dir = util.getDataTestDirOnTestFS(testName);
184     FileSystem fs = util.getTestFileSystem();
185     dir = dir.makeQualified(fs);
186     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
187 
188     int hFileIdx = 0;
189     for (byte[][] range : hFileRanges) {
190       byte[] from = range[0];
191       byte[] to = range[1];
192       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
193           + hFileIdx++), FAMILY, QUALIFIER, from, to, 1000);
194     }
195 
196     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
197 
198     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
199     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
200     // set real family name to upper case in purpose to simulate the case that
201     // family name in HFiles is invalid
202     HColumnDescriptor family =
203         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
204     htd.addFamily(family);
205     admin.createTable(htd, SPLIT_KEYS);
206 
207     HTable table = new HTable(util.getConfiguration(), TABLE);
208     util.waitTableEnabled(TABLE);
209     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
210     try {
211       loader.doBulkLoad(dir, table);
212       assertTrue("Loading into table with non-existent family should have failed", false);
213     } catch (Exception e) {
214       assertTrue("IOException expected", e instanceof IOException);
215       // further check whether the exception message is correct
216       String errMsg = e.getMessage();
217       assertTrue("Incorrect exception message, expected message: ["
218           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
219           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
220     }
221     table.close();
222     admin.close();
223   }
224 
225   @Test
226   public void testSplitStoreFile() throws IOException {
227     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
228     FileSystem fs = util.getTestFileSystem();
229     Path testIn = new Path(dir, "testhfile");
230     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
231     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
232         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
233 
234     Path bottomOut = new Path(dir, "bottom.out");
235     Path topOut = new Path(dir, "top.out");
236 
237     LoadIncrementalHFiles.splitStoreFile(
238         util.getConfiguration(), testIn,
239         familyDesc, Bytes.toBytes("ggg"),
240         bottomOut,
241         topOut);
242 
243     int rowCount = verifyHFile(bottomOut);
244     rowCount += verifyHFile(topOut);
245     assertEquals(1000, rowCount);
246   }
247 
248   private int verifyHFile(Path p) throws IOException {
249     Configuration conf = util.getConfiguration();
250     HFile.Reader reader = HFile.createReader(
251         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
252     reader.loadFileInfo();
253     HFileScanner scanner = reader.getScanner(false, false);
254     scanner.seekTo();
255     int count = 0;
256     do {
257       count++;
258     } while (scanner.next());
259     assertTrue(count > 0);
260     reader.close();
261     return count;
262   }
263 
264 
265   /**
266    * Create an HFile with the given number of rows between a given
267    * start key and end key.
268    * TODO put me in an HFileTestUtil or something?
269    */
270   static void createHFile(
271       Configuration configuration,
272       FileSystem fs, Path path,
273       byte[] family, byte[] qualifier,
274       byte[] startKey, byte[] endKey, int numRows) throws IOException
275   {
276     HFileContext meta = new HFileContextBuilder()
277                         .withBlockSize(BLOCKSIZE)
278                         .withCompression(COMPRESSION)
279                         .build();
280     HFile.Writer writer = HFile.getWriterFactory(configuration, new CacheConfig(configuration))
281         .withPath(fs, path)
282         .withFileContext(meta)
283         .create();
284     long now = System.currentTimeMillis();
285     try {
286       // subtract 2 since iterateOnSplits doesn't include boundary keys
287       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
288         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
289         writer.append(kv);
290       }
291     } finally {
292       writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
293           Bytes.toBytes(System.currentTimeMillis()));
294       writer.close();
295     }
296   }
297 
298   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
299     Integer value = map.containsKey(first)?map.get(first):0;
300     map.put(first, value+1);
301 
302     value = map.containsKey(last)?map.get(last):0;
303     map.put(last, value-1);
304   }
305 
306   @Test 
307   public void testInferBoundaries() {
308     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
309 
310     /* Toy example
311      *     c---------i            o------p          s---------t     v------x
312      * a------e    g-----k   m-------------q   r----s            u----w
313      *
314      * Should be inferred as:
315      * a-----------------k   m-------------q   r--------------t  u---------x
316      * 
317      * The output should be (m,r,u) 
318      */
319 
320     String first;
321     String last;
322 
323     first = "a"; last = "e";
324     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
325     
326     first = "r"; last = "s";
327     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
328 
329     first = "o"; last = "p";
330     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
331 
332     first = "g"; last = "k";
333     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
334 
335     first = "v"; last = "x";
336     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
337 
338     first = "c"; last = "i";
339     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
340 
341     first = "m"; last = "q";
342     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
343 
344     first = "s"; last = "t";
345     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
346     
347     first = "u"; last = "w";
348     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
349 
350     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
351     byte[][] compare = new byte[3][];
352     compare[0] = "m".getBytes();
353     compare[1] = "r".getBytes(); 
354     compare[2] = "u".getBytes();
355 
356     assertEquals(keysArray.length, 3);
357 
358     for (int row = 0; row<keysArray.length; row++){
359       assertArrayEquals(keysArray[row], compare[row]);
360     }
361   }
362 
363   @Test
364   public void testLoadTooMayHFiles() throws Exception {
365     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
366     FileSystem fs = util.getTestFileSystem();
367     dir = dir.makeQualified(fs);
368     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
369 
370     byte[] from = Bytes.toBytes("begin");
371     byte[] to = Bytes.toBytes("end");
372     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
373       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
374           + i), FAMILY, QUALIFIER, from, to, 1000);
375     }
376 
377     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
378     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
379     try {
380       loader.run(args);
381       fail("Bulk loading too many files should fail");
382     } catch (IOException ie) {
383       assertTrue(ie.getMessage().contains("Trying to load more than "
384         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
385     }
386   }
387 }
388