1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import static org.junit.Assert.assertArrayEquals;
23  import static org.junit.Assert.assertEquals;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.List;
28  import java.util.TreeMap;
29  
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.KeyValue;
37  import org.apache.hadoop.hbase.LargeTests;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.Put;
41  import org.apache.hadoop.hbase.client.UserProvider;
42  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
43  import org.apache.hadoop.hbase.io.hfile.Compression;
44  import org.apache.hadoop.hbase.io.hfile.HFile;
45  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
46  import org.apache.hadoop.hbase.regionserver.StoreFile;
47  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.junit.AfterClass;
50  import org.junit.BeforeClass;
51  import org.junit.Test;
52  import org.junit.experimental.categories.Category;
53  
54  /**
55   * Test cases for the "load" half of the HFileOutputFormat bulk load
56   * functionality. These tests run faster than the full MR cluster
57   * tests in TestHFileOutputFormat
58   */
59  @Category(LargeTests.class)
60  public class TestLoadIncrementalHFiles {
61    protected static boolean useSecureHBaseOverride = false;
62    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
63    private static final byte[] FAMILY = Bytes.toBytes("myfam");
64  
65    private static final byte[][] SPLIT_KEYS = new byte[][] {
66      Bytes.toBytes("ddd"),
67      Bytes.toBytes("ppp")
68    };
69  
70    public static int BLOCKSIZE = 64*1024;
71    public static String COMPRESSION =
72      Compression.Algorithm.NONE.getName();
73  
74    static HBaseTestingUtility util = new HBaseTestingUtility();
75  
76    @BeforeClass
77    public static void setUpBeforeClass() throws Exception {
78      util.startMiniCluster();
79    }
80  
81    @AfterClass
82    public static void tearDownAfterClass() throws Exception {
83      util.shutdownMiniCluster();
84    }
85  
86    /**
87     * Test case that creates some regions and loads
88     * HFiles that fit snugly inside those regions
89     */
90    @Test
91    public void testSimpleLoad() throws Exception {
92      runTest("testSimpleLoad", BloomType.NONE,
93          new byte[][][] {
94            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
95            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
96      });
97    }
98  
99    /**
100    * Test case that creates some regions and loads
101    * HFiles that cross the boundaries of those regions
102    */
103   @Test
104   public void testRegionCrossingLoad() throws Exception {
105     runTest("testRegionCrossingLoad", BloomType.NONE,
106         new byte[][][] {
107           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
108           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
109     });
110   }
111 
112   /**
113    * Test loading into a column family that has a ROW bloom filter.
114    */
115   @Test
116   public void testRegionCrossingRowBloom() throws Exception {
117     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
118         new byte[][][] {
119           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
120           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
121     });
122   }
123   
124   /**
125    * Test loading into a column family that has a ROWCOL bloom filter.
126    */
127   @Test
128   public void testRegionCrossingRowColBloom() throws Exception {
129     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
130         new byte[][][] {
131           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
132           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
133     });
134   }
135 
136   private void runTest(String testName, BloomType bloomType, 
137           byte[][][] hfileRanges) throws Exception {
138     Path dir = util.getDataTestDir(testName);
139     FileSystem fs = util.getTestFileSystem();
140     dir = dir.makeQualified(fs);
141     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
142 
143     int hfileIdx = 0;
144     for (byte[][] range : hfileRanges) {
145       byte[] from = range[0];
146       byte[] to = range[1];
147       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
148           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
149     }
150     int expectedRows = hfileIdx * 1000;
151 
152     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
153 
154     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
155     HTableDescriptor htd = new HTableDescriptor(TABLE);
156     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
157     familyDesc.setBloomFilterType(bloomType);
158     htd.addFamily(familyDesc);
159     admin.createTable(htd, SPLIT_KEYS);
160 
161     HTable table = new HTable(util.getConfiguration(), TABLE);
162     util.waitTableAvailable(TABLE, 30000);
163 
164     LoadIncrementalHFiles loader =
165         new LoadIncrementalHFiles(util.getConfiguration(), useSecureHBaseOverride);
166     loader.doBulkLoad(dir, table);
167 
168     assertEquals(expectedRows, util.countRows(table));
169   }
170 
171   private void
172       verifyAssignedSequenceNumber(String testName, byte[][][] hfileRanges, boolean nonZero)
173           throws Exception {
174     Path dir = util.getDataTestDir(testName);
175     FileSystem fs = util.getTestFileSystem();
176     dir = dir.makeQualified(fs);
177     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
178 
179     int hfileIdx = 0;
180     for (byte[][] range : hfileRanges) {
181       byte[] from = range[0];
182       byte[] to = range[1];
183       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_" + hfileIdx++), FAMILY,
184         QUALIFIER, from, to, 1000);
185     }
186 
187     final byte[] TABLE = Bytes.toBytes("mytable_" + testName);
188 
189     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
190     HTableDescriptor htd = new HTableDescriptor(TABLE);
191     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
192     htd.addFamily(familyDesc);
193     admin.createTable(htd, SPLIT_KEYS);
194 
195     HTable table = new HTable(util.getConfiguration(), TABLE);
196     util.waitTableAvailable(TABLE, 30000);
197     LoadIncrementalHFiles loader =
198         new LoadIncrementalHFiles(util.getConfiguration(), useSecureHBaseOverride);
199 
200     // Do a dummy put to increase the hlog sequence number
201     Put put = new Put(Bytes.toBytes("row"));
202     put.add(FAMILY, QUALIFIER, Bytes.toBytes("value"));
203     table.put(put);
204 
205     loader.doBulkLoad(dir, table);
206 
207     // Get the store files
208     List<StoreFile> files =
209         util.getHBaseCluster().getRegions(TABLE).get(0).getStore(FAMILY).getStorefiles();
210     for (StoreFile file : files) {
211       // the sequenceId gets initialized during createReader
212       file.createReader();
213 
214       if (nonZero) assertTrue(file.getMaxSequenceId() > 0);
215       else assertTrue(file.getMaxSequenceId() == -1);
216     }
217   }
218 
219   /**
220    * Test loading into a column family that does not exist.
221    */
222   @Test
223   public void testNonexistentColumnFamilyLoad() throws Exception {
224     String testName = "testNonexistentColumnFamilyLoad";
225     byte[][][] hfileRanges = new byte[][][] {
226       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
227       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
228     }; 
229 
230     Path dir = util.getDataTestDir(testName);
231     FileSystem fs = util.getTestFileSystem();
232     dir = dir.makeQualified(fs);
233     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
234 
235     int hfileIdx = 0;
236     for (byte[][] range : hfileRanges) {
237       byte[] from = range[0];
238       byte[] to = range[1];
239       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
240           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
241     }
242 
243     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
244 
245     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
246     HTableDescriptor htd = new HTableDescriptor(TABLE);
247     admin.createTable(htd, SPLIT_KEYS);
248 
249     HTable table = new HTable(util.getConfiguration(), TABLE);
250     util.waitTableAvailable(TABLE, 30000);
251     // make sure we go back to the usual user provider
252     UserProvider.setUserProviderForTesting(util.getConfiguration(), UserProvider.class);
253     LoadIncrementalHFiles loader =
254         new LoadIncrementalHFiles(util.getConfiguration(), useSecureHBaseOverride);
255     try {
256       loader.doBulkLoad(dir, table);
257       assertTrue("Loading into table with non-existent family should have failed", false);
258     } catch (Exception e) {
259       assertTrue("IOException expected", e instanceof IOException);
260     }
261     table.close();
262     admin.close();
263   }
264 
265   @Test
266   public void testSplitStoreFile() throws IOException {
267     Path dir = util.getDataTestDir("testSplitHFile");
268     FileSystem fs = util.getTestFileSystem();
269     Path testIn = new Path(dir, "testhfile");
270     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
271     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
272         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
273 
274     Path bottomOut = new Path(dir, "bottom.out");
275     Path topOut = new Path(dir, "top.out");
276 
277     LoadIncrementalHFiles.splitStoreFile(
278         util.getConfiguration(), testIn,
279         familyDesc, Bytes.toBytes("ggg"),
280         bottomOut,
281         topOut);
282 
283     int rowCount = verifyHFile(bottomOut);
284     rowCount += verifyHFile(topOut);
285     assertEquals(1000, rowCount);
286   }
287 
288   private int verifyHFile(Path p) throws IOException {
289     Configuration conf = util.getConfiguration();
290     HFile.Reader reader = HFile.createReader(
291         p.getFileSystem(conf), p, new CacheConfig(conf));
292     reader.loadFileInfo();
293     HFileScanner scanner = reader.getScanner(false, false);
294     scanner.seekTo();
295     int count = 0;
296     do {
297       count++;
298     } while (scanner.next());
299     assertTrue(count > 0);
300     reader.close();
301     return count;
302   }
303 
304 
305   /**
306    * Create an HFile with the given number of rows between a given
307    * start key and end key.
308    * TODO put me in an HFileTestUtil or something?
309    */
310   static void createHFile(
311       Configuration conf,
312       FileSystem fs, Path path,
313       byte[] family, byte[] qualifier,
314       byte[] startKey, byte[] endKey, int numRows) throws IOException
315   {
316     HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
317         .withPath(fs, path)
318         .withBlockSize(BLOCKSIZE)
319         .withCompression(COMPRESSION)
320         .withComparator(KeyValue.KEY_COMPARATOR)
321         .create();
322     long now = System.currentTimeMillis();
323     try {
324       // subtract 2 since iterateOnSplits doesn't include boundary keys
325       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
326         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
327         writer.append(kv);
328       }
329     } finally {
330       writer.close();
331     }
332   }
333 
334   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
335     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
336     map.put(first, value+1);
337 
338     value = map.containsKey(last)?(Integer)map.get(last):0;
339     map.put(last, value-1);
340   }
341 
342   @Test 
343   public void testInferBoundaries() {
344     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
345 
346     /* Toy example
347      *     c---------i            o------p          s---------t     v------x
348      * a------e    g-----k   m-------------q   r----s            u----w
349      *
350      * Should be inferred as:
351      * a-----------------k   m-------------q   r--------------t  u---------x
352      * 
353      * The output should be (m,r,u) 
354      */
355 
356     String first;
357     String last;
358 
359     first = "a"; last = "e";
360     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
361     
362     first = "r"; last = "s";
363     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
364 
365     first = "o"; last = "p";
366     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
367 
368     first = "g"; last = "k";
369     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
370 
371     first = "v"; last = "x";
372     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
373 
374     first = "c"; last = "i";
375     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
376 
377     first = "m"; last = "q";
378     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
379 
380     first = "s"; last = "t";
381     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
382     
383     first = "u"; last = "w";
384     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
385 
386     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
387     byte[][] compare = new byte[3][];
388     compare[0] = "m".getBytes();
389     compare[1] = "r".getBytes(); 
390     compare[2] = "u".getBytes();
391 
392     assertEquals(keysArray.length, 3);
393 
394     for (int row = 0; row<keysArray.length; row++){
395       assertArrayEquals(keysArray[row], compare[row]);
396     }
397   }
398 
399 
400   @org.junit.Rule
401   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
402     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
403 }
404