1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import static org.junit.Assert.assertArrayEquals;
23  import static org.junit.Assert.assertEquals;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.*;
33  import org.apache.hadoop.hbase.client.HBaseAdmin;
34  import org.apache.hadoop.hbase.client.HTable;
35  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
36  import org.apache.hadoop.hbase.io.hfile.Compression;
37  import org.apache.hadoop.hbase.io.hfile.HFile;
38  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
39  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.junit.*;
42  import org.junit.experimental.categories.Category;
43  
44  /**
45   * Test cases for the "load" half of the HFileOutputFormat bulk load
46   * functionality. These tests run faster than the full MR cluster
47   * tests in TestHFileOutputFormat
48   */
49  @Category(LargeTests.class)
50  public class TestLoadIncrementalHFiles {
51    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
52    private static final byte[] FAMILY = Bytes.toBytes("myfam");
53  
54    private static final byte[][] SPLIT_KEYS = new byte[][] {
55      Bytes.toBytes("ddd"),
56      Bytes.toBytes("ppp")
57    };
58  
59    public static int BLOCKSIZE = 64*1024;
60    public static String COMPRESSION =
61      Compression.Algorithm.NONE.getName();
62  
63    static HBaseTestingUtility util = new HBaseTestingUtility();
64    //used by secure subclass
65    static boolean useSecure = false;
66  
67    @BeforeClass
68    public static void setUpBeforeClass() throws Exception {
69      util.startMiniCluster();
70    }
71  
72    @AfterClass
73    public static void tearDownAfterClass() throws Exception {
74      util.shutdownMiniCluster();
75    }
76  
77    /**
78     * Test case that creates some regions and loads
79     * HFiles that fit snugly inside those regions
80     */
81    @Test
82    public void testSimpleLoad() throws Exception {
83      runTest("testSimpleLoad", BloomType.NONE,
84          new byte[][][] {
85            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
86            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
87      });
88    }
89  
90    /**
91     * Test case that creates some regions and loads
92     * HFiles that cross the boundaries of those regions
93     */
94    @Test
95    public void testRegionCrossingLoad() throws Exception {
96      runTest("testRegionCrossingLoad", BloomType.NONE,
97          new byte[][][] {
98            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
99            new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
100     });
101   }
102 
103   /**
104    * Test loading into a column family that has a ROW bloom filter.
105    */
106   @Test
107   public void testRegionCrossingRowBloom() throws Exception {
108     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
109         new byte[][][] {
110           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
111           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
112     });
113   }
114   
115   /**
116    * Test loading into a column family that has a ROWCOL bloom filter.
117    */
118   @Test
119   public void testRegionCrossingRowColBloom() throws Exception {
120     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
121         new byte[][][] {
122           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
123           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
124     });
125   }
126 
127   private void runTest(String testName, BloomType bloomType, 
128           byte[][][] hfileRanges) throws Exception {
129     Path dir = util.getDataTestDir(testName);
130     FileSystem fs = util.getTestFileSystem();
131     dir = dir.makeQualified(fs);
132     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
133 
134     int hfileIdx = 0;
135     for (byte[][] range : hfileRanges) {
136       byte[] from = range[0];
137       byte[] to = range[1];
138       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
139           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
140     }
141     int expectedRows = hfileIdx * 1000;
142 
143     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
144 
145     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
146     HTableDescriptor htd = new HTableDescriptor(TABLE);
147     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
148     familyDesc.setBloomFilterType(bloomType);
149     htd.addFamily(familyDesc);
150     admin.createTable(htd, SPLIT_KEYS);
151 
152     HTable table = new HTable(util.getConfiguration(), TABLE);
153     util.waitTableAvailable(TABLE, 30000);
154     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration(), useSecure);
155     loader.doBulkLoad(dir, table);
156 
157     assertEquals(expectedRows, util.countRows(table));
158   }
159 
160   /**
161    * Test loading into a column family that does not exist.
162    */
163   @Test
164   public void testNonexistentColumnFamilyLoad() throws Exception {
165     String testName = "testNonexistentColumnFamilyLoad";
166     byte[][][] hfileRanges = new byte[][][] {
167       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
168       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
169     }; 
170 
171     Path dir = util.getDataTestDir(testName);
172     FileSystem fs = util.getTestFileSystem();
173     dir = dir.makeQualified(fs);
174     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
175 
176     int hfileIdx = 0;
177     for (byte[][] range : hfileRanges) {
178       byte[] from = range[0];
179       byte[] to = range[1];
180       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
181           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
182     }
183 
184     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
185 
186     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
187     HTableDescriptor htd = new HTableDescriptor(TABLE);
188     admin.createTable(htd, SPLIT_KEYS);
189 
190     HTable table = new HTable(util.getConfiguration(), TABLE);
191     util.waitTableAvailable(TABLE, 30000);
192     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration(), false);
193     try {
194       loader.doBulkLoad(dir, table);
195       assertTrue("Loading into table with non-existent family should have failed", false);
196     } catch (Exception e) {
197       assertTrue("IOException expected", e instanceof IOException);
198     }
199     table.close();
200     admin.close();
201   }
202 
203   @Test
204   public void testSplitStoreFile() throws IOException {
205     Path dir = util.getDataTestDir("testSplitHFile");
206     FileSystem fs = util.getTestFileSystem();
207     Path testIn = new Path(dir, "testhfile");
208     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
209     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
210         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
211 
212     Path bottomOut = new Path(dir, "bottom.out");
213     Path topOut = new Path(dir, "top.out");
214 
215     LoadIncrementalHFiles.splitStoreFile(
216         util.getConfiguration(), testIn,
217         familyDesc, Bytes.toBytes("ggg"),
218         bottomOut,
219         topOut);
220 
221     int rowCount = verifyHFile(bottomOut);
222     rowCount += verifyHFile(topOut);
223     assertEquals(1000, rowCount);
224   }
225 
226   private int verifyHFile(Path p) throws IOException {
227     Configuration conf = util.getConfiguration();
228     HFile.Reader reader = HFile.createReader(
229         p.getFileSystem(conf), p, new CacheConfig(conf));
230     reader.loadFileInfo();
231     HFileScanner scanner = reader.getScanner(false, false);
232     scanner.seekTo();
233     int count = 0;
234     do {
235       count++;
236     } while (scanner.next());
237     assertTrue(count > 0);
238     reader.close();
239     return count;
240   }
241 
242 
243   /**
244    * Create an HFile with the given number of rows between a given
245    * start key and end key.
246    * TODO put me in an HFileTestUtil or something?
247    */
248   static void createHFile(
249       Configuration conf,
250       FileSystem fs, Path path,
251       byte[] family, byte[] qualifier,
252       byte[] startKey, byte[] endKey, int numRows) throws IOException
253   {
254     HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
255         .withPath(fs, path)
256         .withBlockSize(BLOCKSIZE)
257         .withCompression(COMPRESSION)
258         .withComparator(KeyValue.KEY_COMPARATOR)
259         .create();
260     long now = System.currentTimeMillis();
261     try {
262       // subtract 2 since iterateOnSplits doesn't include boundary keys
263       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
264         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
265         writer.append(kv);
266       }
267     } finally {
268       writer.close();
269     }
270   }
271 
272   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
273     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
274     map.put(first, value+1);
275 
276     value = map.containsKey(last)?(Integer)map.get(last):0;
277     map.put(last, value-1);
278   }
279 
280   @Test 
281   public void testInferBoundaries() {
282     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
283 
284     /* Toy example
285      *     c---------i            o------p          s---------t     v------x
286      * a------e    g-----k   m-------------q   r----s            u----w
287      *
288      * Should be inferred as:
289      * a-----------------k   m-------------q   r--------------t  u---------x
290      * 
291      * The output should be (m,r,u) 
292      */
293 
294     String first;
295     String last;
296 
297     first = "a"; last = "e";
298     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
299     
300     first = "r"; last = "s";
301     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
302 
303     first = "o"; last = "p";
304     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
305 
306     first = "g"; last = "k";
307     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
308 
309     first = "v"; last = "x";
310     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
311 
312     first = "c"; last = "i";
313     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
314 
315     first = "m"; last = "q";
316     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
317 
318     first = "s"; last = "t";
319     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
320     
321     first = "u"; last = "w";
322     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
323 
324     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
325     byte[][] compare = new byte[3][];
326     compare[0] = "m".getBytes();
327     compare[1] = "r".getBytes(); 
328     compare[2] = "u".getBytes();
329 
330     assertEquals(keysArray.length, 3);
331 
332     for (int row = 0; row<keysArray.length; row++){
333       assertArrayEquals(keysArray[row], compare[row]);
334     }
335   }
336 
337 
338   @org.junit.Rule
339   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
340     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
341 }
342