1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import static org.junit.Assert.assertArrayEquals;
23  import static org.junit.Assert.assertEquals;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.List;
28  import java.util.TreeMap;
29  
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.LargeTests;
37  import org.apache.hadoop.hbase.client.HBaseAdmin;
38  import org.apache.hadoop.hbase.client.HTable;
39  import org.apache.hadoop.hbase.client.Put;
40  import org.apache.hadoop.hbase.client.UserProvider;
41  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
42  import org.apache.hadoop.hbase.io.hfile.HFile;
43  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
44  import org.apache.hadoop.hbase.regionserver.StoreFile;
45  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.HFileTestUtil;
48  import org.junit.AfterClass;
49  import org.junit.BeforeClass;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  
53  /**
54   * Test cases for the "load" half of the HFileOutputFormat bulk load
55   * functionality. These tests run faster than the full MR cluster
56   * tests in TestHFileOutputFormat
57   */
58  @Category(LargeTests.class)
59  public class TestLoadIncrementalHFiles {
60    protected static boolean useSecureHBaseOverride = false;
61    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
62    private static final byte[] FAMILY = Bytes.toBytes("myfam");
63  
64    private static final byte[][] SPLIT_KEYS = new byte[][] {
65      Bytes.toBytes("ddd"),
66      Bytes.toBytes("ppp")
67    };
68  
69    static HBaseTestingUtility util = new HBaseTestingUtility();
70  
71    @BeforeClass
72    public static void setUpBeforeClass() throws Exception {
73      util.startMiniCluster();
74    }
75  
76    @AfterClass
77    public static void tearDownAfterClass() throws Exception {
78      util.shutdownMiniCluster();
79    }
80  
81    /**
82     * Test case that creates some regions and loads
83     * HFiles that fit snugly inside those regions
84     */
85    @Test
86    public void testSimpleLoad() throws Exception {
87      runTest("testSimpleLoad", BloomType.NONE,
88          new byte[][][] {
89            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
90            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
91      });
92    }
93  
94    /**
95     * Test case that creates some regions and loads
96     * HFiles that cross the boundaries of those regions
97     */
98    @Test
99    public void testRegionCrossingLoad() throws Exception {
100     runTest("testRegionCrossingLoad", BloomType.NONE,
101         new byte[][][] {
102           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
103           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
104     });
105   }
106 
107   /**
108    * Test loading into a column family that has a ROW bloom filter.
109    */
110   @Test
111   public void testRegionCrossingRowBloom() throws Exception {
112     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
113         new byte[][][] {
114           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
115           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
116     });
117   }
118   
119   /**
120    * Test loading into a column family that has a ROWCOL bloom filter.
121    */
122   @Test
123   public void testRegionCrossingRowColBloom() throws Exception {
124     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
125         new byte[][][] {
126           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
127           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
128     });
129   }
130 
131   private void runTest(String testName, BloomType bloomType, 
132           byte[][][] hfileRanges) throws Exception {
133     Path dir = util.getDataTestDir(testName);
134     FileSystem fs = util.getTestFileSystem();
135     dir = dir.makeQualified(fs);
136     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
137 
138     int hfileIdx = 0;
139     for (byte[][] range : hfileRanges) {
140       byte[] from = range[0];
141       byte[] to = range[1];
142       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
143           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
144     }
145     int expectedRows = hfileIdx * 1000;
146 
147     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
148 
149     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
150     HTableDescriptor htd = new HTableDescriptor(TABLE);
151     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
152     familyDesc.setBloomFilterType(bloomType);
153     htd.addFamily(familyDesc);
154     admin.createTable(htd, SPLIT_KEYS);
155 
156     HTable table = new HTable(util.getConfiguration(), TABLE);
157     util.waitTableAvailable(TABLE, 30000);
158 
159     LoadIncrementalHFiles loader =
160         new LoadIncrementalHFiles(util.getConfiguration(), useSecureHBaseOverride);
161     loader.doBulkLoad(dir, table);
162 
163     assertEquals(expectedRows, util.countRows(table));
164   }
165 
166   private void
167       verifyAssignedSequenceNumber(String testName, byte[][][] hfileRanges, boolean nonZero)
168           throws Exception {
169     Path dir = util.getDataTestDir(testName);
170     FileSystem fs = util.getTestFileSystem();
171     dir = dir.makeQualified(fs);
172     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
173 
174     int hfileIdx = 0;
175     for (byte[][] range : hfileRanges) {
176       byte[] from = range[0];
177       byte[] to = range[1];
178       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir,
179           "hfile_" + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
180     }
181 
182     final byte[] TABLE = Bytes.toBytes("mytable_" + testName);
183 
184     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
185     HTableDescriptor htd = new HTableDescriptor(TABLE);
186     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
187     htd.addFamily(familyDesc);
188     admin.createTable(htd, SPLIT_KEYS);
189 
190     HTable table = new HTable(util.getConfiguration(), TABLE);
191     util.waitTableAvailable(TABLE, 30000);
192     LoadIncrementalHFiles loader =
193         new LoadIncrementalHFiles(util.getConfiguration(), useSecureHBaseOverride);
194 
195     // Do a dummy put to increase the hlog sequence number
196     Put put = new Put(Bytes.toBytes("row"));
197     put.add(FAMILY, QUALIFIER, Bytes.toBytes("value"));
198     table.put(put);
199 
200     loader.doBulkLoad(dir, table);
201 
202     // Get the store files
203     List<StoreFile> files =
204         util.getHBaseCluster().getRegions(TABLE).get(0).getStore(FAMILY).getStorefiles();
205     for (StoreFile file : files) {
206       // the sequenceId gets initialized during createReader
207       file.createReader();
208 
209       if (nonZero) assertTrue(file.getMaxSequenceId() > 0);
210       else assertTrue(file.getMaxSequenceId() == -1);
211     }
212   }
213 
214   /**
215    * Test loading into a column family that does not exist.
216    */
217   @Test
218   public void testNonexistentColumnFamilyLoad() throws Exception {
219     String testName = "testNonexistentColumnFamilyLoad";
220     byte[][][] hfileRanges = new byte[][][] {
221       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
222       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
223     }; 
224 
225     Path dir = util.getDataTestDir(testName);
226     FileSystem fs = util.getTestFileSystem();
227     dir = dir.makeQualified(fs);
228     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
229 
230     int hfileIdx = 0;
231     for (byte[][] range : hfileRanges) {
232       byte[] from = range[0];
233       byte[] to = range[1];
234       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
235           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
236     }
237 
238     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
239 
240     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
241     HTableDescriptor htd = new HTableDescriptor(TABLE);
242     admin.createTable(htd, SPLIT_KEYS);
243 
244     HTable table = new HTable(util.getConfiguration(), TABLE);
245     util.waitTableAvailable(TABLE, 30000);
246     // make sure we go back to the usual user provider
247     UserProvider.setUserProviderForTesting(util.getConfiguration(), UserProvider.class);
248     LoadIncrementalHFiles loader =
249         new LoadIncrementalHFiles(util.getConfiguration(), useSecureHBaseOverride);
250     try {
251       loader.doBulkLoad(dir, table);
252       assertTrue("Loading into table with non-existent family should have failed", false);
253     } catch (Exception e) {
254       assertTrue("IOException expected", e instanceof IOException);
255     }
256     table.close();
257     admin.close();
258   }
259 
260   @Test
261   public void testSplitStoreFile() throws IOException {
262     Path dir = util.getDataTestDir("testSplitHFile");
263     FileSystem fs = util.getTestFileSystem();
264     Path testIn = new Path(dir, "testhfile");
265     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
266     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
267         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
268 
269     Path bottomOut = new Path(dir, "bottom.out");
270     Path topOut = new Path(dir, "top.out");
271 
272     LoadIncrementalHFiles.splitStoreFile(
273         util.getConfiguration(), testIn,
274         familyDesc, Bytes.toBytes("ggg"),
275         bottomOut,
276         topOut);
277 
278     int rowCount = verifyHFile(bottomOut);
279     rowCount += verifyHFile(topOut);
280     assertEquals(1000, rowCount);
281   }
282 
283   private int verifyHFile(Path p) throws IOException {
284     Configuration conf = util.getConfiguration();
285     HFile.Reader reader = HFile.createReader(
286         p.getFileSystem(conf), p, new CacheConfig(conf));
287     reader.loadFileInfo();
288     HFileScanner scanner = reader.getScanner(false, false);
289     scanner.seekTo();
290     int count = 0;
291     do {
292       count++;
293     } while (scanner.next());
294     assertTrue(count > 0);
295     reader.close();
296     return count;
297   }
298 
299   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
300     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
301     map.put(first, value+1);
302 
303     value = map.containsKey(last)?(Integer)map.get(last):0;
304     map.put(last, value-1);
305   }
306 
307   @Test 
308   public void testInferBoundaries() {
309     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
310 
311     /* Toy example
312      *     c---------i            o------p          s---------t     v------x
313      * a------e    g-----k   m-------------q   r----s            u----w
314      *
315      * Should be inferred as:
316      * a-----------------k   m-------------q   r--------------t  u---------x
317      * 
318      * The output should be (m,r,u) 
319      */
320 
321     String first;
322     String last;
323 
324     first = "a"; last = "e";
325     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
326     
327     first = "r"; last = "s";
328     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
329 
330     first = "o"; last = "p";
331     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
332 
333     first = "g"; last = "k";
334     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
335 
336     first = "v"; last = "x";
337     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
338 
339     first = "c"; last = "i";
340     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
341 
342     first = "m"; last = "q";
343     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
344 
345     first = "s"; last = "t";
346     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
347     
348     first = "u"; last = "w";
349     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
350 
351     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
352     byte[][] compare = new byte[3][];
353     compare[0] = "m".getBytes();
354     compare[1] = "r".getBytes(); 
355     compare[2] = "u".getBytes();
356 
357     assertEquals(keysArray.length, 3);
358 
359     for (int row = 0; row<keysArray.length; row++){
360       assertArrayEquals(keysArray[row], compare[row]);
361     }
362   }
363 
364 
365   @org.junit.Rule
366   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
367     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
368 }
369