View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.List;
22  import java.util.concurrent.CountDownLatch;
23  
24  import junit.framework.Assert;
25  
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.HBaseTestingUtility;
30  import org.apache.hadoop.hbase.HColumnDescriptor;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.testclassification.MediumTests;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.TableNotFoundException;
36  import org.apache.hadoop.hbase.client.Admin;
37  import org.apache.hadoop.hbase.client.HBaseAdmin;
38  import org.apache.hadoop.hbase.client.HTable;
39  import org.apache.hadoop.hbase.client.Put;
40  import org.apache.hadoop.hbase.client.Result;
41  import org.apache.hadoop.hbase.client.ResultScanner;
42  import org.apache.hadoop.hbase.client.Scan;
43  import org.apache.hadoop.hbase.io.hfile.HFile;
44  import org.apache.hadoop.hbase.io.hfile.HFileContext;
45  import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.junit.AfterClass;
48  import org.junit.BeforeClass;
49  import org.junit.Test;
50  import org.junit.experimental.categories.Category;
51  
52  @Category(MediumTests.class)
53  public class TestScannerWithBulkload {
54    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
55  
56    @BeforeClass
57    public static void setUpBeforeClass() throws Exception {
58      TEST_UTIL.startMiniCluster(1);
59    }
60  
61    private static void createTable(Admin admin, TableName tableName) throws IOException {
62      HTableDescriptor desc = new HTableDescriptor(tableName);
63      HColumnDescriptor hcd = new HColumnDescriptor("col");
64      hcd.setMaxVersions(3);
65      desc.addFamily(hcd);
66      admin.createTable(desc);
67    }
68  
69    @Test
70    public void testBulkLoad() throws Exception {
71      TableName tableName = TableName.valueOf("testBulkLoad");
72      long l = System.currentTimeMillis();
73      HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
74      createTable(admin, tableName);
75      Scan scan = createScan();
76      final HTable table = init(admin, l, scan, tableName);
77      // use bulkload
78      final Path hfilePath = writeToHFile(l, "/temp/testBulkLoad/", "/temp/testBulkLoad/col/file",
79        false);
80      Configuration conf = TEST_UTIL.getConfiguration();
81      conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
82      final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
83      bulkload.doBulkLoad(hfilePath, table);
84      ResultScanner scanner = table.getScanner(scan);
85      Result result = scanner.next();
86      result = scanAfterBulkLoad(scanner, result, "version2");
87      Put put0 = new Put(Bytes.toBytes("row1"));
88      put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
89          .toBytes("version3")));
90      table.put(put0);
91      admin.flush(tableName);
92      scanner = table.getScanner(scan);
93      result = scanner.next();
94      while (result != null) {
95        List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
96        for (KeyValue _kv : kvs) {
97          if (Bytes.toString(_kv.getRow()).equals("row1")) {
98            System.out.println(Bytes.toString(_kv.getRow()));
99            System.out.println(Bytes.toString(_kv.getQualifier()));
100           System.out.println(Bytes.toString(_kv.getValue()));
101           Assert.assertEquals("version3", Bytes.toString(_kv.getValue()));
102         }
103       }
104       result = scanner.next();
105     }
106     scanner.close();
107     table.close();
108   }
109 
110   private Result scanAfterBulkLoad(ResultScanner scanner, Result result, String expctedVal)
111       throws IOException {
112     while (result != null) {
113       List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
114       for (KeyValue _kv : kvs) {
115         if (Bytes.toString(_kv.getRow()).equals("row1")) {
116           System.out.println(Bytes.toString(_kv.getRow()));
117           System.out.println(Bytes.toString(_kv.getQualifier()));
118           System.out.println(Bytes.toString(_kv.getValue()));
119           Assert.assertEquals(expctedVal, Bytes.toString(_kv.getValue()));
120         }
121       }
122       result = scanner.next();
123     }
124     return result;
125   }
126 
127   // If nativeHFile is true, we will set cell seq id and MAX_SEQ_ID_KEY in the file.
128   // Else, we will set BULKLOAD_TIME_KEY.
129   private Path writeToHFile(long l, String hFilePath, String pathStr, boolean nativeHFile)
130       throws IOException {
131     FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
132     final Path hfilePath = new Path(hFilePath);
133     fs.mkdirs(hfilePath);
134     Path path = new Path(pathStr);
135     HFile.WriterFactory wf = HFile.getWriterFactoryNoCache(TEST_UTIL.getConfiguration());
136     Assert.assertNotNull(wf);
137     HFileContext context = new HFileContext();
138     HFile.Writer writer = wf.withPath(fs, path).withFileContext(context).create();
139     KeyValue kv = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l,
140         Bytes.toBytes("version2"));
141 
142     // Set cell seq id to test bulk load native hfiles.
143     if (nativeHFile) {
144       // Set a big seq id. Scan should not look at this seq id in a bulk loaded file.
145       // Scan should only look at the seq id appended at the bulk load time, and not skip
146       // this kv.
147       kv.setSequenceId(9999999);
148     }
149 
150     writer.append(kv);
151 
152     if (nativeHFile) {
153       // Set a big MAX_SEQ_ID_KEY. Scan should not look at this seq id in a bulk loaded file.
154       // Scan should only look at the seq id appended at the bulk load time, and not skip its
155       // kv.
156       writer.appendFileInfo(StoreFile.MAX_SEQ_ID_KEY, Bytes.toBytes(new Long(9999999)));
157     }
158     else {
159     writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
160     }
161     writer.close();
162     return hfilePath;
163   }
164 
165   private HTable init(HBaseAdmin admin, long l, Scan scan, TableName tableName) throws Exception {
166     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
167     Put put0 = new Put(Bytes.toBytes("row1"));
168     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
169         .toBytes("version0")));
170     table.put(put0);
171     admin.flush(tableName);
172     Put put1 = new Put(Bytes.toBytes("row2"));
173     put1.add(new KeyValue(Bytes.toBytes("row2"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
174         .toBytes("version0")));
175     table.put(put1);
176     admin.flush(tableName);
177     put0 = new Put(Bytes.toBytes("row1"));
178     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
179         .toBytes("version1")));
180     table.put(put0);
181     admin.flush(tableName);
182     admin.compact(tableName);
183 
184     ResultScanner scanner = table.getScanner(scan);
185     Result result = scanner.next();
186     List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
187     Assert.assertEquals(1, kvs.size());
188     Assert.assertEquals("version1", Bytes.toString(kvs.get(0).getValue()));
189     scanner.close();
190     return table;
191   }
192 
193   @Test
194   public void testBulkLoadWithParallelScan() throws Exception {
195     TableName tableName = TableName.valueOf("testBulkLoadWithParallelScan");
196       final long l = System.currentTimeMillis();
197     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
198     createTable(admin, tableName);
199     Scan scan = createScan();
200     final HTable table = init(admin, l, scan, tableName);
201     // use bulkload
202     final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadWithParallelScan/",
203         "/temp/testBulkLoadWithParallelScan/col/file", false);
204     Configuration conf = TEST_UTIL.getConfiguration();
205     conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
206     final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
207     ResultScanner scanner = table.getScanner(scan);
208     // Create a scanner and then do bulk load
209     final CountDownLatch latch = new CountDownLatch(1);
210     new Thread() {
211       public void run() {
212         try {
213           Put put1 = new Put(Bytes.toBytes("row5"));
214           put1.add(new KeyValue(Bytes.toBytes("row5"), Bytes.toBytes("col"), Bytes.toBytes("q"), l,
215               Bytes.toBytes("version0")));
216           table.put(put1);
217           bulkload.doBulkLoad(hfilePath, (HTable) table);
218           latch.countDown();
219         } catch (TableNotFoundException e) {
220         } catch (IOException e) {
221         }
222       }
223     }.start();
224     latch.await();
225     // By the time we do next() the bulk loaded files are also added to the kv
226     // scanner
227     Result result = scanner.next();
228     scanAfterBulkLoad(scanner, result, "version1");
229     scanner.close();
230     table.close();
231 
232   }
233 
234   @Test
235   public void testBulkLoadNativeHFile() throws Exception {
236     TableName tableName = TableName.valueOf("testBulkLoadNativeHFile");
237     long l = System.currentTimeMillis();
238     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
239     createTable(admin, tableName);
240     Scan scan = createScan();
241     final HTable table = init(admin, l, scan, tableName);
242     // use bulkload
243     final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadNativeHFile/",
244       "/temp/testBulkLoadNativeHFile/col/file", true);
245     Configuration conf = TEST_UTIL.getConfiguration();
246     conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
247     final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
248     bulkload.doBulkLoad(hfilePath, table);
249     ResultScanner scanner = table.getScanner(scan);
250     Result result = scanner.next();
251     // We had 'version0', 'version1' for 'row1,col:q' in the table.
252     // Bulk load added 'version2'  scanner should be able to see 'version2'
253     result = scanAfterBulkLoad(scanner, result, "version2");
254     Put put0 = new Put(Bytes.toBytes("row1"));
255     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
256         .toBytes("version3")));
257     table.put(put0);
258     admin.flush(tableName);
259     scanner = table.getScanner(scan);
260     result = scanner.next();
261     while (result != null) {
262       List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
263       for (KeyValue _kv : kvs) {
264         if (Bytes.toString(_kv.getRow()).equals("row1")) {
265           System.out.println(Bytes.toString(_kv.getRow()));
266           System.out.println(Bytes.toString(_kv.getQualifier()));
267           System.out.println(Bytes.toString(_kv.getValue()));
268           Assert.assertEquals("version3", Bytes.toString(_kv.getValue()));
269         }
270       }
271       result = scanner.next();
272     }
273     scanner.close();
274     table.close();
275   }
276 
277   private Scan createScan() {
278     Scan scan = new Scan();
279     scan.setMaxVersions(3);
280     return scan;
281   }
282 
283   @AfterClass
284   public static void tearDownAfterClass() throws Exception {
285     TEST_UTIL.shutdownMiniCluster();
286   }
287 }