1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    * <p/>
10   * http://www.apache.org/licenses/LICENSE-2.0
11   * <p/>
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.test;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.IOException;
24  import java.util.Random;
25  import java.util.UUID;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.conf.Configured;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseConfiguration;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.IntegrationTestingUtility;
38  import org.apache.hadoop.hbase.IntegrationTests;
39  import org.apache.hadoop.hbase.KeyValue;
40  import org.apache.hadoop.hbase.client.HBaseAdmin;
41  import org.apache.hadoop.hbase.client.HTable;
42  import org.apache.hadoop.hbase.client.Put;
43  import org.apache.hadoop.hbase.client.Result;
44  import org.apache.hadoop.hbase.client.Scan;
45  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
46  import org.apache.hadoop.hbase.mapreduce.NMapInputFormat;
47  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
48  import org.apache.hadoop.hbase.mapreduce.TableMapper;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.io.BytesWritable;
51  import org.apache.hadoop.io.NullWritable;
52  import org.apache.hadoop.io.Text;
53  import org.apache.hadoop.mapreduce.Counter;
54  import org.apache.hadoop.mapreduce.Job;
55  import org.apache.hadoop.mapreduce.Mapper;
56  import org.apache.hadoop.mapreduce.Reducer;
57  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
58  import org.apache.hadoop.util.Tool;
59  import org.apache.hadoop.util.ToolRunner;
60  import org.junit.After;
61  import org.junit.Before;
62  import org.junit.Test;
63  import org.junit.experimental.categories.Category;
64  
65  import com.google.common.collect.Lists;
66  
67  /**
68   * A large test which loads a lot of data that has internal references, and
69   * verifies the data.
70   *
71   * In load step, 200 map tasks are launched, which in turn write loadmapper.num_to_write
72   * (default 100K) rows to an hbase table. Rows are written in blocks, for a total of
73   * 100 blocks. Each row in a block, contains loadmapper.backrefs (default 50) references
74   * to random rows in the prev block.
75   *
76   * Verify step is scans the table, and verifies that for every referenced row, the row is
77   * actually there (no data loss). Failed rows are output from reduce to be saved in the
78   * job output dir in hdfs and inspected later.
79   *
80   * This class can be run as a unit test, as an integration test, or from the command line
81   *
82   * Originally taken from Apache Bigtop.
83   */
84  @Category(IntegrationTests.class)
85  public class IntegrationTestLoadAndVerify  extends Configured implements Tool {
86    private static final String TEST_NAME = "IntegrationTestLoadAndVerify";
87    private static final byte[] TEST_FAMILY = Bytes.toBytes("f1");
88    private static final byte[] TEST_QUALIFIER = Bytes.toBytes("q1");
89  
90    private static final String NUM_TO_WRITE_KEY =
91      "loadmapper.num_to_write";
92    private static final long NUM_TO_WRITE_DEFAULT = 100*1000;
93  
94    private static final String TABLE_NAME_KEY = "loadmapper.table";
95    private static final String TABLE_NAME_DEFAULT = "table";
96  
97    private static final String NUM_BACKREFS_KEY = "loadmapper.backrefs";
98    private static final int NUM_BACKREFS_DEFAULT = 50;
99  
100   private static final String NUM_MAP_TASKS_KEY = "loadmapper.map.tasks";
101   private static final String NUM_REDUCE_TASKS_KEY = "verify.reduce.tasks";
102   private static final int NUM_MAP_TASKS_DEFAULT = 200;
103   private static final int NUM_REDUCE_TASKS_DEFAULT = 35;
104 
105   private static final int SCANNER_CACHING = 500;
106 
107   private IntegrationTestingUtility util;
108 
109   private enum Counters {
110     ROWS_WRITTEN,
111     REFERENCES_WRITTEN,
112     REFERENCES_CHECKED;
113   }
114 
115   @Before
116   public void setUp() throws Exception {
117     util = getTestingUtil();
118     util.initializeCluster(3);
119     this.setConf(util.getConfiguration());
120     getConf().setLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT / 100);
121     getConf().setInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT / 100);
122     getConf().setInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT / 10);
123   }
124 
125   @After
126   public void tearDown() throws Exception {
127     util.restoreCluster();
128   }
129 
130   /**
131    * Converts a "long" value between endian systems.
132    * Borrowed from Apache Commons IO
133    * @param value value to convert
134    * @return the converted value
135    */
136   public static long swapLong(long value)
137   {
138     return
139       ( ( ( value >> 0 ) & 0xff ) << 56 ) +
140       ( ( ( value >> 8 ) & 0xff ) << 48 ) +
141       ( ( ( value >> 16 ) & 0xff ) << 40 ) +
142       ( ( ( value >> 24 ) & 0xff ) << 32 ) +
143       ( ( ( value >> 32 ) & 0xff ) << 24 ) +
144       ( ( ( value >> 40 ) & 0xff ) << 16 ) +
145       ( ( ( value >> 48 ) & 0xff ) << 8 ) +
146       ( ( ( value >> 56 ) & 0xff ) << 0 );
147   }
148 
149   public static class LoadMapper
150       extends Mapper<NullWritable, NullWritable, NullWritable, NullWritable>
151   {
152     private long recordsToWrite;
153     private HTable table;
154     private Configuration conf;
155     private int numBackReferencesPerRow;
156     private String shortTaskId;
157 
158     private Random rand = new Random();
159 
160     private Counter rowsWritten, refsWritten;
161 
162     @Override
163     public void setup(Context context) throws IOException {
164       conf = context.getConfiguration();
165       recordsToWrite = conf.getLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT);
166       String tableName = conf.get(TABLE_NAME_KEY, TABLE_NAME_DEFAULT);
167       numBackReferencesPerRow = conf.getInt(NUM_BACKREFS_KEY, NUM_BACKREFS_DEFAULT);
168       table = new HTable(conf, tableName);
169       table.setWriteBufferSize(4*1024*1024);
170       table.setAutoFlush(false);
171 
172       String taskId = conf.get("mapred.task.id");
173       Matcher matcher = Pattern.compile(".+_m_(\\d+_\\d+)").matcher(taskId);
174       if (!matcher.matches()) {
175         throw new RuntimeException("Strange task ID: " + taskId);
176       }
177       shortTaskId = matcher.group(1);
178 
179       rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
180       refsWritten = context.getCounter(Counters.REFERENCES_WRITTEN);
181     }
182 
183     @Override
184     public void cleanup(Context context) throws IOException {
185       table.flushCommits();
186       table.close();
187     }
188 
189     @Override
190     protected void map(NullWritable key, NullWritable value,
191         Context context) throws IOException, InterruptedException {
192 
193       String suffix = "/" + shortTaskId;
194       byte[] row = Bytes.add(new byte[8], Bytes.toBytes(suffix));
195 
196       int BLOCK_SIZE = (int)(recordsToWrite / 100);
197 
198       for (long i = 0; i < recordsToWrite;) {
199         long blockStart = i;
200         for (long idxInBlock = 0;
201              idxInBlock < BLOCK_SIZE && i < recordsToWrite;
202              idxInBlock++, i++) {
203 
204           long byteSwapped = swapLong(i);
205           Bytes.putLong(row, 0, byteSwapped);
206 
207           Put p = new Put(row);
208           p.add(TEST_FAMILY, TEST_QUALIFIER, HConstants.EMPTY_BYTE_ARRAY);
209           if (blockStart > 0) {
210             for (int j = 0; j < numBackReferencesPerRow; j++) {
211               long referredRow = blockStart - BLOCK_SIZE + rand.nextInt(BLOCK_SIZE);
212               Bytes.putLong(row, 0, swapLong(referredRow));
213               p.add(TEST_FAMILY, row, HConstants.EMPTY_BYTE_ARRAY);
214             }
215             refsWritten.increment(1);
216           }
217           rowsWritten.increment(1);
218           table.put(p);
219 
220           if (i % 100 == 0) {
221             context.setStatus("Written " + i + "/" + recordsToWrite + " records");
222             context.progress();
223           }
224         }
225         // End of block, flush all of them before we start writing anything
226         // pointing to these!
227         table.flushCommits();
228       }
229     }
230   }
231 
232   public static class VerifyMapper extends TableMapper<BytesWritable, BytesWritable> {
233     static final BytesWritable EMPTY = new BytesWritable(HConstants.EMPTY_BYTE_ARRAY);
234 
235     @Override
236     protected void map(ImmutableBytesWritable key, Result value, Context context)
237         throws IOException, InterruptedException {
238       BytesWritable bwKey = new BytesWritable(key.get());
239       BytesWritable bwVal = new BytesWritable();
240       for (KeyValue kv : value.list()) {
241         if (Bytes.compareTo(TEST_QUALIFIER, 0, TEST_QUALIFIER.length,
242                             kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength()) == 0) {
243           context.write(bwKey, EMPTY);
244         } else {
245           bwVal.set(kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
246           context.write(bwVal, bwKey);
247         }
248       }
249     }
250   }
251 
252   public static class VerifyReducer extends Reducer<BytesWritable, BytesWritable, Text, Text> {
253     private Counter refsChecked;
254     private Counter rowsWritten;
255 
256     @Override
257     public void setup(Context context) throws IOException {
258       refsChecked = context.getCounter(Counters.REFERENCES_CHECKED);
259       rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
260     }
261 
262     @Override
263     protected void reduce(BytesWritable referredRow, Iterable<BytesWritable> referrers,
264         VerifyReducer.Context ctx) throws IOException, InterruptedException {
265       boolean gotOriginalRow = false;
266       int refCount = 0;
267 
268       for (BytesWritable ref : referrers) {
269         if (ref.getLength() == 0) {
270           assert !gotOriginalRow;
271           gotOriginalRow = true;
272         } else {
273           refCount++;
274         }
275       }
276       refsChecked.increment(refCount);
277 
278       if (!gotOriginalRow) {
279         String parsedRow = makeRowReadable(referredRow.getBytes(), referredRow.getLength());
280         String binRow = Bytes.toStringBinary(referredRow.getBytes(), 0, referredRow.getLength());
281         ctx.write(new Text(binRow), new Text(parsedRow));
282         rowsWritten.increment(1);
283       }
284     }
285 
286     private String makeRowReadable(byte[] bytes, int length) {
287       long rowIdx = swapLong(Bytes.toLong(bytes, 0));
288       String suffix = Bytes.toString(bytes, 8, length - 8);
289 
290       return "Row #" + rowIdx + " suffix " + suffix;
291     }
292   }
293 
294   private void doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
295     Path outputDir = getTestDir(TEST_NAME, "load-output");
296 
297     NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
298     conf.set(TABLE_NAME_KEY, htd.getNameAsString());
299 
300     Job job = new Job(conf);
301     job.setJobName(TEST_NAME + " Load for " + htd.getNameAsString());
302     job.setJarByClass(this.getClass());
303     job.setMapperClass(LoadMapper.class);
304     job.setInputFormatClass(NMapInputFormat.class);
305     job.setNumReduceTasks(0);
306     FileOutputFormat.setOutputPath(job, outputDir);
307 
308     TableMapReduceUtil.addDependencyJars(job);
309     TableMapReduceUtil.addDependencyJars(
310         job.getConfiguration(), HTable.class, Lists.class);
311     TableMapReduceUtil.initCredentials(job);
312     assertTrue(job.waitForCompletion(true));
313   }
314 
315   private void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
316     Path outputDir = getTestDir(TEST_NAME, "verify-output");
317 
318     Job job = new Job(conf);
319     job.setJarByClass(this.getClass());
320     job.setJobName(TEST_NAME + " Verification for " + htd.getNameAsString());
321 
322     Scan scan = new Scan();
323 
324     TableMapReduceUtil.initTableMapperJob(
325         htd.getNameAsString(), scan, VerifyMapper.class,
326         BytesWritable.class, BytesWritable.class, job);
327     int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
328     TableMapReduceUtil.setScannerCaching(job, scannerCaching);
329 
330     job.setReducerClass(VerifyReducer.class);
331     job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
332     FileOutputFormat.setOutputPath(job, outputDir);
333     assertTrue(job.waitForCompletion(true));
334 
335     long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
336     assertEquals(0, numOutputRecords);
337   }
338 
339   public Path getTestDir(String testName, String subdir) throws IOException {
340     //HBaseTestingUtility.getDataTestDirOnTestFs() has not been backported.
341     FileSystem fs = FileSystem.get(getConf());
342     Path base = new Path(fs.getWorkingDirectory(), "test-data");
343     String randomStr = UUID.randomUUID().toString();
344     Path testDir = new Path(base, randomStr);
345     fs.deleteOnExit(testDir);
346 
347     return new Path(new Path(testDir, testName), subdir);
348   }
349 
350   @Test
351   public void testLoadAndVerify() throws Exception {
352     HTableDescriptor htd = new HTableDescriptor(TEST_NAME);
353     htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
354 
355     HBaseAdmin admin = getTestingUtil().getHBaseAdmin();
356     int numPreCreate = 40;
357     admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), numPreCreate);
358 
359     doLoad(getConf(), htd);
360     doVerify(getConf(), htd);
361 
362     // Only disable and drop if we succeeded to verify - otherwise it's useful
363     // to leave it around for post-mortem
364     deleteTable(admin, htd);
365   }
366 
367   private void deleteTable(HBaseAdmin admin, HTableDescriptor htd)
368     throws IOException, InterruptedException {
369     // Use disableTestAsync because disable can take a long time to complete
370     System.out.print("Disabling table " + htd.getNameAsString() +" ");
371     admin.disableTableAsync(htd.getName());
372 
373     long start = System.currentTimeMillis();
374     // NOTE tables can be both admin.isTableEnabled=false and
375     // isTableDisabled=false, when disabling must use isTableDisabled!
376     while (!admin.isTableDisabled(htd.getName())) {
377       System.out.print(".");
378       Thread.sleep(1000);
379     }
380     long delta = System.currentTimeMillis() - start;
381     System.out.println(" " + delta +" ms");
382     System.out.println("Deleting table " + htd.getNameAsString() +" ");
383     admin.deleteTable(htd.getName());
384   }
385 
386   public void usage() {
387     System.err.println(this.getClass().getSimpleName() + " [-Doptions] <load|verify|loadAndVerify>");
388     System.err.println("  Loads a table with row dependencies and verifies the dependency chains");
389     System.err.println("Options");
390     System.err.println("  -Dloadmapper.table=<name>        Table to write/verify (default autogen)");
391     System.err.println("  -Dloadmapper.backrefs=<n>        Number of backreferences per row (default 50)");
392     System.err.println("  -Dloadmapper.num_to_write=<n>    Number of rows per mapper (default 100,000 per mapper)");
393     System.err.println("  -Dloadmapper.deleteAfter=<bool>  Delete after a successful verify (default true)");
394     System.err.println("  -Dloadmapper.numPresplits=<n>    Number of presplit regions to start with (default 40)");
395     System.err.println("  -Dloadmapper.map.tasks=<n>       Number of map tasks for load (default 200)");
396     System.err.println("  -Dverify.reduce.tasks=<n>        Number of reduce tasks for verify (default 35)");
397     System.err.println("  -Dverify.scannercaching=<n>      Number hbase scanner caching rows to read (default 50)");
398   }
399 
400   public int run(String argv[]) throws Exception {
401     if (argv.length < 1 || argv.length > 1) {
402       usage();
403       return 1;
404     }
405 
406     IntegrationTestingUtility.setUseDistributedCluster(getConf());
407     boolean doLoad = false;
408     boolean doVerify = false;
409     boolean doDelete = getConf().getBoolean("loadmapper.deleteAfter",true);
410     int numPresplits = getConf().getInt("loadmapper.numPresplits", 40);
411 
412     if (argv[0].equals("load")) {
413       doLoad = true;
414     } else if (argv[0].equals("verify")) {
415       doVerify= true;
416     } else if (argv[0].equals("loadAndVerify")) {
417       doLoad=true;
418       doVerify= true;
419     } else {
420       System.err.println("Invalid argument " + argv[0]);
421       usage();
422       return 1;
423     }
424 
425     // create HTableDescriptor for specified table
426     String table = getConf().get(TABLE_NAME_KEY, TEST_NAME);
427     HTableDescriptor htd = new HTableDescriptor(table);
428     htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
429 
430     HBaseAdmin admin = new HBaseAdmin(getConf());
431     if (doLoad) {
432       admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), numPresplits);
433       doLoad(getConf(), htd);
434     }
435     if (doVerify) {
436       doVerify(getConf(), htd);
437       if (doDelete) {
438         deleteTable(admin, htd);
439       }
440     }
441     return 0;
442   }
443 
444   private IntegrationTestingUtility getTestingUtil() {
445     if (this.util == null) {
446       if (getConf() == null) {
447         this.util = new IntegrationTestingUtility();
448       } else {
449         this.util = new IntegrationTestingUtility(getConf());
450       }
451     }
452     return util;
453   }
454 
455   public static void main(String argv[]) throws Exception {
456     Configuration conf = HBaseConfiguration.create();
457     int ret = ToolRunner.run(conf, new IntegrationTestLoadAndVerify(), argv);
458     System.exit(ret);
459   }
460 }