View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    * <p/>
10   * http://www.apache.org/licenses/LICENSE-2.0
11   * <p/>
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.test;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import com.google.common.collect.Sets;
24  
25  import org.apache.commons.cli.CommandLine;
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.Cell;
32  import org.apache.hadoop.hbase.HBaseConfiguration;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.IntegrationTestBase;
37  import org.apache.hadoop.hbase.IntegrationTestingUtility;
38  import org.apache.hadoop.hbase.client.Connection;
39  import org.apache.hadoop.hbase.client.ConnectionFactory;
40  import org.apache.hadoop.hbase.testclassification.IntegrationTests;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.client.Admin;
43  import org.apache.hadoop.hbase.client.BufferedMutator;
44  import org.apache.hadoop.hbase.client.BufferedMutatorParams;
45  import org.apache.hadoop.hbase.client.Put;
46  import org.apache.hadoop.hbase.client.Result;
47  import org.apache.hadoop.hbase.client.Scan;
48  import org.apache.hadoop.hbase.client.ScannerCallable;
49  import org.apache.hadoop.hbase.client.Table;
50  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
51  import org.apache.hadoop.hbase.mapreduce.NMapInputFormat;
52  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
53  import org.apache.hadoop.hbase.mapreduce.TableMapper;
54  import org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl;
55  import org.apache.hadoop.hbase.util.AbstractHBaseTool;
56  import org.apache.hadoop.hbase.util.Bytes;
57  import org.apache.hadoop.io.BytesWritable;
58  import org.apache.hadoop.io.NullWritable;
59  import org.apache.hadoop.io.Text;
60  import org.apache.hadoop.mapreduce.Counter;
61  import org.apache.hadoop.mapreduce.Job;
62  import org.apache.hadoop.mapreduce.Mapper;
63  import org.apache.hadoop.mapreduce.Reducer;
64  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
65  import org.apache.hadoop.util.ToolRunner;
66  import org.junit.Test;
67  import org.junit.experimental.categories.Category;
68  
69  import java.io.IOException;
70  import java.util.Random;
71  import java.util.Set;
72  import java.util.UUID;
73  import java.util.regex.Matcher;
74  import java.util.regex.Pattern;
75  
76  /**
77   * A large test which loads a lot of data that has internal references, and
78   * verifies the data.
79   *
80   * In load step, 200 map tasks are launched, which in turn write loadmapper.num_to_write
81   * (default 100K) rows to an hbase table. Rows are written in blocks, for a total of
82   * 100 blocks. Each row in a block, contains loadmapper.backrefs (default 50) references
83   * to random rows in the prev block.
84   *
85   * Verify step is scans the table, and verifies that for every referenced row, the row is
86   * actually there (no data loss). Failed rows are output from reduce to be saved in the
87   * job output dir in hdfs and inspected later.
88   *
89   * This class can be run as a unit test, as an integration test, or from the command line
90   *
91   * Originally taken from Apache Bigtop.
92   */
93  @Category(IntegrationTests.class)
94  public class IntegrationTestLoadAndVerify  extends IntegrationTestBase  {
95    private static final String TEST_NAME = "IntegrationTestLoadAndVerify";
96    private static final byte[] TEST_FAMILY = Bytes.toBytes("f1");
97    private static final byte[] TEST_QUALIFIER = Bytes.toBytes("q1");
98  
99    private static final String NUM_TO_WRITE_KEY =
100     "loadmapper.num_to_write";
101   private static final long NUM_TO_WRITE_DEFAULT = 100*1000;
102 
103   private static final String TABLE_NAME_KEY = "loadmapper.table";
104   private static final String TABLE_NAME_DEFAULT = "table";
105 
106   private static final String NUM_BACKREFS_KEY = "loadmapper.backrefs";
107   private static final int NUM_BACKREFS_DEFAULT = 50;
108 
109   private static final String NUM_MAP_TASKS_KEY = "loadmapper.map.tasks";
110   private static final String NUM_REDUCE_TASKS_KEY = "verify.reduce.tasks";
111   private static final int NUM_MAP_TASKS_DEFAULT = 200;
112   private static final int NUM_REDUCE_TASKS_DEFAULT = 35;
113 
114   private static final int SCANNER_CACHING = 500;
115 
116   private String toRun = null;
117 
118   private enum Counters {
119     ROWS_WRITTEN,
120     REFERENCES_WRITTEN,
121     REFERENCES_CHECKED
122   }
123 
124   @Override
125   public void setUpCluster() throws Exception {
126     util = getTestingUtil(getConf());
127     util.initializeCluster(3);
128     this.setConf(util.getConfiguration());
129     if (!util.isDistributedCluster()) {
130       getConf().setLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT / 100);
131       getConf().setInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT / 100);
132       getConf().setInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT / 10);
133       util.startMiniMapReduceCluster();
134     }
135   }
136 
137 @Override
138 public void cleanUpCluster() throws Exception {
139   super.cleanUpCluster();
140   if (!util.isDistributedCluster()) {
141     util.shutdownMiniMapReduceCluster();
142   }
143 }
144 
145   /**
146    * Converts a "long" value between endian systems.
147    * Borrowed from Apache Commons IO
148    * @param value value to convert
149    * @return the converted value
150    */
151   public static long swapLong(long value)
152   {
153     return
154       ( ( ( value >> 0 ) & 0xff ) << 56 ) +
155       ( ( ( value >> 8 ) & 0xff ) << 48 ) +
156       ( ( ( value >> 16 ) & 0xff ) << 40 ) +
157       ( ( ( value >> 24 ) & 0xff ) << 32 ) +
158       ( ( ( value >> 32 ) & 0xff ) << 24 ) +
159       ( ( ( value >> 40 ) & 0xff ) << 16 ) +
160       ( ( ( value >> 48 ) & 0xff ) << 8 ) +
161       ( ( ( value >> 56 ) & 0xff ) << 0 );
162   }
163 
164   public static class LoadMapper
165       extends Mapper<NullWritable, NullWritable, NullWritable, NullWritable>
166   {
167     protected long recordsToWrite;
168     protected Connection connection;
169     protected BufferedMutator mutator;
170     protected Configuration conf;
171     protected int numBackReferencesPerRow;
172     protected String shortTaskId;
173 
174     protected Random rand = new Random();
175 
176     protected Counter rowsWritten, refsWritten;
177 
178     @Override
179     public void setup(Context context) throws IOException {
180       conf = context.getConfiguration();
181       recordsToWrite = conf.getLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT);
182       String tableName = conf.get(TABLE_NAME_KEY, TABLE_NAME_DEFAULT);
183       numBackReferencesPerRow = conf.getInt(NUM_BACKREFS_KEY, NUM_BACKREFS_DEFAULT);
184       this.connection = ConnectionFactory.createConnection(conf);
185       mutator = connection.getBufferedMutator(
186           new BufferedMutatorParams(TableName.valueOf(tableName))
187               .writeBufferSize(4 * 1024 * 1024));
188 
189       String taskId = conf.get("mapreduce.task.attempt.id");
190       Matcher matcher = Pattern.compile(".+_m_(\\d+_\\d+)").matcher(taskId);
191       if (!matcher.matches()) {
192         throw new RuntimeException("Strange task ID: " + taskId);
193       }
194       shortTaskId = matcher.group(1);
195 
196       rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
197       refsWritten = context.getCounter(Counters.REFERENCES_WRITTEN);
198     }
199 
200     @Override
201     public void cleanup(Context context) throws IOException {
202       mutator.close();
203       connection.close();
204     }
205 
206     @Override
207     protected void map(NullWritable key, NullWritable value,
208         Context context) throws IOException, InterruptedException {
209 
210       String suffix = "/" + shortTaskId;
211       byte[] row = Bytes.add(new byte[8], Bytes.toBytes(suffix));
212 
213       int BLOCK_SIZE = (int)(recordsToWrite / 100);
214 
215       for (long i = 0; i < recordsToWrite;) {
216         long blockStart = i;
217         for (long idxInBlock = 0;
218              idxInBlock < BLOCK_SIZE && i < recordsToWrite;
219              idxInBlock++, i++) {
220 
221           long byteSwapped = swapLong(i);
222           Bytes.putLong(row, 0, byteSwapped);
223 
224           Put p = new Put(row);
225           p.add(TEST_FAMILY, TEST_QUALIFIER, HConstants.EMPTY_BYTE_ARRAY);
226           if (blockStart > 0) {
227             for (int j = 0; j < numBackReferencesPerRow; j++) {
228               long referredRow = blockStart - BLOCK_SIZE + rand.nextInt(BLOCK_SIZE);
229               Bytes.putLong(row, 0, swapLong(referredRow));
230               p.add(TEST_FAMILY, row, HConstants.EMPTY_BYTE_ARRAY);
231             }
232             refsWritten.increment(1);
233           }
234           rowsWritten.increment(1);
235           mutator.mutate(p);
236 
237           if (i % 100 == 0) {
238             context.setStatus("Written " + i + "/" + recordsToWrite + " records");
239             context.progress();
240           }
241         }
242         // End of block, flush all of them before we start writing anything
243         // pointing to these!
244         mutator.flush();
245       }
246     }
247   }
248 
249   public static class VerifyMapper extends TableMapper<BytesWritable, BytesWritable> {
250     static final BytesWritable EMPTY = new BytesWritable(HConstants.EMPTY_BYTE_ARRAY);
251 
252 
253     @Override
254     protected void map(ImmutableBytesWritable key, Result value, Context context)
255         throws IOException, InterruptedException {
256       BytesWritable bwKey = new BytesWritable(key.get());
257       BytesWritable bwVal = new BytesWritable();
258       for (Cell kv : value.listCells()) {
259         if (Bytes.compareTo(TEST_QUALIFIER, 0, TEST_QUALIFIER.length,
260                             kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()) == 0) {
261           context.write(bwKey, EMPTY);
262         } else {
263           bwVal.set(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
264           context.write(bwVal, bwKey);
265         }
266       }
267     }
268   }
269 
270   public static class VerifyReducer extends Reducer<BytesWritable, BytesWritable, Text, Text> {
271     private static final Log LOG = LogFactory.getLog(VerifyReducer.class);
272     private Counter refsChecked;
273     private Counter rowsWritten;
274 
275     @Override
276     public void setup(Context context) throws IOException {
277       refsChecked = context.getCounter(Counters.REFERENCES_CHECKED);
278       rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
279     }
280 
281     @Override
282     protected void reduce(BytesWritable referredRow, Iterable<BytesWritable> referrers,
283         VerifyReducer.Context ctx) throws IOException, InterruptedException {
284       boolean gotOriginalRow = false;
285       int refCount = 0;
286 
287       for (BytesWritable ref : referrers) {
288         if (ref.getLength() == 0) {
289           assert !gotOriginalRow;
290           gotOriginalRow = true;
291         } else {
292           refCount++;
293         }
294       }
295       refsChecked.increment(refCount);
296 
297       if (!gotOriginalRow) {
298         String parsedRow = makeRowReadable(referredRow.getBytes(), referredRow.getLength());
299         String binRow = Bytes.toStringBinary(referredRow.getBytes(), 0, referredRow.getLength());
300         LOG.error("Reference error row " + parsedRow);
301         ctx.write(new Text(binRow), new Text(parsedRow));
302         rowsWritten.increment(1);
303       }
304     }
305 
306     private String makeRowReadable(byte[] bytes, int length) {
307       long rowIdx = swapLong(Bytes.toLong(bytes, 0));
308       String suffix = Bytes.toString(bytes, 8, length - 8);
309 
310       return "Row #" + rowIdx + " suffix " + suffix;
311     }
312   }
313 
314   protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
315     Path outputDir = getTestDir(TEST_NAME, "load-output");
316 
317     NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
318     conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());
319 
320     Job job = Job.getInstance(conf);
321     job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
322     job.setJarByClass(this.getClass());
323     setMapperClass(job);
324     job.setInputFormatClass(NMapInputFormat.class);
325     job.setNumReduceTasks(0);
326     setJobScannerConf(job);
327     FileOutputFormat.setOutputPath(job, outputDir);
328 
329     TableMapReduceUtil.addDependencyJars(job);
330 
331     TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
332     TableMapReduceUtil.initCredentials(job);
333     assertTrue(job.waitForCompletion(true));
334     return job;
335   }
336 
337   protected void setMapperClass(Job job) {
338     job.setMapperClass(LoadMapper.class);
339   }
340 
341   protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
342     Path outputDir = getTestDir(TEST_NAME, "verify-output");
343 
344     Job job = Job.getInstance(conf);
345     job.setJarByClass(this.getClass());
346     job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
347     setJobScannerConf(job);
348 
349     Scan scan = new Scan();
350 
351     TableMapReduceUtil.initTableMapperJob(
352         htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
353         BytesWritable.class, BytesWritable.class, job);
354     TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
355     int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
356     TableMapReduceUtil.setScannerCaching(job, scannerCaching);
357 
358     job.setReducerClass(VerifyReducer.class);
359     job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
360     FileOutputFormat.setOutputPath(job, outputDir);
361     assertTrue(job.waitForCompletion(true));
362 
363     long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
364     assertEquals(0, numOutputRecords);
365   }
366 
367   private static void setJobScannerConf(Job job) {
368     // Make sure scanners log something useful to make debugging possible.
369     job.getConfiguration().setBoolean(ScannerCallable.LOG_SCANNER_ACTIVITY, true);
370     long lpr = job.getConfiguration().getLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT) / 100;
371     job.getConfiguration().setInt(TableRecordReaderImpl.LOG_PER_ROW_COUNT, (int)lpr);
372   }
373 
374   public Path getTestDir(String testName, String subdir) throws IOException {
375     //HBaseTestingUtility.getDataTestDirOnTestFs() has not been backported.
376     FileSystem fs = FileSystem.get(getConf());
377     Path base = new Path(fs.getWorkingDirectory(), "test-data");
378     String randomStr = UUID.randomUUID().toString();
379     Path testDir = new Path(base, randomStr);
380     fs.deleteOnExit(testDir);
381 
382     return new Path(new Path(testDir, testName), subdir);
383   }
384 
385   @Test
386   public void testLoadAndVerify() throws Exception {
387     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TEST_NAME));
388     htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
389 
390     Admin admin = getTestingUtil(getConf()).getHBaseAdmin();
391     admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), 40);
392 
393     doLoad(getConf(), htd);
394     doVerify(getConf(), htd);
395 
396     // Only disable and drop if we succeeded to verify - otherwise it's useful
397     // to leave it around for post-mortem
398     getTestingUtil(getConf()).deleteTable(htd.getTableName());
399   }
400 
401   public void usage() {
402     System.err.println(this.getClass().getSimpleName() + " [-Doptions] <load|verify|loadAndVerify>");
403     System.err.println("  Loads a table with row dependencies and verifies the dependency chains");
404     System.err.println("Options");
405     System.err.println("  -Dloadmapper.table=<name>        Table to write/verify (default autogen)");
406     System.err.println("  -Dloadmapper.backrefs=<n>        Number of backreferences per row (default 50)");
407     System.err.println("  -Dloadmapper.num_to_write=<n>    Number of rows per mapper (default 100,000 per mapper)");
408     System.err.println("  -Dloadmapper.deleteAfter=<bool>  Delete after a successful verify (default true)");
409     System.err.println("  -Dloadmapper.numPresplits=<n>    Number of presplit regions to start with (default 40)");
410     System.err.println("  -Dloadmapper.map.tasks=<n>       Number of map tasks for load (default 200)");
411     System.err.println("  -Dverify.reduce.tasks=<n>        Number of reduce tasks for verify (default 35)");
412     System.err.println("  -Dverify.scannercaching=<n>      Number hbase scanner caching rows to read (default 50)");
413   }
414 
415 
416   @Override
417   protected void processOptions(CommandLine cmd) {
418     super.processOptions(cmd);
419 
420     String[] args = cmd.getArgs();
421     if (args == null || args.length < 1 || args.length > 1) {
422       usage();
423       throw new RuntimeException("Incorrect Number of args.");
424     }
425     toRun = args[0];
426   }
427 
428   @Override
429   public int runTestFromCommandLine() throws Exception {
430     IntegrationTestingUtility.setUseDistributedCluster(getConf());
431     boolean doLoad = false;
432     boolean doVerify = false;
433     boolean doDelete = getConf().getBoolean("loadmapper.deleteAfter",true);
434     int numPresplits = getConf().getInt("loadmapper.numPresplits", 40);
435 
436     if (toRun.equals("load")) {
437       doLoad = true;
438     } else if (toRun.equals("verify")) {
439       doVerify= true;
440     } else if (toRun.equals("loadAndVerify")) {
441       doLoad=true;
442       doVerify= true;
443     } else {
444       System.err.println("Invalid argument " + toRun);
445       usage();
446       return 1;
447     }
448 
449     // create HTableDescriptor for specified table
450     TableName table = getTablename();
451     HTableDescriptor htd = new HTableDescriptor(table);
452     htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
453 
454     try (Connection conn = ConnectionFactory.createConnection(getConf());
455         Admin admin = conn.getAdmin()) {
456       if (doLoad) {
457         admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), numPresplits);
458         doLoad(getConf(), htd);
459       }
460     }
461     if (doVerify) {
462       doVerify(getConf(), htd);
463       if (doDelete) {
464         getTestingUtil(getConf()).deleteTable(htd.getTableName());
465       }
466     }
467     return 0;
468   }
469 
470   @Override
471   public TableName getTablename() {
472     return TableName.valueOf(getConf().get(TABLE_NAME_KEY, TEST_NAME));
473   }
474 
475   @Override
476   protected Set<String> getColumnFamilies() {
477     return Sets.newHashSet(Bytes.toString(TEST_FAMILY));
478   }
479 
480   public static void main(String argv[]) throws Exception {
481     Configuration conf = HBaseConfiguration.create();
482     IntegrationTestingUtility.setUseDistributedCluster(conf);
483     int ret = ToolRunner.run(conf, new IntegrationTestLoadAndVerify(), argv);
484     System.exit(ret);
485   }
486 }