View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.test;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Random;
29  import java.util.Set;
30  import java.util.UUID;
31  import java.util.concurrent.atomic.AtomicInteger;
32  
33  import org.apache.commons.cli.CommandLine;
34  import org.apache.commons.cli.GnuParser;
35  import org.apache.commons.cli.HelpFormatter;
36  import org.apache.commons.cli.Options;
37  import org.apache.commons.cli.ParseException;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.conf.Configured;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.HBaseConfiguration;
44  import org.apache.hadoop.hbase.HBaseTestingUtility;
45  import org.apache.hadoop.hbase.HColumnDescriptor;
46  import org.apache.hadoop.hbase.HRegionLocation;
47  import org.apache.hadoop.hbase.HTableDescriptor;
48  import org.apache.hadoop.hbase.IntegrationTestBase;
49  import org.apache.hadoop.hbase.IntegrationTestingUtility;
50  import org.apache.hadoop.hbase.IntegrationTests;
51  import org.apache.hadoop.hbase.MasterNotRunningException;
52  import org.apache.hadoop.hbase.TableName;
53  import org.apache.hadoop.hbase.client.Get;
54  import org.apache.hadoop.hbase.client.HBaseAdmin;
55  import org.apache.hadoop.hbase.client.HConnection;
56  import org.apache.hadoop.hbase.client.HConnectionManager;
57  import org.apache.hadoop.hbase.client.HTable;
58  import org.apache.hadoop.hbase.client.Put;
59  import org.apache.hadoop.hbase.client.Result;
60  import org.apache.hadoop.hbase.client.ResultScanner;
61  import org.apache.hadoop.hbase.client.Scan;
62  import org.apache.hadoop.hbase.client.ScannerCallable;
63  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
64  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
65  import org.apache.hadoop.hbase.mapreduce.TableMapper;
66  import org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl;
67  import org.apache.hadoop.hbase.util.AbstractHBaseTool;
68  import org.apache.hadoop.hbase.util.Bytes;
69  import org.apache.hadoop.hbase.util.RegionSplitter;
70  import org.apache.hadoop.io.BytesWritable;
71  import org.apache.hadoop.io.NullWritable;
72  import org.apache.hadoop.io.Text;
73  import org.apache.hadoop.io.Writable;
74  import org.apache.hadoop.mapreduce.Counter;
75  import org.apache.hadoop.mapreduce.CounterGroup;
76  import org.apache.hadoop.mapreduce.Counters;
77  import org.apache.hadoop.mapreduce.InputFormat;
78  import org.apache.hadoop.mapreduce.InputSplit;
79  import org.apache.hadoop.mapreduce.Job;
80  import org.apache.hadoop.mapreduce.JobContext;
81  import org.apache.hadoop.mapreduce.Mapper;
82  import org.apache.hadoop.mapreduce.RecordReader;
83  import org.apache.hadoop.mapreduce.Reducer;
84  import org.apache.hadoop.mapreduce.TaskAttemptContext;
85  import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
86  import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
87  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
88  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
89  import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
90  import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
91  import org.apache.hadoop.util.Tool;
92  import org.apache.hadoop.util.ToolRunner;
93  import org.junit.Test;
94  import org.junit.experimental.categories.Category;
95  
96  import com.google.common.collect.Sets;
97  
98  /**
99   * This is an integration test borrowed from goraci, written by Keith Turner,
100  * which is in turn inspired by the Accumulo test called continous ingest (ci).
101  * The original source code can be found here:
102  * https://github.com/keith-turner/goraci
103  * https://github.com/enis/goraci/
104  *
105  * Apache Accumulo [0] has a simple test suite that verifies that data is not
106  * lost at scale. This test suite is called continuous ingest. This test runs
107  * many ingest clients that continually create linked lists containing 25
108  * million nodes. At some point the clients are stopped and a map reduce job is
109  * run to ensure no linked list has a hole. A hole indicates data was lost.··
110  *
111  * The nodes in the linked list are random. This causes each linked list to
112  * spread across the table. Therefore if one part of a table loses data, then it
113  * will be detected by references in another part of the table.
114  *
115  * THE ANATOMY OF THE TEST
116  *
117  * Below is rough sketch of how data is written. For specific details look at
118  * the Generator code.
119  *
120  * 1 Write out 1 million nodes· 2 Flush the client· 3 Write out 1 million that
121  * reference previous million· 4 If this is the 25th set of 1 million nodes,
122  * then update 1st set of million to point to last· 5 goto 1
123  *
124  * The key is that nodes only reference flushed nodes. Therefore a node should
125  * never reference a missing node, even if the ingest client is killed at any
126  * point in time.
127  *
128  * When running this test suite w/ Accumulo there is a script running in
129  * parallel called the Aggitator that randomly and continuously kills server
130  * processes.·· The outcome was that many data loss bugs were found in Accumulo
131  * by doing this.· This test suite can also help find bugs that impact uptime
132  * and stability when· run for days or weeks.··
133  *
134  * This test suite consists the following· - a few Java programs· - a little
135  * helper script to run the java programs - a maven script to build it.··
136  *
137  * When generating data, its best to have each map task generate a multiple of
138  * 25 million. The reason for this is that circular linked list are generated
139  * every 25M. Not generating a multiple in 25M will result in some nodes in the
140  * linked list not having references. The loss of an unreferenced node can not
141  * be detected.
142  *
143  *
144  * Below is a description of the Java programs
145  *
146  * Generator - A map only job that generates data. As stated previously,·
147  * its best to generate data in multiples of 25M.
148  *
149  * Verify - A map reduce job that looks for holes. Look at the counts after running. REFERENCED and
150  * UNREFERENCED are· ok, any UNDEFINED counts are bad. Do not run at the· same
151  * time as the Generator.
152  *
153  * Walker - A standalong program that start following a linked list· and emits timing info.··
154  *
155  * Print - A standalone program that prints nodes in the linked list
156  *
157  * Delete - A standalone program that deletes a single node
158  *
159  * This class can be run as a unit test, as an integration test, or from the command line
160  */
161 @Category(IntegrationTests.class)
162 public class IntegrationTestBigLinkedList extends IntegrationTestBase {
163   protected static final byte[] NO_KEY = new byte[1];
164 
165   protected static String TABLE_NAME_KEY = "IntegrationTestBigLinkedList.table";
166 
167   protected static String DEFAULT_TABLE_NAME = "IntegrationTestBigLinkedList";
168 
169   protected static byte[] FAMILY_NAME = Bytes.toBytes("meta");
170 
171   //link to the id of the prev node in the linked list
172   protected static final byte[] COLUMN_PREV = Bytes.toBytes("prev");
173 
174   //identifier of the mapred task that generated this row
175   protected static final byte[] COLUMN_CLIENT = Bytes.toBytes("client");
176 
177   //the id of the row within the same client.
178   protected static final byte[] COLUMN_COUNT = Bytes.toBytes("count");
179 
180   /** How many rows to write per map task. This has to be a multiple of 25M */
181   private static final String GENERATOR_NUM_ROWS_PER_MAP_KEY
182     = "IntegrationTestBigLinkedList.generator.num_rows";
183 
184   private static final String GENERATOR_NUM_MAPPERS_KEY
185     = "IntegrationTestBigLinkedList.generator.map.tasks";
186 
187   private static final String GENERATOR_WIDTH_KEY
188     = "IntegrationTestBigLinkedList.generator.width";
189 
190   private static final String GENERATOR_WRAP_KEY
191     = "IntegrationTestBigLinkedList.generator.wrap";
192 
193   protected int NUM_SLAVES_BASE = 3; // number of slaves for the cluster
194 
195   private static final int MISSING_ROWS_TO_LOG = 50;
196 
197   private static final int WIDTH_DEFAULT = 1000000;
198   private static final int WRAP_DEFAULT = 25;
199   private static final int ROWKEY_LENGTH = 16;
200 
201   protected String toRun;
202   protected String[] otherArgs;
203 
204   static class CINode {
205     byte[] key;
206     byte[] prev;
207     String client;
208     long count;
209   }
210 
211   /**
212    * A Map only job that generates random linked list and stores them.
213    */
214   static class Generator extends Configured implements Tool {
215 
216     private static final Log LOG = LogFactory.getLog(Generator.class);
217 
218     static class GeneratorInputFormat extends InputFormat<BytesWritable,NullWritable> {
219       static class GeneratorInputSplit extends InputSplit implements Writable {
220         @Override
221         public long getLength() throws IOException, InterruptedException {
222           return 1;
223         }
224         @Override
225         public String[] getLocations() throws IOException, InterruptedException {
226           return new String[0];
227         }
228         @Override
229         public void readFields(DataInput arg0) throws IOException {
230         }
231         @Override
232         public void write(DataOutput arg0) throws IOException {
233         }
234       }
235 
236       static class GeneratorRecordReader extends RecordReader<BytesWritable,NullWritable> {
237         private long count;
238         private long numNodes;
239         private Random rand;
240 
241         @Override
242         public void close() throws IOException {
243         }
244 
245         @Override
246         public BytesWritable getCurrentKey() throws IOException, InterruptedException {
247           byte[] bytes = new byte[ROWKEY_LENGTH];
248           rand.nextBytes(bytes);
249           return new BytesWritable(bytes);
250         }
251 
252         @Override
253         public NullWritable getCurrentValue() throws IOException, InterruptedException {
254           return NullWritable.get();
255         }
256 
257         @Override
258         public float getProgress() throws IOException, InterruptedException {
259           return (float)(count / (double)numNodes);
260         }
261 
262         @Override
263         public void initialize(InputSplit arg0, TaskAttemptContext context)
264             throws IOException, InterruptedException {
265           numNodes = context.getConfiguration().getLong(GENERATOR_NUM_ROWS_PER_MAP_KEY, 25000000);
266           rand = new Random();
267         }
268 
269         @Override
270         public boolean nextKeyValue() throws IOException, InterruptedException {
271           return count++ < numNodes;
272         }
273 
274       }
275 
276       @Override
277       public RecordReader<BytesWritable,NullWritable> createRecordReader(
278           InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
279         GeneratorRecordReader rr = new GeneratorRecordReader();
280         rr.initialize(split, context);
281         return rr;
282       }
283 
284       @Override
285       public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
286         int numMappers = job.getConfiguration().getInt(GENERATOR_NUM_MAPPERS_KEY, 1);
287 
288         ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numMappers);
289 
290         for (int i = 0; i < numMappers; i++) {
291           splits.add(new GeneratorInputSplit());
292         }
293 
294         return splits;
295       }
296     }
297 
298     /** Ensure output files from prev-job go to map inputs for current job */
299     static class OneFilePerMapperSFIF<K, V> extends SequenceFileInputFormat<K, V> {
300       @Override
301       protected boolean isSplitable(JobContext context, Path filename) {
302         return false;
303       }
304     }
305 
306     /**
307      * Some ASCII art time:
308      * [ . . . ] represents one batch of random longs of length WIDTH
309      *
310      *                _________________________
311      *               |                  ______ |
312      *               |                 |      ||
313      *             __+_________________+_____ ||
314      *             v v                 v     |||
315      * first   = [ . . . . . . . . . . . ]   |||
316      *             ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^     |||
317      *             | | | | | | | | | | |     |||
318      * prev    = [ . . . . . . . . . . . ]   |||
319      *             ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^     |||
320      *             | | | | | | | | | | |     |||
321      * current = [ . . . . . . . . . . . ]   |||
322      *                                       |||
323      * ...                                   |||
324      *                                       |||
325      * last    = [ . . . . . . . . . . . ]   |||
326      *             | | | | | | | | | | |-----|||
327      *             |                 |--------||
328      *             |___________________________|
329      */
330     static class GeneratorMapper
331       extends Mapper<BytesWritable, NullWritable, NullWritable, NullWritable> {
332 
333       byte[][] first = null;
334       byte[][] prev = null;
335       byte[][] current = null;
336       byte[] id;
337       long count = 0;
338       int i;
339       HTable table;
340       long numNodes;
341       long wrap;
342       int width;
343 
344       @Override
345       protected void setup(Context context) throws IOException, InterruptedException {
346         id = Bytes.toBytes("Job: "+context.getJobID() + " Task: " + context.getTaskAttemptID());
347         Configuration conf = context.getConfiguration();
348         instantiateHTable(conf);
349         this.width = context.getConfiguration().getInt(GENERATOR_WIDTH_KEY, WIDTH_DEFAULT);
350         current = new byte[this.width][];
351         int wrapMultiplier = context.getConfiguration().getInt(GENERATOR_WRAP_KEY, WRAP_DEFAULT);
352         this.wrap = (long)wrapMultiplier * width;
353         this.numNodes = context.getConfiguration().getLong(
354             GENERATOR_NUM_ROWS_PER_MAP_KEY, (long)WIDTH_DEFAULT * WRAP_DEFAULT);
355         if (this.numNodes < this.wrap) {
356           this.wrap = this.numNodes;
357         }
358       }
359 
360       protected void instantiateHTable(Configuration conf) throws IOException {
361         table = new HTable(conf, getTableName(conf));
362         table.setAutoFlush(false, true);
363         table.setWriteBufferSize(4 * 1024 * 1024);
364       }
365 
366       @Override
367       protected void cleanup(Context context) throws IOException ,InterruptedException {
368         table.close();
369       }
370 
371       @Override
372       protected void map(BytesWritable key, NullWritable value, Context output) throws IOException {
373         current[i] = new byte[key.getLength()];
374         System.arraycopy(key.getBytes(), 0, current[i], 0, key.getLength());
375         if (++i == current.length) {
376           persist(output, count, prev, current, id);
377           i = 0;
378 
379           if (first == null)
380             first = current;
381           prev = current;
382           current = new byte[this.width][];
383 
384           count += current.length;
385           output.setStatus("Count " + count);
386 
387           if (count % wrap == 0) {
388             // this block of code turns the 1 million linked list of length 25 into one giant
389             //circular linked list of 25 million
390             circularLeftShift(first);
391 
392             persist(output, -1, prev, first, null);
393 
394             first = null;
395             prev = null;
396           }
397         }
398       }
399 
400       private static <T> void circularLeftShift(T[] first) {
401         T ez = first[0];
402         System.arraycopy(first, 1, first, 0, first.length - 1);
403         first[first.length - 1] = ez;
404       }
405 
406       protected void persist(Context output, long count, byte[][] prev, byte[][] current, byte[] id)
407           throws IOException {
408         for (int i = 0; i < current.length; i++) {
409           Put put = new Put(current[i]);
410           put.add(FAMILY_NAME, COLUMN_PREV, prev == null ? NO_KEY : prev[i]);
411 
412           if (count >= 0) {
413             put.add(FAMILY_NAME, COLUMN_COUNT, Bytes.toBytes(count + i));
414           }
415           if (id != null) {
416             put.add(FAMILY_NAME, COLUMN_CLIENT, id);
417           }
418           table.put(put);
419 
420           if (i % 1000 == 0) {
421             // Tickle progress every so often else maprunner will think us hung
422             output.progress();
423           }
424         }
425 
426         table.flushCommits();
427       }
428     }
429 
430     @Override
431     public int run(String[] args) throws Exception {
432       if (args.length < 3) {
433         System.out.println("Usage : " + Generator.class.getSimpleName() +
434             " <num mappers> <num nodes per map> <tmp output dir> [<width> <wrap multiplier>]");
435         System.out.println("   where <num nodes per map> should be a multiple of " +
436             " width*wrap multiplier, 25M by default");
437         return 0;
438       }
439 
440       int numMappers = Integer.parseInt(args[0]);
441       long numNodes = Long.parseLong(args[1]);
442       Path tmpOutput = new Path(args[2]);
443       Integer width = (args.length < 4) ? null : Integer.parseInt(args[3]);
444       Integer wrapMuplitplier = (args.length < 5) ? null : Integer.parseInt(args[4]);
445       return run(numMappers, numNodes, tmpOutput, width, wrapMuplitplier);
446     }
447 
448     protected void createSchema() throws IOException {
449       Configuration conf = getConf();
450       HBaseAdmin admin = new HBaseAdmin(conf);
451       TableName tableName = getTableName(conf);
452       try {
453         if (!admin.tableExists(tableName)) {
454           HTableDescriptor htd = new HTableDescriptor(getTableName(getConf()));
455           htd.addFamily(new HColumnDescriptor(FAMILY_NAME));
456           int numberOfServers = admin.getClusterStatus().getServers().size();
457           if (numberOfServers == 0) {
458             throw new IllegalStateException("No live regionservers");
459           }
460           int regionsPerServer = conf.getInt(HBaseTestingUtility.REGIONS_PER_SERVER_KEY,
461                                 HBaseTestingUtility.DEFAULT_REGIONS_PER_SERVER);
462           int totalNumberOfRegions = numberOfServers * regionsPerServer;
463           LOG.info("Number of live regionservers: " + numberOfServers + ", " +
464               "pre-splitting table into " + totalNumberOfRegions + " regions " +
465               "(default regions per server: " + regionsPerServer + ")");
466 
467           byte[][] splits = new RegionSplitter.UniformSplit().split(
468               totalNumberOfRegions);
469 
470           admin.createTable(htd, splits);
471         }
472       } catch (MasterNotRunningException e) {
473         LOG.error("Master not running", e);
474         throw new IOException(e);
475       } finally {
476         admin.close();
477       }
478     }
479 
480     public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
481         Integer width, Integer wrapMuplitplier) throws Exception {
482       LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
483           + ", numNodes=" + numNodes);
484       Job job = new Job(getConf());
485 
486       job.setJobName("Random Input Generator");
487       job.setNumReduceTasks(0);
488       job.setJarByClass(getClass());
489 
490       job.setInputFormatClass(GeneratorInputFormat.class);
491       job.setOutputKeyClass(BytesWritable.class);
492       job.setOutputValueClass(NullWritable.class);
493 
494       setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);
495 
496       job.setMapperClass(Mapper.class); //identity mapper
497 
498       FileOutputFormat.setOutputPath(job, tmpOutput);
499       job.setOutputFormatClass(SequenceFileOutputFormat.class);
500 
501       boolean success = jobCompletion(job);
502 
503       return success ? 0 : 1;
504     }
505 
506     public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
507         Integer width, Integer wrapMuplitplier) throws Exception {
508       LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
509       createSchema();
510       Job job = new Job(getConf());
511 
512       job.setJobName("Link Generator");
513       job.setNumReduceTasks(0);
514       job.setJarByClass(getClass());
515 
516       FileInputFormat.setInputPaths(job, tmpOutput);
517       job.setInputFormatClass(OneFilePerMapperSFIF.class);
518       job.setOutputKeyClass(NullWritable.class);
519       job.setOutputValueClass(NullWritable.class);
520 
521       setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);
522 
523       setMapperForGenerator(job);
524 
525       job.setOutputFormatClass(NullOutputFormat.class);
526 
527       job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
528       TableMapReduceUtil.addDependencyJars(job);
529       TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
530       TableMapReduceUtil.initCredentials(job);
531 
532       boolean success = jobCompletion(job);
533 
534       return success ? 0 : 1;
535     }
536 
537     protected boolean jobCompletion(Job job) throws IOException, InterruptedException,
538         ClassNotFoundException {
539       boolean success = job.waitForCompletion(true);
540       return success;
541     }
542 
543     protected void setMapperForGenerator(Job job) {
544       job.setMapperClass(GeneratorMapper.class);
545     }
546 
547     public int run(int numMappers, long numNodes, Path tmpOutput,
548         Integer width, Integer wrapMuplitplier) throws Exception {
549       int ret = runRandomInputGenerator(numMappers, numNodes, tmpOutput, width, wrapMuplitplier);
550       if (ret > 0) {
551         return ret;
552       }
553       return runGenerator(numMappers, numNodes, tmpOutput, width, wrapMuplitplier);
554     }
555   }
556 
557   /**
558    * A Map Reduce job that verifies that the linked lists generated by
559    * {@link Generator} do not have any holes.
560    */
561   static class Verify extends Configured implements Tool {
562 
563     private static final Log LOG = LogFactory.getLog(Verify.class);
564     protected static final BytesWritable DEF = new BytesWritable(NO_KEY);
565 
566     protected Job job;
567 
568     public static class VerifyMapper extends TableMapper<BytesWritable, BytesWritable> {
569       private BytesWritable row = new BytesWritable();
570       private BytesWritable ref = new BytesWritable();
571 
572       @Override
573       protected void map(ImmutableBytesWritable key, Result value, Context context)
574           throws IOException ,InterruptedException {
575         byte[] rowKey = key.get();
576         row.set(rowKey, 0, rowKey.length);
577         context.write(row, DEF);
578         byte[] prev = value.getValue(FAMILY_NAME, COLUMN_PREV);
579         if (prev != null && prev.length > 0) {
580           ref.set(prev, 0, prev.length);
581           context.write(ref, row);
582         } else {
583           LOG.warn(String.format("Prev is not set for: %s", Bytes.toStringBinary(rowKey)));
584         }
585       }
586     }
587 
588     public static enum Counts {
589       UNREFERENCED, UNDEFINED, REFERENCED, CORRUPT, EXTRAREFERENCES
590     }
591 
592     public static class VerifyReducer extends Reducer<BytesWritable,BytesWritable,Text,Text> {
593       private ArrayList<byte[]> refs = new ArrayList<byte[]>();
594 
595       private AtomicInteger rows = new AtomicInteger(0);
596 
597       @Override
598       public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context)
599           throws IOException, InterruptedException {
600 
601         int defCount = 0;
602 
603         refs.clear();
604         for (BytesWritable type : values) {
605           if (type.getLength() == DEF.getLength()) {
606             defCount++;
607           } else {
608             byte[] bytes = new byte[type.getLength()];
609             System.arraycopy(type.getBytes(), 0, bytes, 0, type.getLength());
610             refs.add(bytes);
611           }
612         }
613 
614         // TODO check for more than one def, should not happen
615 
616         StringBuilder refsSb = null;
617         String keyString = null;
618         if (defCount == 0 || refs.size() != 1) {
619           refsSb = new StringBuilder();
620           String comma = "";
621           for (byte[] ref : refs) {
622             refsSb.append(comma);
623             comma = ",";
624             refsSb.append(Bytes.toStringBinary(ref));
625           }
626           keyString = Bytes.toStringBinary(key.getBytes(), 0, key.getLength());
627 
628           LOG.error("Linked List error: Key = " + keyString + " References = " + refsSb.toString());
629         }
630 
631         if (defCount == 0 && refs.size() > 0) {
632           // this is bad, found a node that is referenced but not defined. It must have been
633           // lost, emit some info about this node for debugging purposes.
634           context.write(new Text(keyString), new Text(refsSb.toString()));
635           context.getCounter(Counts.UNDEFINED).increment(1);
636           if (rows.addAndGet(1) < MISSING_ROWS_TO_LOG) {
637             context.getCounter("undef", keyString).increment(1);
638           }
639         } else if (defCount > 0 && refs.size() == 0) {
640           // node is defined but not referenced
641           context.write(new Text(keyString), new Text("none"));
642           context.getCounter(Counts.UNREFERENCED).increment(1);
643           if (rows.addAndGet(1) < MISSING_ROWS_TO_LOG) {
644             context.getCounter("unref", keyString).increment(1);
645           }
646         } else {
647           if (refs.size() > 1) {
648             if (refsSb != null) {
649               context.write(new Text(keyString), new Text(refsSb.toString()));
650             }
651             context.getCounter(Counts.EXTRAREFERENCES).increment(refs.size() - 1);
652           }
653           // node is defined and referenced
654           context.getCounter(Counts.REFERENCED).increment(1);
655         }
656 
657       }
658     }
659 
660     @Override
661     public int run(String[] args) throws Exception {
662 
663       if (args.length != 2) {
664         System.out.println("Usage : " + Verify.class.getSimpleName() + " <output dir> <num reducers>");
665         return 0;
666       }
667 
668       String outputDir = args[0];
669       int numReducers = Integer.parseInt(args[1]);
670 
671        return run(outputDir, numReducers);
672     }
673 
674     public int run(String outputDir, int numReducers) throws Exception {
675       return run(new Path(outputDir), numReducers);
676     }
677 
678     public int run(Path outputDir, int numReducers) throws Exception {
679       LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers);
680 
681       job = new Job(getConf());
682 
683       job.setJobName("Link Verifier");
684       job.setNumReduceTasks(numReducers);
685       job.setJarByClass(getClass());
686 
687       setJobScannerConf(job);
688 
689       Scan scan = new Scan();
690       scan.addColumn(FAMILY_NAME, COLUMN_PREV);
691       scan.setCaching(10000);
692       scan.setCacheBlocks(false);
693 
694       TableMapReduceUtil.initTableMapperJob(getTableName(getConf()).getName(), scan,
695           VerifyMapper.class, BytesWritable.class, BytesWritable.class, job);
696       TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
697 
698       job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
699 
700       job.setReducerClass(VerifyReducer.class);
701       job.setOutputFormatClass(TextOutputFormat.class);
702       TextOutputFormat.setOutputPath(job, outputDir);
703 
704       boolean success = job.waitForCompletion(true);
705 
706       return success ? 0 : 1;
707     }
708 
709     @SuppressWarnings("deprecation")
710     public boolean verify(long expectedReferenced) throws Exception {
711       if (job == null) {
712         throw new IllegalStateException("You should call run() first");
713       }
714 
715       Counters counters = job.getCounters();
716 
717       Counter referenced = counters.findCounter(Counts.REFERENCED);
718       Counter unreferenced = counters.findCounter(Counts.UNREFERENCED);
719       Counter undefined = counters.findCounter(Counts.UNDEFINED);
720       Counter multiref = counters.findCounter(Counts.EXTRAREFERENCES);
721 
722       boolean success = true;
723       //assert
724       if (expectedReferenced != referenced.getValue()) {
725         LOG.error("Expected referenced count does not match with actual referenced count. " +
726             "expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue());
727         success = false;
728       }
729 
730       if (unreferenced.getValue() > 0) {
731         boolean couldBeMultiRef = (multiref.getValue() == unreferenced.getValue());
732         LOG.error("Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue()
733             + (couldBeMultiRef ? "; could be due to duplicate random numbers" : ""));
734         success = false;
735       }
736 
737       if (undefined.getValue() > 0) {
738         LOG.error("Found an undefined node. Undefined count=" + undefined.getValue());
739         success = false;
740       }
741 
742       if (!success) {
743         handleFailure(counters);
744       }
745       return success;
746     }
747 
748     protected void handleFailure(Counters counters) throws IOException {
749       Configuration conf = job.getConfiguration();
750       HConnection conn = HConnectionManager.getConnection(conf);
751       TableName tableName = getTableName(conf);
752       CounterGroup g = counters.getGroup("undef");
753       Iterator<Counter> it = g.iterator();
754       while (it.hasNext()) {
755         String keyString = it.next().getName();
756         byte[] key = Bytes.toBytes(keyString);
757         HRegionLocation loc = conn.relocateRegion(tableName, key);
758         LOG.error("undefined row " + keyString + ", " + loc);
759       }
760       g = counters.getGroup("unref");
761       it = g.iterator();
762       while (it.hasNext()) {
763         String keyString = it.next().getName();
764         byte[] key = Bytes.toBytes(keyString);
765         HRegionLocation loc = conn.relocateRegion(tableName, key);
766         LOG.error("unreferred row " + keyString + ", " + loc);
767       }
768     }
769   }
770 
771   /**
772    * Executes Generate and Verify in a loop. Data is not cleaned between runs, so each iteration
773    * adds more data.
774    */
775   static class Loop extends Configured implements Tool {
776 
777     private static final Log LOG = LogFactory.getLog(Loop.class);
778 
779     IntegrationTestBigLinkedList it;
780 
781     protected void runGenerator(int numMappers, long numNodes,
782         String outputDir, Integer width, Integer wrapMuplitplier) throws Exception {
783       Path outputPath = new Path(outputDir);
784       UUID uuid = UUID.randomUUID(); //create a random UUID.
785       Path generatorOutput = new Path(outputPath, uuid.toString());
786 
787       Generator generator = new Generator();
788       generator.setConf(getConf());
789       int retCode = generator.run(numMappers, numNodes, generatorOutput, width, wrapMuplitplier);
790       if (retCode > 0) {
791         throw new RuntimeException("Generator failed with return code: " + retCode);
792       }
793     }
794 
795     protected void runVerify(String outputDir,
796         int numReducers, long expectedNumNodes) throws Exception {
797       Path outputPath = new Path(outputDir);
798       UUID uuid = UUID.randomUUID(); //create a random UUID.
799       Path iterationOutput = new Path(outputPath, uuid.toString());
800 
801       Verify verify = new Verify();
802       verify.setConf(getConf());
803       int retCode = verify.run(iterationOutput, numReducers);
804       if (retCode > 0) {
805         throw new RuntimeException("Verify.run failed with return code: " + retCode);
806       }
807 
808       if (!verify.verify(expectedNumNodes)) {
809         throw new RuntimeException("Verify.verify failed");
810       }
811 
812       LOG.info("Verify finished with succees. Total nodes=" + expectedNumNodes);
813     }
814 
815     @Override
816     public int run(String[] args) throws Exception {
817       if (args.length < 5) {
818         System.err.println("Usage: Loop <num iterations> <num mappers> <num nodes per mapper> <output dir> <num reducers> [<width> <wrap multiplier>]");
819         return 1;
820       }
821       LOG.info("Running Loop with args:" + Arrays.deepToString(args));
822 
823       int numIterations = Integer.parseInt(args[0]);
824       int numMappers = Integer.parseInt(args[1]);
825       long numNodes = Long.parseLong(args[2]);
826       String outputDir = args[3];
827       int numReducers = Integer.parseInt(args[4]);
828       Integer width = (args.length < 6) ? null : Integer.parseInt(args[5]);
829       Integer wrapMuplitplier = (args.length < 7) ? null : Integer.parseInt(args[6]);
830 
831       long expectedNumNodes = 0;
832 
833       if (numIterations < 0) {
834         numIterations = Integer.MAX_VALUE; //run indefinitely (kind of)
835       }
836 
837       for (int i = 0; i < numIterations; i++) {
838         LOG.info("Starting iteration = " + i);
839         runGenerator(numMappers, numNodes, outputDir, width, wrapMuplitplier);
840         expectedNumNodes += numMappers * numNodes;
841 
842         runVerify(outputDir, numReducers, expectedNumNodes);
843       }
844 
845       return 0;
846     }
847   }
848 
849   /**
850    * A stand alone program that prints out portions of a list created by {@link Generator}
851    */
852   private static class Print extends Configured implements Tool {
853     @Override
854     public int run(String[] args) throws Exception {
855       Options options = new Options();
856       options.addOption("s", "start", true, "start key");
857       options.addOption("e", "end", true, "end key");
858       options.addOption("l", "limit", true, "number to print");
859 
860       GnuParser parser = new GnuParser();
861       CommandLine cmd = null;
862       try {
863         cmd = parser.parse(options, args);
864         if (cmd.getArgs().length != 0) {
865           throw new ParseException("Command takes no arguments");
866         }
867       } catch (ParseException e) {
868         System.err.println("Failed to parse command line " + e.getMessage());
869         System.err.println();
870         HelpFormatter formatter = new HelpFormatter();
871         formatter.printHelp(getClass().getSimpleName(), options);
872         System.exit(-1);
873       }
874 
875       HTable table = new HTable(getConf(), getTableName(getConf()));
876 
877       Scan scan = new Scan();
878       scan.setBatch(10000);
879 
880       if (cmd.hasOption("s"))
881         scan.setStartRow(Bytes.toBytesBinary(cmd.getOptionValue("s")));
882 
883       if (cmd.hasOption("e"))
884         scan.setStopRow(Bytes.toBytesBinary(cmd.getOptionValue("e")));
885 
886       int limit = 0;
887       if (cmd.hasOption("l"))
888         limit = Integer.parseInt(cmd.getOptionValue("l"));
889       else
890         limit = 100;
891 
892       ResultScanner scanner = table.getScanner(scan);
893 
894       CINode node = new CINode();
895       Result result = scanner.next();
896       int count = 0;
897       while (result != null && count++ < limit) {
898         node = getCINode(result, node);
899         System.out.printf("%s:%s:%012d:%s\n", Bytes.toStringBinary(node.key),
900             Bytes.toStringBinary(node.prev), node.count, node.client);
901         result = scanner.next();
902       }
903       scanner.close();
904       table.close();
905 
906       return 0;
907     }
908   }
909 
910   /**
911    * A stand alone program that deletes a single node.
912    */
913   private static class Delete extends Configured implements Tool {
914     @Override
915     public int run(String[] args) throws Exception {
916       if (args.length != 1) {
917         System.out.println("Usage : " + Delete.class.getSimpleName() + " <node to delete>");
918         return 0;
919       }
920       byte[] val = Bytes.toBytesBinary(args[0]);
921 
922       org.apache.hadoop.hbase.client.Delete delete
923         = new org.apache.hadoop.hbase.client.Delete(val);
924 
925       HTable table = new HTable(getConf(), getTableName(getConf()));
926 
927       table.delete(delete);
928       table.flushCommits();
929       table.close();
930 
931       System.out.println("Delete successful");
932       return 0;
933     }
934   }
935 
936   /**
937    * A stand alone program that follows a linked list created by {@link Generator} and prints timing info.
938    */
939   private static class Walker extends Configured implements Tool {
940     @Override
941     public int run(String[] args) throws IOException {
942       Options options = new Options();
943       options.addOption("n", "num", true, "number of queries");
944       options.addOption("s", "start", true, "key to start at, binary string");
945       options.addOption("l", "logevery", true, "log every N queries");
946 
947       GnuParser parser = new GnuParser();
948       CommandLine cmd = null;
949       try {
950         cmd = parser.parse(options, args);
951         if (cmd.getArgs().length != 0) {
952           throw new ParseException("Command takes no arguments");
953         }
954       } catch (ParseException e) {
955         System.err.println("Failed to parse command line " + e.getMessage());
956         System.err.println();
957         HelpFormatter formatter = new HelpFormatter();
958         formatter.printHelp(getClass().getSimpleName(), options);
959         System.exit(-1);
960       }
961 
962       long maxQueries = Long.MAX_VALUE;
963       if (cmd.hasOption('n')) {
964         maxQueries = Long.parseLong(cmd.getOptionValue("n"));
965       }
966       Random rand = new Random();
967       boolean isSpecificStart = cmd.hasOption('s');
968       byte[] startKey = isSpecificStart ? Bytes.toBytesBinary(cmd.getOptionValue('s')) : null;
969       int logEvery = cmd.hasOption('l') ? Integer.parseInt(cmd.getOptionValue('l')) : 1;
970 
971       HTable table = new HTable(getConf(), getTableName(getConf()));
972       long numQueries = 0;
973       // If isSpecificStart is set, only walk one list from that particular node.
974       // Note that in case of circular (or P-shaped) list it will walk forever, as is
975       // the case in normal run without startKey.
976       while (numQueries < maxQueries && (numQueries == 0 || !isSpecificStart)) {
977         if (!isSpecificStart) {
978           startKey = new byte[ROWKEY_LENGTH];
979           rand.nextBytes(startKey);
980         }
981         CINode node = findStartNode(table, startKey);
982         if (node == null && isSpecificStart) {
983           System.err.printf("Start node not found: %s \n", Bytes.toStringBinary(startKey));
984         }
985         numQueries++;
986         while (node != null && node.prev.length != NO_KEY.length && numQueries < maxQueries) {
987           byte[] prev = node.prev;
988           long t1 = System.currentTimeMillis();
989           node = getNode(prev, table, node);
990           long t2 = System.currentTimeMillis();
991           if (numQueries % logEvery == 0) {
992             System.out.printf("CQ %d: %d %s \n", numQueries, t2 - t1, Bytes.toStringBinary(prev));
993           }
994           numQueries++;
995           if (node == null) {
996             System.err.printf("UNDEFINED NODE %s \n", Bytes.toStringBinary(prev));
997           } else if (node.prev.length == NO_KEY.length) {
998             System.err.printf("TERMINATING NODE %s \n", Bytes.toStringBinary(node.key));
999           }
1000         }
1001       }
1002 
1003       table.close();
1004       return 0;
1005     }
1006 
1007     private static CINode findStartNode(HTable table, byte[] startKey) throws IOException {
1008       Scan scan = new Scan();
1009       scan.setStartRow(startKey);
1010       scan.setBatch(1);
1011       scan.addColumn(FAMILY_NAME, COLUMN_PREV);
1012 
1013       long t1 = System.currentTimeMillis();
1014       ResultScanner scanner = table.getScanner(scan);
1015       Result result = scanner.next();
1016       long t2 = System.currentTimeMillis();
1017       scanner.close();
1018 
1019       if ( result != null) {
1020         CINode node = getCINode(result, new CINode());
1021         System.out.printf("FSR %d %s\n", t2 - t1, Bytes.toStringBinary(node.key));
1022         return node;
1023       }
1024 
1025       System.out.println("FSR " + (t2 - t1));
1026 
1027       return null;
1028     }
1029 
1030     private CINode getNode(byte[] row, HTable table, CINode node) throws IOException {
1031       Get get = new Get(row);
1032       get.addColumn(FAMILY_NAME, COLUMN_PREV);
1033       Result result = table.get(get);
1034       return getCINode(result, node);
1035     }
1036   }
1037 
1038   static TableName getTableName(Configuration conf) {
1039     return TableName.valueOf(conf.get(TABLE_NAME_KEY, DEFAULT_TABLE_NAME));
1040   }
1041 
1042   private static CINode getCINode(Result result, CINode node) {
1043     node.key = Bytes.copy(result.getRow());
1044     if (result.containsColumn(FAMILY_NAME, COLUMN_PREV)) {
1045       node.prev = Bytes.copy(result.getValue(FAMILY_NAME, COLUMN_PREV));
1046     } else {
1047       node.prev = NO_KEY;
1048     }
1049     if (result.containsColumn(FAMILY_NAME, COLUMN_COUNT)) {
1050       node.count = Bytes.toLong(result.getValue(FAMILY_NAME, COLUMN_COUNT));
1051     } else {
1052       node.count = -1;
1053     }
1054     if (result.containsColumn(FAMILY_NAME, COLUMN_CLIENT)) {
1055       node.client = Bytes.toString(result.getValue(FAMILY_NAME, COLUMN_CLIENT));
1056     } else {
1057       node.client = "";
1058     }
1059     return node;
1060   }
1061 
1062   protected IntegrationTestingUtility util;
1063 
1064   @Override
1065   public void setUpCluster() throws Exception {
1066     util = getTestingUtil(getConf());
1067     boolean isDistributed = util.isDistributedCluster();
1068     util.initializeCluster(isDistributed ? 1 : this.NUM_SLAVES_BASE);
1069     if (!isDistributed) {
1070       util.startMiniMapReduceCluster();
1071     }
1072     this.setConf(util.getConfiguration());
1073   }
1074 
1075   @Override
1076   public void cleanUpCluster() throws Exception {
1077     super.cleanUpCluster();
1078     if (util.isDistributedCluster()) {
1079       util.shutdownMiniMapReduceCluster();
1080     }
1081   }
1082 
1083   @Test
1084   public void testContinuousIngest() throws IOException, Exception {
1085     //Loop <num iterations> <num mappers> <num nodes per mapper> <output dir> <num reducers>
1086     int ret = ToolRunner.run(getTestingUtil(getConf()).getConfiguration(), new Loop(),
1087         new String[] {"1", "1", "2000000",
1088                      util.getDataTestDirOnTestFS("IntegrationTestBigLinkedList").toString(), "1"});
1089     org.junit.Assert.assertEquals(0, ret);
1090   }
1091 
1092   private void usage() {
1093     System.err.println("Usage: " + this.getClass().getSimpleName() + " COMMAND [COMMAND options]");
1094     System.err.println("  where COMMAND is one of:");
1095     System.err.println("");
1096     System.err.println("  Generator                  A map only job that generates data.");
1097     System.err.println("  Verify                     A map reduce job that looks for holes");
1098     System.err.println("                             Look at the counts after running");
1099     System.err.println("                             REFERENCED and UNREFERENCED are ok");
1100     System.err.println("                             any UNDEFINED counts are bad. Do not");
1101     System.err.println("                             run at the same time as the Generator.");
1102     System.err.println("  Walker                     A standalong program that starts ");
1103     System.err.println("                             following a linked list and emits");
1104     System.err.println("                             timing info.");
1105     System.err.println("  Print                      A standalone program that prints nodes");
1106     System.err.println("                             in the linked list.");
1107     System.err.println("  Delete                     A standalone program that deletes a·");
1108     System.err.println("                             single node.");
1109     System.err.println("  Loop                       A program to Loop through Generator and");
1110     System.err.println("                             Verify steps");
1111     System.err.println("\t  ");
1112     System.err.flush();
1113   }
1114 
1115   @Override
1116   protected void processOptions(CommandLine cmd) {
1117     super.processOptions(cmd);
1118     String[] args = cmd.getArgs();
1119     //get the class, run with the conf
1120     if (args.length < 1) {
1121       printUsage();
1122       throw new RuntimeException("Incorrect Number of args.");
1123     }
1124     toRun = args[0];
1125     otherArgs = Arrays.copyOfRange(args, 1, args.length);
1126   }
1127 
1128   @Override
1129   public int runTestFromCommandLine() throws Exception {
1130 
1131     Tool tool = null;
1132     if (toRun.equals("Generator")) {
1133       tool = new Generator();
1134     } else if (toRun.equals("Verify")) {
1135       tool = new Verify();
1136     } else if (toRun.equals("Loop")) {
1137       Loop loop = new Loop();
1138       loop.it = this;
1139       tool = loop;
1140     } else if (toRun.equals("Walker")) {
1141       tool = new Walker();
1142     } else if (toRun.equals("Print")) {
1143       tool = new Print();
1144     } else if (toRun.equals("Delete")) {
1145       tool = new Delete();
1146     } else {
1147       usage();
1148       throw new RuntimeException("Unknown arg");
1149     }
1150 
1151     return ToolRunner.run(getConf(), tool, otherArgs);
1152   }
1153 
1154   @Override
1155   public String getTablename() {
1156     Configuration c = getConf();
1157     return c.get(TABLE_NAME_KEY, DEFAULT_TABLE_NAME);
1158   }
1159 
1160   @Override
1161   protected Set<String> getColumnFamilies() {
1162     return Sets.newHashSet(Bytes.toString(FAMILY_NAME));
1163   }
1164 
1165   private static void setJobConf(Job job, int numMappers, long numNodes,
1166       Integer width, Integer wrapMultiplier) {
1167     job.getConfiguration().setInt(GENERATOR_NUM_MAPPERS_KEY, numMappers);
1168     job.getConfiguration().setLong(GENERATOR_NUM_ROWS_PER_MAP_KEY, numNodes);
1169     if (width != null) {
1170       job.getConfiguration().setInt(GENERATOR_WIDTH_KEY, width);
1171     }
1172     if (wrapMultiplier != null) {
1173       job.getConfiguration().setInt(GENERATOR_WRAP_KEY, wrapMultiplier);
1174     }
1175   }
1176 
1177   public static void setJobScannerConf(Job job) {
1178     // Make sure scanners log something useful to make debugging possible.
1179     job.getConfiguration().setBoolean(ScannerCallable.LOG_SCANNER_ACTIVITY, true);
1180     job.getConfiguration().setInt(TableRecordReaderImpl.LOG_PER_ROW_COUNT, 100000);
1181   }
1182 
1183   public static void main(String[] args) throws Exception {
1184     Configuration conf = HBaseConfiguration.create();
1185     IntegrationTestingUtility.setUseDistributedCluster(conf);
1186     int ret = ToolRunner.run(conf, new IntegrationTestBigLinkedList(), args);
1187     System.exit(ret);
1188   }
1189 }