1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.test;
19
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertTrue;
22
23 import java.io.IOException;
24 import java.util.Random;
25 import java.util.Set;
26 import java.util.UUID;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29
30 import org.apache.commons.cli.CommandLine;
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.fs.FileSystem;
35 import org.apache.hadoop.fs.Path;
36 import org.apache.hadoop.hbase.Cell;
37 import org.apache.hadoop.hbase.HBaseConfiguration;
38 import org.apache.hadoop.hbase.HColumnDescriptor;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.HTableDescriptor;
41 import org.apache.hadoop.hbase.IntegrationTestBase;
42 import org.apache.hadoop.hbase.IntegrationTestingUtility;
43 import org.apache.hadoop.hbase.IntegrationTests;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.client.HBaseAdmin;
46 import org.apache.hadoop.hbase.client.HTable;
47 import org.apache.hadoop.hbase.client.Put;
48 import org.apache.hadoop.hbase.client.Result;
49 import org.apache.hadoop.hbase.client.Scan;
50 import org.apache.hadoop.hbase.client.ScannerCallable;
51 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
52 import org.apache.hadoop.hbase.mapreduce.NMapInputFormat;
53 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
54 import org.apache.hadoop.hbase.mapreduce.TableMapper;
55 import org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl;
56 import org.apache.hadoop.hbase.util.AbstractHBaseTool;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.io.BytesWritable;
59 import org.apache.hadoop.io.NullWritable;
60 import org.apache.hadoop.io.Text;
61 import org.apache.hadoop.mapreduce.Counter;
62 import org.apache.hadoop.mapreduce.Job;
63 import org.apache.hadoop.mapreduce.Mapper;
64 import org.apache.hadoop.mapreduce.Reducer;
65 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
66 import org.apache.hadoop.util.ToolRunner;
67 import org.junit.Test;
68 import org.junit.experimental.categories.Category;
69
70 import com.google.common.collect.Sets;
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89 @Category(IntegrationTests.class)
90 public class IntegrationTestLoadAndVerify extends IntegrationTestBase {
91 private static final String TEST_NAME = "IntegrationTestLoadAndVerify";
92 private static final byte[] TEST_FAMILY = Bytes.toBytes("f1");
93 private static final byte[] TEST_QUALIFIER = Bytes.toBytes("q1");
94
95 private static final String NUM_TO_WRITE_KEY =
96 "loadmapper.num_to_write";
97 private static final long NUM_TO_WRITE_DEFAULT = 100*1000;
98
99 private static final String TABLE_NAME_KEY = "loadmapper.table";
100 private static final String TABLE_NAME_DEFAULT = "table";
101
102 private static final String NUM_BACKREFS_KEY = "loadmapper.backrefs";
103 private static final int NUM_BACKREFS_DEFAULT = 50;
104
105 private static final String NUM_MAP_TASKS_KEY = "loadmapper.map.tasks";
106 private static final String NUM_REDUCE_TASKS_KEY = "verify.reduce.tasks";
107 private static final int NUM_MAP_TASKS_DEFAULT = 200;
108 private static final int NUM_REDUCE_TASKS_DEFAULT = 35;
109
110 private static final int SCANNER_CACHING = 500;
111
112 protected IntegrationTestingUtility util;
113
114 private String toRun = null;
115
116 private enum Counters {
117 ROWS_WRITTEN,
118 REFERENCES_WRITTEN,
119 REFERENCES_CHECKED
120 }
121
122 public void setUpCluster() throws Exception {
123 util = getTestingUtil(getConf());
124 util.initializeCluster(3);
125 this.setConf(util.getConfiguration());
126 if (!util.isDistributedCluster()) {
127 getConf().setLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT / 100);
128 getConf().setInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT / 100);
129 getConf().setInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT / 10);
130 }
131 }
132
133
134
135
136
137
138
139 public static long swapLong(long value)
140 {
141 return
142 ( ( ( value >> 0 ) & 0xff ) << 56 ) +
143 ( ( ( value >> 8 ) & 0xff ) << 48 ) +
144 ( ( ( value >> 16 ) & 0xff ) << 40 ) +
145 ( ( ( value >> 24 ) & 0xff ) << 32 ) +
146 ( ( ( value >> 32 ) & 0xff ) << 24 ) +
147 ( ( ( value >> 40 ) & 0xff ) << 16 ) +
148 ( ( ( value >> 48 ) & 0xff ) << 8 ) +
149 ( ( ( value >> 56 ) & 0xff ) << 0 );
150 }
151
152 public static class LoadMapper
153 extends Mapper<NullWritable, NullWritable, NullWritable, NullWritable>
154 {
155 protected long recordsToWrite;
156 protected HTable table;
157 protected Configuration conf;
158 protected int numBackReferencesPerRow;
159 protected String shortTaskId;
160
161 protected Random rand = new Random();
162
163 protected Counter rowsWritten, refsWritten;
164
165 @Override
166 public void setup(Context context) throws IOException {
167 conf = context.getConfiguration();
168 recordsToWrite = conf.getLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT);
169 String tableName = conf.get(TABLE_NAME_KEY, TABLE_NAME_DEFAULT);
170 numBackReferencesPerRow = conf.getInt(NUM_BACKREFS_KEY, NUM_BACKREFS_DEFAULT);
171 table = new HTable(conf, tableName);
172 table.setWriteBufferSize(4*1024*1024);
173 table.setAutoFlush(false, true);
174
175 String taskId = conf.get("mapred.task.id");
176 Matcher matcher = Pattern.compile(".+_m_(\\d+_\\d+)").matcher(taskId);
177 if (!matcher.matches()) {
178 throw new RuntimeException("Strange task ID: " + taskId);
179 }
180 shortTaskId = matcher.group(1);
181
182 rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
183 refsWritten = context.getCounter(Counters.REFERENCES_WRITTEN);
184 }
185
186 @Override
187 public void cleanup(Context context) throws IOException {
188 table.flushCommits();
189 table.close();
190 }
191
192 @Override
193 protected void map(NullWritable key, NullWritable value,
194 Context context) throws IOException, InterruptedException {
195
196 String suffix = "/" + shortTaskId;
197 byte[] row = Bytes.add(new byte[8], Bytes.toBytes(suffix));
198
199 int BLOCK_SIZE = (int)(recordsToWrite / 100);
200
201 for (long i = 0; i < recordsToWrite;) {
202 long blockStart = i;
203 for (long idxInBlock = 0;
204 idxInBlock < BLOCK_SIZE && i < recordsToWrite;
205 idxInBlock++, i++) {
206
207 long byteSwapped = swapLong(i);
208 Bytes.putLong(row, 0, byteSwapped);
209
210 Put p = new Put(row);
211 p.add(TEST_FAMILY, TEST_QUALIFIER, HConstants.EMPTY_BYTE_ARRAY);
212 if (blockStart > 0) {
213 for (int j = 0; j < numBackReferencesPerRow; j++) {
214 long referredRow = blockStart - BLOCK_SIZE + rand.nextInt(BLOCK_SIZE);
215 Bytes.putLong(row, 0, swapLong(referredRow));
216 p.add(TEST_FAMILY, row, HConstants.EMPTY_BYTE_ARRAY);
217 }
218 refsWritten.increment(1);
219 }
220 rowsWritten.increment(1);
221 table.put(p);
222
223 if (i % 100 == 0) {
224 context.setStatus("Written " + i + "/" + recordsToWrite + " records");
225 context.progress();
226 }
227 }
228
229
230 table.flushCommits();
231 }
232 }
233 }
234
235 public static class VerifyMapper extends TableMapper<BytesWritable, BytesWritable> {
236 static final BytesWritable EMPTY = new BytesWritable(HConstants.EMPTY_BYTE_ARRAY);
237
238
239 @Override
240 protected void map(ImmutableBytesWritable key, Result value, Context context)
241 throws IOException, InterruptedException {
242 BytesWritable bwKey = new BytesWritable(key.get());
243 BytesWritable bwVal = new BytesWritable();
244 for (Cell kv : value.listCells()) {
245 if (Bytes.compareTo(TEST_QUALIFIER, 0, TEST_QUALIFIER.length,
246 kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()) == 0) {
247 context.write(bwKey, EMPTY);
248 } else {
249 bwVal.set(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
250 context.write(bwVal, bwKey);
251 }
252 }
253 }
254 }
255
256 public static class VerifyReducer extends Reducer<BytesWritable, BytesWritable, Text, Text> {
257 private static final Log LOG = LogFactory.getLog(VerifyReducer.class);
258 private Counter refsChecked;
259 private Counter rowsWritten;
260
261 @Override
262 public void setup(Context context) throws IOException {
263 refsChecked = context.getCounter(Counters.REFERENCES_CHECKED);
264 rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
265 }
266
267 @Override
268 protected void reduce(BytesWritable referredRow, Iterable<BytesWritable> referrers,
269 VerifyReducer.Context ctx) throws IOException, InterruptedException {
270 boolean gotOriginalRow = false;
271 int refCount = 0;
272
273 for (BytesWritable ref : referrers) {
274 if (ref.getLength() == 0) {
275 assert !gotOriginalRow;
276 gotOriginalRow = true;
277 } else {
278 refCount++;
279 }
280 }
281 refsChecked.increment(refCount);
282
283 if (!gotOriginalRow) {
284 String parsedRow = makeRowReadable(referredRow.getBytes(), referredRow.getLength());
285 String binRow = Bytes.toStringBinary(referredRow.getBytes(), 0, referredRow.getLength());
286 LOG.error("Reference error row " + parsedRow);
287 ctx.write(new Text(binRow), new Text(parsedRow));
288 rowsWritten.increment(1);
289 }
290 }
291
292 private String makeRowReadable(byte[] bytes, int length) {
293 long rowIdx = swapLong(Bytes.toLong(bytes, 0));
294 String suffix = Bytes.toString(bytes, 8, length - 8);
295
296 return "Row #" + rowIdx + " suffix " + suffix;
297 }
298 }
299
300 protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
301 Path outputDir = getTestDir(TEST_NAME, "load-output");
302
303 NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
304 conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());
305
306 Job job = new Job(conf);
307 job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
308 job.setJarByClass(this.getClass());
309 setMapperClass(job);
310 job.setInputFormatClass(NMapInputFormat.class);
311 job.setNumReduceTasks(0);
312 setJobScannerConf(job);
313 FileOutputFormat.setOutputPath(job, outputDir);
314
315 TableMapReduceUtil.addDependencyJars(job);
316
317 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
318 TableMapReduceUtil.initCredentials(job);
319 assertTrue(job.waitForCompletion(true));
320 return job;
321 }
322
323 protected void setMapperClass(Job job) {
324 job.setMapperClass(LoadMapper.class);
325 }
326
327 protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
328 Path outputDir = getTestDir(TEST_NAME, "verify-output");
329
330 Job job = new Job(conf);
331 job.setJarByClass(this.getClass());
332 job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
333 setJobScannerConf(job);
334
335 Scan scan = new Scan();
336
337 TableMapReduceUtil.initTableMapperJob(
338 htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
339 BytesWritable.class, BytesWritable.class, job);
340 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
341 int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
342 TableMapReduceUtil.setScannerCaching(job, scannerCaching);
343
344 job.setReducerClass(VerifyReducer.class);
345 job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
346 FileOutputFormat.setOutputPath(job, outputDir);
347 assertTrue(job.waitForCompletion(true));
348
349 long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
350 assertEquals(0, numOutputRecords);
351 }
352
353 private static void setJobScannerConf(Job job) {
354
355 job.getConfiguration().setBoolean(ScannerCallable.LOG_SCANNER_ACTIVITY, true);
356 long lpr = job.getConfiguration().getLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT) / 100;
357 job.getConfiguration().setInt(TableRecordReaderImpl.LOG_PER_ROW_COUNT, (int)lpr);
358 }
359
360 public Path getTestDir(String testName, String subdir) throws IOException {
361
362 FileSystem fs = FileSystem.get(getConf());
363 Path base = new Path(fs.getWorkingDirectory(), "test-data");
364 String randomStr = UUID.randomUUID().toString();
365 Path testDir = new Path(base, randomStr);
366 fs.deleteOnExit(testDir);
367
368 return new Path(new Path(testDir, testName), subdir);
369 }
370
371 @Test
372 public void testLoadAndVerify() throws Exception {
373 HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TEST_NAME));
374 htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
375
376 HBaseAdmin admin = getTestingUtil(getConf()).getHBaseAdmin();
377 admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), 40);
378
379 doLoad(getConf(), htd);
380 doVerify(getConf(), htd);
381
382
383
384 getTestingUtil(getConf()).deleteTable(htd.getName());
385 }
386
387 public void usage() {
388 System.err.println(this.getClass().getSimpleName() + " [-Doptions] <load|verify|loadAndVerify>");
389 System.err.println(" Loads a table with row dependencies and verifies the dependency chains");
390 System.err.println("Options");
391 System.err.println(" -Dloadmapper.table=<name> Table to write/verify (default autogen)");
392 System.err.println(" -Dloadmapper.backrefs=<n> Number of backreferences per row (default 50)");
393 System.err.println(" -Dloadmapper.num_to_write=<n> Number of rows per mapper (default 100,000 per mapper)");
394 System.err.println(" -Dloadmapper.deleteAfter=<bool> Delete after a successful verify (default true)");
395 System.err.println(" -Dloadmapper.numPresplits=<n> Number of presplit regions to start with (default 40)");
396 System.err.println(" -Dloadmapper.map.tasks=<n> Number of map tasks for load (default 200)");
397 System.err.println(" -Dverify.reduce.tasks=<n> Number of reduce tasks for verify (default 35)");
398 System.err.println(" -Dverify.scannercaching=<n> Number hbase scanner caching rows to read (default 50)");
399 }
400
401
402 @Override
403 protected void processOptions(CommandLine cmd) {
404 super.processOptions(cmd);
405
406 String[] args = cmd.getArgs();
407 if (args == null || args.length < 1 || args.length > 1) {
408 usage();
409 throw new RuntimeException("Incorrect Number of args.");
410 }
411 toRun = args[0];
412 }
413
414 public int runTestFromCommandLine() throws Exception {
415 IntegrationTestingUtility.setUseDistributedCluster(getConf());
416 boolean doLoad = false;
417 boolean doVerify = false;
418 boolean doDelete = getConf().getBoolean("loadmapper.deleteAfter",true);
419 int numPresplits = getConf().getInt("loadmapper.numPresplits", 40);
420
421 if (toRun.equals("load")) {
422 doLoad = true;
423 } else if (toRun.equals("verify")) {
424 doVerify= true;
425 } else if (toRun.equals("loadAndVerify")) {
426 doLoad=true;
427 doVerify= true;
428 } else {
429 System.err.println("Invalid argument " + toRun);
430 usage();
431 return 1;
432 }
433
434
435 String table = getTablename();
436 HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(table));
437 htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
438
439 HBaseAdmin admin = new HBaseAdmin(getConf());
440 if (doLoad) {
441 admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), numPresplits);
442 doLoad(getConf(), htd);
443 }
444 if (doVerify) {
445 doVerify(getConf(), htd);
446 if (doDelete) {
447 getTestingUtil(getConf()).deleteTable(htd.getName());
448 }
449 }
450 return 0;
451 }
452
453 @Override
454 public String getTablename() {
455 return getConf().get(TABLE_NAME_KEY, TEST_NAME);
456 }
457
458 @Override
459 protected Set<String> getColumnFamilies() {
460 return Sets.newHashSet(Bytes.toString(TEST_FAMILY));
461 }
462
463 public static void main(String argv[]) throws Exception {
464 Configuration conf = HBaseConfiguration.create();
465 IntegrationTestingUtility.setUseDistributedCluster(conf);
466 int ret = ToolRunner.run(conf, new IntegrationTestLoadAndVerify(), argv);
467 System.exit(ret);
468 }
469 }