1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.test;
19
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertTrue;
22
23 import com.google.common.collect.Sets;
24
25 import org.apache.commons.cli.CommandLine;
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.fs.FileSystem;
30 import org.apache.hadoop.fs.Path;
31 import org.apache.hadoop.hbase.Cell;
32 import org.apache.hadoop.hbase.HBaseConfiguration;
33 import org.apache.hadoop.hbase.HColumnDescriptor;
34 import org.apache.hadoop.hbase.HConstants;
35 import org.apache.hadoop.hbase.HTableDescriptor;
36 import org.apache.hadoop.hbase.IntegrationTestBase;
37 import org.apache.hadoop.hbase.IntegrationTestingUtility;
38 import org.apache.hadoop.hbase.client.Connection;
39 import org.apache.hadoop.hbase.client.ConnectionFactory;
40 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
41 import org.apache.hadoop.hbase.TableName;
42 import org.apache.hadoop.hbase.client.Admin;
43 import org.apache.hadoop.hbase.client.BufferedMutator;
44 import org.apache.hadoop.hbase.client.BufferedMutatorParams;
45 import org.apache.hadoop.hbase.client.Put;
46 import org.apache.hadoop.hbase.client.Result;
47 import org.apache.hadoop.hbase.client.Scan;
48 import org.apache.hadoop.hbase.client.ScannerCallable;
49 import org.apache.hadoop.hbase.client.Table;
50 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
51 import org.apache.hadoop.hbase.mapreduce.NMapInputFormat;
52 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
53 import org.apache.hadoop.hbase.mapreduce.TableMapper;
54 import org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl;
55 import org.apache.hadoop.hbase.util.AbstractHBaseTool;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.io.BytesWritable;
58 import org.apache.hadoop.io.NullWritable;
59 import org.apache.hadoop.io.Text;
60 import org.apache.hadoop.mapreduce.Counter;
61 import org.apache.hadoop.mapreduce.Job;
62 import org.apache.hadoop.mapreduce.Mapper;
63 import org.apache.hadoop.mapreduce.Reducer;
64 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
65 import org.apache.hadoop.util.ToolRunner;
66 import org.junit.Test;
67 import org.junit.experimental.categories.Category;
68
69 import java.io.IOException;
70 import java.util.Random;
71 import java.util.Set;
72 import java.util.UUID;
73 import java.util.regex.Matcher;
74 import java.util.regex.Pattern;
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93 @Category(IntegrationTests.class)
94 public class IntegrationTestLoadAndVerify extends IntegrationTestBase {
95 private static final String TEST_NAME = "IntegrationTestLoadAndVerify";
96 private static final byte[] TEST_FAMILY = Bytes.toBytes("f1");
97 private static final byte[] TEST_QUALIFIER = Bytes.toBytes("q1");
98
99 private static final String NUM_TO_WRITE_KEY =
100 "loadmapper.num_to_write";
101 private static final long NUM_TO_WRITE_DEFAULT = 100*1000;
102
103 private static final String TABLE_NAME_KEY = "loadmapper.table";
104 private static final String TABLE_NAME_DEFAULT = "table";
105
106 private static final String NUM_BACKREFS_KEY = "loadmapper.backrefs";
107 private static final int NUM_BACKREFS_DEFAULT = 50;
108
109 private static final String NUM_MAP_TASKS_KEY = "loadmapper.map.tasks";
110 private static final String NUM_REDUCE_TASKS_KEY = "verify.reduce.tasks";
111 private static final int NUM_MAP_TASKS_DEFAULT = 200;
112 private static final int NUM_REDUCE_TASKS_DEFAULT = 35;
113
114 private static final int SCANNER_CACHING = 500;
115
116 private String toRun = null;
117
118 private enum Counters {
119 ROWS_WRITTEN,
120 REFERENCES_WRITTEN,
121 REFERENCES_CHECKED
122 }
123
124 @Override
125 public void setUpCluster() throws Exception {
126 util = getTestingUtil(getConf());
127 util.initializeCluster(3);
128 this.setConf(util.getConfiguration());
129 if (!util.isDistributedCluster()) {
130 getConf().setLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT / 100);
131 getConf().setInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT / 100);
132 getConf().setInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT / 10);
133 util.startMiniMapReduceCluster();
134 }
135 }
136
137 @Override
138 public void cleanUpCluster() throws Exception {
139 super.cleanUpCluster();
140 if (!util.isDistributedCluster()) {
141 util.shutdownMiniMapReduceCluster();
142 }
143 }
144
145
146
147
148
149
150
151 public static long swapLong(long value)
152 {
153 return
154 ( ( ( value >> 0 ) & 0xff ) << 56 ) +
155 ( ( ( value >> 8 ) & 0xff ) << 48 ) +
156 ( ( ( value >> 16 ) & 0xff ) << 40 ) +
157 ( ( ( value >> 24 ) & 0xff ) << 32 ) +
158 ( ( ( value >> 32 ) & 0xff ) << 24 ) +
159 ( ( ( value >> 40 ) & 0xff ) << 16 ) +
160 ( ( ( value >> 48 ) & 0xff ) << 8 ) +
161 ( ( ( value >> 56 ) & 0xff ) << 0 );
162 }
163
164 public static class LoadMapper
165 extends Mapper<NullWritable, NullWritable, NullWritable, NullWritable>
166 {
167 protected long recordsToWrite;
168 protected Connection connection;
169 protected BufferedMutator mutator;
170 protected Configuration conf;
171 protected int numBackReferencesPerRow;
172 protected String shortTaskId;
173
174 protected Random rand = new Random();
175
176 protected Counter rowsWritten, refsWritten;
177
178 @Override
179 public void setup(Context context) throws IOException {
180 conf = context.getConfiguration();
181 recordsToWrite = conf.getLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT);
182 String tableName = conf.get(TABLE_NAME_KEY, TABLE_NAME_DEFAULT);
183 numBackReferencesPerRow = conf.getInt(NUM_BACKREFS_KEY, NUM_BACKREFS_DEFAULT);
184 this.connection = ConnectionFactory.createConnection(conf);
185 mutator = connection.getBufferedMutator(
186 new BufferedMutatorParams(TableName.valueOf(tableName))
187 .writeBufferSize(4 * 1024 * 1024));
188
189 String taskId = conf.get("mapreduce.task.attempt.id");
190 Matcher matcher = Pattern.compile(".+_m_(\\d+_\\d+)").matcher(taskId);
191 if (!matcher.matches()) {
192 throw new RuntimeException("Strange task ID: " + taskId);
193 }
194 shortTaskId = matcher.group(1);
195
196 rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
197 refsWritten = context.getCounter(Counters.REFERENCES_WRITTEN);
198 }
199
200 @Override
201 public void cleanup(Context context) throws IOException {
202 mutator.close();
203 connection.close();
204 }
205
206 @Override
207 protected void map(NullWritable key, NullWritable value,
208 Context context) throws IOException, InterruptedException {
209
210 String suffix = "/" + shortTaskId;
211 byte[] row = Bytes.add(new byte[8], Bytes.toBytes(suffix));
212
213 int BLOCK_SIZE = (int)(recordsToWrite / 100);
214
215 for (long i = 0; i < recordsToWrite;) {
216 long blockStart = i;
217 for (long idxInBlock = 0;
218 idxInBlock < BLOCK_SIZE && i < recordsToWrite;
219 idxInBlock++, i++) {
220
221 long byteSwapped = swapLong(i);
222 Bytes.putLong(row, 0, byteSwapped);
223
224 Put p = new Put(row);
225 p.add(TEST_FAMILY, TEST_QUALIFIER, HConstants.EMPTY_BYTE_ARRAY);
226 if (blockStart > 0) {
227 for (int j = 0; j < numBackReferencesPerRow; j++) {
228 long referredRow = blockStart - BLOCK_SIZE + rand.nextInt(BLOCK_SIZE);
229 Bytes.putLong(row, 0, swapLong(referredRow));
230 p.add(TEST_FAMILY, row, HConstants.EMPTY_BYTE_ARRAY);
231 }
232 refsWritten.increment(1);
233 }
234 rowsWritten.increment(1);
235 mutator.mutate(p);
236
237 if (i % 100 == 0) {
238 context.setStatus("Written " + i + "/" + recordsToWrite + " records");
239 context.progress();
240 }
241 }
242
243
244 mutator.flush();
245 }
246 }
247 }
248
249 public static class VerifyMapper extends TableMapper<BytesWritable, BytesWritable> {
250 static final BytesWritable EMPTY = new BytesWritable(HConstants.EMPTY_BYTE_ARRAY);
251
252
253 @Override
254 protected void map(ImmutableBytesWritable key, Result value, Context context)
255 throws IOException, InterruptedException {
256 BytesWritable bwKey = new BytesWritable(key.get());
257 BytesWritable bwVal = new BytesWritable();
258 for (Cell kv : value.listCells()) {
259 if (Bytes.compareTo(TEST_QUALIFIER, 0, TEST_QUALIFIER.length,
260 kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()) == 0) {
261 context.write(bwKey, EMPTY);
262 } else {
263 bwVal.set(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
264 context.write(bwVal, bwKey);
265 }
266 }
267 }
268 }
269
270 public static class VerifyReducer extends Reducer<BytesWritable, BytesWritable, Text, Text> {
271 private static final Log LOG = LogFactory.getLog(VerifyReducer.class);
272 private Counter refsChecked;
273 private Counter rowsWritten;
274
275 @Override
276 public void setup(Context context) throws IOException {
277 refsChecked = context.getCounter(Counters.REFERENCES_CHECKED);
278 rowsWritten = context.getCounter(Counters.ROWS_WRITTEN);
279 }
280
281 @Override
282 protected void reduce(BytesWritable referredRow, Iterable<BytesWritable> referrers,
283 VerifyReducer.Context ctx) throws IOException, InterruptedException {
284 boolean gotOriginalRow = false;
285 int refCount = 0;
286
287 for (BytesWritable ref : referrers) {
288 if (ref.getLength() == 0) {
289 assert !gotOriginalRow;
290 gotOriginalRow = true;
291 } else {
292 refCount++;
293 }
294 }
295 refsChecked.increment(refCount);
296
297 if (!gotOriginalRow) {
298 String parsedRow = makeRowReadable(referredRow.getBytes(), referredRow.getLength());
299 String binRow = Bytes.toStringBinary(referredRow.getBytes(), 0, referredRow.getLength());
300 LOG.error("Reference error row " + parsedRow);
301 ctx.write(new Text(binRow), new Text(parsedRow));
302 rowsWritten.increment(1);
303 }
304 }
305
306 private String makeRowReadable(byte[] bytes, int length) {
307 long rowIdx = swapLong(Bytes.toLong(bytes, 0));
308 String suffix = Bytes.toString(bytes, 8, length - 8);
309
310 return "Row #" + rowIdx + " suffix " + suffix;
311 }
312 }
313
314 protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
315 Path outputDir = getTestDir(TEST_NAME, "load-output");
316
317 NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
318 conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());
319
320 Job job = Job.getInstance(conf);
321 job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
322 job.setJarByClass(this.getClass());
323 setMapperClass(job);
324 job.setInputFormatClass(NMapInputFormat.class);
325 job.setNumReduceTasks(0);
326 setJobScannerConf(job);
327 FileOutputFormat.setOutputPath(job, outputDir);
328
329 TableMapReduceUtil.addDependencyJars(job);
330
331 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
332 TableMapReduceUtil.initCredentials(job);
333 assertTrue(job.waitForCompletion(true));
334 return job;
335 }
336
337 protected void setMapperClass(Job job) {
338 job.setMapperClass(LoadMapper.class);
339 }
340
341 protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
342 Path outputDir = getTestDir(TEST_NAME, "verify-output");
343
344 Job job = Job.getInstance(conf);
345 job.setJarByClass(this.getClass());
346 job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
347 setJobScannerConf(job);
348
349 Scan scan = new Scan();
350
351 TableMapReduceUtil.initTableMapperJob(
352 htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
353 BytesWritable.class, BytesWritable.class, job);
354 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
355 int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
356 TableMapReduceUtil.setScannerCaching(job, scannerCaching);
357
358 job.setReducerClass(VerifyReducer.class);
359 job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
360 FileOutputFormat.setOutputPath(job, outputDir);
361 assertTrue(job.waitForCompletion(true));
362
363 long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
364 assertEquals(0, numOutputRecords);
365 }
366
367 private static void setJobScannerConf(Job job) {
368
369 job.getConfiguration().setBoolean(ScannerCallable.LOG_SCANNER_ACTIVITY, true);
370 long lpr = job.getConfiguration().getLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT) / 100;
371 job.getConfiguration().setInt(TableRecordReaderImpl.LOG_PER_ROW_COUNT, (int)lpr);
372 }
373
374 public Path getTestDir(String testName, String subdir) throws IOException {
375
376 FileSystem fs = FileSystem.get(getConf());
377 Path base = new Path(fs.getWorkingDirectory(), "test-data");
378 String randomStr = UUID.randomUUID().toString();
379 Path testDir = new Path(base, randomStr);
380 fs.deleteOnExit(testDir);
381
382 return new Path(new Path(testDir, testName), subdir);
383 }
384
385 @Test
386 public void testLoadAndVerify() throws Exception {
387 HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TEST_NAME));
388 htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
389
390 Admin admin = getTestingUtil(getConf()).getHBaseAdmin();
391 admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), 40);
392
393 doLoad(getConf(), htd);
394 doVerify(getConf(), htd);
395
396
397
398 getTestingUtil(getConf()).deleteTable(htd.getTableName());
399 }
400
401 public void usage() {
402 System.err.println(this.getClass().getSimpleName() + " [-Doptions] <load|verify|loadAndVerify>");
403 System.err.println(" Loads a table with row dependencies and verifies the dependency chains");
404 System.err.println("Options");
405 System.err.println(" -Dloadmapper.table=<name> Table to write/verify (default autogen)");
406 System.err.println(" -Dloadmapper.backrefs=<n> Number of backreferences per row (default 50)");
407 System.err.println(" -Dloadmapper.num_to_write=<n> Number of rows per mapper (default 100,000 per mapper)");
408 System.err.println(" -Dloadmapper.deleteAfter=<bool> Delete after a successful verify (default true)");
409 System.err.println(" -Dloadmapper.numPresplits=<n> Number of presplit regions to start with (default 40)");
410 System.err.println(" -Dloadmapper.map.tasks=<n> Number of map tasks for load (default 200)");
411 System.err.println(" -Dverify.reduce.tasks=<n> Number of reduce tasks for verify (default 35)");
412 System.err.println(" -Dverify.scannercaching=<n> Number hbase scanner caching rows to read (default 50)");
413 }
414
415
416 @Override
417 protected void processOptions(CommandLine cmd) {
418 super.processOptions(cmd);
419
420 String[] args = cmd.getArgs();
421 if (args == null || args.length < 1 || args.length > 1) {
422 usage();
423 throw new RuntimeException("Incorrect Number of args.");
424 }
425 toRun = args[0];
426 }
427
428 @Override
429 public int runTestFromCommandLine() throws Exception {
430 IntegrationTestingUtility.setUseDistributedCluster(getConf());
431 boolean doLoad = false;
432 boolean doVerify = false;
433 boolean doDelete = getConf().getBoolean("loadmapper.deleteAfter",true);
434 int numPresplits = getConf().getInt("loadmapper.numPresplits", 40);
435
436 if (toRun.equals("load")) {
437 doLoad = true;
438 } else if (toRun.equals("verify")) {
439 doVerify= true;
440 } else if (toRun.equals("loadAndVerify")) {
441 doLoad=true;
442 doVerify= true;
443 } else {
444 System.err.println("Invalid argument " + toRun);
445 usage();
446 return 1;
447 }
448
449
450 TableName table = getTablename();
451 HTableDescriptor htd = new HTableDescriptor(table);
452 htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
453
454 try (Connection conn = ConnectionFactory.createConnection(getConf());
455 Admin admin = conn.getAdmin()) {
456 if (doLoad) {
457 admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), numPresplits);
458 doLoad(getConf(), htd);
459 }
460 }
461 if (doVerify) {
462 doVerify(getConf(), htd);
463 if (doDelete) {
464 getTestingUtil(getConf()).deleteTable(htd.getTableName());
465 }
466 }
467 return 0;
468 }
469
470 @Override
471 public TableName getTablename() {
472 return TableName.valueOf(getConf().get(TABLE_NAME_KEY, TEST_NAME));
473 }
474
475 @Override
476 protected Set<String> getColumnFamilies() {
477 return Sets.newHashSet(Bytes.toString(TEST_FAMILY));
478 }
479
480 public static void main(String argv[]) throws Exception {
481 Configuration conf = HBaseConfiguration.create();
482 IntegrationTestingUtility.setUseDistributedCluster(conf);
483 int ret = ToolRunner.run(conf, new IntegrationTestLoadAndVerify(), argv);
484 System.exit(ret);
485 }
486 }