1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mttr;
20
21 import static org.junit.Assert.assertEquals;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.concurrent.Callable;
26 import java.util.concurrent.ExecutorService;
27 import java.util.concurrent.Executors;
28 import java.util.concurrent.Future;
29 import java.util.concurrent.TimeUnit;
30
31 import org.apache.commons.lang.RandomStringUtils;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
35 import org.apache.hadoop.hbase.ClusterStatus;
36 import org.apache.hadoop.hbase.HColumnDescriptor;
37 import org.apache.hadoop.hbase.HTableDescriptor;
38 import org.apache.hadoop.hbase.IntegrationTestingUtility;
39 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
40 import org.apache.hadoop.hbase.InvalidFamilyOperationException;
41 import org.apache.hadoop.hbase.NamespaceExistException;
42 import org.apache.hadoop.hbase.NamespaceNotFoundException;
43 import org.apache.hadoop.hbase.TableExistsException;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.TableNotFoundException;
46 import org.apache.hadoop.hbase.chaos.actions.Action;
47 import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
48 import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
49 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
50 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction;
51 import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
52 import org.apache.hadoop.hbase.client.Admin;
53 import org.apache.hadoop.hbase.client.HBaseAdmin;
54 import org.apache.hadoop.hbase.client.HTable;
55 import org.apache.hadoop.hbase.client.Put;
56 import org.apache.hadoop.hbase.client.Result;
57 import org.apache.hadoop.hbase.client.ResultScanner;
58 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
59 import org.apache.hadoop.hbase.client.Scan;
60 import org.apache.hadoop.hbase.client.Table;
61 import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
62 import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
63 import org.apache.hadoop.hbase.ipc.FatalConnectionException;
64 import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
65 import org.apache.hadoop.hbase.security.AccessDeniedException;
66 import org.apache.hadoop.hbase.util.Bytes;
67 import org.apache.hadoop.hbase.util.LoadTestTool;
68 import org.apache.htrace.Span;
69 import org.apache.htrace.Trace;
70 import org.apache.htrace.TraceScope;
71 import org.apache.htrace.impl.AlwaysSampler;
72 import org.junit.AfterClass;
73 import org.junit.BeforeClass;
74 import org.junit.Test;
75 import org.junit.experimental.categories.Category;
76
77 import com.google.common.base.Objects;
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 @Category(IntegrationTests.class)
119 public class IntegrationTestMTTR {
120
121
122
123 private static final byte[] FAMILY = Bytes.toBytes("d");
124 private static final Log LOG = LogFactory.getLog(IntegrationTestMTTR.class);
125 private static long sleepTime;
126 private static final String SLEEP_TIME_KEY = "hbase.IntegrationTestMTTR.sleeptime";
127 private static final long SLEEP_TIME_DEFAULT = 60 * 1000l;
128
129
130
131
132 private static TableName tableName;
133 private static TableName loadTableName;
134
135
136
137
138 private static IntegrationTestingUtility util;
139
140
141
142
143 private static ExecutorService executorService;
144
145
146
147
148 private static Action restartRSAction;
149 private static Action restartMetaAction;
150 private static Action moveRegionAction;
151 private static Action restartMasterAction;
152
153
154
155
156 private static LoadTestTool loadTool;
157
158
159 @BeforeClass
160 public static void setUp() throws Exception {
161
162 if (util == null) {
163 util = new IntegrationTestingUtility();
164 }
165
166
167 util.initializeCluster(3);
168
169
170 loadTool = new LoadTestTool();
171 loadTool.setConf(util.getConfiguration());
172
173
174
175 executorService = Executors.newFixedThreadPool(8);
176
177
178 setupTables();
179
180
181 sleepTime = util.getConfiguration().getLong(SLEEP_TIME_KEY, SLEEP_TIME_DEFAULT);
182 setupActions();
183 }
184
185 private static void setupActions() throws IOException {
186
187
188 util.getConfiguration().setLong(Action.START_RS_TIMEOUT_KEY, 3 * 60 * 1000);
189
190
191
192 restartRSAction = new RestartRsHoldingTableAction(sleepTime, tableName.getNameAsString());
193
194
195 restartMetaAction = new RestartRsHoldingMetaAction(sleepTime);
196
197
198 moveRegionAction = new MoveRegionsOfTableAction(sleepTime,
199 MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName);
200
201
202 restartMasterAction = new RestartActiveMasterAction(1000);
203
204
205 Action.ActionContext actionContext = new Action.ActionContext(util);
206 restartRSAction.init(actionContext);
207 restartMetaAction.init(actionContext);
208 moveRegionAction.init(actionContext);
209 restartMasterAction.init(actionContext);
210 }
211
212 private static void setupTables() throws IOException {
213
214 tableName = TableName.valueOf(util.getConfiguration()
215 .get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR"));
216
217 loadTableName = TableName.valueOf(util.getConfiguration()
218 .get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool"));
219
220 if (util.getHBaseAdmin().tableExists(tableName)) {
221 util.deleteTable(tableName);
222 }
223
224 if (util.getHBaseAdmin().tableExists(loadTableName)) {
225 util.deleteTable(loadTableName);
226 }
227
228
229 HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
230
231
232 tableDescriptor.setMaxFileSize(Long.MAX_VALUE);
233
234 HColumnDescriptor descriptor = new HColumnDescriptor(FAMILY);
235 descriptor.setMaxVersions(1);
236 tableDescriptor.addFamily(descriptor);
237 util.getHBaseAdmin().createTable(tableDescriptor);
238
239
240 int ret = loadTool.run(new String[]{"-tn", loadTableName.getNameAsString(), "-init_only"});
241 assertEquals("Failed to initialize LoadTestTool", 0, ret);
242 }
243
244 @AfterClass
245 public static void after() throws IOException {
246
247 util.restoreCluster();
248 util = null;
249
250
251 executorService.shutdown();
252 executorService = null;
253
254
255 moveRegionAction = null;
256 restartMetaAction = null;
257 restartRSAction = null;
258 restartMasterAction = null;
259
260 loadTool = null;
261 }
262
263 @Test
264 public void testRestartRsHoldingTable() throws Exception {
265 run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction");
266 }
267
268 @Test
269 public void testKillRsHoldingMeta() throws Exception {
270 run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
271 }
272
273 @Test
274 public void testMoveRegion() throws Exception {
275 run(new ActionCallable(moveRegionAction), "MoveRegion");
276 }
277
278 @Test
279 public void testRestartMaster() throws Exception {
280 run(new ActionCallable(restartMasterAction), "RestartMaster");
281 }
282
283 public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
284 int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
285 LOG.info("Starting " + testName + " with " + maxIters + " iterations.");
286
287
288 ArrayList<TimingResult> resultPuts = new ArrayList<TimingResult>(maxIters);
289 ArrayList<TimingResult> resultScan = new ArrayList<TimingResult>(maxIters);
290 ArrayList<TimingResult> resultAdmin = new ArrayList<TimingResult>(maxIters);
291 long start = System.nanoTime();
292
293 try {
294
295 for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
296
297 Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
298
299
300 Future<TimingResult> putFuture = executorService.submit(new PutCallable(monkeyFuture));
301 Future<TimingResult> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
302 Future<TimingResult> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
303
304 Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
305
306 monkeyFuture.get();
307 loadFuture.get();
308
309
310 TimingResult putTime = putFuture.get();
311 TimingResult scanTime = scanFuture.get();
312 TimingResult adminTime = adminFuture.get();
313
314
315 resultPuts.add(putTime);
316 resultScan.add(scanTime);
317 resultAdmin.add(adminTime);
318
319
320 Thread.sleep(5000l);
321 }
322 } catch (Exception e) {
323 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
324 LOG.info(testName + " failed after " + runtimeMs + "ms.", e);
325 throw e;
326 }
327
328 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
329
330 Objects.ToStringHelper helper = Objects.toStringHelper("MTTRResults")
331 .add("putResults", resultPuts)
332 .add("scanResults", resultScan)
333 .add("adminResults", resultAdmin)
334 .add("totalRuntimeMs", runtimeMs)
335 .add("name", testName);
336
337
338 LOG.info(helper.toString());
339 }
340
341
342
343
344
345
346 private static class TimingResult {
347 DescriptiveStatistics stats = new DescriptiveStatistics();
348 ArrayList<Long> traces = new ArrayList<Long>(10);
349
350
351
352
353
354
355 public void addResult(long time, Span span) {
356 stats.addValue(TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS));
357 if (TimeUnit.SECONDS.convert(time, TimeUnit.NANOSECONDS) >= 1) {
358 traces.add(span.getTraceId());
359 }
360 }
361
362 @Override
363 public String toString() {
364 Objects.ToStringHelper helper = Objects.toStringHelper(this)
365 .add("numResults", stats.getN())
366 .add("minTime", stats.getMin())
367 .add("meanTime", stats.getMean())
368 .add("maxTime", stats.getMax())
369 .add("25th", stats.getPercentile(25))
370 .add("50th", stats.getPercentile(50))
371 .add("75th", stats.getPercentile(75))
372 .add("90th", stats.getPercentile(90))
373 .add("95th", stats.getPercentile(95))
374 .add("99th", stats.getPercentile(99))
375 .add("99.9th", stats.getPercentile(99.9))
376 .add("99.99th", stats.getPercentile(99.99))
377 .add("traces", traces);
378 return helper.toString();
379 }
380 }
381
382
383
384
385 static abstract class TimingCallable implements Callable<TimingResult> {
386 protected final Future<?> future;
387
388 public TimingCallable(Future<?> f) {
389 future = f;
390 }
391
392 @Override
393 public TimingResult call() throws Exception {
394 TimingResult result = new TimingResult();
395 final int maxIterations = 10;
396 int numAfterDone = 0;
397 int resetCount = 0;
398
399 while (numAfterDone < maxIterations) {
400 long start = System.nanoTime();
401 TraceScope scope = null;
402 try {
403 scope = Trace.startSpan(getSpanName(), AlwaysSampler.INSTANCE);
404 boolean actionResult = doAction();
405 if (actionResult && future.isDone()) {
406 numAfterDone++;
407 }
408
409
410
411
412
413
414 } catch (AccessDeniedException e) {
415 throw e;
416 } catch (CoprocessorException e) {
417 throw e;
418 } catch (FatalConnectionException e) {
419 throw e;
420 } catch (InvalidFamilyOperationException e) {
421 throw e;
422 } catch (NamespaceExistException e) {
423 throw e;
424 } catch (NamespaceNotFoundException e) {
425 throw e;
426 } catch (NoSuchColumnFamilyException e) {
427 throw e;
428 } catch (TableExistsException e) {
429 throw e;
430 } catch (TableNotFoundException e) {
431 throw e;
432 } catch (RetriesExhaustedException e){
433 throw e;
434
435
436
437
438
439 } catch (Exception e) {
440 resetCount++;
441 if (resetCount < maxIterations) {
442 LOG.info("Non-fatal exception while running " + this.toString()
443 + ". Resetting loop counter", e);
444 numAfterDone = 0;
445 } else {
446 LOG.info("Too many unexpected Exceptions. Aborting.", e);
447 throw e;
448 }
449 } finally {
450 if (scope != null) {
451 scope.close();
452 }
453 }
454 result.addResult(System.nanoTime() - start, scope.getSpan());
455 }
456 return result;
457 }
458
459 protected abstract boolean doAction() throws Exception;
460
461 protected String getSpanName() {
462 return this.getClass().getSimpleName();
463 }
464
465 @Override
466 public String toString() {
467 return this.getSpanName();
468 }
469 }
470
471
472
473
474
475 static class PutCallable extends TimingCallable {
476
477 private final Table table;
478
479 public PutCallable(Future<?> f) throws IOException {
480 super(f);
481 this.table = new HTable(util.getConfiguration(), tableName);
482 }
483
484 @Override
485 protected boolean doAction() throws Exception {
486 Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
487 p.add(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
488 table.put(p);
489 return true;
490 }
491
492 @Override
493 protected String getSpanName() {
494 return "MTTR Put Test";
495 }
496 }
497
498
499
500
501
502 static class ScanCallable extends TimingCallable {
503 private final Table table;
504
505 public ScanCallable(Future<?> f) throws IOException {
506 super(f);
507 this.table = new HTable(util.getConfiguration(), tableName);
508 }
509
510 @Override
511 protected boolean doAction() throws Exception {
512 ResultScanner rs = null;
513 try {
514 Scan s = new Scan();
515 s.setBatch(2);
516 s.addFamily(FAMILY);
517 s.setFilter(new KeyOnlyFilter());
518 s.setMaxVersions(1);
519
520 rs = table.getScanner(s);
521 Result result = rs.next();
522 return result != null && result.size() > 0;
523 } finally {
524 if (rs != null) {
525 rs.close();
526 }
527 }
528 }
529 @Override
530 protected String getSpanName() {
531 return "MTTR Scan Test";
532 }
533 }
534
535
536
537
538 static class AdminCallable extends TimingCallable {
539
540 public AdminCallable(Future<?> f) throws IOException {
541 super(f);
542 }
543
544 @Override
545 protected boolean doAction() throws Exception {
546 Admin admin = null;
547 try {
548 admin = new HBaseAdmin(util.getConfiguration());
549 ClusterStatus status = admin.getClusterStatus();
550 return status != null;
551 } finally {
552 if (admin != null) {
553 admin.close();
554 }
555 }
556 }
557
558 @Override
559 protected String getSpanName() {
560 return "MTTR Admin Test";
561 }
562 }
563
564
565 static class ActionCallable implements Callable<Boolean> {
566 private final Action action;
567
568 public ActionCallable(Action action) {
569 this.action = action;
570 }
571
572 @Override
573 public Boolean call() throws Exception {
574 this.action.perform();
575 return true;
576 }
577 }
578
579
580
581
582
583 public static class LoadCallable implements Callable<Boolean> {
584
585 private final Future<?> future;
586
587 public LoadCallable(Future<?> f) {
588 future = f;
589 }
590
591 @Override
592 public Boolean call() throws Exception {
593 int colsPerKey = 10;
594 int numServers = util.getHBaseClusterInterface().getInitialClusterStatus().getServersSize();
595 int numKeys = numServers * 5000;
596 int writeThreads = 10;
597
598
599
600
601 do {
602 int ret = loadTool.run(new String[]{
603 "-tn", loadTableName.getNameAsString(),
604 "-write", String.format("%d:%d:%d", colsPerKey, 500, writeThreads),
605 "-num_keys", String.valueOf(numKeys),
606 "-skip_init"
607 });
608 assertEquals("Load failed", 0, ret);
609 } while (!future.isDone());
610
611 return true;
612 }
613 }
614 }