1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.Closeable;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.LinkedList;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.Random;
33 import java.util.Set;
34 import java.util.TreeSet;
35 import java.util.concurrent.Callable;
36 import java.util.concurrent.ExecutionException;
37 import java.util.concurrent.ExecutorService;
38 import java.util.concurrent.Future;
39 import java.util.concurrent.ScheduledThreadPoolExecutor;
40 import java.util.concurrent.atomic.AtomicLong;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43
44 import org.apache.commons.lang.time.StopWatch;
45 import org.apache.commons.logging.Log;
46 import org.apache.commons.logging.LogFactory;
47 import org.apache.hadoop.conf.Configuration;
48 import org.apache.hadoop.hbase.AuthUtil;
49 import org.apache.hadoop.hbase.DoNotRetryIOException;
50 import org.apache.hadoop.hbase.HBaseConfiguration;
51 import org.apache.hadoop.hbase.HColumnDescriptor;
52 import org.apache.hadoop.hbase.HConstants;
53 import org.apache.hadoop.hbase.HRegionInfo;
54 import org.apache.hadoop.hbase.HRegionLocation;
55 import org.apache.hadoop.hbase.HTableDescriptor;
56 import org.apache.hadoop.hbase.NamespaceDescriptor;
57 import org.apache.hadoop.hbase.ServerName;
58 import org.apache.hadoop.hbase.TableName;
59 import org.apache.hadoop.hbase.TableNotEnabledException;
60 import org.apache.hadoop.hbase.TableNotFoundException;
61 import org.apache.hadoop.hbase.client.Get;
62 import org.apache.hadoop.hbase.client.HBaseAdmin;
63 import org.apache.hadoop.hbase.client.HConnection;
64 import org.apache.hadoop.hbase.client.HConnectionManager;
65 import org.apache.hadoop.hbase.client.HTable;
66 import org.apache.hadoop.hbase.client.HTableInterface;
67 import org.apache.hadoop.hbase.client.Put;
68 import org.apache.hadoop.hbase.client.ResultScanner;
69 import org.apache.hadoop.hbase.client.Scan;
70 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
71 import org.apache.hadoop.hbase.tool.Canary.RegionTask.TaskType;
72 import org.apache.hadoop.hbase.util.Bytes;
73 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
74 import org.apache.hadoop.hbase.util.ReflectionUtils;
75 import org.apache.hadoop.hbase.util.RegionSplitter;
76 import org.apache.hadoop.util.GenericOptionsParser;
77 import org.apache.hadoop.util.Tool;
78 import org.apache.hadoop.util.ToolRunner;
79
80 import com.google.protobuf.ServiceException;
81
82
83
84
85
86
87
88
89
90
91
92
93 public final class Canary implements Tool {
94
95 public interface Sink {
96 public long getReadFailureCount();
97 public long incReadFailureCount();
98 public void publishReadFailure(HRegionInfo region, Exception e);
99 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
100 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
101 public long getWriteFailureCount();
102 public void publishWriteFailure(HRegionInfo region, Exception e);
103 public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
104 public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
105 }
106
107
108 public interface ExtendedSink extends Sink {
109 public void publishReadFailure(String table, String server);
110 public void publishReadTiming(String table, String server, long msTime);
111 }
112
113
114
115 public static class StdOutSink implements Sink {
116 private AtomicLong readFailureCount = new AtomicLong(0),
117 writeFailureCount = new AtomicLong(0);
118
119 @Override
120 public long getReadFailureCount() {
121 return readFailureCount.get();
122 }
123
124 @Override
125 public long incReadFailureCount() {
126 return readFailureCount.incrementAndGet();
127 }
128
129 @Override
130 public void publishReadFailure(HRegionInfo region, Exception e) {
131 readFailureCount.incrementAndGet();
132 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
133 }
134
135 @Override
136 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
137 readFailureCount.incrementAndGet();
138 LOG.error(String.format("read from region %s column family %s failed",
139 region.getRegionNameAsString(), column.getNameAsString()), e);
140 }
141
142 @Override
143 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
144 LOG.info(String.format("read from region %s column family %s in %dms",
145 region.getRegionNameAsString(), column.getNameAsString(), msTime));
146 }
147
148 @Override
149 public long getWriteFailureCount() {
150 return writeFailureCount.get();
151 }
152
153 @Override
154 public void publishWriteFailure(HRegionInfo region, Exception e) {
155 writeFailureCount.incrementAndGet();
156 LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e);
157 }
158
159 @Override
160 public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
161 writeFailureCount.incrementAndGet();
162 LOG.error(String.format("write to region %s column family %s failed",
163 region.getRegionNameAsString(), column.getNameAsString()), e);
164 }
165
166 @Override
167 public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
168 LOG.info(String.format("write to region %s column family %s in %dms",
169 region.getRegionNameAsString(), column.getNameAsString(), msTime));
170 }
171 }
172
173 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
174
175 @Override
176 public void publishReadFailure(String table, String server) {
177 incReadFailureCount();
178 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
179 }
180
181 @Override
182 public void publishReadTiming(String table, String server, long msTime) {
183 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
184 table, server, msTime));
185 }
186 }
187
188
189
190
191
192 public static class RegionTask implements Callable<Void> {
193 public enum TaskType{
194 READ, WRITE
195 }
196 private HConnection connection;
197 private HRegionInfo region;
198 private Sink sink;
199 private TaskType taskType;
200
201 RegionTask(HConnection connection, HRegionInfo region, Sink sink, TaskType taskType) {
202 this.connection = connection;
203 this.region = region;
204 this.sink = sink;
205 this.taskType = taskType;
206 }
207
208 @Override
209 public Void call() {
210 switch (taskType) {
211 case READ:
212 return read();
213 case WRITE:
214 return write();
215 default:
216 return read();
217 }
218 }
219
220 public Void read() {
221 HTableInterface table = null;
222 HTableDescriptor tableDesc = null;
223 try {
224 if (LOG.isDebugEnabled()) {
225 LOG.debug(String.format("reading table descriptor for table %s",
226 region.getTable()));
227 }
228 table = connection.getTable(region.getTable());
229 tableDesc = table.getTableDescriptor();
230 } catch (IOException e) {
231 LOG.debug("sniffRegion failed", e);
232 sink.publishReadFailure(region, e);
233 if (table != null) {
234 try {
235 table.close();
236 } catch (IOException ioe) {
237 LOG.error("Close table failed", e);
238 }
239 }
240 return null;
241 }
242
243 byte[] startKey = null;
244 Get get = null;
245 Scan scan = null;
246 ResultScanner rs = null;
247 StopWatch stopWatch = new StopWatch();
248 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
249 stopWatch.reset();
250 startKey = region.getStartKey();
251
252 if (startKey.length > 0) {
253 get = new Get(startKey);
254 get.setCacheBlocks(false);
255 get.setFilter(new FirstKeyOnlyFilter());
256 get.addFamily(column.getName());
257 } else {
258 scan = new Scan();
259 scan.setCaching(1);
260 scan.setCacheBlocks(false);
261 scan.setFilter(new FirstKeyOnlyFilter());
262 scan.addFamily(column.getName());
263 scan.setMaxResultSize(1L);
264 scan.setSmall(true);
265 }
266
267 if (LOG.isDebugEnabled()) {
268 LOG.debug(String.format("reading from table %s region %s column family %s and key %s",
269 tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(),
270 Bytes.toStringBinary(startKey)));
271 }
272 try {
273 stopWatch.start();
274 if (startKey.length > 0) {
275 table.get(get);
276 } else {
277 rs = table.getScanner(scan);
278 rs.next();
279 }
280 stopWatch.stop();
281 sink.publishReadTiming(region, column, stopWatch.getTime());
282 } catch (Exception e) {
283 sink.publishReadFailure(region, column, e);
284 } finally {
285 if (rs != null) {
286 rs.close();
287 }
288 scan = null;
289 get = null;
290 startKey = null;
291 }
292 }
293 try {
294 table.close();
295 } catch (IOException e) {
296 LOG.error("Close table failed", e);
297 }
298 return null;
299 }
300
301
302
303
304
305 private Void write() {
306 HTableInterface table = null;
307 HTableDescriptor tableDesc = null;
308 try {
309 table = connection.getTable(region.getTable());
310 tableDesc = table.getTableDescriptor();
311 byte[] rowToCheck = region.getStartKey();
312 if (rowToCheck.length == 0) {
313 rowToCheck = new byte[]{0x0};
314 }
315 int writeValueSize =
316 connection.getConfiguration().getInt(HConstants.HBASE_CANARY_WRITE_VALUE_SIZE_KEY, 10);
317 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
318 Put put = new Put(rowToCheck);
319 byte[] value = new byte[writeValueSize];
320 Bytes.random(value);
321 put.add(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value);
322
323 if (LOG.isDebugEnabled()) {
324 LOG.debug(String.format("writing to table %s region %s column family %s and key %s",
325 tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(),
326 Bytes.toStringBinary(rowToCheck)));
327 }
328 try {
329 long startTime = System.currentTimeMillis();
330 table.put(put);
331 long time = System.currentTimeMillis() - startTime;
332 sink.publishWriteTiming(region, column, time);
333 } catch (Exception e) {
334 sink.publishWriteFailure(region, column, e);
335 }
336 }
337 table.close();
338 } catch (IOException e) {
339 sink.publishWriteFailure(region, e);
340 }
341 return null;
342 }
343 }
344
345
346
347
348 static class RegionServerTask implements Callable<Void> {
349 private HConnection connection;
350 private String serverName;
351 private HRegionInfo region;
352 private ExtendedSink sink;
353
354 RegionServerTask(HConnection connection, String serverName, HRegionInfo region,
355 ExtendedSink sink) {
356 this.connection = connection;
357 this.serverName = serverName;
358 this.region = region;
359 this.sink = sink;
360 }
361
362 @Override
363 public Void call() {
364 TableName tableName = null;
365 HTableInterface table = null;
366 Get get = null;
367 byte[] startKey = null;
368 Scan scan = null;
369 StopWatch stopWatch = new StopWatch();
370
371 stopWatch.reset();
372 try {
373 tableName = region.getTable();
374 table = connection.getTable(tableName);
375 startKey = region.getStartKey();
376
377 if (LOG.isDebugEnabled()) {
378 LOG.debug(String.format("reading from region server %s table %s region %s and key %s",
379 serverName, region.getTable(), region.getRegionNameAsString(),
380 Bytes.toStringBinary(startKey)));
381 }
382 if (startKey.length > 0) {
383 get = new Get(startKey);
384 get.setCacheBlocks(false);
385 get.setFilter(new FirstKeyOnlyFilter());
386 stopWatch.start();
387 table.get(get);
388 stopWatch.stop();
389 } else {
390 scan = new Scan();
391 scan.setCacheBlocks(false);
392 scan.setFilter(new FirstKeyOnlyFilter());
393 scan.setCaching(1);
394 scan.setMaxResultSize(1L);
395 scan.setSmall(true);
396 stopWatch.start();
397 ResultScanner s = table.getScanner(scan);
398 s.next();
399 s.close();
400 stopWatch.stop();
401 }
402 sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime());
403 } catch (TableNotFoundException tnfe) {
404 LOG.error("Table may be deleted", tnfe);
405
406 } catch (TableNotEnabledException tnee) {
407
408 LOG.debug("The targeted table was disabled. Assuming success.");
409 } catch (DoNotRetryIOException dnrioe) {
410 sink.publishReadFailure(tableName.getNameAsString(), serverName);
411 LOG.error(dnrioe);
412 } catch (IOException e) {
413 sink.publishReadFailure(tableName.getNameAsString(), serverName);
414 LOG.error(e);
415 } finally {
416 if (table != null) {
417 try {
418 table.close();
419 } catch (IOException e) {
420 LOG.error("Close table failed", e);
421 }
422 }
423 scan = null;
424 get = null;
425 startKey = null;
426 }
427 return null;
428 }
429 }
430
431 private static final int USAGE_EXIT_CODE = 1;
432 private static final int INIT_ERROR_EXIT_CODE = 2;
433 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
434 private static final int ERROR_EXIT_CODE = 4;
435 private static final int FAILURE_EXIT_CODE = 5;
436
437 private static final long DEFAULT_INTERVAL = 6000;
438
439 private static final long DEFAULT_TIMEOUT = 600000;
440 private static final int MAX_THREADS_NUM = 16;
441
442 private static final Log LOG = LogFactory.getLog(Canary.class);
443
444 public static final TableName DEFAULT_WRITE_TABLE_NAME = TableName.valueOf(
445 NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR, "canary");
446
447 private static final String CANARY_TABLE_FAMILY_NAME = "Test";
448
449 private Configuration conf = null;
450 private long interval = 0;
451 private Sink sink = null;
452
453 private boolean useRegExp;
454 private long timeout = DEFAULT_TIMEOUT;
455 private boolean failOnError = true;
456 private boolean regionServerMode = false;
457 private boolean writeSniffing = false;
458 private boolean treatFailureAsError = false;
459 private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME;
460
461 private ExecutorService executor;
462
463 public Canary() {
464 this(new ScheduledThreadPoolExecutor(1), new RegionServerStdOutSink());
465 }
466
467 public Canary(ExecutorService executor, Sink sink) {
468 this.executor = executor;
469 this.sink = sink;
470 }
471
472 @Override
473 public Configuration getConf() {
474 return conf;
475 }
476
477 @Override
478 public void setConf(Configuration conf) {
479 this.conf = conf;
480 }
481
482 private int parseArgs(String[] args) {
483 int index = -1;
484
485 for (int i = 0; i < args.length; i++) {
486 String cmd = args[i];
487
488 if (cmd.startsWith("-")) {
489 if (index >= 0) {
490
491 System.err.println("Invalid command line options");
492 printUsageAndExit();
493 }
494
495 if (cmd.equals("-help")) {
496
497 printUsageAndExit();
498 } else if (cmd.equals("-daemon") && interval == 0) {
499
500 interval = DEFAULT_INTERVAL;
501 } else if (cmd.equals("-interval")) {
502
503 i++;
504
505 if (i == args.length) {
506 System.err.println("-interval needs a numeric value argument.");
507 printUsageAndExit();
508 }
509
510 try {
511 interval = Long.parseLong(args[i]) * 1000;
512 } catch (NumberFormatException e) {
513 System.err.println("-interval needs a numeric value argument.");
514 printUsageAndExit();
515 }
516 } else if(cmd.equals("-regionserver")) {
517 this.regionServerMode = true;
518 } else if(cmd.equals("-writeSniffing")) {
519 this.writeSniffing = true;
520 } else if(cmd.equals("-treatFailureAsError")) {
521 this.treatFailureAsError = true;
522 } else if (cmd.equals("-e")) {
523 this.useRegExp = true;
524 } else if (cmd.equals("-t")) {
525 i++;
526
527 if (i == args.length) {
528 System.err.println("-t needs a numeric value argument.");
529 printUsageAndExit();
530 }
531
532 try {
533 this.timeout = Long.parseLong(args[i]);
534 } catch (NumberFormatException e) {
535 System.err.println("-t needs a numeric value argument.");
536 printUsageAndExit();
537 }
538 } else if (cmd.equals("-writeTable")) {
539 i++;
540
541 if (i == args.length) {
542 System.err.println("-writeTable needs a string value argument.");
543 printUsageAndExit();
544 }
545 this.writeTableName = TableName.valueOf(args[i]);
546 } else if (cmd.equals("-f")) {
547 i++;
548
549 if (i == args.length) {
550 System.err
551 .println("-f needs a boolean value argument (true|false).");
552 printUsageAndExit();
553 }
554
555 this.failOnError = Boolean.parseBoolean(args[i]);
556 } else {
557
558 System.err.println(cmd + " options is invalid.");
559 printUsageAndExit();
560 }
561 } else if (index < 0) {
562
563 index = i;
564 }
565 }
566 return index;
567 }
568
569 @Override
570 public int run(String[] args) throws Exception {
571 int index = parseArgs(args);
572
573
574
575
576 AuthUtil.launchAuthChore(conf);
577
578
579 Monitor monitor = null;
580 Thread monitorThread = null;
581 long startTime = 0;
582 long currentTimeLength = 0;
583
584 HConnection connection = HConnectionManager.createConnection(this.conf);
585 try {
586 do {
587
588 try {
589 monitor = this.newMonitor(connection, index, args);
590 monitorThread = new Thread(monitor);
591 startTime = System.currentTimeMillis();
592 monitorThread.start();
593 while (!monitor.isDone()) {
594
595 Thread.sleep(1000);
596
597 if (this.failOnError && monitor.hasError()) {
598 monitorThread.interrupt();
599 if (monitor.initialized) {
600 return monitor.errorCode;
601 } else {
602 return INIT_ERROR_EXIT_CODE;
603 }
604 }
605 currentTimeLength = System.currentTimeMillis() - startTime;
606 if (currentTimeLength > this.timeout) {
607 LOG.error("The monitor is running too long (" + currentTimeLength
608 + ") after timeout limit:" + this.timeout
609 + " will be killed itself !!");
610 if (monitor.initialized) {
611 return TIMEOUT_ERROR_EXIT_CODE;
612 } else {
613 return INIT_ERROR_EXIT_CODE;
614 }
615 }
616 }
617
618 if (this.failOnError && monitor.finalCheckForErrors()) {
619 monitorThread.interrupt();
620 return monitor.errorCode;
621 }
622 } finally {
623 if (monitor != null) monitor.close();
624 }
625
626 Thread.sleep(interval);
627 } while (interval > 0);
628 } finally {
629 connection.close();
630 }
631
632 return monitor.errorCode;
633 }
634
635 private void printUsageAndExit() {
636 System.err.printf(
637 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
638 getClass().getName());
639 System.err.println(" where [opts] are:");
640 System.err.println(" -help Show this help and exit.");
641 System.err.println(" -regionserver replace the table argument to regionserver,");
642 System.err.println(" which means to enable regionserver mode");
643 System.err.println(" -daemon Continuous check at defined intervals.");
644 System.err.println(" -interval <N> Interval between checks (sec)");
645 System.err.println(" -e Use table/regionserver as regular expression");
646 System.err.println(" which means the table/regionserver is regular expression pattern");
647 System.err.println(" -f <B> stop whole program if first error occurs," +
648 " default is true");
649 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
650 System.err.println(" -writeSniffing enable the write sniffing in canary");
651 System.err.println(" -treatFailureAsError treats read / write failure as error");
652 System.err.println(" -writeTable The table used for write sniffing."
653 + " Default is hbase:canary");
654 System.err
655 .println(" -D<configProperty>=<value> assigning or override the configuration params");
656 System.exit(USAGE_EXIT_CODE);
657 }
658
659
660
661
662
663
664
665
666 public Monitor newMonitor(final HConnection connection, int index, String[] args) {
667 Monitor monitor = null;
668 String[] monitorTargets = null;
669
670 if(index >= 0) {
671 int length = args.length - index;
672 monitorTargets = new String[length];
673 System.arraycopy(args, index, monitorTargets, 0, length);
674 }
675
676 if (this.regionServerMode) {
677 monitor =
678 new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
679 (ExtendedSink) this.sink, this.executor, this.treatFailureAsError);
680 } else {
681 monitor =
682 new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor,
683 this.writeSniffing, this.writeTableName, this.treatFailureAsError);
684 }
685 return monitor;
686 }
687
688
689 public static abstract class Monitor implements Runnable, Closeable {
690
691 protected HConnection connection;
692 protected HBaseAdmin admin;
693 protected String[] targets;
694 protected boolean useRegExp;
695 protected boolean treatFailureAsError;
696 protected boolean initialized = false;
697
698 protected boolean done = false;
699 protected int errorCode = 0;
700 protected Sink sink;
701 protected ExecutorService executor;
702
703 public boolean isDone() {
704 return done;
705 }
706
707 public boolean hasError() {
708 return errorCode != 0;
709 }
710
711 public boolean finalCheckForErrors() {
712 if (errorCode != 0) {
713 return true;
714 }
715 if (treatFailureAsError &&
716 (sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) {
717 errorCode = FAILURE_EXIT_CODE;
718 return true;
719 }
720 return false;
721 }
722
723 @Override
724 public void close() throws IOException {
725 if (this.admin != null) this.admin.close();
726 }
727
728 protected Monitor(HConnection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
729 ExecutorService executor, boolean treatFailureAsError) {
730 if (null == connection) throw new IllegalArgumentException("connection shall not be null");
731
732 this.connection = connection;
733 this.targets = monitorTargets;
734 this.useRegExp = useRegExp;
735 this.sink = sink;
736 this.executor = executor;
737 }
738
739 @Override
740 public abstract void run();
741
742 protected boolean initAdmin() {
743 if (null == this.admin) {
744 try {
745 this.admin = new HBaseAdmin(connection);
746 } catch (Exception e) {
747 LOG.error("Initial HBaseAdmin failed...", e);
748 this.errorCode = INIT_ERROR_EXIT_CODE;
749 }
750 } else if (admin.isAborted()) {
751 LOG.error("HBaseAdmin aborted");
752 this.errorCode = INIT_ERROR_EXIT_CODE;
753 }
754 return !this.hasError();
755 }
756 }
757
758
759 private static class RegionMonitor extends Monitor {
760
761 private static final int DEFAULT_WRITE_TABLE_CHECK_PERIOD = 10 * 60 * 1000;
762
763 private static final int DEFAULT_WRITE_DATA_TTL = 24 * 60 * 60;
764
765 private long lastCheckTime = -1;
766 private boolean writeSniffing;
767 private TableName writeTableName;
768 private int writeDataTTL;
769 private float regionsLowerLimit;
770 private float regionsUpperLimit;
771 private int checkPeriod;
772
773 public RegionMonitor(HConnection connection, String[] monitorTargets, boolean useRegExp,
774 Sink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
775 boolean treatFailureAsError) {
776 super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
777 Configuration conf = connection.getConfiguration();
778 this.writeSniffing = writeSniffing;
779 this.writeTableName = writeTableName;
780 this.writeDataTTL =
781 conf.getInt(HConstants.HBASE_CANARY_WRITE_DATA_TTL_KEY, DEFAULT_WRITE_DATA_TTL);
782 this.regionsLowerLimit =
783 conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY, 1.0f);
784 this.regionsUpperLimit =
785 conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_UPPERLIMIT_KEY, 1.5f);
786 this.checkPeriod =
787 conf.getInt(HConstants.HBASE_CANARY_WRITE_TABLE_CHECK_PERIOD_KEY,
788 DEFAULT_WRITE_TABLE_CHECK_PERIOD);
789 }
790
791 @Override
792 public void run() {
793 if (this.initAdmin()) {
794 try {
795 List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
796 if (this.targets != null && this.targets.length > 0) {
797 String[] tables = generateMonitorTables(this.targets);
798 this.initialized = true;
799 for (String table : tables) {
800 taskFutures.addAll(Canary.sniff(connection, sink, table, executor, TaskType.READ));
801 }
802 } else {
803 taskFutures.addAll(sniff(TaskType.READ));
804 }
805
806 if (writeSniffing) {
807 if (EnvironmentEdgeManager.currentTimeMillis() - lastCheckTime > checkPeriod) {
808 try {
809 checkWriteTableDistribution();
810 } catch (IOException e) {
811 LOG.error("Check canary table distribution failed!", e);
812 }
813 lastCheckTime = EnvironmentEdgeManager.currentTimeMillis();
814 }
815
816 taskFutures.addAll(Canary.sniff(connection, sink,
817 writeTableName.getNameAsString(), executor, TaskType.WRITE));
818 }
819
820 for (Future<Void> future : taskFutures) {
821 try {
822 future.get();
823 } catch (ExecutionException e) {
824 LOG.error("Sniff region failed!", e);
825 }
826 }
827 } catch (Exception e) {
828 LOG.error("Run regionMonitor failed", e);
829 this.errorCode = ERROR_EXIT_CODE;
830 }
831 }
832 this.done = true;
833 }
834
835 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
836 String[] returnTables = null;
837
838 if (this.useRegExp) {
839 Pattern pattern = null;
840 HTableDescriptor[] tds = null;
841 Set<String> tmpTables = new TreeSet<String>();
842 try {
843 if (LOG.isDebugEnabled()) {
844 LOG.debug(String.format("reading list of tables"));
845 }
846 tds = this.admin.listTables(pattern);
847 if (tds == null) {
848 tds = new HTableDescriptor[0];
849 }
850 for (String monitorTarget : monitorTargets) {
851 pattern = Pattern.compile(monitorTarget);
852 for (HTableDescriptor td : tds) {
853 if (pattern.matcher(td.getNameAsString()).matches()) {
854 tmpTables.add(td.getNameAsString());
855 }
856 }
857 }
858 } catch (IOException e) {
859 LOG.error("Communicate with admin failed", e);
860 throw e;
861 }
862
863 if (tmpTables.size() > 0) {
864 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
865 } else {
866 String msg = "No HTable found, tablePattern:" + Arrays.toString(monitorTargets);
867 LOG.error(msg);
868 this.errorCode = INIT_ERROR_EXIT_CODE;
869 throw new TableNotFoundException(msg);
870 }
871 } else {
872 returnTables = monitorTargets;
873 }
874
875 return returnTables;
876 }
877
878
879
880
881 private List<Future<Void>> sniff(TaskType taskType) throws Exception {
882 if (LOG.isDebugEnabled()) {
883 LOG.debug(String.format("reading list of tables"));
884 }
885 List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
886 for (HTableDescriptor table : admin.listTables()) {
887 if (admin.isTableEnabled(table.getTableName())
888 && (!table.getTableName().equals(writeTableName))) {
889 taskFutures.addAll(Canary.sniff(connection, sink, table.getTableName(), executor,
890 taskType));
891 }
892 }
893 return taskFutures;
894 }
895
896 private void checkWriteTableDistribution() throws IOException, ServiceException {
897 if (!admin.tableExists(writeTableName)) {
898 int numberOfServers = admin.getClusterStatus().getServers().size();
899 if (numberOfServers == 0) {
900 throw new IllegalStateException("No live regionservers");
901 }
902 createWriteTable(numberOfServers);
903 }
904
905 if (!admin.isTableEnabled(writeTableName)) {
906 admin.enableTable(writeTableName);
907 }
908
909 int numberOfServers = admin.getClusterStatus().getServers().size();
910 List<HRegionLocation> locations = connection.locateRegions(writeTableName);
911 int numberOfRegions = locations.size();
912 if (numberOfRegions < numberOfServers * regionsLowerLimit
913 || numberOfRegions > numberOfServers * regionsUpperLimit) {
914 admin.disableTable(writeTableName);
915 admin.deleteTable(writeTableName);
916 createWriteTable(numberOfServers);
917 }
918 HashSet<ServerName> serverSet = new HashSet<ServerName>();
919 for (HRegionLocation location: locations) {
920 serverSet.add(location.getServerName());
921 }
922 int numberOfCoveredServers = serverSet.size();
923 if (numberOfCoveredServers < numberOfServers) {
924 admin.balancer();
925 }
926 }
927
928 private void createWriteTable(int numberOfServers) throws IOException {
929 int numberOfRegions = (int)(numberOfServers * regionsLowerLimit);
930 LOG.info("Number of live regionservers: " + numberOfServers + ", "
931 + "pre-splitting the canary table into " + numberOfRegions + " regions "
932 + "(current lower limit of regions per server is " + regionsLowerLimit
933 + " and you can change it by config: "
934 + HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )");
935 HTableDescriptor desc = new HTableDescriptor(writeTableName);
936 HColumnDescriptor family = new HColumnDescriptor(CANARY_TABLE_FAMILY_NAME);
937 family.setMaxVersions(1);
938 family.setTimeToLive(writeDataTTL);
939
940 desc.addFamily(family);
941 byte[][] splits = new RegionSplitter.HexStringSplit().split(numberOfRegions);
942 admin.createTable(desc, splits);
943 }
944 }
945
946
947
948
949
950 public static void sniff(final HConnection connection, TableName tableName, TaskType taskType)
951 throws Exception {
952 List<Future<Void>> taskFutures =
953 Canary.sniff(connection, new StdOutSink(), tableName.getNameAsString(),
954 new ScheduledThreadPoolExecutor(1), taskType);
955 for (Future<Void> future : taskFutures) {
956 future.get();
957 }
958 }
959
960
961
962
963
964 private static List<Future<Void>> sniff(final HConnection connection, final Sink sink,
965 String tableName, ExecutorService executor, TaskType taskType) throws Exception {
966 if (LOG.isDebugEnabled()) {
967 LOG.debug(String.format("checking table is enabled and getting table descriptor for table %s",
968 tableName));
969 }
970 HBaseAdmin admin = new HBaseAdmin(connection);
971 try {
972 if (admin.isTableEnabled(TableName.valueOf(tableName))) {
973 return Canary.sniff(connection, sink, TableName.valueOf(tableName), executor,
974 taskType);
975 } else {
976 LOG.warn(String.format("Table %s is not enabled", tableName));
977 }
978 return new LinkedList<Future<Void>>();
979 } finally {
980 admin.close();
981 }
982 }
983
984
985
986
987 private static List<Future<Void>> sniff(final HConnection connection, final Sink sink,
988 TableName tableName, ExecutorService executor, TaskType taskType) throws Exception {
989 if (LOG.isDebugEnabled()) {
990 LOG.debug(String.format("reading list of regions for table %s", tableName));
991 }
992 HTableInterface table = null;
993 try {
994 table = connection.getTable(tableName);
995 } catch (TableNotFoundException e) {
996 return new ArrayList<Future<Void>>();
997 }
998 List<RegionTask> tasks = new ArrayList<RegionTask>();
999 try {
1000 for (HRegionInfo region : ((HTable)table).getRegionLocations().keySet()) {
1001 tasks.add(new RegionTask(connection, region, sink, taskType));
1002 }
1003 } finally {
1004 table.close();
1005 }
1006 return executor.invokeAll(tasks);
1007 }
1008
1009
1010
1011
1012
1013 private static void sniffRegion(
1014 final HBaseAdmin admin,
1015 final Sink sink,
1016 HRegionInfo region,
1017 HTableInterface table) throws Exception {
1018 HTableDescriptor tableDesc = table.getTableDescriptor();
1019 byte[] startKey = null;
1020 Get get = null;
1021 Scan scan = null;
1022 ResultScanner rs = null;
1023 StopWatch stopWatch = new StopWatch();
1024 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
1025 stopWatch.reset();
1026 startKey = region.getStartKey();
1027
1028 if (startKey.length > 0) {
1029 get = new Get(startKey);
1030 get.setCacheBlocks(false);
1031 get.setFilter(new FirstKeyOnlyFilter());
1032 get.addFamily(column.getName());
1033 } else {
1034 scan = new Scan();
1035 scan.setRaw(true);
1036 scan.setCaching(1);
1037 scan.setCacheBlocks(false);
1038 scan.setFilter(new FirstKeyOnlyFilter());
1039 scan.addFamily(column.getName());
1040 scan.setMaxResultSize(1L);
1041 }
1042
1043 try {
1044 if (startKey.length > 0) {
1045 stopWatch.start();
1046 table.get(get);
1047 stopWatch.stop();
1048 sink.publishReadTiming(region, column, stopWatch.getTime());
1049 } else {
1050 stopWatch.start();
1051 rs = table.getScanner(scan);
1052 stopWatch.stop();
1053 sink.publishReadTiming(region, column, stopWatch.getTime());
1054 }
1055 } catch (Exception e) {
1056 sink.publishReadFailure(region, column, e);
1057 } finally {
1058 if (rs != null) {
1059 rs.close();
1060 }
1061 scan = null;
1062 get = null;
1063 startKey = null;
1064 }
1065 }
1066 }
1067
1068 private static class RegionServerMonitor extends Monitor {
1069
1070 public RegionServerMonitor(HConnection connection, String[] monitorTargets, boolean useRegExp,
1071 ExtendedSink sink, ExecutorService executor, boolean treatFailureAsError) {
1072 super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
1073 }
1074
1075 private ExtendedSink getSink() {
1076 return (ExtendedSink) this.sink;
1077 }
1078
1079 @Override
1080 public void run() {
1081 if (this.initAdmin() && this.checkNoTableNames()) {
1082 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
1083 this.initialized = true;
1084 this.monitorRegionServers(rsAndRMap);
1085 }
1086 this.done = true;
1087 }
1088
1089 private boolean checkNoTableNames() {
1090 List<String> foundTableNames = new ArrayList<String>();
1091 TableName[] tableNames = null;
1092
1093 if (LOG.isDebugEnabled()) {
1094 LOG.debug(String.format("reading list of tables"));
1095 }
1096 try {
1097 tableNames = this.admin.listTableNames();
1098 } catch (IOException e) {
1099 LOG.error("Get listTableNames failed", e);
1100 this.errorCode = INIT_ERROR_EXIT_CODE;
1101 return false;
1102 }
1103
1104 if (this.targets == null || this.targets.length == 0) return true;
1105
1106 for (String target : this.targets) {
1107 for (TableName tableName : tableNames) {
1108 if (target.equals(tableName.getNameAsString())) {
1109 foundTableNames.add(target);
1110 }
1111 }
1112 }
1113
1114 if (foundTableNames.size() > 0) {
1115 System.err.println("Cannot pass a tablename when using the -regionserver " +
1116 "option, tablenames:" + foundTableNames.toString());
1117 this.errorCode = USAGE_EXIT_CODE;
1118 }
1119 return foundTableNames.size() == 0;
1120 }
1121
1122 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
1123 List<RegionServerTask> tasks = new ArrayList<RegionServerTask>();
1124 Random rand =new Random();
1125
1126 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
1127 String serverName = entry.getKey();
1128
1129 HRegionInfo region = entry.getValue().get(rand.nextInt(entry.getValue().size()));
1130 tasks.add(new RegionServerTask(this.connection, serverName, region, getSink()));
1131 }
1132 try {
1133 for (Future<Void> future : this.executor.invokeAll(tasks)) {
1134 try {
1135 future.get();
1136 } catch (ExecutionException e) {
1137 LOG.error("Sniff regionserver failed!", e);
1138 this.errorCode = ERROR_EXIT_CODE;
1139 }
1140 }
1141 } catch (InterruptedException e) {
1142 this.errorCode = ERROR_EXIT_CODE;
1143 LOG.error("Sniff regionserver interrupted!", e);
1144 }
1145 }
1146
1147 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
1148 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
1149 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
1150 return regionServerAndRegionsMap;
1151 }
1152
1153 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
1154 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
1155 HTableInterface table = null;
1156 try {
1157 if (LOG.isDebugEnabled()) {
1158 LOG.debug(String.format("reading list of tables and locations"));
1159 }
1160 HTableDescriptor[] tableDescs = this.admin.listTables();
1161 List<HRegionInfo> regions = null;
1162 for (HTableDescriptor tableDesc : tableDescs) {
1163 table = this.admin.getConnection().getTable(tableDesc.getTableName());
1164 for (Entry<HRegionInfo, ServerName> e: ((HTable)table).getRegionLocations().entrySet()) {
1165 HRegionInfo r = e.getKey();
1166 ServerName rs = e.getValue();
1167 String rsName = rs.getHostname();
1168
1169 if (rsAndRMap.containsKey(rsName)) {
1170 regions = rsAndRMap.get(rsName);
1171 } else {
1172 regions = new ArrayList<HRegionInfo>();
1173 rsAndRMap.put(rsName, regions);
1174 }
1175 regions.add(r);
1176 }
1177 table.close();
1178 }
1179
1180 } catch (IOException e) {
1181 String msg = "Get HTables info failed";
1182 LOG.error(msg, e);
1183 this.errorCode = INIT_ERROR_EXIT_CODE;
1184 } finally {
1185 if (table != null) {
1186 try {
1187 table.close();
1188 } catch (IOException e) {
1189 LOG.warn("Close table failed", e);
1190 }
1191 }
1192 }
1193
1194 return rsAndRMap;
1195 }
1196
1197 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
1198 Map<String, List<HRegionInfo>> fullRsAndRMap) {
1199
1200 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
1201
1202 if (this.targets != null && this.targets.length > 0) {
1203 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
1204 Pattern pattern = null;
1205 Matcher matcher = null;
1206 boolean regExpFound = false;
1207 for (String rsName : this.targets) {
1208 if (this.useRegExp) {
1209 regExpFound = false;
1210 pattern = Pattern.compile(rsName);
1211 for (Map.Entry<String, List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
1212 matcher = pattern.matcher(entry.getKey());
1213 if (matcher.matches()) {
1214 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
1215 regExpFound = true;
1216 }
1217 }
1218 if (!regExpFound) {
1219 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
1220 }
1221 } else {
1222 if (fullRsAndRMap.containsKey(rsName)) {
1223 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
1224 } else {
1225 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
1226 }
1227 }
1228 }
1229 } else {
1230 filteredRsAndRMap = fullRsAndRMap;
1231 }
1232 return filteredRsAndRMap;
1233 }
1234 }
1235
1236 public static void main(String[] args) throws Exception {
1237 final Configuration conf = HBaseConfiguration.create();
1238
1239
1240 new GenericOptionsParser(conf, args);
1241
1242 AuthUtil.launchAuthChore(conf);
1243
1244 int numThreads = conf.getInt("hbase.canary.threads.num", MAX_THREADS_NUM);
1245 LOG.info("Number of exection threads " + numThreads);
1246
1247 ExecutorService executor = new ScheduledThreadPoolExecutor(numThreads);
1248
1249 Class<? extends Sink> sinkClass =
1250 conf.getClass("hbase.canary.sink.class", RegionServerStdOutSink.class, Sink.class);
1251 Sink sink = ReflectionUtils.newInstance(sinkClass);
1252
1253 int exitCode = ToolRunner.run(conf, new Canary(executor, sink), args);
1254 executor.shutdown();
1255 System.exit(exitCode);
1256 }
1257 }