1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.Closeable;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.LinkedList;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.Random;
33 import java.util.Set;
34 import java.util.TreeSet;
35 import java.util.concurrent.Callable;
36 import java.util.concurrent.ExecutionException;
37 import java.util.concurrent.ExecutorService;
38 import java.util.concurrent.Future;
39 import java.util.concurrent.ScheduledThreadPoolExecutor;
40 import java.util.regex.Matcher;
41 import java.util.regex.Pattern;
42
43 import org.apache.commons.lang.time.StopWatch;
44 import org.apache.commons.logging.Log;
45 import org.apache.commons.logging.LogFactory;
46 import org.apache.hadoop.conf.Configuration;
47 import org.apache.hadoop.hbase.AuthUtil;
48 import org.apache.hadoop.hbase.DoNotRetryIOException;
49 import org.apache.hadoop.hbase.HBaseConfiguration;
50 import org.apache.hadoop.hbase.HColumnDescriptor;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.HRegionInfo;
53 import org.apache.hadoop.hbase.HRegionLocation;
54 import org.apache.hadoop.hbase.HTableDescriptor;
55 import org.apache.hadoop.hbase.NamespaceDescriptor;
56 import org.apache.hadoop.hbase.ServerName;
57 import org.apache.hadoop.hbase.TableName;
58 import org.apache.hadoop.hbase.TableNotEnabledException;
59 import org.apache.hadoop.hbase.TableNotFoundException;
60 import org.apache.hadoop.hbase.client.Get;
61 import org.apache.hadoop.hbase.client.HBaseAdmin;
62 import org.apache.hadoop.hbase.client.HConnection;
63 import org.apache.hadoop.hbase.client.HConnectionManager;
64 import org.apache.hadoop.hbase.client.HTable;
65 import org.apache.hadoop.hbase.client.HTableInterface;
66 import org.apache.hadoop.hbase.client.Put;
67 import org.apache.hadoop.hbase.client.ResultScanner;
68 import org.apache.hadoop.hbase.client.Scan;
69 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
70 import org.apache.hadoop.hbase.tool.Canary.RegionTask.TaskType;
71 import org.apache.hadoop.hbase.util.Bytes;
72 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
73 import org.apache.hadoop.hbase.util.ReflectionUtils;
74 import org.apache.hadoop.hbase.util.RegionSplitter;
75 import org.apache.hadoop.util.Tool;
76 import org.apache.hadoop.util.ToolRunner;
77
78 import com.google.protobuf.ServiceException;
79
80
81
82
83
84
85
86
87
88
89
90
91 public final class Canary implements Tool {
92
93 public interface Sink {
94 public void publishReadFailure(HRegionInfo region, Exception e);
95 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
96 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
97 public void publishWriteFailure(HRegionInfo region, Exception e);
98 public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
99 public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
100 }
101
102
103 public interface ExtendedSink extends Sink {
104 public void publishReadFailure(String table, String server);
105 public void publishReadTiming(String table, String server, long msTime);
106 }
107
108
109
110 public static class StdOutSink implements Sink {
111 @Override
112 public void publishReadFailure(HRegionInfo region, Exception e) {
113 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
114 }
115
116 @Override
117 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
118 LOG.error(String.format("read from region %s column family %s failed",
119 region.getRegionNameAsString(), column.getNameAsString()), e);
120 }
121
122 @Override
123 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
124 LOG.info(String.format("read from region %s column family %s in %dms",
125 region.getRegionNameAsString(), column.getNameAsString(), msTime));
126 }
127
128 @Override
129 public void publishWriteFailure(HRegionInfo region, Exception e) {
130 LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e);
131 }
132
133 @Override
134 public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
135 LOG.error(String.format("write to region %s column family %s failed",
136 region.getRegionNameAsString(), column.getNameAsString()), e);
137 }
138
139 @Override
140 public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
141 LOG.info(String.format("write to region %s column family %s in %dms",
142 region.getRegionNameAsString(), column.getNameAsString(), msTime));
143 }
144 }
145
146 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
147
148 @Override
149 public void publishReadFailure(String table, String server) {
150 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
151 }
152
153 @Override
154 public void publishReadTiming(String table, String server, long msTime) {
155 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
156 table, server, msTime));
157 }
158 }
159
160
161
162
163
164 public static class RegionTask implements Callable<Void> {
165 public enum TaskType{
166 READ, WRITE
167 }
168 private HConnection connection;
169 private HRegionInfo region;
170 private Sink sink;
171 private TaskType taskType;
172
173 RegionTask(HConnection connection, HRegionInfo region, Sink sink, TaskType taskType) {
174 this.connection = connection;
175 this.region = region;
176 this.sink = sink;
177 this.taskType = taskType;
178 }
179
180 @Override
181 public Void call() {
182 switch (taskType) {
183 case READ:
184 return read();
185 case WRITE:
186 return write();
187 default:
188 return read();
189 }
190 }
191
192 public Void read() {
193 HTableInterface table = null;
194 HTableDescriptor tableDesc = null;
195 try {
196 table = connection.getTable(region.getTable());
197 tableDesc = table.getTableDescriptor();
198 } catch (IOException e) {
199 LOG.debug("sniffRegion failed", e);
200 sink.publishReadFailure(region, e);
201 if (table != null) {
202 try {
203 table.close();
204 } catch (IOException ioe) {
205 LOG.error("Close table failed", e);
206 }
207 }
208 return null;
209 }
210
211 byte[] startKey = null;
212 Get get = null;
213 Scan scan = null;
214 ResultScanner rs = null;
215 StopWatch stopWatch = new StopWatch();
216 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
217 stopWatch.reset();
218 startKey = region.getStartKey();
219
220 if (startKey.length > 0) {
221 get = new Get(startKey);
222 get.setCacheBlocks(false);
223 get.setFilter(new FirstKeyOnlyFilter());
224 get.addFamily(column.getName());
225 } else {
226 scan = new Scan();
227 scan.setCaching(1);
228 scan.setCacheBlocks(false);
229 scan.setFilter(new FirstKeyOnlyFilter());
230 scan.addFamily(column.getName());
231 scan.setMaxResultSize(1L);
232 }
233
234 try {
235 if (startKey.length > 0) {
236 stopWatch.start();
237 table.get(get);
238 stopWatch.stop();
239 sink.publishReadTiming(region, column, stopWatch.getTime());
240 } else {
241 stopWatch.start();
242 rs = table.getScanner(scan);
243 stopWatch.stop();
244 sink.publishReadTiming(region, column, stopWatch.getTime());
245 }
246 } catch (Exception e) {
247 sink.publishReadFailure(region, column, e);
248 } finally {
249 if (rs != null) {
250 rs.close();
251 }
252 scan = null;
253 get = null;
254 startKey = null;
255 }
256 }
257 try {
258 table.close();
259 } catch (IOException e) {
260 LOG.error("Close table failed", e);
261 }
262 return null;
263 }
264
265
266
267
268
269 private Void write() {
270 HTableInterface table = null;
271 HTableDescriptor tableDesc = null;
272 try {
273 table = connection.getTable(region.getTable());
274 tableDesc = table.getTableDescriptor();
275 byte[] rowToCheck = region.getStartKey();
276 if (rowToCheck.length == 0) {
277 rowToCheck = new byte[]{0x0};
278 }
279 int writeValueSize =
280 connection.getConfiguration().getInt(HConstants.HBASE_CANARY_WRITE_VALUE_SIZE_KEY, 10);
281 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
282 Put put = new Put(rowToCheck);
283 byte[] value = new byte[writeValueSize];
284 Bytes.random(value);
285 put.add(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value);
286 try {
287 long startTime = System.currentTimeMillis();
288 table.put(put);
289 long time = System.currentTimeMillis() - startTime;
290 sink.publishWriteTiming(region, column, time);
291 } catch (Exception e) {
292 sink.publishWriteFailure(region, column, e);
293 }
294 }
295 table.close();
296 } catch (IOException e) {
297 sink.publishWriteFailure(region, e);
298 }
299 return null;
300 }
301 }
302
303
304
305
306 static class RegionServerTask implements Callable<Void> {
307 private HConnection connection;
308 private String serverName;
309 private HRegionInfo region;
310 private ExtendedSink sink;
311
312 RegionServerTask(HConnection connection, String serverName, HRegionInfo region,
313 ExtendedSink sink) {
314 this.connection = connection;
315 this.serverName = serverName;
316 this.region = region;
317 this.sink = sink;
318 }
319
320 @Override
321 public Void call() {
322 TableName tableName = null;
323 HTableInterface table = null;
324 Get get = null;
325 byte[] startKey = null;
326 Scan scan = null;
327 StopWatch stopWatch = new StopWatch();
328
329 stopWatch.reset();
330 try {
331 tableName = region.getTable();
332 table = connection.getTable(tableName);
333 startKey = region.getStartKey();
334
335 if (startKey.length > 0) {
336 get = new Get(startKey);
337 get.setCacheBlocks(false);
338 get.setFilter(new FirstKeyOnlyFilter());
339 stopWatch.start();
340 table.get(get);
341 stopWatch.stop();
342 } else {
343 scan = new Scan();
344 scan.setCacheBlocks(false);
345 scan.setFilter(new FirstKeyOnlyFilter());
346 scan.setCaching(1);
347 scan.setMaxResultSize(1L);
348 stopWatch.start();
349 ResultScanner s = table.getScanner(scan);
350 s.close();
351 stopWatch.stop();
352 }
353 sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime());
354 } catch (TableNotFoundException tnfe) {
355 LOG.error("Table may be deleted", tnfe);
356
357 } catch (TableNotEnabledException tnee) {
358
359 LOG.debug("The targeted table was disabled. Assuming success.");
360 } catch (DoNotRetryIOException dnrioe) {
361 sink.publishReadFailure(tableName.getNameAsString(), serverName);
362 LOG.error(dnrioe);
363 } catch (IOException e) {
364 sink.publishReadFailure(tableName.getNameAsString(), serverName);
365 LOG.error(e);
366 } finally {
367 if (table != null) {
368 try {
369 table.close();
370 } catch (IOException e) {
371 LOG.error("Close table failed", e);
372 }
373 }
374 scan = null;
375 get = null;
376 startKey = null;
377 }
378 return null;
379 }
380 }
381
382 private static final int USAGE_EXIT_CODE = 1;
383 private static final int INIT_ERROR_EXIT_CODE = 2;
384 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
385 private static final int ERROR_EXIT_CODE = 4;
386
387 private static final long DEFAULT_INTERVAL = 6000;
388
389 private static final long DEFAULT_TIMEOUT = 600000;
390 private static final int MAX_THREADS_NUM = 16;
391
392 private static final Log LOG = LogFactory.getLog(Canary.class);
393
394 public static final TableName DEFAULT_WRITE_TABLE_NAME = TableName.valueOf(
395 NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR, "canary");
396
397 private static final String CANARY_TABLE_FAMILY_NAME = "Test";
398
399 private Configuration conf = null;
400 private long interval = 0;
401 private Sink sink = null;
402
403 private boolean useRegExp;
404 private long timeout = DEFAULT_TIMEOUT;
405 private boolean failOnError = true;
406 private boolean regionServerMode = false;
407 private boolean writeSniffing = false;
408 private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME;
409
410 private ExecutorService executor;
411
412 public Canary() {
413 this(new ScheduledThreadPoolExecutor(1), new RegionServerStdOutSink());
414 }
415
416 public Canary(ExecutorService executor, Sink sink) {
417 this.executor = executor;
418 this.sink = sink;
419 }
420
421 @Override
422 public Configuration getConf() {
423 return conf;
424 }
425
426 @Override
427 public void setConf(Configuration conf) {
428 this.conf = conf;
429 }
430
431 private int parseArgs(String[] args) {
432 int index = -1;
433
434 for (int i = 0; i < args.length; i++) {
435 String cmd = args[i];
436
437 if (cmd.startsWith("-")) {
438 if (index >= 0) {
439
440 System.err.println("Invalid command line options");
441 printUsageAndExit();
442 }
443
444 if (cmd.equals("-help")) {
445
446 printUsageAndExit();
447 } else if (cmd.equals("-daemon") && interval == 0) {
448
449 interval = DEFAULT_INTERVAL;
450 } else if (cmd.equals("-interval")) {
451
452 i++;
453
454 if (i == args.length) {
455 System.err.println("-interval needs a numeric value argument.");
456 printUsageAndExit();
457 }
458
459 try {
460 interval = Long.parseLong(args[i]) * 1000;
461 } catch (NumberFormatException e) {
462 System.err.println("-interval needs a numeric value argument.");
463 printUsageAndExit();
464 }
465 } else if(cmd.equals("-regionserver")) {
466 this.regionServerMode = true;
467 } else if(cmd.equals("-writeSniffing")) {
468 this.writeSniffing = true;
469 } else if (cmd.equals("-e")) {
470 this.useRegExp = true;
471 } else if (cmd.equals("-t")) {
472 i++;
473
474 if (i == args.length) {
475 System.err.println("-t needs a numeric value argument.");
476 printUsageAndExit();
477 }
478
479 try {
480 this.timeout = Long.parseLong(args[i]);
481 } catch (NumberFormatException e) {
482 System.err.println("-t needs a numeric value argument.");
483 printUsageAndExit();
484 }
485 } else if (cmd.equals("-writeTable")) {
486 i++;
487
488 if (i == args.length) {
489 System.err.println("-writeTable needs a string value argument.");
490 printUsageAndExit();
491 }
492 this.writeTableName = TableName.valueOf(args[i]);
493 } else if (cmd.equals("-f")) {
494 i++;
495
496 if (i == args.length) {
497 System.err
498 .println("-f needs a boolean value argument (true|false).");
499 printUsageAndExit();
500 }
501
502 this.failOnError = Boolean.parseBoolean(args[i]);
503 } else {
504
505 System.err.println(cmd + " options is invalid.");
506 printUsageAndExit();
507 }
508 } else if (index < 0) {
509
510 index = i;
511 }
512 }
513 return index;
514 }
515
516 @Override
517 public int run(String[] args) throws Exception {
518 int index = parseArgs(args);
519
520
521
522
523 AuthUtil.launchAuthChore(conf);
524
525
526 Monitor monitor = null;
527 Thread monitorThread = null;
528 long startTime = 0;
529 long currentTimeLength = 0;
530
531 HConnection connection = HConnectionManager.createConnection(this.conf);
532 try {
533 do {
534
535 try {
536 monitor = this.newMonitor(connection, index, args);
537 monitorThread = new Thread(monitor);
538 startTime = System.currentTimeMillis();
539 monitorThread.start();
540 while (!monitor.isDone()) {
541
542 Thread.sleep(1000);
543
544 if (this.failOnError && monitor.hasError()) {
545 monitorThread.interrupt();
546 if (monitor.initialized) {
547 System.exit(monitor.errorCode);
548 } else {
549 System.exit(INIT_ERROR_EXIT_CODE);
550 }
551 }
552 currentTimeLength = System.currentTimeMillis() - startTime;
553 if (currentTimeLength > this.timeout) {
554 LOG.error("The monitor is running too long (" + currentTimeLength
555 + ") after timeout limit:" + this.timeout
556 + " will be killed itself !!");
557 if (monitor.initialized) {
558 System.exit(TIMEOUT_ERROR_EXIT_CODE);
559 } else {
560 System.exit(INIT_ERROR_EXIT_CODE);
561 }
562 break;
563 }
564 }
565
566 if (this.failOnError && monitor.hasError()) {
567 monitorThread.interrupt();
568 System.exit(monitor.errorCode);
569 }
570 } finally {
571 if (monitor != null) monitor.close();
572 }
573
574 Thread.sleep(interval);
575 } while (interval > 0);
576 } finally {
577 connection.close();
578 }
579
580 return(monitor.errorCode);
581 }
582
583 private void printUsageAndExit() {
584 System.err.printf(
585 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
586 getClass().getName());
587 System.err.println(" where [opts] are:");
588 System.err.println(" -help Show this help and exit.");
589 System.err.println(" -regionserver replace the table argument to regionserver,");
590 System.err.println(" which means to enable regionserver mode");
591 System.err.println(" -daemon Continuous check at defined intervals.");
592 System.err.println(" -interval <N> Interval between checks (sec)");
593 System.err.println(" -e Use region/regionserver as regular expression");
594 System.err.println(" which means the region/regionserver is regular expression pattern");
595 System.err.println(" -f <B> stop whole program if first error occurs," +
596 " default is true");
597 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
598 System.err.println(" -writeSniffing enable the write sniffing in canary");
599 System.err.println(" -writeTable The table used for write sniffing."
600 + " Default is hbase:canary");
601 System.exit(USAGE_EXIT_CODE);
602 }
603
604
605
606
607
608
609
610
611 public Monitor newMonitor(final HConnection connection, int index, String[] args) {
612 Monitor monitor = null;
613 String[] monitorTargets = null;
614
615 if(index >= 0) {
616 int length = args.length - index;
617 monitorTargets = new String[length];
618 System.arraycopy(args, index, monitorTargets, 0, length);
619 }
620
621 if (this.regionServerMode) {
622 monitor =
623 new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
624 (ExtendedSink) this.sink, this.executor);
625 } else {
626 monitor =
627 new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor,
628 this.writeSniffing, this.writeTableName);
629 }
630 return monitor;
631 }
632
633
634 public static abstract class Monitor implements Runnable, Closeable {
635
636 protected HConnection connection;
637 protected HBaseAdmin admin;
638 protected String[] targets;
639 protected boolean useRegExp;
640 protected boolean initialized = false;
641
642 protected boolean done = false;
643 protected int errorCode = 0;
644 protected Sink sink;
645 protected ExecutorService executor;
646
647 public boolean isDone() {
648 return done;
649 }
650
651 public boolean hasError() {
652 return errorCode != 0;
653 }
654
655 @Override
656 public void close() throws IOException {
657 if (this.admin != null) this.admin.close();
658 }
659
660 protected Monitor(HConnection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
661 ExecutorService executor) {
662 if (null == connection) throw new IllegalArgumentException("connection shall not be null");
663
664 this.connection = connection;
665 this.targets = monitorTargets;
666 this.useRegExp = useRegExp;
667 this.sink = sink;
668 this.executor = executor;
669 }
670
671 public abstract void run();
672
673 protected boolean initAdmin() {
674 if (null == this.admin) {
675 try {
676 this.admin = new HBaseAdmin(connection);
677 } catch (Exception e) {
678 LOG.error("Initial HBaseAdmin failed...", e);
679 this.errorCode = INIT_ERROR_EXIT_CODE;
680 }
681 } else if (admin.isAborted()) {
682 LOG.error("HBaseAdmin aborted");
683 this.errorCode = INIT_ERROR_EXIT_CODE;
684 }
685 return !this.hasError();
686 }
687 }
688
689
690 private static class RegionMonitor extends Monitor {
691
692 private static final int DEFAULT_WRITE_TABLE_CHECK_PERIOD = 10 * 60 * 1000;
693
694 private static final int DEFAULT_WRITE_DATA_TTL = 24 * 60 * 60;
695
696 private long lastCheckTime = -1;
697 private boolean writeSniffing;
698 private TableName writeTableName;
699 private int writeDataTTL;
700 private float regionsLowerLimit;
701 private float regionsUpperLimit;
702 private int checkPeriod;
703
704 public RegionMonitor(HConnection connection, String[] monitorTargets, boolean useRegExp,
705 Sink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName) {
706 super(connection, monitorTargets, useRegExp, sink, executor);
707 Configuration conf = connection.getConfiguration();
708 this.writeSniffing = writeSniffing;
709 this.writeTableName = writeTableName;
710 this.writeDataTTL =
711 conf.getInt(HConstants.HBASE_CANARY_WRITE_DATA_TTL_KEY, DEFAULT_WRITE_DATA_TTL);
712 this.regionsLowerLimit =
713 conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY, 1.0f);
714 this.regionsUpperLimit =
715 conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_UPPERLIMIT_KEY, 1.5f);
716 this.checkPeriod =
717 conf.getInt(HConstants.HBASE_CANARY_WRITE_TABLE_CHECK_PERIOD_KEY,
718 DEFAULT_WRITE_TABLE_CHECK_PERIOD);
719 }
720
721 @Override
722 public void run() {
723 if (this.initAdmin()) {
724 try {
725 List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
726 if (this.targets != null && this.targets.length > 0) {
727 String[] tables = generateMonitorTables(this.targets);
728 this.initialized = true;
729 for (String table : tables) {
730 taskFutures.addAll(Canary.sniff(connection, sink, table, executor, TaskType.READ));
731 }
732 } else {
733 taskFutures.addAll(sniff(TaskType.READ));
734 }
735
736 if (writeSniffing) {
737 if (EnvironmentEdgeManager.currentTimeMillis() - lastCheckTime > checkPeriod) {
738 try {
739 checkWriteTableDistribution();
740 } catch (IOException e) {
741 LOG.error("Check canary table distribution failed!", e);
742 }
743 lastCheckTime = EnvironmentEdgeManager.currentTimeMillis();
744 }
745
746 taskFutures.addAll(Canary.sniff(connection, sink,
747 writeTableName.getNameAsString(), executor, TaskType.WRITE));
748 }
749
750 for (Future<Void> future : taskFutures) {
751 try {
752 future.get();
753 } catch (ExecutionException e) {
754 LOG.error("Sniff region failed!", e);
755 }
756 }
757 } catch (Exception e) {
758 LOG.error("Run regionMonitor failed", e);
759 this.errorCode = ERROR_EXIT_CODE;
760 }
761 }
762 this.done = true;
763 }
764
765 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
766 String[] returnTables = null;
767
768 if (this.useRegExp) {
769 Pattern pattern = null;
770 HTableDescriptor[] tds = null;
771 Set<String> tmpTables = new TreeSet<String>();
772 try {
773 for (String monitorTarget : monitorTargets) {
774 pattern = Pattern.compile(monitorTarget);
775 tds = this.admin.listTables(pattern);
776 if (tds != null) {
777 for (HTableDescriptor td : tds) {
778 tmpTables.add(td.getNameAsString());
779 }
780 }
781 }
782 } catch (IOException e) {
783 LOG.error("Communicate with admin failed", e);
784 throw e;
785 }
786
787 if (tmpTables.size() > 0) {
788 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
789 } else {
790 String msg = "No HTable found, tablePattern:" + Arrays.toString(monitorTargets);
791 LOG.error(msg);
792 this.errorCode = INIT_ERROR_EXIT_CODE;
793 throw new TableNotFoundException(msg);
794 }
795 } else {
796 returnTables = monitorTargets;
797 }
798
799 return returnTables;
800 }
801
802
803
804
805 private List<Future<Void>> sniff(TaskType taskType) throws Exception {
806 List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
807 for (HTableDescriptor table : admin.listTables()) {
808 if (admin.isTableEnabled(table.getTableName())
809 && (!table.getTableName().equals(writeTableName))) {
810 taskFutures.addAll(Canary.sniff(connection, sink, table.getTableName(), executor,
811 taskType));
812 }
813 }
814 return taskFutures;
815 }
816
817 private void checkWriteTableDistribution() throws IOException, ServiceException {
818 if (!admin.tableExists(writeTableName)) {
819 int numberOfServers = admin.getClusterStatus().getServers().size();
820 if (numberOfServers == 0) {
821 throw new IllegalStateException("No live regionservers");
822 }
823 createWriteTable(numberOfServers);
824 }
825
826 if (!admin.isTableEnabled(writeTableName)) {
827 admin.enableTable(writeTableName);
828 }
829
830 int numberOfServers = admin.getClusterStatus().getServers().size();
831 List<HRegionLocation> locations = connection.locateRegions(writeTableName);
832 int numberOfRegions = locations.size();
833 if (numberOfRegions < numberOfServers * regionsLowerLimit
834 || numberOfRegions > numberOfServers * regionsUpperLimit) {
835 admin.disableTable(writeTableName);
836 admin.deleteTable(writeTableName);
837 createWriteTable(numberOfServers);
838 }
839 HashSet<ServerName> serverSet = new HashSet<ServerName>();
840 for (HRegionLocation location: locations) {
841 serverSet.add(location.getServerName());
842 }
843 int numberOfCoveredServers = serverSet.size();
844 if (numberOfCoveredServers < numberOfServers) {
845 admin.balancer();
846 }
847 }
848
849 private void createWriteTable(int numberOfServers) throws IOException {
850 int numberOfRegions = (int)(numberOfServers * regionsLowerLimit);
851 LOG.info("Number of live regionservers: " + numberOfServers + ", "
852 + "pre-splitting the canary table into " + numberOfRegions + " regions "
853 + "(current lower limi of regions per server is " + regionsLowerLimit
854 + " and you can change it by config: "
855 + HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )");
856 HTableDescriptor desc = new HTableDescriptor(writeTableName);
857 HColumnDescriptor family = new HColumnDescriptor(CANARY_TABLE_FAMILY_NAME);
858 family.setMaxVersions(1);
859 family.setTimeToLive(writeDataTTL);
860
861 desc.addFamily(family);
862 byte[][] splits = new RegionSplitter.HexStringSplit().split(numberOfRegions);
863 admin.createTable(desc, splits);
864 }
865 }
866
867
868
869
870
871 public static void sniff(final HConnection connection, TableName tableName, TaskType taskType)
872 throws Exception {
873 List<Future<Void>> taskFutures =
874 Canary.sniff(connection, new StdOutSink(), tableName.getNameAsString(),
875 new ScheduledThreadPoolExecutor(1), taskType);
876 for (Future<Void> future : taskFutures) {
877 future.get();
878 }
879 }
880
881
882
883
884
885 private static List<Future<Void>> sniff(final HConnection connection, final Sink sink,
886 String tableName, ExecutorService executor, TaskType taskType) throws Exception {
887 HBaseAdmin admin = new HBaseAdmin(connection);
888 try {
889 if (admin.isTableEnabled(TableName.valueOf(tableName))) {
890 return Canary.sniff(connection, sink, TableName.valueOf(tableName), executor,
891 taskType);
892 } else {
893 LOG.warn(String.format("Table %s is not enabled", tableName));
894 }
895 return new LinkedList<Future<Void>>();
896 } finally {
897 admin.close();
898 }
899 }
900
901
902
903
904 private static List<Future<Void>> sniff(final HConnection connection, final Sink sink,
905 TableName tableName, ExecutorService executor, TaskType taskType) throws Exception {
906 HTableInterface table = null;
907 try {
908 table = connection.getTable(tableName);
909 } catch (TableNotFoundException e) {
910 return new ArrayList<Future<Void>>();
911 }
912 List<RegionTask> tasks = new ArrayList<RegionTask>();
913 try {
914 for (HRegionInfo region : ((HTable)table).getRegionLocations().keySet()) {
915 tasks.add(new RegionTask(connection, region, sink, taskType));
916 }
917 } finally {
918 table.close();
919 }
920 return executor.invokeAll(tasks);
921 }
922
923
924
925
926
927 private static void sniffRegion(
928 final HBaseAdmin admin,
929 final Sink sink,
930 HRegionInfo region,
931 HTableInterface table) throws Exception {
932 HTableDescriptor tableDesc = table.getTableDescriptor();
933 byte[] startKey = null;
934 Get get = null;
935 Scan scan = null;
936 ResultScanner rs = null;
937 StopWatch stopWatch = new StopWatch();
938 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
939 stopWatch.reset();
940 startKey = region.getStartKey();
941
942 if (startKey.length > 0) {
943 get = new Get(startKey);
944 get.setCacheBlocks(false);
945 get.setFilter(new FirstKeyOnlyFilter());
946 get.addFamily(column.getName());
947 } else {
948 scan = new Scan();
949 scan.setRaw(true);
950 scan.setCaching(1);
951 scan.setCacheBlocks(false);
952 scan.setFilter(new FirstKeyOnlyFilter());
953 scan.addFamily(column.getName());
954 scan.setMaxResultSize(1L);
955 }
956
957 try {
958 if (startKey.length > 0) {
959 stopWatch.start();
960 table.get(get);
961 stopWatch.stop();
962 sink.publishReadTiming(region, column, stopWatch.getTime());
963 } else {
964 stopWatch.start();
965 rs = table.getScanner(scan);
966 stopWatch.stop();
967 sink.publishReadTiming(region, column, stopWatch.getTime());
968 }
969 } catch (Exception e) {
970 sink.publishReadFailure(region, column, e);
971 } finally {
972 if (rs != null) {
973 rs.close();
974 }
975 scan = null;
976 get = null;
977 startKey = null;
978 }
979 }
980 }
981
982 private static class RegionServerMonitor extends Monitor {
983
984 public RegionServerMonitor(HConnection connection, String[] monitorTargets, boolean useRegExp,
985 ExtendedSink sink, ExecutorService executor) {
986 super(connection, monitorTargets, useRegExp, sink, executor);
987 }
988
989 private ExtendedSink getSink() {
990 return (ExtendedSink) this.sink;
991 }
992
993 @Override
994 public void run() {
995 if (this.initAdmin() && this.checkNoTableNames()) {
996 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
997 this.initialized = true;
998 this.monitorRegionServers(rsAndRMap);
999 }
1000 this.done = true;
1001 }
1002
1003 private boolean checkNoTableNames() {
1004 List<String> foundTableNames = new ArrayList<String>();
1005 TableName[] tableNames = null;
1006
1007 try {
1008 tableNames = this.admin.listTableNames();
1009 } catch (IOException e) {
1010 LOG.error("Get listTableNames failed", e);
1011 this.errorCode = INIT_ERROR_EXIT_CODE;
1012 return false;
1013 }
1014
1015 if (this.targets == null || this.targets.length == 0) return true;
1016
1017 for (String target : this.targets) {
1018 for (TableName tableName : tableNames) {
1019 if (target.equals(tableName.getNameAsString())) {
1020 foundTableNames.add(target);
1021 }
1022 }
1023 }
1024
1025 if (foundTableNames.size() > 0) {
1026 System.err.println("Cannot pass a tablename when using the -regionserver " +
1027 "option, tablenames:" + foundTableNames.toString());
1028 this.errorCode = USAGE_EXIT_CODE;
1029 }
1030 return foundTableNames.size() == 0;
1031 }
1032
1033 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
1034 List<RegionServerTask> tasks = new ArrayList<RegionServerTask>();
1035 Random rand =new Random();
1036
1037 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
1038 String serverName = entry.getKey();
1039
1040 HRegionInfo region = entry.getValue().get(rand.nextInt(entry.getValue().size()));
1041 tasks.add(new RegionServerTask(this.connection, serverName, region, getSink()));
1042 }
1043 try {
1044 for (Future<Void> future : this.executor.invokeAll(tasks)) {
1045 try {
1046 future.get();
1047 } catch (ExecutionException e) {
1048 LOG.error("Sniff regionserver failed!", e);
1049 this.errorCode = ERROR_EXIT_CODE;
1050 }
1051 }
1052 } catch (InterruptedException e) {
1053 this.errorCode = ERROR_EXIT_CODE;
1054 LOG.error("Sniff regionserver failed!", e);
1055 }
1056 }
1057
1058 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
1059 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
1060 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
1061 return regionServerAndRegionsMap;
1062 }
1063
1064 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
1065 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
1066 HTableInterface table = null;
1067 try {
1068 HTableDescriptor[] tableDescs = this.admin.listTables();
1069 List<HRegionInfo> regions = null;
1070 for (HTableDescriptor tableDesc : tableDescs) {
1071 table = this.admin.getConnection().getTable(tableDesc.getTableName());
1072 for (Entry<HRegionInfo, ServerName> e: ((HTable)table).getRegionLocations().entrySet()) {
1073 HRegionInfo r = e.getKey();
1074 ServerName rs = e.getValue();
1075 String rsName = rs.getHostname();
1076
1077 if (rsAndRMap.containsKey(rsName)) {
1078 regions = rsAndRMap.get(rsName);
1079 } else {
1080 regions = new ArrayList<HRegionInfo>();
1081 rsAndRMap.put(rsName, regions);
1082 }
1083 regions.add(r);
1084 }
1085 table.close();
1086 }
1087
1088 } catch (IOException e) {
1089 String msg = "Get HTables info failed";
1090 LOG.error(msg, e);
1091 this.errorCode = INIT_ERROR_EXIT_CODE;
1092 } finally {
1093 if (table != null) {
1094 try {
1095 table.close();
1096 } catch (IOException e) {
1097 LOG.warn("Close table failed", e);
1098 }
1099 }
1100 }
1101
1102 return rsAndRMap;
1103 }
1104
1105 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
1106 Map<String, List<HRegionInfo>> fullRsAndRMap) {
1107
1108 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
1109
1110 if (this.targets != null && this.targets.length > 0) {
1111 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
1112 Pattern pattern = null;
1113 Matcher matcher = null;
1114 boolean regExpFound = false;
1115 for (String rsName : this.targets) {
1116 if (this.useRegExp) {
1117 regExpFound = false;
1118 pattern = Pattern.compile(rsName);
1119 for (Map.Entry<String, List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
1120 matcher = pattern.matcher(entry.getKey());
1121 if (matcher.matches()) {
1122 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
1123 regExpFound = true;
1124 }
1125 }
1126 if (!regExpFound) {
1127 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
1128 }
1129 } else {
1130 if (fullRsAndRMap.containsKey(rsName)) {
1131 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
1132 } else {
1133 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
1134 }
1135 }
1136 }
1137 } else {
1138 filteredRsAndRMap = fullRsAndRMap;
1139 }
1140 return filteredRsAndRMap;
1141 }
1142 }
1143
1144 public static void main(String[] args) throws Exception {
1145 final Configuration conf = HBaseConfiguration.create();
1146 AuthUtil.launchAuthChore(conf);
1147 int numThreads = conf.getInt("hbase.canary.threads.num", MAX_THREADS_NUM);
1148 ExecutorService executor = new ScheduledThreadPoolExecutor(numThreads);
1149
1150 Class<? extends Sink> sinkClass =
1151 conf.getClass("hbase.canary.sink.class", StdOutSink.class, Sink.class);
1152 Sink sink = ReflectionUtils.newInstance(sinkClass);
1153
1154 int exitCode = ToolRunner.run(conf, new Canary(executor, sink), args);
1155 executor.shutdown();
1156 System.exit(exitCode);
1157 }
1158 }