1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.Closeable;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.HashMap;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30 import java.util.TreeSet;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
33
34 import org.apache.commons.lang.time.StopWatch;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.hbase.AuthUtil;
39 import org.apache.hadoop.hbase.DoNotRetryIOException;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HColumnDescriptor;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.HRegionLocation;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableName;
47 import org.apache.hadoop.hbase.TableNotEnabledException;
48 import org.apache.hadoop.hbase.TableNotFoundException;
49 import org.apache.hadoop.hbase.client.Admin;
50 import org.apache.hadoop.hbase.client.Connection;
51 import org.apache.hadoop.hbase.client.ConnectionFactory;
52 import org.apache.hadoop.hbase.client.Get;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.ResultScanner;
55 import org.apache.hadoop.hbase.client.Scan;
56 import org.apache.hadoop.hbase.client.Table;
57 import org.apache.hadoop.util.Tool;
58 import org.apache.hadoop.util.ToolRunner;
59
60
61
62
63
64
65
66
67
68
69
70
71 public final class Canary implements Tool {
72
73 public interface Sink {
74 public void publishReadFailure(HRegionInfo region, Exception e);
75 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
76 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
77 }
78
79
80 public interface ExtendedSink extends Sink {
81 public void publishReadFailure(String table, String server);
82 public void publishReadTiming(String table, String server, long msTime);
83 }
84
85
86
87 public static class StdOutSink implements Sink {
88 @Override
89 public void publishReadFailure(HRegionInfo region, Exception e) {
90 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
91 }
92
93 @Override
94 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
95 LOG.error(String.format("read from region %s column family %s failed",
96 region.getRegionNameAsString(), column.getNameAsString()), e);
97 }
98
99 @Override
100 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
101 LOG.info(String.format("read from region %s column family %s in %dms",
102 region.getRegionNameAsString(), column.getNameAsString(), msTime));
103 }
104 }
105
106 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
107
108 @Override
109 public void publishReadFailure(String table, String server) {
110 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
111 }
112
113 @Override
114 public void publishReadTiming(String table, String server, long msTime) {
115 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
116 table, server, msTime));
117 }
118 }
119
120 private static final int USAGE_EXIT_CODE = 1;
121 private static final int INIT_ERROR_EXIT_CODE = 2;
122 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
123 private static final int ERROR_EXIT_CODE = 4;
124
125 private static final long DEFAULT_INTERVAL = 6000;
126
127 private static final long DEFAULT_TIMEOUT = 600000;
128
129 private static final Log LOG = LogFactory.getLog(Canary.class);
130
131 private Configuration conf = null;
132 private long interval = 0;
133 private Sink sink = null;
134
135 private boolean useRegExp;
136 private long timeout = DEFAULT_TIMEOUT;
137 private boolean failOnError = true;
138 private boolean regionServerMode = false;
139
140 public Canary() {
141 this(new RegionServerStdOutSink());
142 }
143
144 public Canary(Sink sink) {
145 this.sink = sink;
146 }
147
148 @Override
149 public Configuration getConf() {
150 return conf;
151 }
152
153 @Override
154 public void setConf(Configuration conf) {
155 this.conf = conf;
156 }
157
158 @Override
159 public int run(String[] args) throws Exception {
160 int index = -1;
161
162
163 for (int i = 0; i < args.length; i++) {
164 String cmd = args[i];
165
166 if (cmd.startsWith("-")) {
167 if (index >= 0) {
168
169 System.err.println("Invalid command line options");
170 printUsageAndExit();
171 }
172
173 if (cmd.equals("-help")) {
174
175 printUsageAndExit();
176 } else if (cmd.equals("-daemon") && interval == 0) {
177
178 interval = DEFAULT_INTERVAL;
179 } else if (cmd.equals("-interval")) {
180
181 i++;
182
183 if (i == args.length) {
184 System.err.println("-interval needs a numeric value argument.");
185 printUsageAndExit();
186 }
187
188 try {
189 interval = Long.parseLong(args[i]) * 1000;
190 } catch (NumberFormatException e) {
191 System.err.println("-interval needs a numeric value argument.");
192 printUsageAndExit();
193 }
194 } else if(cmd.equals("-regionserver")) {
195 this.regionServerMode = true;
196 } else if (cmd.equals("-e")) {
197 this.useRegExp = true;
198 } else if (cmd.equals("-t")) {
199 i++;
200
201 if (i == args.length) {
202 System.err.println("-t needs a numeric value argument.");
203 printUsageAndExit();
204 }
205
206 try {
207 this.timeout = Long.parseLong(args[i]);
208 } catch (NumberFormatException e) {
209 System.err.println("-t needs a numeric value argument.");
210 printUsageAndExit();
211 }
212
213 } else if (cmd.equals("-f")) {
214 i++;
215
216 if (i == args.length) {
217 System.err
218 .println("-f needs a boolean value argument (true|false).");
219 printUsageAndExit();
220 }
221
222 this.failOnError = Boolean.parseBoolean(args[i]);
223 } else {
224
225 System.err.println(cmd + " options is invalid.");
226 printUsageAndExit();
227 }
228 } else if (index < 0) {
229
230 index = i;
231 }
232 }
233
234
235 Monitor monitor = null;
236 Thread monitorThread = null;
237 long startTime = 0;
238 long currentTimeLength = 0;
239
240
241
242 try (Connection connection = ConnectionFactory.createConnection(this.conf)) {
243 do {
244
245 try {
246 monitor = this.newMonitor(connection, index, args);
247 monitorThread = new Thread(monitor);
248 startTime = System.currentTimeMillis();
249 monitorThread.start();
250 while (!monitor.isDone()) {
251
252 Thread.sleep(1000);
253
254 if (this.failOnError && monitor.hasError()) {
255 monitorThread.interrupt();
256 if (monitor.initialized) {
257 System.exit(monitor.errorCode);
258 } else {
259 System.exit(INIT_ERROR_EXIT_CODE);
260 }
261 }
262 currentTimeLength = System.currentTimeMillis() - startTime;
263 if (currentTimeLength > this.timeout) {
264 LOG.error("The monitor is running too long (" + currentTimeLength
265 + ") after timeout limit:" + this.timeout
266 + " will be killed itself !!");
267 if (monitor.initialized) {
268 System.exit(TIMEOUT_ERROR_EXIT_CODE);
269 } else {
270 System.exit(INIT_ERROR_EXIT_CODE);
271 }
272 break;
273 }
274 }
275
276 if (this.failOnError && monitor.hasError()) {
277 monitorThread.interrupt();
278 System.exit(monitor.errorCode);
279 }
280 } finally {
281 if (monitor != null) monitor.close();
282 }
283
284 Thread.sleep(interval);
285 } while (interval > 0);
286 }
287
288 return(monitor.errorCode);
289 }
290
291 private void printUsageAndExit() {
292 System.err.printf(
293 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
294 getClass().getName());
295 System.err.println(" where [opts] are:");
296 System.err.println(" -help Show this help and exit.");
297 System.err.println(" -regionserver replace the table argument to regionserver,");
298 System.err.println(" which means to enable regionserver mode");
299 System.err.println(" -daemon Continuous check at defined intervals.");
300 System.err.println(" -interval <N> Interval between checks (sec)");
301 System.err.println(" -e Use region/regionserver as regular expression");
302 System.err.println(" which means the region/regionserver is regular expression pattern");
303 System.err.println(" -f <B> stop whole program if first error occurs," +
304 " default is true");
305 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
306 System.exit(USAGE_EXIT_CODE);
307 }
308
309
310
311
312
313
314
315
316 public Monitor newMonitor(final Connection connection, int index, String[] args) {
317 Monitor monitor = null;
318 String[] monitorTargets = null;
319
320 if(index >= 0) {
321 int length = args.length - index;
322 monitorTargets = new String[length];
323 System.arraycopy(args, index, monitorTargets, 0, length);
324 }
325
326 if(this.regionServerMode) {
327 monitor = new RegionServerMonitor(
328 connection,
329 monitorTargets,
330 this.useRegExp,
331 (ExtendedSink)this.sink);
332 } else {
333 monitor = new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink);
334 }
335 return monitor;
336 }
337
338
339 public static abstract class Monitor implements Runnable, Closeable {
340
341 protected Connection connection;
342 protected Admin admin;
343 protected String[] targets;
344 protected boolean useRegExp;
345 protected boolean initialized = false;
346
347 protected boolean done = false;
348 protected int errorCode = 0;
349 protected Sink sink;
350
351 public boolean isDone() {
352 return done;
353 }
354
355 public boolean hasError() {
356 return errorCode != 0;
357 }
358
359 @Override
360 public void close() throws IOException {
361 if (this.admin != null) this.admin.close();
362 }
363
364 protected Monitor(Connection connection, String[] monitorTargets,
365 boolean useRegExp, Sink sink) {
366 if (null == connection) throw new IllegalArgumentException("connection shall not be null");
367
368 this.connection = connection;
369 this.targets = monitorTargets;
370 this.useRegExp = useRegExp;
371 this.sink = sink;
372 }
373
374 public abstract void run();
375
376 protected boolean initAdmin() {
377 if (null == this.admin) {
378 try {
379 this.admin = this.connection.getAdmin();
380 } catch (Exception e) {
381 LOG.error("Initial HBaseAdmin failed...", e);
382 this.errorCode = INIT_ERROR_EXIT_CODE;
383 }
384 } else if (admin.isAborted()) {
385 LOG.error("HBaseAdmin aborted");
386 this.errorCode = INIT_ERROR_EXIT_CODE;
387 }
388 return !this.hasError();
389 }
390 }
391
392
393 private static class RegionMonitor extends Monitor {
394
395 public RegionMonitor(Connection connection, String[] monitorTargets,
396 boolean useRegExp, Sink sink) {
397 super(connection, monitorTargets, useRegExp, sink);
398 }
399
400 @Override
401 public void run() {
402 if(this.initAdmin()) {
403 try {
404 if (this.targets != null && this.targets.length > 0) {
405 String[] tables = generateMonitorTables(this.targets);
406 this.initialized = true;
407 for (String table : tables) {
408 Canary.sniff(admin, sink, table);
409 }
410 } else {
411 sniff();
412 }
413 } catch (Exception e) {
414 LOG.error("Run regionMonitor failed", e);
415 this.errorCode = ERROR_EXIT_CODE;
416 }
417 }
418 this.done = true;
419 }
420
421 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
422 String[] returnTables = null;
423
424 if(this.useRegExp) {
425 Pattern pattern = null;
426 HTableDescriptor[] tds = null;
427 Set<String> tmpTables = new TreeSet<String>();
428 try {
429 for (String monitorTarget : monitorTargets) {
430 pattern = Pattern.compile(monitorTarget);
431 tds = this.admin.listTables(pattern);
432 if (tds != null) {
433 for (HTableDescriptor td : tds) {
434 tmpTables.add(td.getNameAsString());
435 }
436 }
437 }
438 } catch(IOException e) {
439 LOG.error("Communicate with admin failed", e);
440 throw e;
441 }
442
443 if(tmpTables.size() > 0) {
444 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
445 } else {
446 String msg = "No HTable found, tablePattern:"
447 + Arrays.toString(monitorTargets);
448 LOG.error(msg);
449 this.errorCode = INIT_ERROR_EXIT_CODE;
450 throw new TableNotFoundException(msg);
451 }
452 } else {
453 returnTables = monitorTargets;
454 }
455
456 return returnTables;
457 }
458
459
460
461
462 private void sniff() throws Exception {
463 for (HTableDescriptor table : admin.listTables()) {
464 Canary.sniff(admin, sink, table);
465 }
466 }
467
468 }
469
470
471
472
473
474 public static void sniff(final Admin admin, TableName tableName) throws Exception {
475 sniff(admin, new StdOutSink(), tableName.getNameAsString());
476 }
477
478
479
480
481
482 private static void sniff(final Admin admin, final Sink sink, String tableName)
483 throws Exception {
484 if (admin.isTableAvailable(TableName.valueOf(tableName))) {
485 sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)));
486 } else {
487 LOG.warn(String.format("Table %s is not available", tableName));
488 }
489 }
490
491
492
493
494 private static void sniff(final Admin admin, final Sink sink, HTableDescriptor tableDesc)
495 throws Exception {
496 Table table = null;
497
498 try {
499 table = admin.getConnection().getTable(tableDesc.getTableName());
500 } catch (TableNotFoundException e) {
501 return;
502 }
503
504 try {
505 for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) {
506 try {
507 sniffRegion(admin, sink, region, table);
508 } catch (Exception e) {
509 sink.publishReadFailure(region, e);
510 LOG.debug("sniffRegion failed", e);
511 }
512 }
513 } finally {
514 table.close();
515 }
516 }
517
518
519
520
521
522 private static void sniffRegion(
523 final Admin admin,
524 final Sink sink,
525 HRegionInfo region,
526 Table table) throws Exception {
527 HTableDescriptor tableDesc = table.getTableDescriptor();
528 byte[] startKey = null;
529 Get get = null;
530 Scan scan = null;
531 ResultScanner rs = null;
532 StopWatch stopWatch = new StopWatch();
533 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
534 stopWatch.reset();
535 startKey = region.getStartKey();
536
537 if (startKey.length > 0) {
538 get = new Get(startKey);
539 get.addFamily(column.getName());
540 } else {
541 scan = new Scan();
542 scan.setCaching(1);
543 scan.addFamily(column.getName());
544 scan.setMaxResultSize(1L);
545 }
546
547 try {
548 if (startKey.length > 0) {
549 stopWatch.start();
550 table.get(get);
551 stopWatch.stop();
552 sink.publishReadTiming(region, column, stopWatch.getTime());
553 } else {
554 stopWatch.start();
555 rs = table.getScanner(scan);
556 stopWatch.stop();
557 sink.publishReadTiming(region, column, stopWatch.getTime());
558 }
559 } catch (Exception e) {
560 sink.publishReadFailure(region, column, e);
561 } finally {
562 if (rs != null) {
563 rs.close();
564 }
565 scan = null;
566 get = null;
567 startKey = null;
568 }
569 }
570 }
571
572 private static class RegionServerMonitor extends Monitor {
573
574 public RegionServerMonitor(Connection connection, String[] monitorTargets,
575 boolean useRegExp, ExtendedSink sink) {
576 super(connection, monitorTargets, useRegExp, sink);
577 }
578
579 private ExtendedSink getSink() {
580 return (ExtendedSink) this.sink;
581 }
582
583 @Override
584 public void run() {
585 if (this.initAdmin() && this.checkNoTableNames()) {
586 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
587 this.initialized = true;
588 this.monitorRegionServers(rsAndRMap);
589 }
590 this.done = true;
591 }
592
593 private boolean checkNoTableNames() {
594 List<String> foundTableNames = new ArrayList<String>();
595 TableName[] tableNames = null;
596
597 try {
598 tableNames = this.admin.listTableNames();
599 } catch (IOException e) {
600 LOG.error("Get listTableNames failed", e);
601 this.errorCode = INIT_ERROR_EXIT_CODE;
602 return false;
603 }
604
605 if (this.targets == null || this.targets.length == 0) return true;
606
607 for (String target : this.targets) {
608 for (TableName tableName : tableNames) {
609 if (target.equals(tableName.getNameAsString())) {
610 foundTableNames.add(target);
611 }
612 }
613 }
614
615 if (foundTableNames.size() > 0) {
616 System.err.println("Cannot pass a tablename when using the -regionserver " +
617 "option, tablenames:" + foundTableNames.toString());
618 this.errorCode = USAGE_EXIT_CODE;
619 }
620 return foundTableNames.size() == 0;
621 }
622
623 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
624 String serverName = null;
625 TableName tableName = null;
626 HRegionInfo region = null;
627 Table table = null;
628 Get get = null;
629 byte[] startKey = null;
630 Scan scan = null;
631 StopWatch stopWatch = new StopWatch();
632
633 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
634 stopWatch.reset();
635 serverName = entry.getKey();
636
637 region = entry.getValue().get(0);
638 try {
639 tableName = region.getTable();
640 table = admin.getConnection().getTable(tableName);
641 startKey = region.getStartKey();
642
643 if(startKey.length > 0) {
644 get = new Get(startKey);
645 stopWatch.start();
646 table.get(get);
647 stopWatch.stop();
648 } else {
649 scan = new Scan();
650 scan.setCaching(1);
651 scan.setMaxResultSize(1L);
652 stopWatch.start();
653 ResultScanner s = table.getScanner(scan);
654 s.close();
655 stopWatch.stop();
656 }
657 this.getSink().publishReadTiming(tableName.getNameAsString(),
658 serverName, stopWatch.getTime());
659 } catch (TableNotFoundException tnfe) {
660
661 } catch (TableNotEnabledException tnee) {
662
663 LOG.debug("The targeted table was disabled. Assuming success.");
664 } catch (DoNotRetryIOException dnrioe) {
665 this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
666 LOG.error(dnrioe);
667 } catch (IOException e) {
668 this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
669 LOG.error(e);
670 this.errorCode = ERROR_EXIT_CODE;
671 } finally {
672 if (table != null) {
673 try {
674 table.close();
675 } catch (IOException e) {
676 }
677 }
678 scan = null;
679 get = null;
680 startKey = null;
681 }
682 }
683 }
684
685 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
686 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
687 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
688 return regionServerAndRegionsMap;
689 }
690
691 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
692 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
693 Table table = null;
694 RegionLocator regionLocator = null;
695 try {
696 HTableDescriptor[] tableDescs = this.admin.listTables();
697 List<HRegionInfo> regions = null;
698 for (HTableDescriptor tableDesc : tableDescs) {
699 table = this.admin.getConnection().getTable(tableDesc.getTableName());
700 regionLocator = this.admin.getConnection().getRegionLocator(tableDesc.getTableName());
701
702 for (HRegionLocation location: regionLocator.getAllRegionLocations()) {
703 ServerName rs = location.getServerName();
704 String rsName = rs.getHostname();
705 HRegionInfo r = location.getRegionInfo();
706
707 if (rsAndRMap.containsKey(rsName)) {
708 regions = rsAndRMap.get(rsName);
709 } else {
710 regions = new ArrayList<HRegionInfo>();
711 rsAndRMap.put(rsName, regions);
712 }
713 regions.add(r);
714 }
715 table.close();
716 }
717
718 } catch (IOException e) {
719 String msg = "Get HTables info failed";
720 LOG.error(msg, e);
721 this.errorCode = INIT_ERROR_EXIT_CODE;
722 } finally {
723 if (table != null) {
724 try {
725 table.close();
726 } catch (IOException e) {
727 LOG.warn("Close table failed", e);
728 }
729 }
730 }
731
732 return rsAndRMap;
733 }
734
735 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
736 Map<String, List<HRegionInfo>> fullRsAndRMap) {
737
738 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
739
740 if (this.targets != null && this.targets.length > 0) {
741 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
742 Pattern pattern = null;
743 Matcher matcher = null;
744 boolean regExpFound = false;
745 for (String rsName : this.targets) {
746 if (this.useRegExp) {
747 regExpFound = false;
748 pattern = Pattern.compile(rsName);
749 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
750 matcher = pattern.matcher(entry.getKey());
751 if (matcher.matches()) {
752 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
753 regExpFound = true;
754 }
755 }
756 if (!regExpFound) {
757 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
758 }
759 } else {
760 if (fullRsAndRMap.containsKey(rsName)) {
761 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
762 } else {
763 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
764 }
765 }
766 }
767 } else {
768 filteredRsAndRMap = fullRsAndRMap;
769 }
770 return filteredRsAndRMap;
771 }
772 }
773
774 public static void main(String[] args) throws Exception {
775 final Configuration conf = HBaseConfiguration.create();
776 AuthUtil.launchAuthChore(conf);
777 int exitCode = ToolRunner.run(conf, new Canary(), args);
778 System.exit(exitCode);
779 }
780 }