1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.Closeable;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.HashMap;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30 import java.util.TreeSet;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
33
34 import org.apache.commons.lang.time.StopWatch;
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.hbase.AuthUtil;
39 import org.apache.hadoop.hbase.DoNotRetryIOException;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HColumnDescriptor;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.HRegionLocation;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableName;
47 import org.apache.hadoop.hbase.TableNotEnabledException;
48 import org.apache.hadoop.hbase.TableNotFoundException;
49 import org.apache.hadoop.hbase.client.Admin;
50 import org.apache.hadoop.hbase.client.Connection;
51 import org.apache.hadoop.hbase.client.ConnectionFactory;
52 import org.apache.hadoop.hbase.client.Get;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.ResultScanner;
55 import org.apache.hadoop.hbase.client.Scan;
56 import org.apache.hadoop.hbase.client.Table;
57 import org.apache.hadoop.util.Tool;
58 import org.apache.hadoop.util.ToolRunner;
59
60
61
62
63
64
65
66
67
68
69
70
71 public final class Canary implements Tool {
72
73 public interface Sink {
74 public void publishReadFailure(HRegionInfo region, Exception e);
75 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
76 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
77 }
78
79
80 public interface ExtendedSink extends Sink {
81 public void publishReadFailure(String table, String server);
82 public void publishReadTiming(String table, String server, long msTime);
83 }
84
85
86
87 public static class StdOutSink implements Sink {
88 @Override
89 public void publishReadFailure(HRegionInfo region, Exception e) {
90 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
91 }
92
93 @Override
94 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
95 LOG.error(String.format("read from region %s column family %s failed",
96 region.getRegionNameAsString(), column.getNameAsString()), e);
97 }
98
99 @Override
100 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
101 LOG.info(String.format("read from region %s column family %s in %dms",
102 region.getRegionNameAsString(), column.getNameAsString(), msTime));
103 }
104 }
105
106 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
107
108 @Override
109 public void publishReadFailure(String table, String server) {
110 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
111 }
112
113 @Override
114 public void publishReadTiming(String table, String server, long msTime) {
115 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
116 table, server, msTime));
117 }
118 }
119
120 private static final int USAGE_EXIT_CODE = 1;
121 private static final int INIT_ERROR_EXIT_CODE = 2;
122 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
123 private static final int ERROR_EXIT_CODE = 4;
124
125 private static final long DEFAULT_INTERVAL = 6000;
126
127 private static final long DEFAULT_TIMEOUT = 600000;
128
129 private static final Log LOG = LogFactory.getLog(Canary.class);
130
131 private Configuration conf = null;
132 private long interval = 0;
133 private Sink sink = null;
134
135 private boolean useRegExp;
136 private long timeout = DEFAULT_TIMEOUT;
137 private boolean failOnError = true;
138 private boolean regionServerMode = false;
139
140 public Canary() {
141 this(new RegionServerStdOutSink());
142 }
143
144 public Canary(Sink sink) {
145 this.sink = sink;
146 }
147
148 @Override
149 public Configuration getConf() {
150 return conf;
151 }
152
153 @Override
154 public void setConf(Configuration conf) {
155 this.conf = conf;
156 }
157
158 @Override
159 public int run(String[] args) throws Exception {
160 int index = -1;
161
162
163 for (int i = 0; i < args.length; i++) {
164 String cmd = args[i];
165
166 if (cmd.startsWith("-")) {
167 if (index >= 0) {
168
169 System.err.println("Invalid command line options");
170 printUsageAndExit();
171 }
172
173 if (cmd.equals("-help")) {
174
175 printUsageAndExit();
176 } else if (cmd.equals("-daemon") && interval == 0) {
177
178 interval = DEFAULT_INTERVAL;
179 } else if (cmd.equals("-interval")) {
180
181 i++;
182
183 if (i == args.length) {
184 System.err.println("-interval needs a numeric value argument.");
185 printUsageAndExit();
186 }
187
188 try {
189 interval = Long.parseLong(args[i]) * 1000;
190 } catch (NumberFormatException e) {
191 System.err.println("-interval needs a numeric value argument.");
192 printUsageAndExit();
193 }
194 } else if(cmd.equals("-regionserver")) {
195 this.regionServerMode = true;
196 } else if (cmd.equals("-e")) {
197 this.useRegExp = true;
198 } else if (cmd.equals("-t")) {
199 i++;
200
201 if (i == args.length) {
202 System.err.println("-t needs a numeric value argument.");
203 printUsageAndExit();
204 }
205
206 try {
207 this.timeout = Long.parseLong(args[i]);
208 } catch (NumberFormatException e) {
209 System.err.println("-t needs a numeric value argument.");
210 printUsageAndExit();
211 }
212
213 } else if (cmd.equals("-f")) {
214 i++;
215
216 if (i == args.length) {
217 System.err
218 .println("-f needs a boolean value argument (true|false).");
219 printUsageAndExit();
220 }
221
222 this.failOnError = Boolean.parseBoolean(args[i]);
223 } else {
224
225 System.err.println(cmd + " options is invalid.");
226 printUsageAndExit();
227 }
228 } else if (index < 0) {
229
230 index = i;
231 }
232 }
233
234
235 AuthUtil.launchAuthChore(conf);
236
237
238 Monitor monitor = null;
239 Thread monitorThread = null;
240 long startTime = 0;
241 long currentTimeLength = 0;
242
243
244
245 try (Connection connection = ConnectionFactory.createConnection(this.conf)) {
246 do {
247
248 try {
249 monitor = this.newMonitor(connection, index, args);
250 monitorThread = new Thread(monitor);
251 startTime = System.currentTimeMillis();
252 monitorThread.start();
253 while (!monitor.isDone()) {
254
255 Thread.sleep(1000);
256
257 if (this.failOnError && monitor.hasError()) {
258 monitorThread.interrupt();
259 if (monitor.initialized) {
260 System.exit(monitor.errorCode);
261 } else {
262 System.exit(INIT_ERROR_EXIT_CODE);
263 }
264 }
265 currentTimeLength = System.currentTimeMillis() - startTime;
266 if (currentTimeLength > this.timeout) {
267 LOG.error("The monitor is running too long (" + currentTimeLength
268 + ") after timeout limit:" + this.timeout
269 + " will be killed itself !!");
270 if (monitor.initialized) {
271 System.exit(TIMEOUT_ERROR_EXIT_CODE);
272 } else {
273 System.exit(INIT_ERROR_EXIT_CODE);
274 }
275 break;
276 }
277 }
278
279 if (this.failOnError && monitor.hasError()) {
280 monitorThread.interrupt();
281 System.exit(monitor.errorCode);
282 }
283 } finally {
284 if (monitor != null) monitor.close();
285 }
286
287 Thread.sleep(interval);
288 } while (interval > 0);
289 }
290
291 return(monitor.errorCode);
292 }
293
294 private void printUsageAndExit() {
295 System.err.printf(
296 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
297 getClass().getName());
298 System.err.println(" where [opts] are:");
299 System.err.println(" -help Show this help and exit.");
300 System.err.println(" -regionserver replace the table argument to regionserver,");
301 System.err.println(" which means to enable regionserver mode");
302 System.err.println(" -daemon Continuous check at defined intervals.");
303 System.err.println(" -interval <N> Interval between checks (sec)");
304 System.err.println(" -e Use region/regionserver as regular expression");
305 System.err.println(" which means the region/regionserver is regular expression pattern");
306 System.err.println(" -f <B> stop whole program if first error occurs," +
307 " default is true");
308 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
309 System.exit(USAGE_EXIT_CODE);
310 }
311
312
313
314
315
316
317
318
319 public Monitor newMonitor(final Connection connection, int index, String[] args) {
320 Monitor monitor = null;
321 String[] monitorTargets = null;
322
323 if(index >= 0) {
324 int length = args.length - index;
325 monitorTargets = new String[length];
326 System.arraycopy(args, index, monitorTargets, 0, length);
327 }
328
329 if(this.regionServerMode) {
330 monitor = new RegionServerMonitor(
331 connection,
332 monitorTargets,
333 this.useRegExp,
334 (ExtendedSink)this.sink);
335 } else {
336 monitor = new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink);
337 }
338 return monitor;
339 }
340
341
342 public static abstract class Monitor implements Runnable, Closeable {
343
344 protected Connection connection;
345 protected Admin admin;
346 protected String[] targets;
347 protected boolean useRegExp;
348 protected boolean initialized = false;
349
350 protected boolean done = false;
351 protected int errorCode = 0;
352 protected Sink sink;
353
354 public boolean isDone() {
355 return done;
356 }
357
358 public boolean hasError() {
359 return errorCode != 0;
360 }
361
362 @Override
363 public void close() throws IOException {
364 if (this.admin != null) this.admin.close();
365 }
366
367 protected Monitor(Connection connection, String[] monitorTargets,
368 boolean useRegExp, Sink sink) {
369 if (null == connection) throw new IllegalArgumentException("connection shall not be null");
370
371 this.connection = connection;
372 this.targets = monitorTargets;
373 this.useRegExp = useRegExp;
374 this.sink = sink;
375 }
376
377 public abstract void run();
378
379 protected boolean initAdmin() {
380 if (null == this.admin) {
381 try {
382 this.admin = this.connection.getAdmin();
383 } catch (Exception e) {
384 LOG.error("Initial HBaseAdmin failed...", e);
385 this.errorCode = INIT_ERROR_EXIT_CODE;
386 }
387 } else if (admin.isAborted()) {
388 LOG.error("HBaseAdmin aborted");
389 this.errorCode = INIT_ERROR_EXIT_CODE;
390 }
391 return !this.hasError();
392 }
393 }
394
395
396 private static class RegionMonitor extends Monitor {
397
398 public RegionMonitor(Connection connection, String[] monitorTargets,
399 boolean useRegExp, Sink sink) {
400 super(connection, monitorTargets, useRegExp, sink);
401 }
402
403 @Override
404 public void run() {
405 if(this.initAdmin()) {
406 try {
407 if (this.targets != null && this.targets.length > 0) {
408 String[] tables = generateMonitorTables(this.targets);
409 this.initialized = true;
410 for (String table : tables) {
411 Canary.sniff(admin, sink, table);
412 }
413 } else {
414 sniff();
415 }
416 } catch (Exception e) {
417 LOG.error("Run regionMonitor failed", e);
418 this.errorCode = ERROR_EXIT_CODE;
419 }
420 }
421 this.done = true;
422 }
423
424 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
425 String[] returnTables = null;
426
427 if(this.useRegExp) {
428 Pattern pattern = null;
429 HTableDescriptor[] tds = null;
430 Set<String> tmpTables = new TreeSet<String>();
431 try {
432 for (String monitorTarget : monitorTargets) {
433 pattern = Pattern.compile(monitorTarget);
434 tds = this.admin.listTables(pattern);
435 if (tds != null) {
436 for (HTableDescriptor td : tds) {
437 tmpTables.add(td.getNameAsString());
438 }
439 }
440 }
441 } catch(IOException e) {
442 LOG.error("Communicate with admin failed", e);
443 throw e;
444 }
445
446 if(tmpTables.size() > 0) {
447 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
448 } else {
449 String msg = "No HTable found, tablePattern:"
450 + Arrays.toString(monitorTargets);
451 LOG.error(msg);
452 this.errorCode = INIT_ERROR_EXIT_CODE;
453 throw new TableNotFoundException(msg);
454 }
455 } else {
456 returnTables = monitorTargets;
457 }
458
459 return returnTables;
460 }
461
462
463
464
465 private void sniff() throws Exception {
466 for (HTableDescriptor table : admin.listTables()) {
467 Canary.sniff(admin, sink, table);
468 }
469 }
470
471 }
472
473
474
475
476
477 public static void sniff(final Admin admin, TableName tableName) throws Exception {
478 sniff(admin, new StdOutSink(), tableName.getNameAsString());
479 }
480
481
482
483
484
485 private static void sniff(final Admin admin, final Sink sink, String tableName)
486 throws Exception {
487 if (admin.isTableAvailable(TableName.valueOf(tableName))) {
488 sniff(admin, sink, admin.getTableDescriptor(TableName.valueOf(tableName)));
489 } else {
490 LOG.warn(String.format("Table %s is not available", tableName));
491 }
492 }
493
494
495
496
497 private static void sniff(final Admin admin, final Sink sink, HTableDescriptor tableDesc)
498 throws Exception {
499 Table table = null;
500
501 try {
502 table = admin.getConnection().getTable(tableDesc.getTableName());
503 } catch (TableNotFoundException e) {
504 return;
505 }
506
507 try {
508 for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) {
509 try {
510 sniffRegion(admin, sink, region, table);
511 } catch (Exception e) {
512 sink.publishReadFailure(region, e);
513 LOG.debug("sniffRegion failed", e);
514 }
515 }
516 } finally {
517 table.close();
518 }
519 }
520
521
522
523
524
525 private static void sniffRegion(
526 final Admin admin,
527 final Sink sink,
528 HRegionInfo region,
529 Table table) throws Exception {
530 HTableDescriptor tableDesc = table.getTableDescriptor();
531 byte[] startKey = null;
532 Get get = null;
533 Scan scan = null;
534 ResultScanner rs = null;
535 StopWatch stopWatch = new StopWatch();
536 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
537 stopWatch.reset();
538 startKey = region.getStartKey();
539
540 if (startKey.length > 0) {
541 get = new Get(startKey);
542 get.addFamily(column.getName());
543 } else {
544 scan = new Scan();
545 scan.setCaching(1);
546 scan.addFamily(column.getName());
547 scan.setMaxResultSize(1L);
548 }
549
550 try {
551 if (startKey.length > 0) {
552 stopWatch.start();
553 table.get(get);
554 stopWatch.stop();
555 sink.publishReadTiming(region, column, stopWatch.getTime());
556 } else {
557 stopWatch.start();
558 rs = table.getScanner(scan);
559 stopWatch.stop();
560 sink.publishReadTiming(region, column, stopWatch.getTime());
561 }
562 } catch (Exception e) {
563 sink.publishReadFailure(region, column, e);
564 } finally {
565 if (rs != null) {
566 rs.close();
567 }
568 scan = null;
569 get = null;
570 startKey = null;
571 }
572 }
573 }
574
575 private static class RegionServerMonitor extends Monitor {
576
577 public RegionServerMonitor(Connection connection, String[] monitorTargets,
578 boolean useRegExp, ExtendedSink sink) {
579 super(connection, monitorTargets, useRegExp, sink);
580 }
581
582 private ExtendedSink getSink() {
583 return (ExtendedSink) this.sink;
584 }
585
586 @Override
587 public void run() {
588 if (this.initAdmin() && this.checkNoTableNames()) {
589 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
590 this.initialized = true;
591 this.monitorRegionServers(rsAndRMap);
592 }
593 this.done = true;
594 }
595
596 private boolean checkNoTableNames() {
597 List<String> foundTableNames = new ArrayList<String>();
598 TableName[] tableNames = null;
599
600 try {
601 tableNames = this.admin.listTableNames();
602 } catch (IOException e) {
603 LOG.error("Get listTableNames failed", e);
604 this.errorCode = INIT_ERROR_EXIT_CODE;
605 return false;
606 }
607
608 if (this.targets == null || this.targets.length == 0) return true;
609
610 for (String target : this.targets) {
611 for (TableName tableName : tableNames) {
612 if (target.equals(tableName.getNameAsString())) {
613 foundTableNames.add(target);
614 }
615 }
616 }
617
618 if (foundTableNames.size() > 0) {
619 System.err.println("Cannot pass a tablename when using the -regionserver " +
620 "option, tablenames:" + foundTableNames.toString());
621 this.errorCode = USAGE_EXIT_CODE;
622 }
623 return foundTableNames.size() == 0;
624 }
625
626 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
627 String serverName = null;
628 TableName tableName = null;
629 HRegionInfo region = null;
630 Table table = null;
631 Get get = null;
632 byte[] startKey = null;
633 Scan scan = null;
634 StopWatch stopWatch = new StopWatch();
635
636 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
637 stopWatch.reset();
638 serverName = entry.getKey();
639
640 region = entry.getValue().get(0);
641 try {
642 tableName = region.getTable();
643 table = admin.getConnection().getTable(tableName);
644 startKey = region.getStartKey();
645
646 if(startKey.length > 0) {
647 get = new Get(startKey);
648 stopWatch.start();
649 table.get(get);
650 stopWatch.stop();
651 } else {
652 scan = new Scan();
653 scan.setCaching(1);
654 scan.setMaxResultSize(1L);
655 stopWatch.start();
656 ResultScanner s = table.getScanner(scan);
657 s.close();
658 stopWatch.stop();
659 }
660 this.getSink().publishReadTiming(tableName.getNameAsString(),
661 serverName, stopWatch.getTime());
662 } catch (TableNotFoundException tnfe) {
663
664 } catch (TableNotEnabledException tnee) {
665
666 LOG.debug("The targeted table was disabled. Assuming success.");
667 } catch (DoNotRetryIOException dnrioe) {
668 this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
669 LOG.error(dnrioe);
670 } catch (IOException e) {
671 this.getSink().publishReadFailure(tableName.getNameAsString(), serverName);
672 LOG.error(e);
673 this.errorCode = ERROR_EXIT_CODE;
674 } finally {
675 if (table != null) {
676 try {
677 table.close();
678 } catch (IOException e) {
679 }
680 }
681 scan = null;
682 get = null;
683 startKey = null;
684 }
685 }
686 }
687
688 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
689 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
690 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
691 return regionServerAndRegionsMap;
692 }
693
694 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
695 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
696 Table table = null;
697 RegionLocator regionLocator = null;
698 try {
699 HTableDescriptor[] tableDescs = this.admin.listTables();
700 List<HRegionInfo> regions = null;
701 for (HTableDescriptor tableDesc : tableDescs) {
702 table = this.admin.getConnection().getTable(tableDesc.getTableName());
703 regionLocator = this.admin.getConnection().getRegionLocator(tableDesc.getTableName());
704
705 for (HRegionLocation location: regionLocator.getAllRegionLocations()) {
706 ServerName rs = location.getServerName();
707 String rsName = rs.getHostname();
708 HRegionInfo r = location.getRegionInfo();
709
710 if (rsAndRMap.containsKey(rsName)) {
711 regions = rsAndRMap.get(rsName);
712 } else {
713 regions = new ArrayList<HRegionInfo>();
714 rsAndRMap.put(rsName, regions);
715 }
716 regions.add(r);
717 }
718 table.close();
719 }
720
721 } catch (IOException e) {
722 String msg = "Get HTables info failed";
723 LOG.error(msg, e);
724 this.errorCode = INIT_ERROR_EXIT_CODE;
725 } finally {
726 if (table != null) {
727 try {
728 table.close();
729 } catch (IOException e) {
730 LOG.warn("Close table failed", e);
731 }
732 }
733 }
734
735 return rsAndRMap;
736 }
737
738 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
739 Map<String, List<HRegionInfo>> fullRsAndRMap) {
740
741 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
742
743 if (this.targets != null && this.targets.length > 0) {
744 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
745 Pattern pattern = null;
746 Matcher matcher = null;
747 boolean regExpFound = false;
748 for (String rsName : this.targets) {
749 if (this.useRegExp) {
750 regExpFound = false;
751 pattern = Pattern.compile(rsName);
752 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
753 matcher = pattern.matcher(entry.getKey());
754 if (matcher.matches()) {
755 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
756 regExpFound = true;
757 }
758 }
759 if (!regExpFound) {
760 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
761 }
762 } else {
763 if (fullRsAndRMap.containsKey(rsName)) {
764 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
765 } else {
766 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
767 }
768 }
769 }
770 } else {
771 filteredRsAndRMap = fullRsAndRMap;
772 }
773 return filteredRsAndRMap;
774 }
775 }
776
777 public static void main(String[] args) throws Exception {
778 final Configuration conf = HBaseConfiguration.create();
779 int exitCode = ToolRunner.run(conf, new Canary(), args);
780 System.exit(exitCode);
781 }
782 }