1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.apache.commons.lang.time.StopWatch;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.DoNotRetryIOException;
38 import org.apache.hadoop.hbase.HBaseConfiguration;
39 import org.apache.hadoop.hbase.HColumnDescriptor;
40 import org.apache.hadoop.hbase.HRegionInfo;
41 import org.apache.hadoop.hbase.HTableDescriptor;
42 import org.apache.hadoop.hbase.ServerName;
43 import org.apache.hadoop.hbase.TableName;
44 import org.apache.hadoop.hbase.TableNotEnabledException;
45 import org.apache.hadoop.hbase.TableNotFoundException;
46 import org.apache.hadoop.hbase.client.Get;
47 import org.apache.hadoop.hbase.client.HBaseAdmin;
48 import org.apache.hadoop.hbase.client.HTable;
49 import org.apache.hadoop.hbase.client.ResultScanner;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.util.Tool;
52 import org.apache.hadoop.util.ToolRunner;
53
54
55
56
57
58
59
60
61
62
63
64
65 public final class Canary implements Tool {
66
67 public interface Sink {
68 public void publishReadFailure(HRegionInfo region, Exception e);
69 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
70 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
71 }
72
73
74 public interface ExtendedSink extends Sink {
75 public void publishReadFailure(String table, String server);
76 public void publishReadTiming(String table, String server, long msTime);
77 }
78
79
80
81 public static class StdOutSink implements Sink {
82 @Override
83 public void publishReadFailure(HRegionInfo region, Exception e) {
84 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
85 }
86
87 @Override
88 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
89 LOG.error(String.format("read from region %s column family %s failed",
90 region.getRegionNameAsString(), column.getNameAsString()), e);
91 }
92
93 @Override
94 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
95 LOG.info(String.format("read from region %s column family %s in %dms",
96 region.getRegionNameAsString(), column.getNameAsString(), msTime));
97 }
98 }
99
100 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
101
102 @Override
103 public void publishReadFailure(String table, String server) {
104 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
105 }
106
107 @Override
108 public void publishReadTiming(String table, String server, long msTime) {
109 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
110 table, server, msTime));
111 }
112 }
113
114 private static final int USAGE_EXIT_CODE = 1;
115 private static final int INIT_ERROR_EXIT_CODE = 2;
116 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
117 private static final int ERROR_EXIT_CODE = 4;
118
119 private static final long DEFAULT_INTERVAL = 6000;
120
121 private static final long DEFAULT_TIMEOUT = 600000;
122
123 private static final Log LOG = LogFactory.getLog(Canary.class);
124
125 private Configuration conf = null;
126 private long interval = 0;
127 private Sink sink = null;
128
129 private boolean useRegExp;
130 private long timeout = DEFAULT_TIMEOUT;
131 private boolean failOnError = true;
132 private boolean regionServerMode = false;
133
134 public Canary() {
135 this(new RegionServerStdOutSink());
136 }
137
138 public Canary(Sink sink) {
139 this.sink = sink;
140 }
141
142 @Override
143 public Configuration getConf() {
144 return conf;
145 }
146
147 @Override
148 public void setConf(Configuration conf) {
149 this.conf = conf;
150 }
151
152 @Override
153 public int run(String[] args) throws Exception {
154 int index = -1;
155
156
157 for (int i = 0; i < args.length; i++) {
158 String cmd = args[i];
159
160 if (cmd.startsWith("-")) {
161 if (index >= 0) {
162
163 System.err.println("Invalid command line options");
164 printUsageAndExit();
165 }
166
167 if (cmd.equals("-help")) {
168
169 printUsageAndExit();
170 } else if (cmd.equals("-daemon") && interval == 0) {
171
172 interval = DEFAULT_INTERVAL;
173 } else if (cmd.equals("-interval")) {
174
175 i++;
176
177 if (i == args.length) {
178 System.err.println("-interval needs a numeric value argument.");
179 printUsageAndExit();
180 }
181
182 try {
183 interval = Long.parseLong(args[i]) * 1000;
184 } catch (NumberFormatException e) {
185 System.err.println("-interval needs a numeric value argument.");
186 printUsageAndExit();
187 }
188 } else if(cmd.equals("-regionserver")) {
189 this.regionServerMode = true;
190 } else if (cmd.equals("-e")) {
191 this.useRegExp = true;
192 } else if (cmd.equals("-t")) {
193 i++;
194
195 if (i == args.length) {
196 System.err.println("-t needs a numeric value argument.");
197 printUsageAndExit();
198 }
199
200 try {
201 this.timeout = Long.parseLong(args[i]);
202 } catch (NumberFormatException e) {
203 System.err.println("-t needs a numeric value argument.");
204 printUsageAndExit();
205 }
206
207 } else if (cmd.equals("-f")) {
208 i++;
209
210 if (i == args.length) {
211 System.err
212 .println("-f needs a boolean value argument (true|false).");
213 printUsageAndExit();
214 }
215
216 this.failOnError = Boolean.parseBoolean(args[i]);
217 } else {
218
219 System.err.println(cmd + " options is invalid.");
220 printUsageAndExit();
221 }
222 } else if (index < 0) {
223
224 index = i;
225 }
226 }
227
228
229 Monitor monitor = null;
230 Thread monitorThread = null;
231 long startTime = 0;
232 long currentTimeLength = 0;
233
234 do {
235
236 monitor = this.newMonitor(index, args);
237 monitorThread = new Thread(monitor);
238 startTime = System.currentTimeMillis();
239 monitorThread.start();
240 while (!monitor.isDone()) {
241
242 Thread.sleep(1000);
243
244 if (this.failOnError && monitor.hasError()) {
245 monitorThread.interrupt();
246 if (monitor.initialized) {
247 System.exit(monitor.errorCode);
248 } else {
249 System.exit(INIT_ERROR_EXIT_CODE);
250 }
251 }
252 currentTimeLength = System.currentTimeMillis() - startTime;
253 if (currentTimeLength > this.timeout) {
254 LOG.error("The monitor is running too long (" + currentTimeLength
255 + ") after timeout limit:" + this.timeout
256 + " will be killed itself !!");
257 if (monitor.initialized) {
258 System.exit(TIMEOUT_ERROR_EXIT_CODE);
259 } else {
260 System.exit(INIT_ERROR_EXIT_CODE);
261 }
262 break;
263 }
264 }
265
266 if (this.failOnError && monitor.hasError()) {
267 monitorThread.interrupt();
268 System.exit(monitor.errorCode);
269 }
270
271 Thread.sleep(interval);
272 } while (interval > 0);
273
274 return(monitor.errorCode);
275 }
276
277 private void printUsageAndExit() {
278 System.err.printf(
279 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
280 getClass().getName());
281 System.err.println(" where [opts] are:");
282 System.err.println(" -help Show this help and exit.");
283 System.err.println(" -regionserver replace the table argument to regionserver,");
284 System.err.println(" which means to enable regionserver mode");
285 System.err.println(" -daemon Continuous check at defined intervals.");
286 System.err.println(" -interval <N> Interval between checks (sec)");
287 System.err.println(" -e Use region/regionserver as regular expression");
288 System.err.println(" which means the region/regionserver is regular expression pattern");
289 System.err.println(" -f <B> stop whole program if first error occurs," +
290 " default is true");
291 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
292 System.exit(USAGE_EXIT_CODE);
293 }
294
295
296
297
298
299
300
301
302 public Monitor newMonitor(int index, String[] args) {
303 Monitor monitor = null;
304 String[] monitorTargets = null;
305
306 if(index >= 0) {
307 int length = args.length - index;
308 monitorTargets = new String[length];
309 System.arraycopy(args, index, monitorTargets, 0, length);
310 }
311
312 if(this.regionServerMode) {
313 monitor = new RegionServerMonitor(
314 this.conf,
315 monitorTargets,
316 this.useRegExp,
317 (ExtendedSink)this.sink);
318 } else {
319 monitor = new RegionMonitor(this.conf, monitorTargets, this.useRegExp, this.sink);
320 }
321 return monitor;
322 }
323
324
325 public static abstract class Monitor implements Runnable {
326
327 protected Configuration config;
328 protected HBaseAdmin admin;
329 protected String[] targets;
330 protected boolean useRegExp;
331 protected boolean initialized = false;
332
333 protected boolean done = false;
334 protected int errorCode = 0;
335 protected Sink sink;
336
337 public boolean isDone() {
338 return done;
339 }
340
341 public boolean hasError() {
342 return errorCode != 0;
343 }
344
345 protected Monitor(Configuration config, String[] monitorTargets,
346 boolean useRegExp, Sink sink) {
347 if (null == config)
348 throw new IllegalArgumentException("config shall not be null");
349
350 this.config = config;
351 this.targets = monitorTargets;
352 this.useRegExp = useRegExp;
353 this.sink = sink;
354 }
355
356 public abstract void run();
357
358 protected boolean initAdmin() {
359 if (null == this.admin) {
360 try {
361 this.admin = new HBaseAdmin(config);
362 } catch (Exception e) {
363 LOG.error("Initial HBaseAdmin failed...", e);
364 this.errorCode = INIT_ERROR_EXIT_CODE;
365 }
366 } else if (admin.isAborted()) {
367 LOG.error("HBaseAdmin aborted");
368 this.errorCode = INIT_ERROR_EXIT_CODE;
369 }
370 return !this.hasError();
371 }
372 }
373
374
375 private static class RegionMonitor extends Monitor {
376
377 public RegionMonitor(Configuration config, String[] monitorTargets,
378 boolean useRegExp, Sink sink) {
379 super(config, monitorTargets, useRegExp, sink);
380 }
381
382 @Override
383 public void run() {
384 if(this.initAdmin()) {
385 try {
386 if (this.targets != null && this.targets.length > 0) {
387 String[] tables = generateMonitorTables(this.targets);
388 this.initialized = true;
389 for (String table : tables) {
390 Canary.sniff(admin, sink, table);
391 }
392 } else {
393 sniff();
394 }
395 } catch (Exception e) {
396 LOG.error("Run regionMonitor failed", e);
397 this.errorCode = ERROR_EXIT_CODE;
398 }
399 }
400 this.done = true;
401 }
402
403 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
404 String[] returnTables = null;
405
406 if(this.useRegExp) {
407 Pattern pattern = null;
408 HTableDescriptor[] tds = null;
409 Set<String> tmpTables = new TreeSet<String>();
410 try {
411 for (String monitorTarget : monitorTargets) {
412 pattern = Pattern.compile(monitorTarget);
413 tds = this.admin.listTables(pattern);
414 if (tds != null) {
415 for (HTableDescriptor td : tds) {
416 tmpTables.add(td.getNameAsString());
417 }
418 }
419 }
420 } catch(IOException e) {
421 LOG.error("Communicate with admin failed", e);
422 throw e;
423 }
424
425 if(tmpTables.size() > 0) {
426 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
427 } else {
428 String msg = "No HTable found, tablePattern:"
429 + Arrays.toString(monitorTargets);
430 LOG.error(msg);
431 this.errorCode = INIT_ERROR_EXIT_CODE;
432 throw new TableNotFoundException(msg);
433 }
434 } else {
435 returnTables = monitorTargets;
436 }
437
438 return returnTables;
439 }
440
441
442
443
444 private void sniff() throws Exception {
445 for (HTableDescriptor table : admin.listTables()) {
446 Canary.sniff(admin, sink, table);
447 }
448 }
449
450 }
451
452
453
454
455
456 public static void sniff(final HBaseAdmin admin, TableName tableName) throws Exception {
457 sniff(admin, new StdOutSink(), tableName.getNameAsString());
458 }
459
460
461
462
463
464 private static void sniff(final HBaseAdmin admin, final Sink sink, String tableName)
465 throws Exception {
466 if (admin.isTableAvailable(tableName)) {
467 sniff(admin, sink, admin.getTableDescriptor(tableName.getBytes()));
468 } else {
469 LOG.warn(String.format("Table %s is not available", tableName));
470 }
471 }
472
473
474
475
476 private static void sniff(final HBaseAdmin admin, final Sink sink, HTableDescriptor tableDesc)
477 throws Exception {
478 HTable table = null;
479
480 try {
481 table = new HTable(admin.getConfiguration(), tableDesc.getName());
482 } catch (TableNotFoundException e) {
483 return;
484 }
485
486 for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
487 try {
488 sniffRegion(admin, sink, region, table);
489 } catch (Exception e) {
490 sink.publishReadFailure(region, e);
491 LOG.debug("sniffRegion failed", e);
492 }
493 }
494 }
495
496
497
498
499
500 private static void sniffRegion(
501 final HBaseAdmin admin,
502 final Sink sink,
503 HRegionInfo region,
504 HTable table) throws Exception {
505 HTableDescriptor tableDesc = table.getTableDescriptor();
506 byte[] startKey = null;
507 Get get = null;
508 Scan scan = null;
509 ResultScanner rs = null;
510 StopWatch stopWatch = new StopWatch();
511 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
512 stopWatch.reset();
513 startKey = region.getStartKey();
514
515 if (startKey.length > 0) {
516 get = new Get(startKey);
517 get.addFamily(column.getName());
518 } else {
519 scan = new Scan();
520 scan.setCaching(1);
521 scan.addFamily(column.getName());
522 scan.setMaxResultSize(1L);
523 }
524
525 try {
526 if (startKey.length > 0) {
527 stopWatch.start();
528 table.get(get);
529 stopWatch.stop();
530 sink.publishReadTiming(region, column, stopWatch.getTime());
531 } else {
532 stopWatch.start();
533 rs = table.getScanner(scan);
534 stopWatch.stop();
535 sink.publishReadTiming(region, column, stopWatch.getTime());
536 }
537 } catch (Exception e) {
538 sink.publishReadFailure(region, column, e);
539 } finally {
540 if (rs != null) {
541 rs.close();
542 }
543 scan = null;
544 get = null;
545 startKey = null;
546 }
547 }
548 }
549
550 private static class RegionServerMonitor extends Monitor {
551
552 public RegionServerMonitor(Configuration config, String[] monitorTargets,
553 boolean useRegExp, ExtendedSink sink) {
554 super(config, monitorTargets, useRegExp, sink);
555 }
556
557 private ExtendedSink getSink() {
558 return (ExtendedSink) this.sink;
559 }
560
561 @Override
562 public void run() {
563 if (this.initAdmin() && this.checkNoTableNames()) {
564 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
565 this.initialized = true;
566 this.monitorRegionServers(rsAndRMap);
567 }
568 this.done = true;
569 }
570
571 private boolean checkNoTableNames() {
572 List<String> foundTableNames = new ArrayList<String>();
573 TableName[] tableNames = null;
574
575 try {
576 tableNames = this.admin.listTableNames();
577 } catch (IOException e) {
578 LOG.error("Get listTableNames failed", e);
579 this.errorCode = INIT_ERROR_EXIT_CODE;
580 return false;
581 }
582
583 if (this.targets == null || this.targets.length == 0) return true;
584
585 for (String target : this.targets) {
586 for (TableName tableName : tableNames) {
587 if (target.equals(tableName.getNameAsString())) {
588 foundTableNames.add(target);
589 }
590 }
591 }
592
593 if (foundTableNames.size() > 0) {
594 System.err.println("Cannot pass a tablename when using the -regionserver " +
595 "option, tablenames:" + foundTableNames.toString());
596 this.errorCode = USAGE_EXIT_CODE;
597 }
598 return foundTableNames.size() == 0;
599 }
600
601 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
602 String serverName = null;
603 String tableName = null;
604 HRegionInfo region = null;
605 HTable table = null;
606 Get get = null;
607 byte[] startKey = null;
608 Scan scan = null;
609 StopWatch stopWatch = new StopWatch();
610
611 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
612 stopWatch.reset();
613 serverName = entry.getKey();
614
615 region = entry.getValue().get(0);
616 try {
617 tableName = region.getTable().getNameAsString();
618 table = new HTable(this.admin.getConfiguration(), tableName);
619 startKey = region.getStartKey();
620
621 if(startKey.length > 0) {
622 get = new Get(startKey);
623 stopWatch.start();
624 table.get(get);
625 stopWatch.stop();
626 } else {
627 scan = new Scan();
628 scan.setCaching(1);
629 scan.setMaxResultSize(1L);
630 stopWatch.start();
631 table.getScanner(scan);
632 stopWatch.stop();
633 }
634 this.getSink().publishReadTiming(tableName, serverName, stopWatch.getTime());
635 } catch (TableNotFoundException tnfe) {
636
637 } catch (TableNotEnabledException tnee) {
638
639 LOG.debug("The targeted table was disabled. Assuming success.");
640 } catch (DoNotRetryIOException dnrioe) {
641 this.getSink().publishReadFailure(tableName, serverName);
642 LOG.error(dnrioe);
643 } catch (IOException e) {
644 this.getSink().publishReadFailure(tableName, serverName);
645 LOG.error(e);
646 this.errorCode = ERROR_EXIT_CODE;
647 } finally {
648 if (table != null) {
649 try {
650 table.close();
651 } catch (IOException e) {
652 }
653 }
654 scan = null;
655 get = null;
656 startKey = null;
657 }
658 }
659 }
660
661 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
662 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
663 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
664 return regionServerAndRegionsMap;
665 }
666
667 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
668 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
669 HTable table = null;
670 try {
671 HTableDescriptor[] tableDescs = this.admin.listTables();
672 List<HRegionInfo> regions = null;
673 for (HTableDescriptor tableDesc : tableDescs) {
674 table = new HTable(this.admin.getConfiguration(), tableDesc.getName());
675
676 for (Map.Entry<HRegionInfo, ServerName> entry : table
677 .getRegionLocations().entrySet()) {
678 ServerName rs = entry.getValue();
679 String rsName = rs.getHostname();
680 HRegionInfo r = entry.getKey();
681
682 if (rsAndRMap.containsKey(rsName)) {
683 regions = rsAndRMap.get(rsName);
684 } else {
685 regions = new ArrayList<HRegionInfo>();
686 rsAndRMap.put(rsName, regions);
687 }
688 regions.add(r);
689 }
690 table.close();
691 }
692
693 } catch (IOException e) {
694 String msg = "Get HTables info failed";
695 LOG.error(msg, e);
696 this.errorCode = INIT_ERROR_EXIT_CODE;
697 } finally {
698 if (table != null) {
699 try {
700 table.close();
701 } catch (IOException e) {
702 LOG.warn("Close table failed", e);
703 }
704 }
705 }
706
707 return rsAndRMap;
708 }
709
710 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
711 Map<String, List<HRegionInfo>> fullRsAndRMap) {
712
713 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
714
715 if (this.targets != null && this.targets.length > 0) {
716 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
717 Pattern pattern = null;
718 Matcher matcher = null;
719 boolean regExpFound = false;
720 for (String rsName : this.targets) {
721 if (this.useRegExp) {
722 regExpFound = false;
723 pattern = Pattern.compile(rsName);
724 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
725 matcher = pattern.matcher(entry.getKey());
726 if (matcher.matches()) {
727 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
728 regExpFound = true;
729 }
730 }
731 if (!regExpFound) {
732 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
733 }
734 } else {
735 if (fullRsAndRMap.containsKey(rsName)) {
736 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
737 } else {
738 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
739 }
740 }
741 }
742 } else {
743 filteredRsAndRMap = fullRsAndRMap;
744 }
745 return filteredRsAndRMap;
746 }
747 }
748
749 public static void main(String[] args) throws Exception {
750 int exitCode = ToolRunner.run(HBaseConfiguration.create(), new Canary(), args);
751 System.exit(exitCode);
752 }
753 }