1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.apache.commons.lang.time.StopWatch;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.DoNotRetryIOException;
38 import org.apache.hadoop.hbase.HColumnDescriptor;
39 import org.apache.hadoop.hbase.HRegionInfo;
40 import org.apache.hadoop.hbase.HTableDescriptor;
41 import org.apache.hadoop.hbase.ServerName;
42 import org.apache.hadoop.hbase.TableName;
43 import org.apache.hadoop.hbase.TableNotEnabledException;
44 import org.apache.hadoop.hbase.TableNotFoundException;
45 import org.apache.hadoop.hbase.client.Get;
46 import org.apache.hadoop.hbase.client.HBaseAdmin;
47 import org.apache.hadoop.hbase.client.HTable;
48 import org.apache.hadoop.hbase.client.ResultScanner;
49 import org.apache.hadoop.hbase.client.Scan;
50 import org.apache.hadoop.util.Tool;
51 import org.apache.hadoop.util.ToolRunner;
52
53
54
55
56
57
58
59
60
61
62
63
64 public final class Canary implements Tool {
65
66 public interface Sink {
67 public void publishReadFailure(HRegionInfo region, Exception e);
68 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
69 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
70 }
71
72
73 public interface ExtendedSink extends Sink {
74 public void publishReadFailure(String table, String server);
75 public void publishReadTiming(String table, String server, long msTime);
76 }
77
78
79
80 public static class StdOutSink implements Sink {
81 @Override
82 public void publishReadFailure(HRegionInfo region, Exception e) {
83 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
84 }
85
86 @Override
87 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
88 LOG.error(String.format("read from region %s column family %s failed",
89 region.getRegionNameAsString(), column.getNameAsString()), e);
90 }
91
92 @Override
93 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
94 LOG.info(String.format("read from region %s column family %s in %dms",
95 region.getRegionNameAsString(), column.getNameAsString(), msTime));
96 }
97 }
98
99 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
100
101 @Override
102 public void publishReadFailure(String table, String server) {
103 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
104 }
105
106 @Override
107 public void publishReadTiming(String table, String server, long msTime) {
108 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
109 table, server, msTime));
110 }
111 }
112
113 private static final int USAGE_EXIT_CODE = 1;
114 private static final int INIT_ERROR_EXIT_CODE = 2;
115 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
116 private static final int ERROR_EXIT_CODE = 4;
117
118 private static final long DEFAULT_INTERVAL = 6000;
119
120 private static final long DEFAULT_TIMEOUT = 600000;
121
122 private static final Log LOG = LogFactory.getLog(Canary.class);
123
124 private Configuration conf = null;
125 private long interval = 0;
126 private Sink sink = null;
127
128 private boolean useRegExp;
129 private long timeout = DEFAULT_TIMEOUT;
130 private boolean failOnError = true;
131 private boolean regionServerMode = false;
132
133 public Canary() {
134 this(new RegionServerStdOutSink());
135 }
136
137 public Canary(Sink sink) {
138 this.sink = sink;
139 }
140
141 @Override
142 public Configuration getConf() {
143 return conf;
144 }
145
146 @Override
147 public void setConf(Configuration conf) {
148 this.conf = conf;
149 }
150
151 @Override
152 public int run(String[] args) throws Exception {
153 int index = -1;
154
155
156 for (int i = 0; i < args.length; i++) {
157 String cmd = args[i];
158
159 if (cmd.startsWith("-")) {
160 if (index >= 0) {
161
162 System.err.println("Invalid command line options");
163 printUsageAndExit();
164 }
165
166 if (cmd.equals("-help")) {
167
168 printUsageAndExit();
169 } else if (cmd.equals("-daemon") && interval == 0) {
170
171 interval = DEFAULT_INTERVAL;
172 } else if (cmd.equals("-interval")) {
173
174 i++;
175
176 if (i == args.length) {
177 System.err.println("-interval needs a numeric value argument.");
178 printUsageAndExit();
179 }
180
181 try {
182 interval = Long.parseLong(args[i]) * 1000;
183 } catch (NumberFormatException e) {
184 System.err.println("-interval needs a numeric value argument.");
185 printUsageAndExit();
186 }
187 } else if(cmd.equals("-regionserver")) {
188 this.regionServerMode = true;
189 } else if (cmd.equals("-e")) {
190 this.useRegExp = true;
191 } else if (cmd.equals("-t")) {
192 i++;
193
194 if (i == args.length) {
195 System.err.println("-t needs a numeric value argument.");
196 printUsageAndExit();
197 }
198
199 try {
200 this.timeout = Long.parseLong(args[i]);
201 } catch (NumberFormatException e) {
202 System.err.println("-t needs a numeric value argument.");
203 printUsageAndExit();
204 }
205
206 } else if (cmd.equals("-f")) {
207 i++;
208
209 if (i == args.length) {
210 System.err
211 .println("-f needs a boolean value argument (true|false).");
212 printUsageAndExit();
213 }
214
215 this.failOnError = Boolean.parseBoolean(args[i]);
216 } else {
217
218 System.err.println(cmd + " options is invalid.");
219 printUsageAndExit();
220 }
221 } else if (index < 0) {
222
223 index = i;
224 }
225 }
226
227
228 Monitor monitor = null;
229 Thread monitorThread = null;
230 long startTime = 0;
231 long currentTimeLength = 0;
232
233 do {
234
235 monitor = this.newMonitor(index, args);
236 monitorThread = new Thread(monitor);
237 startTime = System.currentTimeMillis();
238 monitorThread.start();
239 while (!monitor.isDone()) {
240
241 Thread.sleep(1000);
242
243 if (this.failOnError && monitor.hasError()) {
244 monitorThread.interrupt();
245 System.exit(monitor.errorCode);
246 }
247 currentTimeLength = System.currentTimeMillis() - startTime;
248 if (currentTimeLength > this.timeout) {
249 LOG.error("The monitor is running too long (" + currentTimeLength
250 + ") after timeout limit:" + this.timeout
251 + " will be killed itself !!");
252 monitor.errorCode = TIMEOUT_ERROR_EXIT_CODE;
253 break;
254 }
255 }
256
257 if (this.failOnError && monitor.hasError()) {
258 monitorThread.interrupt();
259 System.exit(monitor.errorCode);
260 }
261
262 Thread.sleep(interval);
263 } while (interval > 0);
264
265 return(monitor.errorCode);
266 }
267
268 private void printUsageAndExit() {
269 System.err.printf(
270 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
271 getClass().getName());
272 System.err.println(" where [opts] are:");
273 System.err.println(" -help Show this help and exit.");
274 System.err.println(" -regionserver replace the table argument to regionserver,");
275 System.err.println(" which means to enable regionserver mode");
276 System.err.println(" -daemon Continuous check at defined intervals.");
277 System.err.println(" -interval <N> Interval between checks (sec)");
278 System.err.println(" -e Use region/regionserver as regular expression");
279 System.err.println(" which means the region/regionserver is regular expression pattern");
280 System.err.println(" -f <B> stop whole program if first error occurs," +
281 " default is true");
282 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
283 System.exit(USAGE_EXIT_CODE);
284 }
285
286
287
288
289
290
291
292
293 public Monitor newMonitor(int index, String[] args) {
294 Monitor monitor = null;
295 String[] monitorTargets = null;
296
297 if(index >= 0) {
298 int length = args.length - index;
299 monitorTargets = new String[length];
300 System.arraycopy(args, index, monitorTargets, 0, length);
301 }
302
303 if(this.regionServerMode) {
304 monitor = new RegionServerMonitor(
305 this.conf,
306 monitorTargets,
307 this.useRegExp,
308 (ExtendedSink)this.sink);
309 } else {
310 monitor = new RegionMonitor(this.conf, monitorTargets, this.useRegExp, this.sink);
311 }
312 return monitor;
313 }
314
315
316 public static abstract class Monitor implements Runnable {
317
318 protected Configuration config;
319 protected HBaseAdmin admin;
320 protected String[] targets;
321 protected boolean useRegExp;
322
323 protected boolean done = false;
324 protected int errorCode = 0;
325 protected Sink sink;
326
327 public boolean isDone() {
328 return done;
329 }
330
331 public boolean hasError() {
332 return errorCode != 0;
333 }
334
335 protected Monitor(Configuration config, String[] monitorTargets,
336 boolean useRegExp, Sink sink) {
337 if (null == config)
338 throw new IllegalArgumentException("config shall not be null");
339
340 this.config = config;
341 this.targets = monitorTargets;
342 this.useRegExp = useRegExp;
343 this.sink = sink;
344 }
345
346 public abstract void run();
347
348 protected boolean initAdmin() {
349 if (null == this.admin) {
350 try {
351 this.admin = new HBaseAdmin(config);
352 } catch (Exception e) {
353 LOG.error("Initial HBaseAdmin failed...", e);
354 this.errorCode = INIT_ERROR_EXIT_CODE;
355 }
356 } else if (admin.isAborted()) {
357 LOG.error("HBaseAdmin aborted");
358 this.errorCode = INIT_ERROR_EXIT_CODE;
359 }
360 return !this.hasError();
361 }
362 }
363
364
365 private static class RegionMonitor extends Monitor {
366
367 public RegionMonitor(Configuration config, String[] monitorTargets,
368 boolean useRegExp, Sink sink) {
369 super(config, monitorTargets, useRegExp, sink);
370 }
371
372 @Override
373 public void run() {
374 if(this.initAdmin()) {
375 try {
376 if (this.targets != null && this.targets.length > 0) {
377 String[] tables = generateMonitorTables(this.targets);
378 for (String table : tables) {
379 Canary.sniff(admin, sink, table);
380 }
381 } else {
382 sniff();
383 }
384 } catch (Exception e) {
385 LOG.error("Run regionMonitor failed", e);
386 this.errorCode = ERROR_EXIT_CODE;
387 }
388 }
389 this.done = true;
390 }
391
392 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
393 String[] returnTables = null;
394
395 if(this.useRegExp) {
396 Pattern pattern = null;
397 HTableDescriptor[] tds = null;
398 Set<String> tmpTables = new TreeSet<String>();
399 try {
400 for (String monitorTarget : monitorTargets) {
401 pattern = Pattern.compile(monitorTarget);
402 tds = this.admin.listTables(pattern);
403 if (tds != null) {
404 for (HTableDescriptor td : tds) {
405 tmpTables.add(td.getNameAsString());
406 }
407 }
408 }
409 } catch(IOException e) {
410 LOG.error("Communicate with admin failed", e);
411 throw e;
412 }
413
414 if(tmpTables.size() > 0) {
415 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
416 } else {
417 String msg = "No any HTable found, tablePattern:"
418 + Arrays.toString(monitorTargets);
419 LOG.error(msg);
420 this.errorCode = INIT_ERROR_EXIT_CODE;
421 throw new TableNotFoundException(msg);
422 }
423 } else {
424 returnTables = monitorTargets;
425 }
426
427 return returnTables;
428 }
429
430
431
432
433 private void sniff() throws Exception {
434 for (HTableDescriptor table : admin.listTables()) {
435 Canary.sniff(admin, sink, table);
436 }
437 }
438
439 }
440
441
442
443
444
445 public static void sniff(final HBaseAdmin admin, TableName tableName) throws Exception {
446 sniff(admin, new StdOutSink(), tableName.getNameAsString());
447 }
448
449
450
451
452
453 private static void sniff(final HBaseAdmin admin, final Sink sink, String tableName)
454 throws Exception {
455 if (admin.isTableAvailable(tableName)) {
456 sniff(admin, sink, admin.getTableDescriptor(tableName.getBytes()));
457 } else {
458 LOG.warn(String.format("Table %s is not available", tableName));
459 }
460 }
461
462
463
464
465 private static void sniff(final HBaseAdmin admin, final Sink sink, HTableDescriptor tableDesc)
466 throws Exception {
467 HTable table = null;
468
469 try {
470 table = new HTable(admin.getConfiguration(), tableDesc.getName());
471 } catch (TableNotFoundException e) {
472 return;
473 }
474
475 for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
476 try {
477 sniffRegion(admin, sink, region, table);
478 } catch (Exception e) {
479 sink.publishReadFailure(region, e);
480 LOG.debug("sniffRegion failed", e);
481 }
482 }
483 }
484
485
486
487
488
489 private static void sniffRegion(
490 final HBaseAdmin admin,
491 final Sink sink,
492 HRegionInfo region,
493 HTable table) throws Exception {
494 HTableDescriptor tableDesc = table.getTableDescriptor();
495 byte[] startKey = null;
496 Get get = null;
497 Scan scan = null;
498 ResultScanner rs = null;
499 StopWatch stopWatch = new StopWatch();
500 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
501 stopWatch.reset();
502 startKey = region.getStartKey();
503
504 if (startKey.length > 0) {
505 get = new Get(startKey);
506 get.addFamily(column.getName());
507 } else {
508 scan = new Scan();
509 scan.setCaching(1);
510 scan.addFamily(column.getName());
511 scan.setMaxResultSize(1L);
512 }
513
514 try {
515 if (startKey.length > 0) {
516 stopWatch.start();
517 table.get(get);
518 stopWatch.stop();
519 sink.publishReadTiming(region, column, stopWatch.getTime());
520 } else {
521 stopWatch.start();
522 rs = table.getScanner(scan);
523 stopWatch.stop();
524 sink.publishReadTiming(region, column, stopWatch.getTime());
525 }
526 } catch (Exception e) {
527 sink.publishReadFailure(region, column, e);
528 } finally {
529 if (rs != null) {
530 rs.close();
531 }
532 scan = null;
533 get = null;
534 startKey = null;
535 }
536 }
537 }
538
539 private static class RegionServerMonitor extends Monitor {
540
541 public RegionServerMonitor(Configuration config, String[] monitorTargets,
542 boolean useRegExp, ExtendedSink sink) {
543 super(config, monitorTargets, useRegExp, sink);
544 }
545
546 private ExtendedSink getSink() {
547 return (ExtendedSink) this.sink;
548 }
549
550 @Override
551 public void run() {
552 if (this.initAdmin() && this.checkNoTableNames()) {
553 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
554 this.monitorRegionServers(rsAndRMap);
555 }
556 this.done = true;
557 }
558
559 private boolean checkNoTableNames() {
560 List<String> foundTableNames = new ArrayList<String>();
561 TableName[] tableNames = null;
562
563 try {
564 tableNames = this.admin.listTableNames();
565 } catch (IOException e) {
566 LOG.error("Get listTableNames failed", e);
567 this.errorCode = INIT_ERROR_EXIT_CODE;
568 return false;
569 }
570
571 if (this.targets == null || this.targets.length == 0) return true;
572
573 for (String target : this.targets) {
574 for (TableName tableName : tableNames) {
575 if (target.equals(tableName.getNameAsString())) {
576 foundTableNames.add(target);
577 }
578 }
579 }
580
581 if (foundTableNames.size() > 0) {
582 System.err.println("Cannot pass a tablename when using the -regionserver " +
583 "option, tablenames:" + foundTableNames.toString());
584 this.errorCode = USAGE_EXIT_CODE;
585 }
586 return foundTableNames.size() == 0;
587 }
588
589 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
590 String serverName = null;
591 String tableName = null;
592 HRegionInfo region = null;
593 HTable table = null;
594 Get get = null;
595 byte[] startKey = null;
596 Scan scan = null;
597 StopWatch stopWatch = new StopWatch();
598
599 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
600 stopWatch.reset();
601 serverName = entry.getKey();
602
603 region = entry.getValue().get(0);
604 try {
605 tableName = region.getTable().getNameAsString();
606 table = new HTable(this.admin.getConfiguration(), tableName);
607 startKey = region.getStartKey();
608
609 if(startKey.length > 0) {
610 get = new Get(startKey);
611 stopWatch.start();
612 table.get(get);
613 stopWatch.stop();
614 } else {
615 scan = new Scan();
616 scan.setCaching(1);
617 scan.setMaxResultSize(1L);
618 stopWatch.start();
619 table.getScanner(scan);
620 stopWatch.stop();
621 }
622 this.getSink().publishReadTiming(tableName, serverName, stopWatch.getTime());
623 } catch (TableNotFoundException tnfe) {
624
625 } catch (TableNotEnabledException tnee) {
626
627 LOG.debug("The targeted table was disabled. Assuming success.");
628 } catch (DoNotRetryIOException dnrioe) {
629 this.getSink().publishReadFailure(tableName, serverName);
630 LOG.error(dnrioe);
631 } catch (IOException e) {
632 this.getSink().publishReadFailure(tableName, serverName);
633 LOG.error(e);
634 this.errorCode = ERROR_EXIT_CODE;
635 } finally {
636 if (table != null) {
637 try {
638 table.close();
639 } catch (IOException e) {
640 }
641 }
642 scan = null;
643 get = null;
644 startKey = null;
645 }
646 }
647 }
648
649 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
650 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
651 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
652 return regionServerAndRegionsMap;
653 }
654
655 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
656 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
657 HTable table = null;
658 try {
659 HTableDescriptor[] tableDescs = this.admin.listTables();
660 List<HRegionInfo> regions = null;
661 for (HTableDescriptor tableDesc : tableDescs) {
662 table = new HTable(this.admin.getConfiguration(), tableDesc.getName());
663
664 for (Map.Entry<HRegionInfo, ServerName> entry : table
665 .getRegionLocations().entrySet()) {
666 ServerName rs = entry.getValue();
667 String rsName = rs.getHostname();
668 HRegionInfo r = entry.getKey();
669
670 if (rsAndRMap.containsKey(rsName)) {
671 regions = rsAndRMap.get(rsName);
672 } else {
673 regions = new ArrayList<HRegionInfo>();
674 rsAndRMap.put(rsName, regions);
675 }
676 regions.add(r);
677 }
678 table.close();
679 }
680
681 } catch (IOException e) {
682 String msg = "Get HTables info failed";
683 LOG.error(msg, e);
684 this.errorCode = INIT_ERROR_EXIT_CODE;
685 } finally {
686 if (table != null) {
687 try {
688 table.close();
689 } catch (IOException e) {
690 LOG.warn("Close table failed", e);
691 }
692 }
693 }
694
695 return rsAndRMap;
696 }
697
698 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
699 Map<String, List<HRegionInfo>> fullRsAndRMap) {
700
701 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
702
703 if (this.targets != null && this.targets.length > 0) {
704 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
705 Pattern pattern = null;
706 Matcher matcher = null;
707 boolean regExpFound = false;
708 for (String rsName : this.targets) {
709 if (this.useRegExp) {
710 regExpFound = false;
711 pattern = Pattern.compile(rsName);
712 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
713 matcher = pattern.matcher(entry.getKey());
714 if (matcher.matches()) {
715 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
716 regExpFound = true;
717 }
718 }
719 if (!regExpFound) {
720 LOG.error("No any RegionServerInfo found, regionServerPattern:" + rsName);
721 this.errorCode = INIT_ERROR_EXIT_CODE;
722 }
723 } else {
724 if (fullRsAndRMap.containsKey(rsName)) {
725 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
726 } else {
727 LOG.error("No any RegionServerInfo found, regionServerName:" + rsName);
728 this.errorCode = INIT_ERROR_EXIT_CODE;
729 }
730 }
731 }
732 } else {
733 filteredRsAndRMap = fullRsAndRMap;
734 }
735 return filteredRsAndRMap;
736 }
737 }
738
739 public static void main(String[] args) throws Exception {
740 int exitCode = ToolRunner.run(new Canary(), args);
741 System.exit(exitCode);
742 }
743 }