View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.tool;
21  
22  import java.io.Closeable;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.HashMap;
27  import java.util.LinkedList;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Map.Entry;
31  import java.util.Random;
32  import java.util.Set;
33  import java.util.TreeSet;
34  import java.util.concurrent.Callable;
35  import java.util.concurrent.ExecutionException;
36  import java.util.concurrent.ExecutorService;
37  import java.util.concurrent.Future;
38  import java.util.concurrent.ScheduledThreadPoolExecutor;
39  import java.util.regex.Matcher;
40  import java.util.regex.Pattern;
41  
42  import org.apache.commons.lang.time.StopWatch;
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.hbase.AuthUtil;
47  import org.apache.hadoop.hbase.DoNotRetryIOException;
48  import org.apache.hadoop.hbase.HBaseConfiguration;
49  import org.apache.hadoop.hbase.HColumnDescriptor;
50  import org.apache.hadoop.hbase.HRegionInfo;
51  import org.apache.hadoop.hbase.HTableDescriptor;
52  import org.apache.hadoop.hbase.ServerName;
53  import org.apache.hadoop.hbase.TableName;
54  import org.apache.hadoop.hbase.TableNotEnabledException;
55  import org.apache.hadoop.hbase.TableNotFoundException;
56  import org.apache.hadoop.hbase.client.Get;
57  import org.apache.hadoop.hbase.client.HBaseAdmin;
58  import org.apache.hadoop.hbase.client.HConnection;
59  import org.apache.hadoop.hbase.client.HConnectionManager;
60  import org.apache.hadoop.hbase.client.HTable;
61  import org.apache.hadoop.hbase.client.HTableInterface;
62  import org.apache.hadoop.hbase.client.ResultScanner;
63  import org.apache.hadoop.hbase.client.Scan;
64  import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
65  import org.apache.hadoop.hbase.util.ReflectionUtils;
66  import org.apache.hadoop.util.Tool;
67  import org.apache.hadoop.util.ToolRunner;
68  
69  /**
70   * HBase Canary Tool, that that can be used to do
71   * "canary monitoring" of a running HBase cluster.
72   *
73   * Here are two modes
74   * 1. region mode - Foreach region tries to get one row per column family
75   * and outputs some information about failure or latency.
76   *
77   * 2. regionserver mode - Foreach regionserver tries to get one row from one table
78   * selected randomly and outputs some information about failure or latency.
79   */
80  public final class Canary implements Tool {
81    // Sink interface used by the canary to outputs information
82    public interface Sink {
83      public void publishReadFailure(HRegionInfo region, Exception e);
84      public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
85      public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
86    }
87    // new extended sink for output regionserver mode info
88    // do not change the Sink interface directly due to maintaining the API
89    public interface ExtendedSink extends Sink {
90      public void publishReadFailure(String table, String server);
91      public void publishReadTiming(String table, String server, long msTime);
92    }
93  
94    // Simple implementation of canary sink that allows to plot on
95    // file or standard output timings or failures.
96    public static class StdOutSink implements Sink {
97      @Override
98      public void publishReadFailure(HRegionInfo region, Exception e) {
99        LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
100     }
101 
102     @Override
103     public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
104       LOG.error(String.format("read from region %s column family %s failed",
105                 region.getRegionNameAsString(), column.getNameAsString()), e);
106     }
107 
108     @Override
109     public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
110       LOG.info(String.format("read from region %s column family %s in %dms",
111                region.getRegionNameAsString(), column.getNameAsString(), msTime));
112     }
113   }
114   // a ExtendedSink implementation
115   public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
116 
117     @Override
118     public void publishReadFailure(String table, String server) {
119       LOG.error(String.format("Read from table:%s on region server:%s", table, server));
120     }
121 
122     @Override
123     public void publishReadTiming(String table, String server, long msTime) {
124       LOG.info(String.format("Read from table:%s on region server:%s in %dms",
125           table, server, msTime));
126     }
127   }
128 
129   /**
130    * For each column family of the region tries to get one row and outputs the latency, or the
131    * failure.
132    */
133   static class RegionTask implements Callable<Void> {
134     private HConnection connection;
135     private HRegionInfo region;
136     private Sink sink;
137 
138     RegionTask(HConnection connection, HRegionInfo region, Sink sink) {
139       this.connection = connection;
140       this.region = region;
141       this.sink = sink;
142     }
143 
144     @Override
145     public Void call() {
146       HTableInterface table = null;
147       HTableDescriptor tableDesc = null;
148       try {
149         table = connection.getTable(region.getTable());
150         tableDesc = table.getTableDescriptor();
151       } catch (IOException e) {
152         LOG.debug("sniffRegion failed", e);
153         sink.publishReadFailure(region, e);
154         if (table != null) {
155           try {
156             table.close();
157           } catch (IOException ioe) {
158           }
159         }
160         return null;
161       }
162 
163       byte[] startKey = null;
164       Get get = null;
165       Scan scan = null;
166       ResultScanner rs = null;
167       StopWatch stopWatch = new StopWatch();
168       for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
169         stopWatch.reset();
170         startKey = region.getStartKey();
171         // Can't do a get on empty start row so do a Scan of first element if any instead.
172         if (startKey.length > 0) {
173           get = new Get(startKey);
174           get.setCacheBlocks(false);
175           get.setFilter(new FirstKeyOnlyFilter());
176           get.addFamily(column.getName());
177         } else {
178           scan = new Scan();
179           scan.setCaching(1);
180           scan.setCacheBlocks(false);
181           scan.setFilter(new FirstKeyOnlyFilter());
182           scan.addFamily(column.getName());
183           scan.setMaxResultSize(1L);
184         }
185 
186         try {
187           if (startKey.length > 0) {
188             stopWatch.start();
189             table.get(get);
190             stopWatch.stop();
191             sink.publishReadTiming(region, column, stopWatch.getTime());
192           } else {
193             stopWatch.start();
194             rs = table.getScanner(scan);
195             stopWatch.stop();
196             sink.publishReadTiming(region, column, stopWatch.getTime());
197           }
198         } catch (Exception e) {
199           sink.publishReadFailure(region, column, e);
200         } finally {
201           if (rs != null) {
202             rs.close();
203           }
204           scan = null;
205           get = null;
206           startKey = null;
207         }
208       }
209       try {
210         table.close();
211       } catch (IOException e) {
212       }
213       return null;
214     }
215   }
216 
217   /**
218    * Get one row from a region on the regionserver and outputs the latency, or the failure.
219    */
220   static class RegionServerTask implements Callable<Void> {
221     private HConnection connection;
222     private String serverName;
223     private HRegionInfo region;
224     private ExtendedSink sink;
225 
226     RegionServerTask(HConnection connection, String serverName, HRegionInfo region,
227         ExtendedSink sink) {
228       this.connection = connection;
229       this.serverName = serverName;
230       this.region = region;
231       this.sink = sink;
232     }
233 
234     @Override
235     public Void call() {
236       TableName tableName = null;
237       HTableInterface table = null;
238       Get get = null;
239       byte[] startKey = null;
240       Scan scan = null;
241       StopWatch stopWatch = new StopWatch();
242       // monitor one region on every region server
243       stopWatch.reset();
244       try {
245         tableName = region.getTable();
246         table = connection.getTable(tableName);
247         startKey = region.getStartKey();
248         // Can't do a get on empty start row so do a Scan of first element if any instead.
249         if (startKey.length > 0) {
250           get = new Get(startKey);
251           get.setCacheBlocks(false);
252           get.setFilter(new FirstKeyOnlyFilter());
253           stopWatch.start();
254           table.get(get);
255           stopWatch.stop();
256         } else {
257           scan = new Scan();
258           scan.setCacheBlocks(false);
259           scan.setFilter(new FirstKeyOnlyFilter());
260           scan.setCaching(1);
261           scan.setMaxResultSize(1L);
262           stopWatch.start();
263           ResultScanner s = table.getScanner(scan);
264           s.close();
265           stopWatch.stop();
266         }
267         sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime());
268       } catch (TableNotFoundException tnfe) {
269         // This is ignored because it doesn't imply that the regionserver is dead
270       } catch (TableNotEnabledException tnee) {
271         // This is considered a success since we got a response.
272         LOG.debug("The targeted table was disabled.  Assuming success.");
273       } catch (DoNotRetryIOException dnrioe) {
274         sink.publishReadFailure(tableName.getNameAsString(), serverName);
275         LOG.error(dnrioe);
276       } catch (IOException e) {
277         sink.publishReadFailure(tableName.getNameAsString(), serverName);
278         LOG.error(e);
279       } finally {
280         if (table != null) {
281           try {
282             table.close();
283           } catch (IOException e) {/* DO NOTHING */
284           }
285         }
286         scan = null;
287         get = null;
288         startKey = null;
289       }
290       return null;
291     }
292   }
293 
294   private static final int USAGE_EXIT_CODE = 1;
295   private static final int INIT_ERROR_EXIT_CODE = 2;
296   private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
297   private static final int ERROR_EXIT_CODE = 4;
298 
299   private static final long DEFAULT_INTERVAL = 6000;
300 
301   private static final long DEFAULT_TIMEOUT = 600000; // 10 mins
302 
303   private static final int MAX_THREADS_NUM = 16; // #threads to contact regions
304 
305   private static final Log LOG = LogFactory.getLog(Canary.class);
306 
307   private Configuration conf = null;
308   private long interval = 0;
309   private Sink sink = null;
310 
311   private boolean useRegExp;
312   private long timeout = DEFAULT_TIMEOUT;
313   private boolean failOnError = true;
314   private boolean regionServerMode = false;
315   private ExecutorService executor; // threads to retrieve data from regionservers
316 
317   public Canary() {
318     this(new ScheduledThreadPoolExecutor(1), new RegionServerStdOutSink());
319   }
320 
321   public Canary(ExecutorService executor, Sink sink) {
322     this.executor = executor;
323     this.sink = sink;
324   }
325 
326   @Override
327   public Configuration getConf() {
328     return conf;
329   }
330 
331   @Override
332   public void setConf(Configuration conf) {
333     this.conf = conf;
334   }
335 
336   @Override
337   public int run(String[] args) throws Exception {
338     int index = -1;
339 
340     // Process command line args
341     for (int i = 0; i < args.length; i++) {
342       String cmd = args[i];
343 
344       if (cmd.startsWith("-")) {
345         if (index >= 0) {
346           // command line args must be in the form: [opts] [table 1 [table 2 ...]]
347           System.err.println("Invalid command line options");
348           printUsageAndExit();
349         }
350 
351         if (cmd.equals("-help")) {
352           // user asked for help, print the help and quit.
353           printUsageAndExit();
354         } else if (cmd.equals("-daemon") && interval == 0) {
355           // user asked for daemon mode, set a default interval between checks
356           interval = DEFAULT_INTERVAL;
357         } else if (cmd.equals("-interval")) {
358           // user has specified an interval for canary breaths (-interval N)
359           i++;
360 
361           if (i == args.length) {
362             System.err.println("-interval needs a numeric value argument.");
363             printUsageAndExit();
364           }
365 
366           try {
367             interval = Long.parseLong(args[i]) * 1000;
368           } catch (NumberFormatException e) {
369             System.err.println("-interval needs a numeric value argument.");
370             printUsageAndExit();
371           }
372         } else if(cmd.equals("-regionserver")) {
373           this.regionServerMode = true;
374         } else if (cmd.equals("-e")) {
375           this.useRegExp = true;
376         } else if (cmd.equals("-t")) {
377           i++;
378 
379           if (i == args.length) {
380             System.err.println("-t needs a numeric value argument.");
381             printUsageAndExit();
382           }
383 
384           try {
385             this.timeout = Long.parseLong(args[i]);
386           } catch (NumberFormatException e) {
387             System.err.println("-t needs a numeric value argument.");
388             printUsageAndExit();
389           }
390 
391         } else if (cmd.equals("-f")) {
392           i++;
393 
394           if (i == args.length) {
395             System.err
396                 .println("-f needs a boolean value argument (true|false).");
397             printUsageAndExit();
398           }
399 
400           this.failOnError = Boolean.parseBoolean(args[i]);
401         } else {
402           // no options match
403           System.err.println(cmd + " options is invalid.");
404           printUsageAndExit();
405         }
406       } else if (index < 0) {
407         // keep track of first table name specified by the user
408         index = i;
409       }
410     }
411 
412     // Launches chore for refreshing kerberos credentials if security is enabled.
413     // Please see http://hbase.apache.org/book.html#_running_canary_in_a_kerberos_enabled_cluster
414     // for more details.
415     AuthUtil.launchAuthChore(conf);
416 
417     // Start to prepare the stuffs
418     Monitor monitor = null;
419     Thread monitorThread = null;
420     long startTime = 0;
421     long currentTimeLength = 0;
422     // Get a connection to use in below.
423     HConnection connection = HConnectionManager.createConnection(this.conf);
424     try {
425       do {
426         // Do monitor !!
427         try {
428           monitor = this.newMonitor(connection, index, args);
429           monitorThread = new Thread(monitor);
430           startTime = System.currentTimeMillis();
431           monitorThread.start();
432           while (!monitor.isDone()) {
433             // wait for 1 sec
434             Thread.sleep(1000);
435             // exit if any error occurs
436             if (this.failOnError && monitor.hasError()) {
437               monitorThread.interrupt();
438               if (monitor.initialized) {
439                 System.exit(monitor.errorCode);
440               } else {
441                 System.exit(INIT_ERROR_EXIT_CODE);
442               }
443             }
444             currentTimeLength = System.currentTimeMillis() - startTime;
445             if (currentTimeLength > this.timeout) {
446               LOG.error("The monitor is running too long (" + currentTimeLength
447                   + ") after timeout limit:" + this.timeout
448                   + " will be killed itself !!");
449               if (monitor.initialized) {
450                 System.exit(TIMEOUT_ERROR_EXIT_CODE);
451               } else {
452                 System.exit(INIT_ERROR_EXIT_CODE);
453               }
454               break;
455             }
456           }
457 
458           if (this.failOnError && monitor.hasError()) {
459             monitorThread.interrupt();
460             System.exit(monitor.errorCode);
461           }
462         } finally {
463           if (monitor != null) monitor.close();
464         }
465 
466         Thread.sleep(interval);
467       } while (interval > 0);
468     } finally {
469       connection.close();
470     }
471 
472     return(monitor.errorCode);
473   }
474 
475   private void printUsageAndExit() {
476     System.err.printf(
477       "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
478         getClass().getName());
479     System.err.println(" where [opts] are:");
480     System.err.println("   -help          Show this help and exit.");
481     System.err.println("   -regionserver  replace the table argument to regionserver,");
482     System.err.println("      which means to enable regionserver mode");
483     System.err.println("   -daemon        Continuous check at defined intervals.");
484     System.err.println("   -interval <N>  Interval between checks (sec)");
485     System.err.println("   -e             Use region/regionserver as regular expression");
486     System.err.println("      which means the region/regionserver is regular expression pattern");
487     System.err.println("   -f <B>         stop whole program if first error occurs," +
488         " default is true");
489     System.err.println("   -t <N>         timeout for a check, default is 600000 (milisecs)");
490     System.exit(USAGE_EXIT_CODE);
491   }
492 
493   /**
494    * A Factory method for {@link Monitor}.
495    * Can be overridden by user.
496    * @param index a start index for monitor target
497    * @param args args passed from user
498    * @return a Monitor instance
499    */
500   public Monitor newMonitor(final HConnection connection, int index, String[] args) {
501     Monitor monitor = null;
502     String[] monitorTargets = null;
503 
504     if(index >= 0) {
505       int length = args.length - index;
506       monitorTargets = new String[length];
507       System.arraycopy(args, index, monitorTargets, 0, length);
508     }
509 
510     if (this.regionServerMode) {
511       monitor =
512           new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
513               (ExtendedSink) this.sink, this.executor);
514     } else {
515       monitor =
516           new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor);
517     }
518     return monitor;
519   }
520 
521   // a Monitor super-class can be extended by users
522   public static abstract class Monitor implements Runnable, Closeable {
523 
524     protected HConnection connection;
525     protected HBaseAdmin admin;
526     protected String[] targets;
527     protected boolean useRegExp;
528     protected boolean initialized = false;
529 
530     protected boolean done = false;
531     protected int errorCode = 0;
532     protected Sink sink;
533     protected ExecutorService executor;
534 
535     public boolean isDone() {
536       return done;
537     }
538 
539     public boolean hasError() {
540       return errorCode != 0;
541     }
542 
543     @Override
544     public void close() throws IOException {
545       if (this.admin != null) this.admin.close();
546     }
547 
548     protected Monitor(HConnection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
549         ExecutorService executor) {
550       if (null == connection) throw new IllegalArgumentException("connection shall not be null");
551 
552       this.connection = connection;
553       this.targets = monitorTargets;
554       this.useRegExp = useRegExp;
555       this.sink = sink;
556       this.executor = executor;
557     }
558 
559     public abstract void run();
560 
561     protected boolean initAdmin() {
562       if (null == this.admin) {
563         try {
564           this.admin = new HBaseAdmin(connection);
565         } catch (Exception e) {
566           LOG.error("Initial HBaseAdmin failed...", e);
567           this.errorCode = INIT_ERROR_EXIT_CODE;
568         }
569       } else if (admin.isAborted()) {
570         LOG.error("HBaseAdmin aborted");
571         this.errorCode = INIT_ERROR_EXIT_CODE;
572       }
573       return !this.hasError();
574     }
575   }
576 
577   // a monitor for region mode
578   private static class RegionMonitor extends Monitor {
579 
580     public RegionMonitor(HConnection connection, String[] monitorTargets, boolean useRegExp,
581         Sink sink, ExecutorService executor) {
582       super(connection, monitorTargets, useRegExp, sink, executor);
583     }
584 
585     @Override
586     public void run() {
587       if (this.initAdmin()) {
588         try {
589           List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
590           if (this.targets != null && this.targets.length > 0) {
591             String[] tables = generateMonitorTables(this.targets);
592             this.initialized = true;
593             for (String table : tables) {
594               taskFutures.addAll(Canary.sniff(connection, sink, table, executor));
595             }
596           } else {
597             taskFutures.addAll(sniff());
598           }
599           for (Future<Void> future : taskFutures) {
600             try {
601               future.get();
602             } catch (ExecutionException e) {
603               LOG.error("Sniff region failed!", e);
604             }
605           }
606         } catch (Exception e) {
607           LOG.error("Run regionMonitor failed", e);
608           this.errorCode = ERROR_EXIT_CODE;
609         }
610       }
611       this.done = true;
612     }
613 
614     private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
615       String[] returnTables = null;
616 
617       if (this.useRegExp) {
618         Pattern pattern = null;
619         HTableDescriptor[] tds = null;
620         Set<String> tmpTables = new TreeSet<String>();
621         try {
622           for (String monitorTarget : monitorTargets) {
623             pattern = Pattern.compile(monitorTarget);
624             tds = this.admin.listTables(pattern);
625             if (tds != null) {
626               for (HTableDescriptor td : tds) {
627                 tmpTables.add(td.getNameAsString());
628               }
629             }
630           }
631         } catch (IOException e) {
632           LOG.error("Communicate with admin failed", e);
633           throw e;
634         }
635 
636         if (tmpTables.size() > 0) {
637           returnTables = tmpTables.toArray(new String[tmpTables.size()]);
638         } else {
639           String msg = "No HTable found, tablePattern:" + Arrays.toString(monitorTargets);
640           LOG.error(msg);
641           this.errorCode = INIT_ERROR_EXIT_CODE;
642           throw new TableNotFoundException(msg);
643         }
644       } else {
645         returnTables = monitorTargets;
646       }
647 
648       return returnTables;
649     }
650 
651     /*
652      * canary entry point to monitor all the tables.
653      */
654     private List<Future<Void>> sniff() throws Exception {
655       List<Future<Void>> taskFutures = new LinkedList<Future<Void>>();
656       for (HTableDescriptor table : admin.listTables()) {
657         if (admin.isTableEnabled(table.getTableName())) {
658           taskFutures.addAll(Canary.sniff(connection, sink, table.getTableName(), executor));
659         }
660       }
661       return taskFutures;
662     }
663   }
664 
665   /**
666    * Canary entry point for specified table.
667    * @throws Exception
668    */
669   public static void sniff(final HConnection connection, TableName tableName) throws Exception {
670     List<Future<Void>> taskFutures =
671         Canary.sniff(connection, new StdOutSink(), tableName.getNameAsString(),
672           new ScheduledThreadPoolExecutor(1));
673     for (Future<Void> future : taskFutures) {
674       future.get();
675     }
676   }
677 
678   /**
679    * Canary entry point for specified table.
680    * @throws Exception
681    */
682   private static List<Future<Void>> sniff(final HConnection connection, final Sink sink,
683     String tableName, ExecutorService executor) throws Exception {
684     HBaseAdmin admin = new HBaseAdmin(connection);
685     try {
686       if (admin.isTableEnabled(TableName.valueOf(tableName))) {
687         return Canary.sniff(connection, sink, TableName.valueOf(tableName), executor);
688       } else {
689         LOG.warn(String.format("Table %s is not enabled", tableName));
690       }
691       return new LinkedList<Future<Void>>();
692     } finally {
693       admin.close();
694     }
695   }
696 
697   /*
698    * Loops over regions that owns this table, and output some information abouts the state.
699    */
700   private static List<Future<Void>> sniff(final HConnection connection, final Sink sink,
701       TableName tableName, ExecutorService executor) throws Exception {
702     HTableInterface table = null;
703     try {
704       table = connection.getTable(tableName);
705     } catch (TableNotFoundException e) {
706       return new ArrayList<Future<Void>>();
707     }
708     List<RegionTask> tasks = new ArrayList<RegionTask>();
709     try {
710       for (HRegionInfo region : ((HTable)table).getRegionLocations().keySet()) {
711         tasks.add(new RegionTask(connection, region, sink));
712       }
713     } finally {
714       table.close();
715     }
716     return executor.invokeAll(tasks);
717   }
718 
719   /*
720    * For each column family of the region tries to get one row and outputs the latency, or the
721    * failure.
722    */
723   private static void sniffRegion(
724       final HBaseAdmin admin,
725       final Sink sink,
726       HRegionInfo region,
727       HTableInterface table) throws Exception {
728     HTableDescriptor tableDesc = table.getTableDescriptor();
729     byte[] startKey = null;
730     Get get = null;
731     Scan scan = null;
732     ResultScanner rs = null;
733     StopWatch stopWatch = new StopWatch();
734     for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
735       stopWatch.reset();
736       startKey = region.getStartKey();
737       // Can't do a get on empty start row so do a Scan of first element if any instead.
738       if (startKey.length > 0) {
739         get = new Get(startKey);
740         get.setCacheBlocks(false);
741         get.setFilter(new FirstKeyOnlyFilter());
742         get.addFamily(column.getName());
743       } else {
744         scan = new Scan();
745         scan.setRaw(true);
746         scan.setCaching(1);
747         scan.setCacheBlocks(false);
748         scan.setFilter(new FirstKeyOnlyFilter());
749         scan.addFamily(column.getName());
750         scan.setMaxResultSize(1L);
751       }
752 
753       try {
754         if (startKey.length > 0) {
755           stopWatch.start();
756           table.get(get);
757           stopWatch.stop();
758           sink.publishReadTiming(region, column, stopWatch.getTime());
759         } else {
760           stopWatch.start();
761           rs = table.getScanner(scan);
762           stopWatch.stop();
763           sink.publishReadTiming(region, column, stopWatch.getTime());
764         }
765       } catch (Exception e) {
766         sink.publishReadFailure(region, column, e);
767       } finally {
768         if (rs != null) {
769           rs.close();
770         }
771         scan = null;
772         get = null;
773         startKey = null;
774       }
775     }
776   }
777   // a monitor for regionserver mode
778   private static class RegionServerMonitor extends Monitor {
779 
780     public RegionServerMonitor(HConnection connection, String[] monitorTargets, boolean useRegExp,
781         ExtendedSink sink, ExecutorService executor) {
782       super(connection, monitorTargets, useRegExp, sink, executor);
783     }
784 
785     private ExtendedSink getSink() {
786       return (ExtendedSink) this.sink;
787     }
788 
789     @Override
790     public void run() {
791       if (this.initAdmin() && this.checkNoTableNames()) {
792         Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
793         this.initialized = true;
794         this.monitorRegionServers(rsAndRMap);
795       }
796       this.done = true;
797     }
798 
799     private boolean checkNoTableNames() {
800       List<String> foundTableNames = new ArrayList<String>();
801       TableName[] tableNames = null;
802 
803       try {
804         tableNames = this.admin.listTableNames();
805       } catch (IOException e) {
806         LOG.error("Get listTableNames failed", e);
807         this.errorCode = INIT_ERROR_EXIT_CODE;
808         return false;
809       }
810 
811       if (this.targets == null || this.targets.length == 0) return true;
812 
813       for (String target : this.targets) {
814         for (TableName tableName : tableNames) {
815           if (target.equals(tableName.getNameAsString())) {
816             foundTableNames.add(target);
817           }
818         }
819       }
820 
821       if (foundTableNames.size() > 0) {
822         System.err.println("Cannot pass a tablename when using the -regionserver " +
823             "option, tablenames:" + foundTableNames.toString());
824         this.errorCode = USAGE_EXIT_CODE;
825       }
826       return foundTableNames.size() == 0;
827     }
828 
829     private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
830       List<RegionServerTask> tasks = new ArrayList<RegionServerTask>();
831       Random rand =new Random();
832       // monitor one region on every region server
833       for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
834         String serverName = entry.getKey();
835         // random select a region
836         HRegionInfo region = entry.getValue().get(rand.nextInt(entry.getValue().size()));
837         tasks.add(new RegionServerTask(this.connection, serverName, region, getSink()));
838       }
839       try {
840         for (Future<Void> future : this.executor.invokeAll(tasks)) {
841           try {
842             future.get();
843           } catch (ExecutionException e) {
844             LOG.error("Sniff regionserver failed!", e);
845             this.errorCode = ERROR_EXIT_CODE;
846           }
847         }
848       } catch (InterruptedException e) {
849         this.errorCode = ERROR_EXIT_CODE;
850         LOG.error("Sniff regionserver failed!", e);
851       }
852     }
853 
854     private Map<String, List<HRegionInfo>> filterRegionServerByName() {
855       Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
856       regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
857       return regionServerAndRegionsMap;
858     }
859 
860     private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
861       Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
862       HTableInterface table = null;
863       try {
864         HTableDescriptor[] tableDescs = this.admin.listTables();
865         List<HRegionInfo> regions = null;
866         for (HTableDescriptor tableDesc : tableDescs) {
867           table = this.admin.getConnection().getTable(tableDesc.getTableName());
868           for (Entry<HRegionInfo, ServerName> e: ((HTable)table).getRegionLocations().entrySet()) {
869             HRegionInfo r = e.getKey();
870             ServerName rs = e.getValue();
871             String rsName = rs.getHostname();
872 
873             if (rsAndRMap.containsKey(rsName)) {
874               regions = rsAndRMap.get(rsName);
875             } else {
876               regions = new ArrayList<HRegionInfo>();
877               rsAndRMap.put(rsName, regions);
878             }
879             regions.add(r);
880           }
881           table.close();
882         }
883 
884       } catch (IOException e) {
885         String msg = "Get HTables info failed";
886         LOG.error(msg, e);
887         this.errorCode = INIT_ERROR_EXIT_CODE;
888       } finally {
889         if (table != null) {
890           try {
891             table.close();
892           } catch (IOException e) {
893             LOG.warn("Close table failed", e);
894           }
895         }
896       }
897 
898       return rsAndRMap;
899     }
900 
901     private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
902         Map<String, List<HRegionInfo>> fullRsAndRMap) {
903 
904       Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
905 
906       if (this.targets != null && this.targets.length > 0) {
907         filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
908         Pattern pattern = null;
909         Matcher matcher = null;
910         boolean regExpFound = false;
911         for (String rsName : this.targets) {
912           if (this.useRegExp) {
913             regExpFound = false;
914             pattern = Pattern.compile(rsName);
915             for (Map.Entry<String, List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
916               matcher = pattern.matcher(entry.getKey());
917               if (matcher.matches()) {
918                 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
919                 regExpFound = true;
920               }
921             }
922             if (!regExpFound) {
923               LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
924             }
925           } else {
926             if (fullRsAndRMap.containsKey(rsName)) {
927               filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
928             } else {
929               LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
930             }
931           }
932         }
933       } else {
934         filteredRsAndRMap = fullRsAndRMap;
935       }
936       return filteredRsAndRMap;
937     }
938   }
939 
940   public static void main(String[] args) throws Exception {
941     final Configuration conf = HBaseConfiguration.create();
942     AuthUtil.launchAuthChore(conf);
943     int numThreads = conf.getInt("hbase.canary.threads.num", MAX_THREADS_NUM);
944     ExecutorService executor = new ScheduledThreadPoolExecutor(numThreads);
945 
946     Class<? extends Sink> sinkClass =
947         conf.getClass("hbase.canary.sink.class", StdOutSink.class, Sink.class);
948     Sink sink = ReflectionUtils.newInstance(sinkClass);
949 
950     int exitCode = ToolRunner.run(conf, new Canary(executor, sink), args);
951     executor.shutdown();
952     System.exit(exitCode);
953   }
954 }