View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.tool;
21  
22  import org.apache.commons.lang.time.StopWatch;
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  
26  import org.apache.hadoop.hbase.TableName;
27  import org.apache.hadoop.util.Tool;
28  import org.apache.hadoop.util.ToolRunner;
29  
30  import org.apache.hadoop.conf.Configuration;
31  
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HBaseConfiguration;
36  import org.apache.hadoop.hbase.TableNotFoundException;
37  
38  import org.apache.hadoop.hbase.client.Get;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.HBaseAdmin;
41  import org.apache.hadoop.hbase.client.ResultScanner;
42  import org.apache.hadoop.hbase.client.Scan;
43  
44  /**
45   * HBase Canary Tool, that that can be used to do
46   * "canary monitoring" of a running HBase cluster.
47   *
48   * Foreach region tries to get one row per column family
49   * and outputs some information about failure or latency.
50   */
51  public final class Canary implements Tool {
52    // Sink interface used by the canary to outputs information
53    public interface Sink {
54      public void publishReadFailure(HRegionInfo region, Exception e);
55      public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
56      public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
57    }
58  
59    // Simple implementation of canary sink that allows to plot on
60    // file or standard output timings or failures.
61    public static class StdOutSink implements Sink {
62      @Override
63      public void publishReadFailure(HRegionInfo region, Exception e) {
64        LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
65      }
66  
67      @Override
68      public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
69        LOG.error(String.format("read from region %s column family %s failed",
70                  region.getRegionNameAsString(), column.getNameAsString()), e);
71      }
72  
73      @Override
74      public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
75        LOG.info(String.format("read from region %s column family %s in %dms",
76                 region.getRegionNameAsString(), column.getNameAsString(), msTime));
77      }
78    }
79  
80    private static final long DEFAULT_INTERVAL = 6000;
81  
82    private static final Log LOG = LogFactory.getLog(Canary.class);
83  
84    private Configuration conf = null;
85    private HBaseAdmin admin = null;
86    private long interval = 0;
87    private Sink sink = null;
88  
89    public Canary() {
90      this(new StdOutSink());
91    }
92  
93    public Canary(Sink sink) {
94      this.sink = sink;
95    }
96  
97    @Override
98    public Configuration getConf() {
99      return conf;
100   }
101 
102   @Override
103   public void setConf(Configuration conf) {
104     this.conf = conf;
105   }
106 
107   @Override
108   public int run(String[] args) throws Exception {
109     int tables_index = -1;
110 
111     // Process command line args
112     for (int i = 0; i < args.length; i++) {
113       String cmd = args[i];
114 
115       if (cmd.startsWith("-")) {
116         if (tables_index >= 0) {
117           // command line args must be in the form: [opts] [table 1 [table 2 ...]]
118           System.err.println("Invalid command line options");
119           printUsageAndExit();
120         }
121 
122         if (cmd.equals("-help")) {
123           // user asked for help, print the help and quit.
124           printUsageAndExit();
125         } else if (cmd.equals("-daemon") && interval == 0) {
126           // user asked for daemon mode, set a default interval between checks
127           interval = DEFAULT_INTERVAL;
128         } else if (cmd.equals("-interval")) {
129           // user has specified an interval for canary breaths (-interval N)
130           i++;
131 
132           if (i == args.length) {
133             System.err.println("-interval needs a numeric value argument.");
134             printUsageAndExit();
135           }
136 
137           try {
138             interval = Long.parseLong(args[i]) * 1000;
139           } catch (NumberFormatException e) {
140             System.err.println("-interval needs a numeric value argument.");
141             printUsageAndExit();
142           }
143         } else {
144           // no options match
145           System.err.println(cmd + " options is invalid.");
146           printUsageAndExit();
147         }
148       } else if (tables_index < 0) {
149         // keep track of first table name specified by the user
150         tables_index = i;
151       }
152     }
153 
154     // initialize HBase conf and admin
155     if (conf == null) conf = HBaseConfiguration.create();
156     admin = new HBaseAdmin(conf);
157     try {
158       // lets the canary monitor the cluster
159       do {
160         if (admin.isAborted()) {
161           LOG.error("HBaseAdmin aborted");
162           return(1);
163         }
164 
165         if (tables_index >= 0) {
166           for (int i = tables_index; i < args.length; i++) {
167             sniff(admin, sink, TableName.valueOf(args[i]));
168           }
169         } else {
170           sniff();
171         }
172 
173         Thread.sleep(interval);
174       } while (interval > 0);
175     } finally {
176       this.admin.close();
177     }
178 
179     return(0);
180   }
181 
182   private void printUsageAndExit() {
183     System.err.printf("Usage: bin/hbase %s [opts] [table 1 [table 2...]]%n", getClass().getName());
184     System.err.println(" where [opts] are:");
185     System.err.println("   -help          Show this help and exit.");
186     System.err.println("   -daemon        Continuous check at defined intervals.");
187     System.err.println("   -interval <N>  Interval between checks (sec)");
188     System.exit(1);
189   }
190 
191   /*
192    * canary entry point to monitor all the tables.
193    */
194   private void sniff() throws Exception {
195     for (HTableDescriptor table : admin.listTables()) {
196       sniff(admin, sink, table);
197     }
198   }
199 
200   /**
201    * Canary entry point for specified table.
202    * @param admin
203    * @param tableName
204    * @throws Exception
205    */
206   public static void sniff(final HBaseAdmin admin, TableName tableName)
207   throws Exception {
208     sniff(admin, new StdOutSink(), tableName);
209   }
210 
211   /**
212    * Canary entry point for specified table.
213    * @param admin
214    * @param sink
215    * @param tableName
216    * @throws Exception
217    */
218   private static void sniff(final HBaseAdmin admin, final Sink sink, TableName tableName)
219   throws Exception {
220     if (admin.isTableAvailable(tableName)) {
221       sniff(admin, sink, admin.getTableDescriptor(tableName));
222     } else {
223       LOG.warn(String.format("Table %s is not available", tableName));
224     }
225   }
226 
227   /*
228    * Loops over regions that owns this table,
229    * and output some information abouts the state.
230    */
231   private static void sniff(final HBaseAdmin admin, final Sink sink, HTableDescriptor tableDesc)
232   throws Exception {
233     HTable table = null;
234 
235     try {
236       table = new HTable(admin.getConfiguration(), tableDesc.getTableName());
237     } catch (TableNotFoundException e) {
238       return;
239     }
240 
241     for (HRegionInfo region : admin.getTableRegions(tableDesc.getTableName())) {
242       try {
243         sniffRegion(admin, sink, region, table);
244       } catch (Exception e) {
245         sink.publishReadFailure(region, e);
246       }
247     }
248   }
249 
250   /*
251    * For each column family of the region tries to get one row
252    * and outputs the latency, or the failure.
253    */
254   private static void sniffRegion(final HBaseAdmin admin, final Sink sink, HRegionInfo region,
255       HTable table)
256   throws Exception {
257     HTableDescriptor tableDesc = table.getTableDescriptor();
258     StopWatch stopWatch = new StopWatch();
259     for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
260       stopWatch.reset();
261       byte [] startKey = region.getStartKey();
262       if (startKey ==  null || startKey.length <= 0) {
263         // Can't do a get on empty start row so do a Scan of first element if any instead.
264         Scan scan = new Scan();
265         scan.addFamily(column.getName());
266         scan.setBatch(1);
267         ResultScanner scanner = null;
268         try {
269           stopWatch.start();
270           scanner = table.getScanner(scan);
271           scanner.next();
272           stopWatch.stop();
273           sink.publishReadTiming(region, column, stopWatch.getTime());
274         } catch (Exception e) {
275           sink.publishReadFailure(region, column, e);
276         } finally {
277           if (scanner != null) scanner.close();
278         }
279       } else {
280         Get get = new Get(region.getStartKey());
281         get.addFamily(column.getName());
282         try {
283           stopWatch.start();
284           table.get(get);
285           stopWatch.stop();
286           sink.publishReadTiming(region, column, stopWatch.getTime());
287         } catch (Exception e) {
288           sink.publishReadFailure(region, column, e);
289         }
290       }
291     }
292   }
293 
294   public static void main(String[] args) throws Exception {
295     int exitCode = ToolRunner.run(new Canary(), args);
296     System.exit(exitCode);
297   }
298 }