View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.util.Collection;
21  import java.util.concurrent.ScheduledExecutorService;
22  import java.util.concurrent.TimeUnit;
23  
24  import org.apache.commons.lang.StringUtils;
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
31  import org.apache.hadoop.hbase.ServerName;
32  import org.apache.hadoop.hbase.io.hfile.BlockCache;
33  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
34  import org.apache.hadoop.hbase.io.hfile.CacheStats;
35  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
36  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
37  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
38  import org.apache.hadoop.metrics2.MetricsExecutor;
39  
40  /**
41   * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system.
42   */
43  @InterfaceAudience.Private
44  class MetricsRegionServerWrapperImpl
45      implements MetricsRegionServerWrapper {
46  
47    public static final Log LOG = LogFactory.getLog(MetricsRegionServerWrapperImpl.class);
48  
49    private final HRegionServer regionServer;
50  
51    private BlockCache blockCache;
52  
53    private volatile long numStores = 0;
54    private volatile long numWALFiles = 0;
55    private volatile long walFileSize = 0;
56    private volatile long numStoreFiles = 0;
57    private volatile long memstoreSize = 0;
58    private volatile long storeFileSize = 0;
59    private volatile double requestsPerSecond = 0.0;
60    private volatile long readRequestsCount = 0;
61    private volatile long writeRequestsCount = 0;
62    private volatile long checkAndMutateChecksFailed = 0;
63    private volatile long checkAndMutateChecksPassed = 0;
64    private volatile long storefileIndexSize = 0;
65    private volatile long totalStaticIndexSize = 0;
66    private volatile long totalStaticBloomSize = 0;
67    private volatile long numMutationsWithoutWAL = 0;
68    private volatile long dataInMemoryWithoutWAL = 0;
69    private volatile int percentFileLocal = 0;
70    private volatile long flushedCellsCount = 0;
71    private volatile long compactedCellsCount = 0;
72    private volatile long majorCompactedCellsCount = 0;
73    private volatile long flushedCellsSize = 0;
74    private volatile long compactedCellsSize = 0;
75    private volatile long majorCompactedCellsSize = 0;
76    private volatile long blockedRequestsCount = 0L;
77  
78    private CacheStats cacheStats;
79    private ScheduledExecutorService executor;
80    private Runnable runnable;
81    private long period;
82  
83    public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) {
84      this.regionServer = regionServer;
85      initBlockCache();
86  
87      this.period =
88          regionServer.conf.getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
89            HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD);
90  
91      this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor();
92      this.runnable = new RegionServerMetricsWrapperRunnable();
93      this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period,
94        TimeUnit.MILLISECONDS);
95  
96      if (LOG.isInfoEnabled()) {
97        LOG.info("Computing regionserver metrics every " + this.period + " milliseconds");
98      }
99    }
100 
101   /**
102    * It's possible that due to threading the block cache could not be initialized
103    * yet (testing multiple region servers in one jvm).  So we need to try and initialize
104    * the blockCache and cacheStats reference multiple times until we succeed.
105    */
106   private synchronized  void initBlockCache() {
107     CacheConfig cacheConfig = this.regionServer.cacheConfig;
108     if (cacheConfig != null && this.blockCache == null) {
109       this.blockCache = cacheConfig.getBlockCache();
110     }
111 
112     if (this.blockCache != null && this.cacheStats == null) {
113       this.cacheStats = blockCache.getStats();
114     }
115   }
116 
117   @Override
118   public String getClusterId() {
119     return regionServer.getClusterId();
120   }
121 
122   @Override
123   public long getStartCode() {
124     return regionServer.getStartcode();
125   }
126 
127   @Override
128   public String getZookeeperQuorum() {
129     ZooKeeperWatcher zk = regionServer.getZooKeeper();
130     if (zk == null) {
131       return "";
132     }
133     return zk.getQuorum();
134   }
135 
136   @Override
137   public String getCoprocessors() {
138     String[] coprocessors = regionServer.getRegionServerCoprocessors();
139     if (coprocessors == null || coprocessors.length == 0) {
140       return "";
141     }
142     return StringUtils.join(coprocessors, ", ");
143   }
144 
145   @Override
146   public String getServerName() {
147     ServerName serverName = regionServer.getServerName();
148     if (serverName == null) {
149       return "";
150     }
151     return serverName.getServerName();
152   }
153 
154   @Override
155   public long getNumOnlineRegions() {
156     Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext();
157     if (onlineRegionsLocalContext == null) {
158       return 0;
159     }
160     return onlineRegionsLocalContext.size();
161   }
162 
163   @Override
164   public long getTotalRequestCount() {
165     return regionServer.rpcServices.requestCount.get();
166   }
167 
168   @Override
169   public int getSplitQueueSize() {
170     if (this.regionServer.compactSplitThread == null) {
171       return 0;
172     }
173     return this.regionServer.compactSplitThread.getSplitQueueSize();
174   }
175 
176   @Override
177   public int getCompactionQueueSize() {
178     //The thread could be zero.  if so assume there is no queue.
179     if (this.regionServer.compactSplitThread == null) {
180       return 0;
181     }
182     return this.regionServer.compactSplitThread.getCompactionQueueSize();
183   }
184 
185   @Override
186   public int getSmallCompactionQueueSize() {
187     //The thread could be zero.  if so assume there is no queue.
188     if (this.regionServer.compactSplitThread == null) {
189       return 0;
190     }
191     return this.regionServer.compactSplitThread.getSmallCompactionQueueSize();
192   }
193 
194   @Override
195   public int getLargeCompactionQueueSize() {
196     //The thread could be zero.  if so assume there is no queue.
197     if (this.regionServer.compactSplitThread == null) {
198       return 0;
199     }
200     return this.regionServer.compactSplitThread.getLargeCompactionQueueSize();
201   }
202 
203   @Override
204   public int getFlushQueueSize() {
205     //If there is no flusher there should be no queue.
206     if (this.regionServer.cacheFlusher == null) {
207       return 0;
208     }
209     return this.regionServer.cacheFlusher.getFlushQueueSize();
210   }
211 
212   @Override
213   public long getBlockCacheCount() {
214     if (this.blockCache == null) {
215       return 0;
216     }
217     return this.blockCache.getBlockCount();
218   }
219 
220   @Override
221   public long getBlockCacheSize() {
222     if (this.blockCache == null) {
223       return 0;
224     }
225     return this.blockCache.getCurrentSize();
226   }
227 
228   @Override
229   public long getBlockCacheFreeSize() {
230     if (this.blockCache == null) {
231       return 0;
232     }
233     return this.blockCache.getFreeSize();
234   }
235 
236   @Override
237   public long getBlockCacheHitCount() {
238     if (this.cacheStats == null) {
239       return 0;
240     }
241     return this.cacheStats.getHitCount();
242   }
243 
244   @Override
245   public long getBlockCacheMissCount() {
246     if (this.cacheStats == null) {
247       return 0;
248     }
249     return this.cacheStats.getMissCount();
250   }
251 
252   @Override
253   public long getBlockCacheEvictedCount() {
254     if (this.cacheStats == null) {
255       return 0;
256     }
257     return this.cacheStats.getEvictedCount();
258   }
259 
260   @Override
261   public double getBlockCacheHitPercent() {
262     if (this.cacheStats == null) {
263       return 0;
264     }
265     return (int) (this.cacheStats.getHitRatio() * 100);
266   }
267 
268   @Override
269   public int getBlockCacheHitCachingPercent() {
270     if (this.cacheStats == null) {
271       return 0;
272     }
273     return (int) (this.cacheStats.getHitCachingRatio() * 100);
274   }
275 
276   @Override public void forceRecompute() {
277     this.runnable.run();
278   }
279 
280   @Override
281   public long getNumStores() {
282     return numStores;
283   }
284   
285   @Override
286   public long getNumWALFiles() {
287     return numWALFiles;
288   }
289 
290   @Override
291   public long getWALFileSize() {
292     return walFileSize;
293   }
294   
295   @Override
296   public long getNumStoreFiles() {
297     return numStoreFiles;
298   }
299 
300   @Override
301   public long getMemstoreSize() {
302     return memstoreSize;
303   }
304 
305   @Override
306   public long getStoreFileSize() {
307     return storeFileSize;
308   }
309 
310   @Override public double getRequestsPerSecond() {
311     return requestsPerSecond;
312   }
313 
314   @Override
315   public long getReadRequestsCount() {
316     return readRequestsCount;
317   }
318 
319   @Override
320   public long getWriteRequestsCount() {
321     return writeRequestsCount;
322   }
323 
324   @Override
325   public long getCheckAndMutateChecksFailed() {
326     return checkAndMutateChecksFailed;
327   }
328 
329   @Override
330   public long getCheckAndMutateChecksPassed() {
331     return checkAndMutateChecksPassed;
332   }
333 
334   @Override
335   public long getStoreFileIndexSize() {
336     return storefileIndexSize;
337   }
338 
339   @Override
340   public long getTotalStaticIndexSize() {
341     return totalStaticIndexSize;
342   }
343 
344   @Override
345   public long getTotalStaticBloomSize() {
346     return totalStaticBloomSize;
347   }
348 
349   @Override
350   public long getNumMutationsWithoutWAL() {
351     return numMutationsWithoutWAL;
352   }
353 
354   @Override
355   public long getDataInMemoryWithoutWAL() {
356     return dataInMemoryWithoutWAL;
357   }
358 
359   @Override
360   public int getPercentFileLocal() {
361     return percentFileLocal;
362   }
363 
364   @Override
365   public long getUpdatesBlockedTime() {
366     if (this.regionServer.cacheFlusher == null) {
367       return 0;
368     }
369     return this.regionServer.cacheFlusher.getUpdatesBlockedMsHighWater().get();
370   }
371 
372   @Override
373   public long getFlushedCellsCount() {
374     return flushedCellsCount;
375   }
376 
377   @Override
378   public long getCompactedCellsCount() {
379     return compactedCellsCount;
380   }
381 
382   @Override
383   public long getMajorCompactedCellsCount() {
384     return majorCompactedCellsCount;
385   }
386 
387   @Override
388   public long getFlushedCellsSize() {
389     return flushedCellsSize;
390   }
391 
392   @Override
393   public long getCompactedCellsSize() {
394     return compactedCellsSize;
395   }
396 
397   @Override
398   public long getMajorCompactedCellsSize() {
399     return majorCompactedCellsSize;
400   }
401 
402   /**
403    * This is the runnable that will be executed on the executor every PERIOD number of seconds
404    * It will take metrics/numbers from all of the regions and use them to compute point in
405    * time metrics.
406    */
407   public class RegionServerMetricsWrapperRunnable implements Runnable {
408 
409     private long lastRan = 0;
410     private long lastRequestCount = 0;
411 
412     @Override
413     synchronized public void run() {
414       initBlockCache();
415       cacheStats = blockCache.getStats();
416 
417       HDFSBlocksDistribution hdfsBlocksDistribution =
418           new HDFSBlocksDistribution();
419 
420       long tempNumStores = 0;
421       long tempNumStoreFiles = 0;
422       long tempMemstoreSize = 0;
423       long tempStoreFileSize = 0;
424       long tempReadRequestsCount = 0;
425       long tempWriteRequestsCount = 0;
426       long tempCheckAndMutateChecksFailed = 0;
427       long tempCheckAndMutateChecksPassed = 0;
428       long tempStorefileIndexSize = 0;
429       long tempTotalStaticIndexSize = 0;
430       long tempTotalStaticBloomSize = 0;
431       long tempNumMutationsWithoutWAL = 0;
432       long tempDataInMemoryWithoutWAL = 0;
433       int tempPercentFileLocal = 0;
434       long tempFlushedCellsCount = 0;
435       long tempCompactedCellsCount = 0;
436       long tempMajorCompactedCellsCount = 0;
437       long tempFlushedCellsSize = 0;
438       long tempCompactedCellsSize = 0;
439       long tempMajorCompactedCellsSize = 0;
440       long tempBlockedRequestsCount = 0L;
441 
442       for (HRegion r : regionServer.getOnlineRegionsLocalContext()) {
443         tempNumMutationsWithoutWAL += r.numMutationsWithoutWAL.get();
444         tempDataInMemoryWithoutWAL += r.dataInMemoryWithoutWAL.get();
445         tempReadRequestsCount += r.readRequestsCount.get();
446         tempWriteRequestsCount += r.writeRequestsCount.get();
447         tempCheckAndMutateChecksFailed += r.checkAndMutateChecksFailed.get();
448         tempCheckAndMutateChecksPassed += r.checkAndMutateChecksPassed.get();
449         tempBlockedRequestsCount += r.getBlockedRequestsCount();
450         tempNumStores += r.stores.size();
451         for (Store store : r.stores.values()) {
452           tempNumStoreFiles += store.getStorefilesCount();
453           tempMemstoreSize += store.getMemStoreSize();
454           tempStoreFileSize += store.getStorefilesSize();
455           tempStorefileIndexSize += store.getStorefilesIndexSize();
456           tempTotalStaticBloomSize += store.getTotalStaticBloomSize();
457           tempTotalStaticIndexSize += store.getTotalStaticIndexSize();
458           tempFlushedCellsCount += store.getFlushedCellsCount();
459           tempCompactedCellsCount += store.getCompactedCellsCount();
460           tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount();
461           tempFlushedCellsSize += store.getFlushedCellsSize();
462           tempCompactedCellsSize += store.getCompactedCellsSize();
463           tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize();
464         }
465 
466         hdfsBlocksDistribution.add(r.getHDFSBlocksDistribution());
467       }
468 
469       float localityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(
470           regionServer.getServerName().getHostname());
471       tempPercentFileLocal = (int) (localityIndex * 100);
472 
473 
474       //Compute the number of requests per second
475       long currentTime = EnvironmentEdgeManager.currentTime();
476 
477       // assume that it took PERIOD seconds to start the executor.
478       // this is a guess but it's a pretty good one.
479       if (lastRan == 0) {
480         lastRan = currentTime - period;
481       }
482 
483 
484       //If we've time traveled keep the last requests per second.
485       if ((currentTime - lastRan) > 0) {
486         long currentRequestCount = getTotalRequestCount();
487         requestsPerSecond = (currentRequestCount - lastRequestCount) / ((currentTime - lastRan) / 1000.0);
488         lastRequestCount = currentRequestCount;
489       }
490       lastRan = currentTime;
491 
492       numWALFiles = DefaultWALProvider.getNumLogFiles(regionServer.walFactory);
493       walFileSize = DefaultWALProvider.getLogFileSize(regionServer.walFactory);
494 
495       //Copy over computed values so that no thread sees half computed values.
496       numStores = tempNumStores;
497       numStoreFiles = tempNumStoreFiles;
498       memstoreSize = tempMemstoreSize;
499       storeFileSize = tempStoreFileSize;
500       readRequestsCount = tempReadRequestsCount;
501       writeRequestsCount = tempWriteRequestsCount;
502       checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed;
503       checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed;
504       storefileIndexSize = tempStorefileIndexSize;
505       totalStaticIndexSize = tempTotalStaticIndexSize;
506       totalStaticBloomSize = tempTotalStaticBloomSize;
507       numMutationsWithoutWAL = tempNumMutationsWithoutWAL;
508       dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL;
509       percentFileLocal = tempPercentFileLocal;
510       flushedCellsCount = tempFlushedCellsCount;
511       compactedCellsCount = tempCompactedCellsCount;
512       majorCompactedCellsCount = tempMajorCompactedCellsCount;
513       flushedCellsSize = tempFlushedCellsSize;
514       compactedCellsSize = tempCompactedCellsSize;
515       majorCompactedCellsSize = tempMajorCompactedCellsSize;
516       blockedRequestsCount = tempBlockedRequestsCount;
517     }
518   }
519 
520   @Override
521   public long getBlockedRequestsCount() {
522     return blockedRequestsCount;
523   }
524 }