View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.util.Collection;
21  import java.util.concurrent.ScheduledExecutorService;
22  import java.util.concurrent.TimeUnit;
23  
24  import org.apache.commons.lang.StringUtils;
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
31  import org.apache.hadoop.hbase.ServerName;
32  import org.apache.hadoop.hbase.io.hfile.BlockCache;
33  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
34  import org.apache.hadoop.hbase.io.hfile.CacheStats;
35  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
36  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
37  import org.apache.hadoop.metrics2.MetricsExecutor;
38  
39  /**
40   * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system.
41   */
42  @InterfaceAudience.Private
43  class MetricsRegionServerWrapperImpl
44      implements MetricsRegionServerWrapper {
45  
46    public static final Log LOG = LogFactory.getLog(MetricsRegionServerWrapperImpl.class);
47  
48    private final HRegionServer regionServer;
49  
50    private BlockCache blockCache;
51  
52    private volatile long numStores = 0;
53    private volatile long numHLogFiles = 0;
54    private volatile long hlogFileSize = 0;
55    private volatile long numStoreFiles = 0;
56    private volatile long memstoreSize = 0;
57    private volatile long storeFileSize = 0;
58    private volatile long maxStoreFileAge = 0;
59    private volatile long minStoreFileAge = 0;
60    private volatile long avgStoreFileAge = 0;
61    private volatile long numReferenceFiles = 0;
62    private volatile double requestsPerSecond = 0.0;
63    private volatile long readRequestsCount = 0;
64    private volatile long writeRequestsCount = 0;
65    private volatile long checkAndMutateChecksFailed = 0;
66    private volatile long checkAndMutateChecksPassed = 0;
67    private volatile long storefileIndexSize = 0;
68    private volatile long totalStaticIndexSize = 0;
69    private volatile long totalStaticBloomSize = 0;
70    private volatile long numMutationsWithoutWAL = 0;
71    private volatile long dataInMemoryWithoutWAL = 0;
72    private volatile int percentFileLocal = 0;
73    private volatile long flushedCellsCount = 0;
74    private volatile long compactedCellsCount = 0;
75    private volatile long majorCompactedCellsCount = 0;
76    private volatile long flushedCellsSize = 0;
77    private volatile long compactedCellsSize = 0;
78    private volatile long majorCompactedCellsSize = 0;
79    private volatile long blockedRequestsCount = 0L;
80    private volatile long averageRegionSize = 0L;
81  
82    private CacheStats cacheStats;
83    private ScheduledExecutorService executor;
84    private Runnable runnable;
85    private long period;
86  
87    public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) {
88      this.regionServer = regionServer;
89      initBlockCache();
90  
91      this.period =
92          regionServer.conf.getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
93            HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD);
94  
95      this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor();
96      this.runnable = new RegionServerMetricsWrapperRunnable();
97      this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period,
98        TimeUnit.MILLISECONDS);
99  
100     if (LOG.isInfoEnabled()) {
101       LOG.info("Computing regionserver metrics every " + this.period + " milliseconds");
102     }
103   }
104 
105   /**
106    * It's possible that due to threading the block cache could not be initialized
107    * yet (testing multiple region servers in one jvm).  So we need to try and initialize
108    * the blockCache and cacheStats reference multiple times until we succeed.
109    */
110   private synchronized  void initBlockCache() {
111     CacheConfig cacheConfig = this.regionServer.cacheConfig;
112     if (cacheConfig != null && this.blockCache == null) {
113       this.blockCache = cacheConfig.getBlockCache();
114     }
115 
116     if (this.blockCache != null && this.cacheStats == null) {
117       this.cacheStats = blockCache.getStats();
118     }
119   }
120 
121   @Override
122   public String getClusterId() {
123     return regionServer.getClusterId();
124   }
125 
126   @Override
127   public long getStartCode() {
128     return regionServer.getStartcode();
129   }
130 
131   @Override
132   public String getZookeeperQuorum() {
133     ZooKeeperWatcher zk = regionServer.getZooKeeperWatcher();
134     if (zk == null) {
135       return "";
136     }
137     return zk.getQuorum();
138   }
139 
140   @Override
141   public String getCoprocessors() {
142     String[] coprocessors = regionServer.getCoprocessors();
143     if (coprocessors == null || coprocessors.length == 0) {
144       return "";
145     }
146     return StringUtils.join(coprocessors, ", ");
147   }
148 
149   @Override
150   public String getServerName() {
151     ServerName serverName = regionServer.getServerName();
152     if (serverName == null) {
153       return "";
154     }
155     return serverName.getServerName();
156   }
157 
158   @Override
159   public long getNumOnlineRegions() {
160     Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext();
161     if (onlineRegionsLocalContext == null) {
162       return 0;
163     }
164     return onlineRegionsLocalContext.size();
165   }
166 
167   @Override
168   public long getTotalRequestCount() {
169     return regionServer.requestCount.get();
170   }
171 
172   @Override
173   public int getSplitQueueSize() {
174     if (this.regionServer.compactSplitThread == null) {
175       return 0;
176     }
177     return this.regionServer.compactSplitThread.getSplitQueueSize();
178   }
179 
180   @Override
181   public int getCompactionQueueSize() {
182     //The thread could be zero.  if so assume there is no queue.
183     if (this.regionServer.compactSplitThread == null) {
184       return 0;
185     }
186     return this.regionServer.compactSplitThread.getCompactionQueueSize();
187   }
188 
189   @Override
190   public int getSmallCompactionQueueSize() {
191     //The thread could be zero.  if so assume there is no queue.
192     if (this.regionServer.compactSplitThread == null) {
193       return 0;
194     }
195     return this.regionServer.compactSplitThread.getSmallCompactionQueueSize();
196   }
197 
198   @Override
199   public int getLargeCompactionQueueSize() {
200     //The thread could be zero.  if so assume there is no queue.
201     if (this.regionServer.compactSplitThread == null) {
202       return 0;
203     }
204     return this.regionServer.compactSplitThread.getLargeCompactionQueueSize();
205   }
206 
207   @Override
208   public int getFlushQueueSize() {
209     //If there is no flusher there should be no queue.
210     if (this.regionServer.cacheFlusher == null) {
211       return 0;
212     }
213     return this.regionServer.cacheFlusher.getFlushQueueSize();
214   }
215 
216   @Override
217   public long getBlockCacheCount() {
218     if (this.blockCache == null) {
219       return 0;
220     }
221     return this.blockCache.getBlockCount();
222   }
223 
224   @Override
225   public long getBlockCacheSize() {
226     if (this.blockCache == null) {
227       return 0;
228     }
229     return this.blockCache.getCurrentSize();
230   }
231 
232   @Override
233   public long getBlockCacheFreeSize() {
234     if (this.blockCache == null) {
235       return 0;
236     }
237     return this.blockCache.getFreeSize();
238   }
239 
240   @Override
241   public long getBlockCacheHitCount() {
242     if (this.cacheStats == null) {
243       return 0;
244     }
245     return this.cacheStats.getHitCount();
246   }
247 
248   @Override
249   public long getBlockCacheMissCount() {
250     if (this.cacheStats == null) {
251       return 0;
252     }
253     return this.cacheStats.getMissCount();
254   }
255 
256   @Override
257   public long getBlockCacheEvictedCount() {
258     if (this.cacheStats == null) {
259       return 0;
260     }
261     return this.cacheStats.getEvictedCount();
262   }
263 
264   @Override
265   public int getBlockCacheHitPercent() {
266     if (this.cacheStats == null) {
267       return 0;
268     }
269     return (int) (this.cacheStats.getHitRatio() * 100);
270   }
271 
272   @Override
273   public int getBlockCacheHitCachingPercent() {
274     if (this.cacheStats == null) {
275       return 0;
276     }
277     return (int) (this.cacheStats.getHitCachingRatio() * 100);
278   }
279 
280   @Override
281   public long getBlockCacheFailedInsertions() {
282     return this.cacheStats.getFailedInserts();
283   }
284 
285   @Override public void forceRecompute() {
286     this.runnable.run();
287   }
288 
289   @Override
290   public long getNumStores() {
291     return numStores;
292   }
293   
294   @Override
295   public long getNumHLogFiles() {
296     return numHLogFiles;
297   }
298 
299   @Override
300   public long getHLogFileSize() {
301     return hlogFileSize;
302   }
303   
304   @Override
305   public long getNumStoreFiles() {
306     return numStoreFiles;
307   }
308 
309   @Override
310   public long getMaxStoreFileAge() {
311     return maxStoreFileAge;
312   }
313 
314   @Override
315   public long getMinStoreFileAge() {
316     return minStoreFileAge;
317   }
318 
319   @Override
320   public long getAvgStoreFileAge() {
321     return avgStoreFileAge;
322   }
323 
324   @Override
325   public long getNumReferenceFiles() {
326     return numReferenceFiles;
327   }
328 
329   @Override
330   public long getMemstoreSize() {
331     return memstoreSize;
332   }
333 
334   @Override
335   public long getStoreFileSize() {
336     return storeFileSize;
337   }
338 
339   @Override public double getRequestsPerSecond() {
340     return requestsPerSecond;
341   }
342 
343   @Override
344   public long getReadRequestsCount() {
345     return readRequestsCount;
346   }
347 
348   @Override
349   public long getWriteRequestsCount() {
350     return writeRequestsCount;
351   }
352 
353   @Override
354   public long getCheckAndMutateChecksFailed() {
355     return checkAndMutateChecksFailed;
356   }
357 
358   @Override
359   public long getCheckAndMutateChecksPassed() {
360     return checkAndMutateChecksPassed;
361   }
362 
363   @Override
364   public long getStoreFileIndexSize() {
365     return storefileIndexSize;
366   }
367 
368   @Override
369   public long getTotalStaticIndexSize() {
370     return totalStaticIndexSize;
371   }
372 
373   @Override
374   public long getTotalStaticBloomSize() {
375     return totalStaticBloomSize;
376   }
377 
378   @Override
379   public long getNumMutationsWithoutWAL() {
380     return numMutationsWithoutWAL;
381   }
382 
383   @Override
384   public long getDataInMemoryWithoutWAL() {
385     return dataInMemoryWithoutWAL;
386   }
387 
388   @Override
389   public int getPercentFileLocal() {
390     return percentFileLocal;
391   }
392 
393   @Override
394   public long getUpdatesBlockedTime() {
395     if (this.regionServer.cacheFlusher == null) {
396       return 0;
397     }
398     return this.regionServer.cacheFlusher.getUpdatesBlockedMsHighWater().get();
399   }
400 
401   @Override
402   public long getFlushedCellsCount() {
403     return flushedCellsCount;
404   }
405 
406   @Override
407   public long getCompactedCellsCount() {
408     return compactedCellsCount;
409   }
410 
411   @Override
412   public long getMajorCompactedCellsCount() {
413     return majorCompactedCellsCount;
414   }
415 
416   @Override
417   public long getFlushedCellsSize() {
418     return flushedCellsSize;
419   }
420 
421   @Override
422   public long getCompactedCellsSize() {
423     return compactedCellsSize;
424   }
425 
426   @Override
427   public long getMajorCompactedCellsSize() {
428     return majorCompactedCellsSize;
429   }
430 
431   /**
432    * This is the runnable that will be executed on the executor every PERIOD number of seconds
433    * It will take metrics/numbers from all of the regions and use them to compute point in
434    * time metrics.
435    */
436   public class RegionServerMetricsWrapperRunnable implements Runnable {
437 
438     private long lastRan = 0;
439     private long lastRequestCount = 0;
440 
441     @Override
442     synchronized public void run() {
443       initBlockCache();
444       cacheStats = blockCache.getStats();
445 
446       HDFSBlocksDistribution hdfsBlocksDistribution =
447           new HDFSBlocksDistribution();
448 
449       long tempNumStores = 0;
450       long tempNumStoreFiles = 0;
451       long tempMemstoreSize = 0;
452       long tempStoreFileSize = 0;
453       long tempMaxStoreFileAge = 0;
454       long tempNumReferenceFiles = 0;
455       long avgAgeNumerator = 0;
456       long numHFiles = 0;
457       long tempMinStoreFileAge = Long.MAX_VALUE;
458       long tempReadRequestsCount = 0;
459       long tempWriteRequestsCount = 0;
460       long tempCheckAndMutateChecksFailed = 0;
461       long tempCheckAndMutateChecksPassed = 0;
462       long tempStorefileIndexSize = 0;
463       long tempTotalStaticIndexSize = 0;
464       long tempTotalStaticBloomSize = 0;
465       long tempNumMutationsWithoutWAL = 0;
466       long tempDataInMemoryWithoutWAL = 0;
467       int tempPercentFileLocal = 0;
468       long tempFlushedCellsCount = 0;
469       long tempCompactedCellsCount = 0;
470       long tempMajorCompactedCellsCount = 0;
471       long tempFlushedCellsSize = 0;
472       long tempCompactedCellsSize = 0;
473       long tempMajorCompactedCellsSize = 0;
474       long tempBlockedRequestsCount = 0L;
475 
476       int regionCount = 0;
477       for (HRegion r : regionServer.getOnlineRegionsLocalContext()) {
478         tempNumMutationsWithoutWAL += r.numMutationsWithoutWAL.get();
479         tempDataInMemoryWithoutWAL += r.dataInMemoryWithoutWAL.get();
480         tempReadRequestsCount += r.readRequestsCount.get();
481         tempWriteRequestsCount += r.writeRequestsCount.get();
482         tempCheckAndMutateChecksFailed += r.checkAndMutateChecksFailed.get();
483         tempCheckAndMutateChecksPassed += r.checkAndMutateChecksPassed.get();
484         tempBlockedRequestsCount += r.getBlockedRequestsCount();
485         tempNumStores += r.stores.size();
486         for (Store store : r.stores.values()) {
487           tempNumStoreFiles += store.getStorefilesCount();
488           tempMemstoreSize += store.getMemStoreSize();
489           tempStoreFileSize += store.getStorefilesSize();
490 
491           long storeMaxStoreFileAge = store.getMaxStoreFileAge();
492           tempMaxStoreFileAge = (storeMaxStoreFileAge > tempMaxStoreFileAge) ?
493             storeMaxStoreFileAge : tempMaxStoreFileAge;
494 
495           long storeMinStoreFileAge = store.getMinStoreFileAge();
496           tempMinStoreFileAge = (storeMinStoreFileAge < tempMinStoreFileAge) ?
497             storeMinStoreFileAge : tempMinStoreFileAge;
498 
499           long storeHFiles = store.getNumHFiles();
500           avgAgeNumerator += store.getAvgStoreFileAge() * storeHFiles;
501           numHFiles += storeHFiles;
502           tempNumReferenceFiles += store.getNumReferenceFiles();
503 
504           tempStorefileIndexSize += store.getStorefilesIndexSize();
505           tempTotalStaticBloomSize += store.getTotalStaticBloomSize();
506           tempTotalStaticIndexSize += store.getTotalStaticIndexSize();
507           tempFlushedCellsCount += store.getFlushedCellsCount();
508           tempCompactedCellsCount += store.getCompactedCellsCount();
509           tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount();
510           tempFlushedCellsSize += store.getFlushedCellsSize();
511           tempCompactedCellsSize += store.getCompactedCellsSize();
512           tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize();
513         }
514 
515         hdfsBlocksDistribution.add(r.getHDFSBlocksDistribution());
516         regionCount++;
517       }
518 
519       float localityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(
520           regionServer.getServerName().getHostname());
521       tempPercentFileLocal = (int) (localityIndex * 100);
522 
523 
524       //Compute the number of requests per second
525       long currentTime = EnvironmentEdgeManager.currentTimeMillis();
526 
527       // assume that it took PERIOD seconds to start the executor.
528       // this is a guess but it's a pretty good one.
529       if (lastRan == 0) {
530         lastRan = currentTime - period;
531       }
532 
533       //If we've time traveled keep the last requests per second.
534       if ((currentTime - lastRan) > 0) {
535         long currentRequestCount = getTotalRequestCount();
536         requestsPerSecond = (currentRequestCount - lastRequestCount) / ((currentTime - lastRan) / 1000.0);
537         lastRequestCount = currentRequestCount;
538       }
539       lastRan = currentTime;
540 
541       //Copy over computed values so that no thread sees half computed values.
542       numStores = tempNumStores;
543       long tempNumHLogFiles = regionServer.hlog.getNumLogFiles();
544       // meta logs
545       if (regionServer.hlogForMeta != null) {
546         tempNumHLogFiles += regionServer.hlogForMeta.getNumLogFiles();
547       }
548       numHLogFiles = tempNumHLogFiles;
549       
550       long tempHlogFileSize = regionServer.hlog.getLogFileSize();
551       if (regionServer.hlogForMeta != null) {
552         tempHlogFileSize += regionServer.hlogForMeta.getLogFileSize();
553       }
554       hlogFileSize = tempHlogFileSize;
555       
556       numStoreFiles = tempNumStoreFiles;
557       memstoreSize = tempMemstoreSize;
558       storeFileSize = tempStoreFileSize;
559       maxStoreFileAge = tempMaxStoreFileAge;
560       if (regionCount > 0) {
561         averageRegionSize = (memstoreSize + storeFileSize) / regionCount;
562       }
563 
564       if (tempMinStoreFileAge != Long.MAX_VALUE) {
565         minStoreFileAge = tempMinStoreFileAge;
566       }
567 
568       if (numHFiles != 0) {
569         avgStoreFileAge = avgAgeNumerator / numHFiles;
570       }
571 
572       numReferenceFiles= tempNumReferenceFiles;
573       readRequestsCount = tempReadRequestsCount;
574       writeRequestsCount = tempWriteRequestsCount;
575       checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed;
576       checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed;
577       storefileIndexSize = tempStorefileIndexSize;
578       totalStaticIndexSize = tempTotalStaticIndexSize;
579       totalStaticBloomSize = tempTotalStaticBloomSize;
580       numMutationsWithoutWAL = tempNumMutationsWithoutWAL;
581       dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL;
582       percentFileLocal = tempPercentFileLocal;
583       flushedCellsCount = tempFlushedCellsCount;
584       compactedCellsCount = tempCompactedCellsCount;
585       majorCompactedCellsCount = tempMajorCompactedCellsCount;
586       flushedCellsSize = tempFlushedCellsSize;
587       compactedCellsSize = tempCompactedCellsSize;
588       majorCompactedCellsSize = tempMajorCompactedCellsSize;
589       blockedRequestsCount = tempBlockedRequestsCount;
590     }
591   }
592 
593   @Override
594   public long getBlockedRequestsCount() {
595     return blockedRequestsCount;
596   }
597 
598   @Override
599   public long getAverageRegionSize() {
600     return averageRegionSize;
601   }
602 
603   public long getDataMissCount() {
604     if (this.cacheStats == null) {
605       return 0;
606     }
607     return cacheStats.getDataMissCount();
608   }
609 
610   @Override
611   public long getLeafIndexMissCount() {
612     if (this.cacheStats == null) {
613       return 0;
614     }
615     return cacheStats.getLeafIndexMissCount();
616   }
617 
618   @Override
619   public long getBloomChunkMissCount() {
620     if (this.cacheStats == null) {
621       return 0;
622     }
623     return cacheStats.getBloomChunkMissCount();
624   }
625 
626   @Override
627   public long getMetaMissCount() {
628     if (this.cacheStats == null) {
629       return 0;
630     }
631     return cacheStats.getMetaMissCount();
632   }
633 
634   @Override
635   public long getRootIndexMissCount() {
636     if (this.cacheStats == null) {
637       return 0;
638     }
639     return cacheStats.getRootIndexMissCount();
640   }
641 
642   @Override
643   public long getIntermediateIndexMissCount() {
644     if (this.cacheStats == null) {
645       return 0;
646     }
647     return cacheStats.getIntermediateIndexMissCount();
648   }
649 
650   @Override
651   public long getFileInfoMissCount() {
652     if (this.cacheStats == null) {
653       return 0;
654     }
655     return cacheStats.getFileInfoMissCount();
656   }
657 
658   @Override
659   public long getGeneralBloomMetaMissCount() {
660     if (this.cacheStats == null) {
661       return 0;
662     }
663     return cacheStats.getGeneralBloomMetaMissCount();
664   }
665 
666   @Override
667   public long getDeleteFamilyBloomMissCount() {
668     if (this.cacheStats == null) {
669       return 0;
670     }
671     return cacheStats.getDeleteFamilyBloomMissCount();
672   }
673 
674   @Override
675   public long getTrailerMissCount() {
676     if (this.cacheStats == null) {
677       return 0;
678     }
679     return cacheStats.getTrailerMissCount();
680   }
681 
682   @Override
683   public long getDataHitCount() {
684     if (this.cacheStats == null) {
685       return 0;
686     }
687     return cacheStats.getDataHitCount();
688   }
689 
690   @Override
691   public long getLeafIndexHitCount() {
692     if (this.cacheStats == null) {
693       return 0;
694     }
695     return cacheStats.getLeafIndexHitCount();
696   }
697 
698   @Override
699   public long getBloomChunkHitCount() {
700     if (this.cacheStats == null) {
701       return 0;
702     }
703     return cacheStats.getBloomChunkHitCount();
704   }
705 
706   @Override
707   public long getMetaHitCount() {
708     if (this.cacheStats == null) {
709       return 0;
710     }
711     return cacheStats.getMetaHitCount();
712   }
713 
714   @Override
715   public long getRootIndexHitCount() {
716     if (this.cacheStats == null) {
717       return 0;
718     }
719     return cacheStats.getRootIndexHitCount();
720   }
721 
722   @Override
723   public long getIntermediateIndexHitCount() {
724     if (this.cacheStats == null) {
725       return 0;
726     }
727     return cacheStats.getIntermediateIndexHitCount();
728   }
729 
730   @Override
731   public long getFileInfoHitCount() {
732     if (this.cacheStats == null) {
733       return 0;
734     }
735     return cacheStats.getFileInfoHitCount();
736   }
737 
738   @Override
739   public long getGeneralBloomMetaHitCount() {
740     if (this.cacheStats == null) {
741       return 0;
742     }
743     return cacheStats.getGeneralBloomMetaHitCount();
744   }
745 
746   @Override
747   public long getDeleteFamilyBloomHitCount() {
748     if (this.cacheStats == null) {
749       return 0;
750     }
751     return cacheStats.getDeleteFamilyBloomHitCount();
752   }
753 
754   @Override
755   public long getTrailerHitCount() {
756     if (this.cacheStats == null) {
757       return 0;
758     }
759     return cacheStats.getTrailerHitCount();
760   }
761 }