View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.IOException;
22  import java.lang.ref.WeakReference;
23  import java.nio.ByteBuffer;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.EnumMap;
27  import java.util.HashMap;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.PriorityQueue;
31  import java.util.SortedSet;
32  import java.util.TreeSet;
33  import java.util.concurrent.ConcurrentHashMap;
34  import java.util.concurrent.Executors;
35  import java.util.concurrent.ScheduledExecutorService;
36  import java.util.concurrent.TimeUnit;
37  import java.util.concurrent.atomic.AtomicLong;
38  import java.util.concurrent.locks.ReentrantLock;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.classification.InterfaceAudience;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.fs.FileSystem;
45  import org.apache.hadoop.fs.Path;
46  import org.apache.hadoop.hbase.io.HeapSize;
47  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
48  import org.apache.hadoop.hbase.io.hfile.CachedBlock.BlockPriority;
49  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.ClassSize;
52  import org.apache.hadoop.hbase.util.FSUtils;
53  import org.apache.hadoop.hbase.util.HasThread;
54  import org.apache.hadoop.hbase.util.Threads;
55  import org.apache.hadoop.util.StringUtils;
56  
57  import com.google.common.util.concurrent.ThreadFactoryBuilder;
58  
59  /**
60   * A block cache implementation that is memory-aware using {@link HeapSize},
61   * memory-bound using an LRU eviction algorithm, and concurrent: backed by a
62   * {@link ConcurrentHashMap} and with a non-blocking eviction thread giving
63   * constant-time {@link #cacheBlock} and {@link #getBlock} operations.<p>
64   *
65   * Contains three levels of block priority to allow for
66   * scan-resistance and in-memory families.  A block is added with an inMemory
67   * flag if necessary, otherwise a block becomes a single access priority.  Once
68   * a blocked is accessed again, it changes to multiple access.  This is used
69   * to prevent scans from thrashing the cache, adding a least-frequently-used
70   * element to the eviction algorithm.<p>
71   *
72   * Each priority is given its own chunk of the total cache to ensure
73   * fairness during eviction.  Each priority will retain close to its maximum
74   * size, however, if any priority is not using its entire chunk the others
75   * are able to grow beyond their chunk size.<p>
76   *
77   * Instantiated at a minimum with the total size and average block size.
78   * All sizes are in bytes.  The block size is not especially important as this
79   * cache is fully dynamic in its sizing of blocks.  It is only used for
80   * pre-allocating data structures and in initial heap estimation of the map.<p>
81   *
82   * The detailed constructor defines the sizes for the three priorities (they
83   * should total to the maximum size defined).  It also sets the levels that
84   * trigger and control the eviction thread.<p>
85   *
86   * The acceptable size is the cache size level which triggers the eviction
87   * process to start.  It evicts enough blocks to get the size below the
88   * minimum size specified.<p>
89   *
90   * Eviction happens in a separate thread and involves a single full-scan
91   * of the map.  It determines how many bytes must be freed to reach the minimum
92   * size, and then while scanning determines the fewest least-recently-used
93   * blocks necessary from each of the three priorities (would be 3 times bytes
94   * to free).  It then uses the priority chunk sizes to evict fairly according
95   * to the relative sizes and usage.
96   */
97  @InterfaceAudience.Private
98  public class LruBlockCache implements BlockCache, HeapSize {
99  
100   static final Log LOG = LogFactory.getLog(LruBlockCache.class);
101 
102   static final String LRU_MIN_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.min.factor";
103   static final String LRU_ACCEPTABLE_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.acceptable.factor";
104   static final String LRU_SINGLE_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.single.percentage";
105   static final String LRU_MULTI_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.multi.percentage";
106   static final String LRU_MEMORY_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.memory.percentage";
107 
108   /**
109    * Configuration key to force data-block always(except in-memory are too much)
110    * cached in memory for in-memory hfile, unlike inMemory, which is a column-family
111    * configuration, inMemoryForceMode is a cluster-wide configuration
112    */
113   static final String LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME = "hbase.lru.rs.inmemoryforcemode";
114 
115   /** Default Configuration Parameters*/
116 
117   /** Backing Concurrent Map Configuration */
118   static final float DEFAULT_LOAD_FACTOR = 0.75f;
119   static final int DEFAULT_CONCURRENCY_LEVEL = 16;
120 
121   /** Eviction thresholds */
122   static final float DEFAULT_MIN_FACTOR = 0.95f;
123   static final float DEFAULT_ACCEPTABLE_FACTOR = 0.99f;
124 
125   /** Priority buckets */
126   static final float DEFAULT_SINGLE_FACTOR = 0.25f;
127   static final float DEFAULT_MULTI_FACTOR = 0.50f;
128   static final float DEFAULT_MEMORY_FACTOR = 0.25f;
129 
130   static final boolean DEFAULT_IN_MEMORY_FORCE_MODE = false;
131 
132   /** Statistics thread */
133   static final int statThreadPeriod = 60 * 5;
134 
135   /** Concurrent map (the cache) */
136   private final ConcurrentHashMap<BlockCacheKey,CachedBlock> map;
137 
138   /** Eviction lock (locked when eviction in process) */
139   private final ReentrantLock evictionLock = new ReentrantLock(true);
140 
141   /** Volatile boolean to track if we are in an eviction process or not */
142   private volatile boolean evictionInProgress = false;
143 
144   /** Eviction thread */
145   private final EvictionThread evictionThread;
146 
147   /** Statistics thread schedule pool (for heavy debugging, could remove) */
148   private final ScheduledExecutorService scheduleThreadPool =
149     Executors.newScheduledThreadPool(1,
150       new ThreadFactoryBuilder()
151         .setNameFormat("LruStats #%d")
152         .setDaemon(true)
153         .build());
154 
155   /** Current size of cache */
156   private final AtomicLong size;
157 
158   /** Current number of cached elements */
159   private final AtomicLong elements;
160 
161   /** Cache access count (sequential ID) */
162   private final AtomicLong count;
163 
164   /** Cache statistics */
165   private final CacheStats stats;
166 
167   /** Maximum allowable size of cache (block put if size > max, evict) */
168   private long maxSize;
169 
170   /** Approximate block size */
171   private long blockSize;
172 
173   /** Acceptable size of cache (no evictions if size < acceptable) */
174   private float acceptableFactor;
175 
176   /** Minimum threshold of cache (when evicting, evict until size < min) */
177   private float minFactor;
178 
179   /** Single access bucket size */
180   private float singleFactor;
181 
182   /** Multiple access bucket size */
183   private float multiFactor;
184 
185   /** In-memory bucket size */
186   private float memoryFactor;
187 
188   /** Overhead of the structure itself */
189   private long overhead;
190 
191   /** Whether in-memory hfile's data block has higher priority when evicting */
192   private boolean forceInMemory;
193 
194   /** Where to send victims (blocks evicted from the cache) */
195   private BucketCache victimHandler = null;
196 
197   /**
198    * Default constructor.  Specify maximum size and expected average block
199    * size (approximation is fine).
200    *
201    * <p>All other factors will be calculated based on defaults specified in
202    * this class.
203    * @param maxSize maximum size of cache, in bytes
204    * @param blockSize approximate size of each block, in bytes
205    */
206   public LruBlockCache(long maxSize, long blockSize) {
207     this(maxSize, blockSize, true);
208   }
209 
210   /**
211    * Constructor used for testing.  Allows disabling of the eviction thread.
212    */
213   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread) {
214     this(maxSize, blockSize, evictionThread,
215         (int)Math.ceil(1.2*maxSize/blockSize),
216         DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL,
217         DEFAULT_MIN_FACTOR, DEFAULT_ACCEPTABLE_FACTOR,
218         DEFAULT_SINGLE_FACTOR,
219         DEFAULT_MULTI_FACTOR,
220         DEFAULT_MEMORY_FACTOR,
221         false
222         );
223   }
224 
225   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, Configuration conf) {
226     this(maxSize, blockSize, evictionThread,
227         (int)Math.ceil(1.2*maxSize/blockSize),
228         DEFAULT_LOAD_FACTOR,
229         DEFAULT_CONCURRENCY_LEVEL,
230         conf.getFloat(LRU_MIN_FACTOR_CONFIG_NAME, DEFAULT_MIN_FACTOR),
231         conf.getFloat(LRU_ACCEPTABLE_FACTOR_CONFIG_NAME, DEFAULT_ACCEPTABLE_FACTOR),
232         conf.getFloat(LRU_SINGLE_PERCENTAGE_CONFIG_NAME, DEFAULT_SINGLE_FACTOR),
233         conf.getFloat(LRU_MULTI_PERCENTAGE_CONFIG_NAME, DEFAULT_MULTI_FACTOR),
234         conf.getFloat(LRU_MEMORY_PERCENTAGE_CONFIG_NAME, DEFAULT_MEMORY_FACTOR),
235         conf.getBoolean(LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME, DEFAULT_IN_MEMORY_FORCE_MODE)
236         );
237   }
238 
239   public LruBlockCache(long maxSize, long blockSize, Configuration conf) {
240     this(maxSize, blockSize, true, conf);
241   }
242 
243   /**
244    * Configurable constructor.  Use this constructor if not using defaults.
245    * @param maxSize maximum size of this cache, in bytes
246    * @param blockSize expected average size of blocks, in bytes
247    * @param evictionThread whether to run evictions in a bg thread or not
248    * @param mapInitialSize initial size of backing ConcurrentHashMap
249    * @param mapLoadFactor initial load factor of backing ConcurrentHashMap
250    * @param mapConcurrencyLevel initial concurrency factor for backing CHM
251    * @param minFactor percentage of total size that eviction will evict until
252    * @param acceptableFactor percentage of total size that triggers eviction
253    * @param singleFactor percentage of total size for single-access blocks
254    * @param multiFactor percentage of total size for multiple-access blocks
255    * @param memoryFactor percentage of total size for in-memory blocks
256    */
257   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread,
258       int mapInitialSize, float mapLoadFactor, int mapConcurrencyLevel,
259       float minFactor, float acceptableFactor, float singleFactor,
260       float multiFactor, float memoryFactor, boolean forceInMemory) {
261     if(singleFactor + multiFactor + memoryFactor != 1 ||
262         singleFactor < 0 || multiFactor < 0 || memoryFactor < 0) {
263       throw new IllegalArgumentException("Single, multi, and memory factors " +
264           " should be non-negative and total 1.0");
265     }
266     if(minFactor >= acceptableFactor) {
267       throw new IllegalArgumentException("minFactor must be smaller than acceptableFactor");
268     }
269     if(minFactor >= 1.0f || acceptableFactor >= 1.0f) {
270       throw new IllegalArgumentException("all factors must be < 1");
271     }
272     this.maxSize = maxSize;
273     this.blockSize = blockSize;
274     this.forceInMemory = forceInMemory;
275     map = new ConcurrentHashMap<BlockCacheKey,CachedBlock>(mapInitialSize,
276         mapLoadFactor, mapConcurrencyLevel);
277     this.minFactor = minFactor;
278     this.acceptableFactor = acceptableFactor;
279     this.singleFactor = singleFactor;
280     this.multiFactor = multiFactor;
281     this.memoryFactor = memoryFactor;
282     this.stats = new CacheStats();
283     this.count = new AtomicLong(0);
284     this.elements = new AtomicLong(0);
285     this.overhead = calculateOverhead(maxSize, blockSize, mapConcurrencyLevel);
286     this.size = new AtomicLong(this.overhead);
287     if(evictionThread) {
288       this.evictionThread = new EvictionThread(this);
289       this.evictionThread.start(); // FindBugs SC_START_IN_CTOR
290     } else {
291       this.evictionThread = null;
292     }
293     this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
294         statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);
295   }
296 
297   public void setMaxSize(long maxSize) {
298     this.maxSize = maxSize;
299     if(this.size.get() > acceptableSize() && !evictionInProgress) {
300       runEviction();
301     }
302   }
303 
304   // BlockCache implementation
305 
306   /**
307    * Cache the block with the specified name and buffer.
308    * <p>
309    * It is assumed this will NOT be called on an already cached block. In rare cases (HBASE-8547)
310    * this can happen, for which we compare the buffer contents.
311    * @param cacheKey block's cache key
312    * @param buf block buffer
313    * @param inMemory if block is in-memory
314    */
315   @Override
316   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
317     CachedBlock cb = map.get(cacheKey);
318     if(cb != null) {
319       // compare the contents, if they are not equal, we are in big trouble
320       if (compare(buf, cb.getBuffer()) != 0) {
321         throw new RuntimeException("Cached block contents differ, which should not have happened."
322           + "cacheKey:" + cacheKey);
323       }
324       String msg = "Cached an already cached block: " + cacheKey + " cb:" + cb.getCacheKey();
325       msg += ". This is harmless and can happen in rare cases (see HBASE-8547)";
326       LOG.warn(msg);
327       return;
328     }
329     cb = new CachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
330     long newSize = updateSizeMetrics(cb, false);
331     map.put(cacheKey, cb);
332     elements.incrementAndGet();
333     if(newSize > acceptableSize() && !evictionInProgress) {
334       runEviction();
335     }
336   }
337 
338   private int compare(Cacheable left, Cacheable right) {
339     ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength());
340     left.serialize(l);
341     ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength());
342     right.serialize(r);
343     return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(),
344       r.array(), r.arrayOffset(), r.limit());
345   }
346 
347   /**
348    * Cache the block with the specified name and buffer.
349    * <p>
350    * It is assumed this will NEVER be called on an already cached block.  If
351    * that is done, it is assumed that you are reinserting the same exact
352    * block due to a race condition and will update the buffer but not modify
353    * the size of the cache.
354    * @param cacheKey block's cache key
355    * @param buf block buffer
356    */
357   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf) {
358     cacheBlock(cacheKey, buf, false);
359   }
360 
361   /**
362    * Helper function that updates the local size counter and also updates any
363    * per-cf or per-blocktype metrics it can discern from given
364    * {@link CachedBlock}
365    *
366    * @param cb
367    * @param evict
368    */
369   protected long updateSizeMetrics(CachedBlock cb, boolean evict) {
370     long heapsize = cb.heapSize();
371     if (evict) {
372       heapsize *= -1;
373     }
374     return size.addAndGet(heapsize);
375   }
376 
377   /**
378    * Get the buffer of the block with the specified name.
379    * @param cacheKey block's cache key
380    * @param caching true if the caller caches blocks on cache misses
381    * @param repeat Whether this is a repeat lookup for the same block
382    *        (used to avoid double counting cache misses when doing double-check locking)
383    * @param updateCacheMetrics Whether to update cache metrics or not
384    * @return buffer of specified cache key, or null if not in cache
385    */
386   @Override
387   public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
388       boolean updateCacheMetrics) {
389     CachedBlock cb = map.get(cacheKey);
390     if(cb == null) {
391       if (!repeat && updateCacheMetrics) stats.miss(caching);
392       if (victimHandler != null)
393         return victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
394       return null;
395     }
396     if (updateCacheMetrics) stats.hit(caching);
397     cb.access(count.incrementAndGet());
398     return cb.getBuffer();
399   }
400 
401   /**
402    * Whether the cache contains block with specified cacheKey
403    * @param cacheKey
404    * @return true if contains the block
405    */
406   public boolean containsBlock(BlockCacheKey cacheKey) {
407     return map.containsKey(cacheKey);
408   }
409 
410   @Override
411   public boolean evictBlock(BlockCacheKey cacheKey) {
412     CachedBlock cb = map.get(cacheKey);
413     if (cb == null) return false;
414     evictBlock(cb, false);
415     return true;
416   }
417 
418   /**
419    * Evicts all blocks for a specific HFile. This is an
420    * expensive operation implemented as a linear-time search through all blocks
421    * in the cache. Ideally this should be a search in a log-access-time map.
422    *
423    * <p>
424    * This is used for evict-on-close to remove all blocks of a specific HFile.
425    *
426    * @return the number of blocks evicted
427    */
428   @Override
429   public int evictBlocksByHfileName(String hfileName) {
430     int numEvicted = 0;
431     for (BlockCacheKey key : map.keySet()) {
432       if (key.getHfileName().equals(hfileName)) {
433         if (evictBlock(key))
434           ++numEvicted;
435       }
436     }
437     if (victimHandler != null) {
438       numEvicted += victimHandler.evictBlocksByHfileName(hfileName);
439     }
440     return numEvicted;
441   }
442 
443   /**
444    * Evict the block, and it will be cached by the victim handler if exists &&
445    * block may be read again later
446    * @param block
447    * @param evictedByEvictionProcess true if the given block is evicted by
448    *          EvictionThread
449    * @return the heap size of evicted block
450    */
451   protected long evictBlock(CachedBlock block, boolean evictedByEvictionProcess) {
452     map.remove(block.getCacheKey());
453     updateSizeMetrics(block, true);
454     elements.decrementAndGet();
455     stats.evicted();
456     if (evictedByEvictionProcess && victimHandler != null) {
457       boolean wait = getCurrentSize() < acceptableSize();
458       boolean inMemory = block.getPriority() == BlockPriority.MEMORY;
459       victimHandler.cacheBlockWithWait(block.getCacheKey(), block.getBuffer(),
460           inMemory, wait);
461     }
462     return block.heapSize();
463   }
464 
465   /**
466    * Multi-threaded call to run the eviction process.
467    */
468   private void runEviction() {
469     if(evictionThread == null) {
470       evict();
471     } else {
472       evictionThread.evict();
473     }
474   }
475 
476   /**
477    * Eviction method.
478    */
479   void evict() {
480 
481     // Ensure only one eviction at a time
482     if(!evictionLock.tryLock()) return;
483 
484     try {
485       evictionInProgress = true;
486       long currentSize = this.size.get();
487       long bytesToFree = currentSize - minSize();
488 
489       if (LOG.isTraceEnabled()) {
490         LOG.trace("Block cache LRU eviction started; Attempting to free " +
491           StringUtils.byteDesc(bytesToFree) + " of total=" +
492           StringUtils.byteDesc(currentSize));
493       }
494 
495       if(bytesToFree <= 0) return;
496 
497       // Instantiate priority buckets
498       BlockBucket bucketSingle = new BlockBucket(bytesToFree, blockSize,
499           singleSize());
500       BlockBucket bucketMulti = new BlockBucket(bytesToFree, blockSize,
501           multiSize());
502       BlockBucket bucketMemory = new BlockBucket(bytesToFree, blockSize,
503           memorySize());
504 
505       // Scan entire map putting into appropriate buckets
506       for(CachedBlock cachedBlock : map.values()) {
507         switch(cachedBlock.getPriority()) {
508           case SINGLE: {
509             bucketSingle.add(cachedBlock);
510             break;
511           }
512           case MULTI: {
513             bucketMulti.add(cachedBlock);
514             break;
515           }
516           case MEMORY: {
517             bucketMemory.add(cachedBlock);
518             break;
519           }
520         }
521       }
522 
523       long bytesFreed = 0;
524       if (forceInMemory || memoryFactor > 0.999f) {
525         long s = bucketSingle.totalSize();
526         long m = bucketMulti.totalSize();
527         if (bytesToFree > (s + m)) {
528           // this means we need to evict blocks in memory bucket to make room,
529           // so the single and multi buckets will be emptied
530           bytesFreed = bucketSingle.free(s);
531           bytesFreed += bucketMulti.free(m);
532           bytesFreed += bucketMemory.free(bytesToFree - bytesFreed);
533         } else {
534           // this means no need to evict block in memory bucket,
535           // and we try best to make the ratio between single-bucket and
536           // multi-bucket is 1:2
537           long bytesRemain = s + m - bytesToFree;
538           if (3 * s <= bytesRemain) {
539             // single-bucket is small enough that no eviction happens for it
540             // hence all eviction goes from multi-bucket
541             bytesFreed = bucketMulti.free(bytesToFree);
542           } else if (3 * m <= 2 * bytesRemain) {
543             // multi-bucket is small enough that no eviction happens for it
544             // hence all eviction goes from single-bucket
545             bytesFreed = bucketSingle.free(bytesToFree);
546           } else {
547             // both buckets need to evict some blocks
548             bytesFreed = bucketSingle.free(s - bytesRemain / 3);
549             if (bytesFreed < bytesToFree) {
550               bytesFreed += bucketMulti.free(bytesToFree - bytesFreed);
551             }
552           }
553         }
554       } else {
555         PriorityQueue<BlockBucket> bucketQueue =
556           new PriorityQueue<BlockBucket>(3);
557 
558         bucketQueue.add(bucketSingle);
559         bucketQueue.add(bucketMulti);
560         bucketQueue.add(bucketMemory);
561 
562         int remainingBuckets = 3;
563 
564         BlockBucket bucket;
565         while((bucket = bucketQueue.poll()) != null) {
566           long overflow = bucket.overflow();
567           if(overflow > 0) {
568             long bucketBytesToFree = Math.min(overflow,
569                 (bytesToFree - bytesFreed) / remainingBuckets);
570             bytesFreed += bucket.free(bucketBytesToFree);
571           }
572           remainingBuckets--;
573         }
574       }
575 
576       if (LOG.isTraceEnabled()) {
577         long single = bucketSingle.totalSize();
578         long multi = bucketMulti.totalSize();
579         long memory = bucketMemory.totalSize();
580         LOG.trace("Block cache LRU eviction completed; " +
581           "freed=" + StringUtils.byteDesc(bytesFreed) + ", " +
582           "total=" + StringUtils.byteDesc(this.size.get()) + ", " +
583           "single=" + StringUtils.byteDesc(single) + ", " +
584           "multi=" + StringUtils.byteDesc(multi) + ", " +
585           "memory=" + StringUtils.byteDesc(memory));
586       }
587     } finally {
588       stats.evict();
589       evictionInProgress = false;
590       evictionLock.unlock();
591     }
592   }
593 
594   /**
595    * Used to group blocks into priority buckets.  There will be a BlockBucket
596    * for each priority (single, multi, memory).  Once bucketed, the eviction
597    * algorithm takes the appropriate number of elements out of each according
598    * to configuration parameters and their relatives sizes.
599    */
600   private class BlockBucket implements Comparable<BlockBucket> {
601 
602     private CachedBlockQueue queue;
603     private long totalSize = 0;
604     private long bucketSize;
605 
606     public BlockBucket(long bytesToFree, long blockSize, long bucketSize) {
607       this.bucketSize = bucketSize;
608       queue = new CachedBlockQueue(bytesToFree, blockSize);
609       totalSize = 0;
610     }
611 
612     public void add(CachedBlock block) {
613       totalSize += block.heapSize();
614       queue.add(block);
615     }
616 
617     public long free(long toFree) {
618       CachedBlock cb;
619       long freedBytes = 0;
620       while ((cb = queue.pollLast()) != null) {
621         freedBytes += evictBlock(cb, true);
622         if (freedBytes >= toFree) {
623           return freedBytes;
624         }
625       }
626       return freedBytes;
627     }
628 
629     public long overflow() {
630       return totalSize - bucketSize;
631     }
632 
633     public long totalSize() {
634       return totalSize;
635     }
636 
637     public int compareTo(BlockBucket that) {
638       if(this.overflow() == that.overflow()) return 0;
639       return this.overflow() > that.overflow() ? 1 : -1;
640     }
641 
642     @Override
643     public boolean equals(Object that) {
644       if (that == null || !(that instanceof BlockBucket)){
645         return false;
646       }
647 
648       return compareTo(( BlockBucket)that) == 0;
649     }
650 
651   }
652 
653   /**
654    * Get the maximum size of this cache.
655    * @return max size in bytes
656    */
657   public long getMaxSize() {
658     return this.maxSize;
659   }
660 
661   @Override
662   public long getCurrentSize() {
663     return this.size.get();
664   }
665 
666   @Override
667   public long getFreeSize() {
668     return getMaxSize() - getCurrentSize();
669   }
670 
671   @Override
672   public long size() {
673     return this.elements.get();
674   }
675 
676   @Override
677   public long getBlockCount() {
678     return this.elements.get();
679   }
680 
681   /**
682    * Get the number of eviction runs that have occurred
683    */
684   public long getEvictionCount() {
685     return this.stats.getEvictionCount();
686   }
687 
688   @Override
689   public long getEvictedCount() {
690     return this.stats.getEvictedCount();
691   }
692 
693   EvictionThread getEvictionThread() {
694     return this.evictionThread;
695   }
696 
697   /*
698    * Eviction thread.  Sits in waiting state until an eviction is triggered
699    * when the cache size grows above the acceptable level.<p>
700    *
701    * Thread is triggered into action by {@link LruBlockCache#runEviction()}
702    */
703   static class EvictionThread extends HasThread {
704     private WeakReference<LruBlockCache> cache;
705     private boolean go = true;
706     // flag set after enter the run method, used for test
707     private boolean enteringRun = false;
708 
709     public EvictionThread(LruBlockCache cache) {
710       super(Thread.currentThread().getName() + ".LruBlockCache.EvictionThread");
711       setDaemon(true);
712       this.cache = new WeakReference<LruBlockCache>(cache);
713     }
714 
715     @Override
716     public void run() {
717       enteringRun = true;
718       while (this.go) {
719         synchronized(this) {
720           try {
721             this.wait();
722           } catch(InterruptedException e) {}
723         }
724         LruBlockCache cache = this.cache.get();
725         if(cache == null) break;
726         cache.evict();
727       }
728     }
729 
730     public void evict() {
731       synchronized(this) {
732         this.notifyAll(); // FindBugs NN_NAKED_NOTIFY
733       }
734     }
735 
736     synchronized void shutdown() {
737       this.go = false;
738       this.notifyAll();
739     }
740 
741     /**
742      * Used for the test.
743      */
744     boolean isEnteringRun() {
745       return this.enteringRun;
746     }
747   }
748 
749   /*
750    * Statistics thread.  Periodically prints the cache statistics to the log.
751    */
752   static class StatisticsThread extends Thread {
753     LruBlockCache lru;
754 
755     public StatisticsThread(LruBlockCache lru) {
756       super("LruBlockCache.StatisticsThread");
757       setDaemon(true);
758       this.lru = lru;
759     }
760     @Override
761     public void run() {
762       lru.logStats();
763     }
764   }
765 
766   public void logStats() {
767     if (!LOG.isDebugEnabled()) return;
768     // Log size
769     long totalSize = heapSize();
770     long freeSize = maxSize - totalSize;
771     LruBlockCache.LOG.debug("Total=" + StringUtils.byteDesc(totalSize) + ", " +
772         "free=" + StringUtils.byteDesc(freeSize) + ", " +
773         "max=" + StringUtils.byteDesc(this.maxSize) + ", " +
774         "blocks=" + size() +", " +
775         "accesses=" + stats.getRequestCount() + ", " +
776         "hits=" + stats.getHitCount() + ", " +
777         "hitRatio=" +
778           (stats.getHitCount() == 0 ? "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
779         "cachingAccesses=" + stats.getRequestCachingCount() + ", " +
780         "cachingHits=" + stats.getHitCachingCount() + ", " +
781         "cachingHitsRatio=" +
782           (stats.getHitCachingCount() == 0 ? "0,": (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", ")) +
783         "evictions=" + stats.getEvictionCount() + ", " +
784         "evicted=" + stats.getEvictedCount() + ", " +
785         "evictedPerRun=" + stats.evictedPerEviction());
786   }
787 
788   /**
789    * Get counter statistics for this cache.
790    *
791    * <p>Includes: total accesses, hits, misses, evicted blocks, and runs
792    * of the eviction processes.
793    */
794   public CacheStats getStats() {
795     return this.stats;
796   }
797 
798   public final static long CACHE_FIXED_OVERHEAD = ClassSize.align(
799       (3 * Bytes.SIZEOF_LONG) + (9 * ClassSize.REFERENCE) +
800       (5 * Bytes.SIZEOF_FLOAT) + Bytes.SIZEOF_BOOLEAN
801       + ClassSize.OBJECT);
802 
803   // HeapSize implementation
804   public long heapSize() {
805     return getCurrentSize();
806   }
807 
808   public static long calculateOverhead(long maxSize, long blockSize, int concurrency){
809     // FindBugs ICAST_INTEGER_MULTIPLY_CAST_TO_LONG
810     return CACHE_FIXED_OVERHEAD + ClassSize.CONCURRENT_HASHMAP +
811         ((long)Math.ceil(maxSize*1.2/blockSize)
812             * ClassSize.CONCURRENT_HASHMAP_ENTRY) +
813         ((long)concurrency * ClassSize.CONCURRENT_HASHMAP_SEGMENT);
814   }
815 
816   @Override
817   public List<BlockCacheColumnFamilySummary> getBlockCacheColumnFamilySummaries(Configuration conf) throws IOException {
818 
819     Map<String, Path> sfMap = FSUtils.getTableStoreFilePathMap(
820         FileSystem.get(conf),
821         FSUtils.getRootDir(conf));
822 
823     // quirky, but it's a compound key and this is a shortcut taken instead of
824     // creating a class that would represent only a key.
825     Map<BlockCacheColumnFamilySummary, BlockCacheColumnFamilySummary> bcs =
826       new HashMap<BlockCacheColumnFamilySummary, BlockCacheColumnFamilySummary>();
827 
828     for (CachedBlock cb : map.values()) {
829       String sf = cb.getCacheKey().getHfileName();
830       Path path = sfMap.get(sf);
831       if ( path != null) {
832         BlockCacheColumnFamilySummary lookup =
833           BlockCacheColumnFamilySummary.createFromStoreFilePath(path);
834         BlockCacheColumnFamilySummary bcse = bcs.get(lookup);
835         if (bcse == null) {
836           bcse = BlockCacheColumnFamilySummary.create(lookup);
837           bcs.put(lookup,bcse);
838         }
839         bcse.incrementBlocks();
840         bcse.incrementHeapSize(cb.heapSize());
841       }
842     }
843     List<BlockCacheColumnFamilySummary> list =
844         new ArrayList<BlockCacheColumnFamilySummary>(bcs.values());
845     Collections.sort( list );
846     return list;
847   }
848 
849   // Simple calculators of sizes given factors and maxSize
850 
851   private long acceptableSize() {
852     return (long)Math.floor(this.maxSize * this.acceptableFactor);
853   }
854   private long minSize() {
855     return (long)Math.floor(this.maxSize * this.minFactor);
856   }
857   private long singleSize() {
858     return (long)Math.floor(this.maxSize * this.singleFactor * this.minFactor);
859   }
860   private long multiSize() {
861     return (long)Math.floor(this.maxSize * this.multiFactor * this.minFactor);
862   }
863   private long memorySize() {
864     return (long)Math.floor(this.maxSize * this.memoryFactor * this.minFactor);
865   }
866 
867   public void shutdown() {
868     if (victimHandler != null)
869       victimHandler.shutdown();
870     this.scheduleThreadPool.shutdown();
871     for (int i = 0; i < 10; i++) {
872       if (!this.scheduleThreadPool.isShutdown()) Threads.sleep(10);
873     }
874     if (!this.scheduleThreadPool.isShutdown()) {
875       List<Runnable> runnables = this.scheduleThreadPool.shutdownNow();
876       LOG.debug("Still running " + runnables);
877     }
878     this.evictionThread.shutdown();
879   }
880 
881   /** Clears the cache. Used in tests. */
882   public void clearCache() {
883     map.clear();
884   }
885 
886   /**
887    * Used in testing. May be very inefficient.
888    * @return the set of cached file names
889    */
890   SortedSet<String> getCachedFileNamesForTest() {
891     SortedSet<String> fileNames = new TreeSet<String>();
892     for (BlockCacheKey cacheKey : map.keySet()) {
893       fileNames.add(cacheKey.getHfileName());
894     }
895     return fileNames;
896   }
897 
898   Map<BlockType, Integer> getBlockTypeCountsForTest() {
899     Map<BlockType, Integer> counts =
900         new EnumMap<BlockType, Integer>(BlockType.class);
901     for (CachedBlock cb : map.values()) {
902       BlockType blockType = ((HFileBlock) cb.getBuffer()).getBlockType();
903       Integer count = counts.get(blockType);
904       counts.put(blockType, (count == null ? 0 : count) + 1);
905     }
906     return counts;
907   }
908 
909   public Map<DataBlockEncoding, Integer> getEncodingCountsForTest() {
910     Map<DataBlockEncoding, Integer> counts =
911         new EnumMap<DataBlockEncoding, Integer>(DataBlockEncoding.class);
912     for (BlockCacheKey cacheKey : map.keySet()) {
913       DataBlockEncoding encoding = cacheKey.getDataBlockEncoding();
914       Integer count = counts.get(encoding);
915       counts.put(encoding, (count == null ? 0 : count) + 1);
916     }
917     return counts;
918   }
919 
920   public void setVictimCache(BucketCache handler) {
921     assert victimHandler == null;
922     victimHandler = handler;
923   }
924 
925 }