View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.lang.ref.WeakReference;
22  import java.nio.ByteBuffer;
23  import java.util.EnumMap;
24  import java.util.Iterator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.PriorityQueue;
28  import java.util.SortedSet;
29  import java.util.TreeSet;
30  import java.util.concurrent.ConcurrentHashMap;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.ScheduledExecutorService;
33  import java.util.concurrent.TimeUnit;
34  import java.util.concurrent.atomic.AtomicLong;
35  import java.util.concurrent.locks.ReentrantLock;
36  
37  import com.google.common.base.Objects;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.hbase.io.HeapSize;
43  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
44  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.ClassSize;
47  import org.apache.hadoop.hbase.util.HasThread;
48  import org.apache.hadoop.hbase.util.Threads;
49  import org.apache.hadoop.util.StringUtils;
50  import org.codehaus.jackson.annotate.JsonIgnoreProperties;
51  
52  import com.google.common.annotations.VisibleForTesting;
53  import com.google.common.util.concurrent.ThreadFactoryBuilder;
54  
55  /**
56   * A block cache implementation that is memory-aware using {@link HeapSize},
57   * memory-bound using an LRU eviction algorithm, and concurrent: backed by a
58   * {@link ConcurrentHashMap} and with a non-blocking eviction thread giving
59   * constant-time {@link #cacheBlock} and {@link #getBlock} operations.<p>
60   *
61   * Contains three levels of block priority to allow for
62   * scan-resistance and in-memory families.  A block is added with an inMemory
63   * flag if necessary, otherwise a block becomes a single access priority.  Once
64   * a blocked is accessed again, it changes to multiple access.  This is used
65   * to prevent scans from thrashing the cache, adding a least-frequently-used
66   * element to the eviction algorithm.<p>
67   *
68   * Each priority is given its own chunk of the total cache to ensure
69   * fairness during eviction.  Each priority will retain close to its maximum
70   * size, however, if any priority is not using its entire chunk the others
71   * are able to grow beyond their chunk size.<p>
72   *
73   * Instantiated at a minimum with the total size and average block size.
74   * All sizes are in bytes.  The block size is not especially important as this
75   * cache is fully dynamic in its sizing of blocks.  It is only used for
76   * pre-allocating data structures and in initial heap estimation of the map.<p>
77   *
78   * The detailed constructor defines the sizes for the three priorities (they
79   * should total to the maximum size defined).  It also sets the levels that
80   * trigger and control the eviction thread.<p>
81   *
82   * The acceptable size is the cache size level which triggers the eviction
83   * process to start.  It evicts enough blocks to get the size below the
84   * minimum size specified.<p>
85   *
86   * Eviction happens in a separate thread and involves a single full-scan
87   * of the map.  It determines how many bytes must be freed to reach the minimum
88   * size, and then while scanning determines the fewest least-recently-used
89   * blocks necessary from each of the three priorities (would be 3 times bytes
90   * to free).  It then uses the priority chunk sizes to evict fairly according
91   * to the relative sizes and usage.
92   */
93  @InterfaceAudience.Private
94  @JsonIgnoreProperties({"encodingCountsForTest"})
95  public class LruBlockCache implements BlockCache, HeapSize {
96  
97    static final Log LOG = LogFactory.getLog(LruBlockCache.class);
98  
99    static final String LRU_MIN_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.min.factor";
100   static final String LRU_ACCEPTABLE_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.acceptable.factor";
101   static final String LRU_SINGLE_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.single.percentage";
102   static final String LRU_MULTI_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.multi.percentage";
103   static final String LRU_MEMORY_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.memory.percentage";
104 
105   /**
106    * Configuration key to force data-block always(except in-memory are too much)
107    * cached in memory for in-memory hfile, unlike inMemory, which is a column-family
108    * configuration, inMemoryForceMode is a cluster-wide configuration
109    */
110   static final String LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME = "hbase.lru.rs.inmemoryforcemode";
111 
112   /** Default Configuration Parameters*/
113 
114   /** Backing Concurrent Map Configuration */
115   static final float DEFAULT_LOAD_FACTOR = 0.75f;
116   static final int DEFAULT_CONCURRENCY_LEVEL = 16;
117 
118   /** Eviction thresholds */
119   static final float DEFAULT_MIN_FACTOR = 0.95f;
120   static final float DEFAULT_ACCEPTABLE_FACTOR = 0.99f;
121 
122   /** Priority buckets */
123   static final float DEFAULT_SINGLE_FACTOR = 0.25f;
124   static final float DEFAULT_MULTI_FACTOR = 0.50f;
125   static final float DEFAULT_MEMORY_FACTOR = 0.25f;
126 
127   static final boolean DEFAULT_IN_MEMORY_FORCE_MODE = false;
128 
129   /** Statistics thread */
130   static final int statThreadPeriod = 60 * 5;
131 
132   private static final String LRU_MAX_BLOCK_SIZE = "hbase.lru.max.block.size";
133   private static final long DEFAULT_MAX_BLOCK_SIZE = 16L * 1024L * 1024L;
134 
135   /** Concurrent map (the cache) */
136   private final Map<BlockCacheKey,LruCachedBlock> map;
137 
138   /** Eviction lock (locked when eviction in process) */
139   private final ReentrantLock evictionLock = new ReentrantLock(true);
140   private final long maxBlockSize;
141 
142   /** Volatile boolean to track if we are in an eviction process or not */
143   private volatile boolean evictionInProgress = false;
144 
145   /** Eviction thread */
146   private final EvictionThread evictionThread;
147 
148   /** Statistics thread schedule pool (for heavy debugging, could remove) */
149   private final ScheduledExecutorService scheduleThreadPool =
150     Executors.newScheduledThreadPool(1,
151       new ThreadFactoryBuilder()
152         .setNameFormat("LruStats #%d")
153         .setDaemon(true)
154         .build());
155 
156   /** Current size of cache */
157   private final AtomicLong size;
158 
159   /** Current number of cached elements */
160   private final AtomicLong elements;
161 
162   /** Cache access count (sequential ID) */
163   private final AtomicLong count;
164 
165   /** Cache statistics */
166   private final CacheStats stats;
167 
168   /** Maximum allowable size of cache (block put if size > max, evict) */
169   private long maxSize;
170 
171   /** Approximate block size */
172   private long blockSize;
173 
174   /** Acceptable size of cache (no evictions if size < acceptable) */
175   private float acceptableFactor;
176 
177   /** Minimum threshold of cache (when evicting, evict until size < min) */
178   private float minFactor;
179 
180   /** Single access bucket size */
181   private float singleFactor;
182 
183   /** Multiple access bucket size */
184   private float multiFactor;
185 
186   /** In-memory bucket size */
187   private float memoryFactor;
188 
189   /** Overhead of the structure itself */
190   private long overhead;
191 
192   /** Whether in-memory hfile's data block has higher priority when evicting */
193   private boolean forceInMemory;
194 
195   /** Where to send victims (blocks evicted from the cache) */
196   private BucketCache victimHandler = null;
197 
198   /**
199    * Default constructor.  Specify maximum size and expected average block
200    * size (approximation is fine).
201    *
202    * <p>All other factors will be calculated based on defaults specified in
203    * this class.
204    * @param maxSize maximum size of cache, in bytes
205    * @param blockSize approximate size of each block, in bytes
206    */
207   public LruBlockCache(long maxSize, long blockSize) {
208     this(maxSize, blockSize, true);
209   }
210 
211   /**
212    * Constructor used for testing.  Allows disabling of the eviction thread.
213    */
214   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread) {
215     this(maxSize, blockSize, evictionThread,
216         (int)Math.ceil(1.2*maxSize/blockSize),
217         DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL,
218         DEFAULT_MIN_FACTOR, DEFAULT_ACCEPTABLE_FACTOR,
219         DEFAULT_SINGLE_FACTOR,
220         DEFAULT_MULTI_FACTOR,
221         DEFAULT_MEMORY_FACTOR,
222         false,
223         DEFAULT_MAX_BLOCK_SIZE
224         );
225   }
226 
227   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, Configuration conf) {
228     this(maxSize, blockSize, evictionThread,
229         (int)Math.ceil(1.2*maxSize/blockSize),
230         DEFAULT_LOAD_FACTOR,
231         DEFAULT_CONCURRENCY_LEVEL,
232         conf.getFloat(LRU_MIN_FACTOR_CONFIG_NAME, DEFAULT_MIN_FACTOR),
233         conf.getFloat(LRU_ACCEPTABLE_FACTOR_CONFIG_NAME, DEFAULT_ACCEPTABLE_FACTOR),
234         conf.getFloat(LRU_SINGLE_PERCENTAGE_CONFIG_NAME, DEFAULT_SINGLE_FACTOR),
235         conf.getFloat(LRU_MULTI_PERCENTAGE_CONFIG_NAME, DEFAULT_MULTI_FACTOR),
236         conf.getFloat(LRU_MEMORY_PERCENTAGE_CONFIG_NAME, DEFAULT_MEMORY_FACTOR),
237         conf.getBoolean(LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME, DEFAULT_IN_MEMORY_FORCE_MODE),
238         conf.getLong(LRU_MAX_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE)
239         );
240   }
241 
242   public LruBlockCache(long maxSize, long blockSize, Configuration conf) {
243     this(maxSize, blockSize, true, conf);
244   }
245 
246   /**
247    * Configurable constructor.  Use this constructor if not using defaults.
248    * @param maxSize maximum size of this cache, in bytes
249    * @param blockSize expected average size of blocks, in bytes
250    * @param evictionThread whether to run evictions in a bg thread or not
251    * @param mapInitialSize initial size of backing ConcurrentHashMap
252    * @param mapLoadFactor initial load factor of backing ConcurrentHashMap
253    * @param mapConcurrencyLevel initial concurrency factor for backing CHM
254    * @param minFactor percentage of total size that eviction will evict until
255    * @param acceptableFactor percentage of total size that triggers eviction
256    * @param singleFactor percentage of total size for single-access blocks
257    * @param multiFactor percentage of total size for multiple-access blocks
258    * @param memoryFactor percentage of total size for in-memory blocks
259    */
260   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread,
261       int mapInitialSize, float mapLoadFactor, int mapConcurrencyLevel,
262       float minFactor, float acceptableFactor, float singleFactor,
263       float multiFactor, float memoryFactor, boolean forceInMemory, long maxBlockSize) {
264     this.maxBlockSize = maxBlockSize;
265     if(singleFactor + multiFactor + memoryFactor != 1 ||
266         singleFactor < 0 || multiFactor < 0 || memoryFactor < 0) {
267       throw new IllegalArgumentException("Single, multi, and memory factors " +
268           " should be non-negative and total 1.0");
269     }
270     if(minFactor >= acceptableFactor) {
271       throw new IllegalArgumentException("minFactor must be smaller than acceptableFactor");
272     }
273     if(minFactor >= 1.0f || acceptableFactor >= 1.0f) {
274       throw new IllegalArgumentException("all factors must be < 1");
275     }
276     this.maxSize = maxSize;
277     this.blockSize = blockSize;
278     this.forceInMemory = forceInMemory;
279     map = new ConcurrentHashMap<BlockCacheKey,LruCachedBlock>(mapInitialSize,
280         mapLoadFactor, mapConcurrencyLevel);
281     this.minFactor = minFactor;
282     this.acceptableFactor = acceptableFactor;
283     this.singleFactor = singleFactor;
284     this.multiFactor = multiFactor;
285     this.memoryFactor = memoryFactor;
286     this.stats = new CacheStats();
287     this.count = new AtomicLong(0);
288     this.elements = new AtomicLong(0);
289     this.overhead = calculateOverhead(maxSize, blockSize, mapConcurrencyLevel);
290     this.size = new AtomicLong(this.overhead);
291     if(evictionThread) {
292       this.evictionThread = new EvictionThread(this);
293       this.evictionThread.start(); // FindBugs SC_START_IN_CTOR
294     } else {
295       this.evictionThread = null;
296     }
297     this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
298         statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);
299   }
300 
301   public void setMaxSize(long maxSize) {
302     this.maxSize = maxSize;
303     if(this.size.get() > acceptableSize() && !evictionInProgress) {
304       runEviction();
305     }
306   }
307 
308   // BlockCache implementation
309 
310   /**
311    * Cache the block with the specified name and buffer.
312    * <p>
313    * It is assumed this will NOT be called on an already cached block. In rare cases (HBASE-8547)
314    * this can happen, for which we compare the buffer contents.
315    * @param cacheKey block's cache key
316    * @param buf block buffer
317    * @param inMemory if block is in-memory
318    */
319   @Override
320   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
321 
322     if (buf.heapSize() > maxBlockSize) {
323       // If there are a lot of blocks that are too
324       // big this can make the logs way too noisy.
325       // So we log 2%
326       if (stats.failInsert() % 50 == 0) {
327         LOG.warn("Trying to cache too large a block "
328             + cacheKey.getHfileName() + " @ "
329             + cacheKey.getOffset()
330             + " is " + buf.heapSize()
331             + " which is larger than " + maxBlockSize);
332       }
333       return;
334     }
335 
336     LruCachedBlock cb = map.get(cacheKey);
337     if(cb != null) {
338       // compare the contents, if they are not equal, we are in big trouble
339       if (compare(buf, cb.getBuffer()) != 0) {
340         throw new RuntimeException("Cached block contents differ, which should not have happened."
341           + "cacheKey:" + cacheKey);
342       }
343       String msg = "Cached an already cached block: " + cacheKey + " cb:" + cb.getCacheKey();
344       msg += ". This is harmless and can happen in rare cases (see HBASE-8547)";
345       LOG.warn(msg);
346       return;
347     }
348     cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
349     long newSize = updateSizeMetrics(cb, false);
350     map.put(cacheKey, cb);
351     long val = elements.incrementAndGet();
352     if (LOG.isTraceEnabled()) {
353       long size = map.size();
354       assertCounterSanity(size, val);
355     }
356     if (newSize > acceptableSize() && !evictionInProgress) {
357       runEviction();
358     }
359   }
360 
361   /**
362    * Sanity-checking for parity between actual block cache content and metrics.
363    * Intended only for use with TRACE level logging and -ea JVM.
364    */
365   private static void assertCounterSanity(long mapSize, long counterVal) {
366     if (counterVal < 0) {
367       LOG.trace("counterVal overflow. Assertions unreliable. counterVal=" + counterVal +
368         ", mapSize=" + mapSize);
369       return;
370     }
371     if (mapSize < Integer.MAX_VALUE) {
372       double pct_diff = Math.abs((((double) counterVal) / ((double) mapSize)) - 1.);
373       if (pct_diff > 0.05) {
374         LOG.trace("delta between reported and actual size > 5%. counterVal=" + counterVal +
375           ", mapSize=" + mapSize);
376       }
377     }
378   }
379 
380   private int compare(Cacheable left, Cacheable right) {
381     ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength());
382     left.serialize(l);
383     ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength());
384     right.serialize(r);
385     return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(),
386       r.array(), r.arrayOffset(), r.limit());
387   }
388 
389   /**
390    * Cache the block with the specified name and buffer.
391    * <p>
392    * It is assumed this will NEVER be called on an already cached block.  If
393    * that is done, it is assumed that you are reinserting the same exact
394    * block due to a race condition and will update the buffer but not modify
395    * the size of the cache.
396    * @param cacheKey block's cache key
397    * @param buf block buffer
398    */
399   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf) {
400     cacheBlock(cacheKey, buf, false);
401   }
402 
403   /**
404    * Helper function that updates the local size counter and also updates any
405    * per-cf or per-blocktype metrics it can discern from given
406    * {@link LruCachedBlock}
407    *
408    * @param cb
409    * @param evict
410    */
411   protected long updateSizeMetrics(LruCachedBlock cb, boolean evict) {
412     long heapsize = cb.heapSize();
413     if (evict) {
414       heapsize *= -1;
415     }
416     return size.addAndGet(heapsize);
417   }
418 
419   /**
420    * Get the buffer of the block with the specified name.
421    * @param cacheKey block's cache key
422    * @param caching true if the caller caches blocks on cache misses
423    * @param repeat Whether this is a repeat lookup for the same block
424    *        (used to avoid double counting cache misses when doing double-check locking)
425    * @param updateCacheMetrics Whether to update cache metrics or not
426    * @return buffer of specified cache key, or null if not in cache
427    */
428   @Override
429   public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
430       boolean updateCacheMetrics) {
431     LruCachedBlock cb = map.get(cacheKey);
432     if(cb == null) {
433       if (!repeat && updateCacheMetrics) stats.miss(caching, cacheKey.getBlockType());
434       if (victimHandler != null)
435         return victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
436       return null;
437     }
438     if (updateCacheMetrics) stats.hit(caching, cacheKey.getBlockType());
439     cb.access(count.incrementAndGet());
440     return cb.getBuffer();
441   }
442 
443   /**
444    * Whether the cache contains block with specified cacheKey
445    * @param cacheKey
446    * @return true if contains the block
447    */
448   public boolean containsBlock(BlockCacheKey cacheKey) {
449     return map.containsKey(cacheKey);
450   }
451 
452   @Override
453   public boolean evictBlock(BlockCacheKey cacheKey) {
454     LruCachedBlock cb = map.get(cacheKey);
455     if (cb == null) return false;
456     evictBlock(cb, false);
457     return true;
458   }
459 
460   /**
461    * Evicts all blocks for a specific HFile. This is an
462    * expensive operation implemented as a linear-time search through all blocks
463    * in the cache. Ideally this should be a search in a log-access-time map.
464    *
465    * <p>
466    * This is used for evict-on-close to remove all blocks of a specific HFile.
467    *
468    * @return the number of blocks evicted
469    */
470   @Override
471   public int evictBlocksByHfileName(String hfileName) {
472     int numEvicted = 0;
473     for (BlockCacheKey key : map.keySet()) {
474       if (key.getHfileName().equals(hfileName)) {
475         if (evictBlock(key))
476           ++numEvicted;
477       }
478     }
479     if (victimHandler != null) {
480       numEvicted += victimHandler.evictBlocksByHfileName(hfileName);
481     }
482     return numEvicted;
483   }
484 
485   /**
486    * Evict the block, and it will be cached by the victim handler if exists &&
487    * block may be read again later
488    * @param block
489    * @param evictedByEvictionProcess true if the given block is evicted by
490    *          EvictionThread
491    * @return the heap size of evicted block
492    */
493   protected long evictBlock(LruCachedBlock block, boolean evictedByEvictionProcess) {
494     map.remove(block.getCacheKey());
495     updateSizeMetrics(block, true);
496     long val = elements.decrementAndGet();
497     if (LOG.isTraceEnabled()) {
498       long size = map.size();
499       assertCounterSanity(size, val);
500     }
501     stats.evicted();
502     if (evictedByEvictionProcess && victimHandler != null) {
503       boolean wait = getCurrentSize() < acceptableSize();
504       boolean inMemory = block.getPriority() == BlockPriority.MEMORY;
505       victimHandler.cacheBlockWithWait(block.getCacheKey(), block.getBuffer(),
506           inMemory, wait);
507     }
508     return block.heapSize();
509   }
510 
511   /**
512    * Multi-threaded call to run the eviction process.
513    */
514   private void runEviction() {
515     if(evictionThread == null) {
516       evict();
517     } else {
518       evictionThread.evict();
519     }
520   }
521 
522   /**
523    * Eviction method.
524    */
525   void evict() {
526 
527     // Ensure only one eviction at a time
528     if(!evictionLock.tryLock()) return;
529 
530     try {
531       evictionInProgress = true;
532       long currentSize = this.size.get();
533       long bytesToFree = currentSize - minSize();
534 
535       if (LOG.isTraceEnabled()) {
536         LOG.trace("Block cache LRU eviction started; Attempting to free " +
537           StringUtils.byteDesc(bytesToFree) + " of total=" +
538           StringUtils.byteDesc(currentSize));
539       }
540 
541       if(bytesToFree <= 0) return;
542 
543       // Instantiate priority buckets
544       BlockBucket bucketSingle = new BlockBucket("single", bytesToFree, blockSize,
545           singleSize());
546       BlockBucket bucketMulti = new BlockBucket("multi", bytesToFree, blockSize,
547           multiSize());
548       BlockBucket bucketMemory = new BlockBucket("memory", bytesToFree, blockSize,
549           memorySize());
550 
551       // Scan entire map putting into appropriate buckets
552       for(LruCachedBlock cachedBlock : map.values()) {
553         switch(cachedBlock.getPriority()) {
554           case SINGLE: {
555             bucketSingle.add(cachedBlock);
556             break;
557           }
558           case MULTI: {
559             bucketMulti.add(cachedBlock);
560             break;
561           }
562           case MEMORY: {
563             bucketMemory.add(cachedBlock);
564             break;
565           }
566         }
567       }
568 
569       long bytesFreed = 0;
570       if (forceInMemory || memoryFactor > 0.999f) {
571         long s = bucketSingle.totalSize();
572         long m = bucketMulti.totalSize();
573         if (bytesToFree > (s + m)) {
574           // this means we need to evict blocks in memory bucket to make room,
575           // so the single and multi buckets will be emptied
576           bytesFreed = bucketSingle.free(s);
577           bytesFreed += bucketMulti.free(m);
578           if (LOG.isTraceEnabled()) {
579             LOG.trace("freed " + StringUtils.byteDesc(bytesFreed) +
580               " from single and multi buckets");
581           }
582           bytesFreed += bucketMemory.free(bytesToFree - bytesFreed);
583           if (LOG.isTraceEnabled()) {
584             LOG.trace("freed " + StringUtils.byteDesc(bytesFreed) +
585               " total from all three buckets ");
586           }
587         } else {
588           // this means no need to evict block in memory bucket,
589           // and we try best to make the ratio between single-bucket and
590           // multi-bucket is 1:2
591           long bytesRemain = s + m - bytesToFree;
592           if (3 * s <= bytesRemain) {
593             // single-bucket is small enough that no eviction happens for it
594             // hence all eviction goes from multi-bucket
595             bytesFreed = bucketMulti.free(bytesToFree);
596           } else if (3 * m <= 2 * bytesRemain) {
597             // multi-bucket is small enough that no eviction happens for it
598             // hence all eviction goes from single-bucket
599             bytesFreed = bucketSingle.free(bytesToFree);
600           } else {
601             // both buckets need to evict some blocks
602             bytesFreed = bucketSingle.free(s - bytesRemain / 3);
603             if (bytesFreed < bytesToFree) {
604               bytesFreed += bucketMulti.free(bytesToFree - bytesFreed);
605             }
606           }
607         }
608       } else {
609         PriorityQueue<BlockBucket> bucketQueue =
610           new PriorityQueue<BlockBucket>(3);
611 
612         bucketQueue.add(bucketSingle);
613         bucketQueue.add(bucketMulti);
614         bucketQueue.add(bucketMemory);
615 
616         int remainingBuckets = 3;
617 
618         BlockBucket bucket;
619         while((bucket = bucketQueue.poll()) != null) {
620           long overflow = bucket.overflow();
621           if(overflow > 0) {
622             long bucketBytesToFree = Math.min(overflow,
623                 (bytesToFree - bytesFreed) / remainingBuckets);
624             bytesFreed += bucket.free(bucketBytesToFree);
625           }
626           remainingBuckets--;
627         }
628       }
629 
630       if (LOG.isTraceEnabled()) {
631         long single = bucketSingle.totalSize();
632         long multi = bucketMulti.totalSize();
633         long memory = bucketMemory.totalSize();
634         LOG.trace("Block cache LRU eviction completed; " +
635           "freed=" + StringUtils.byteDesc(bytesFreed) + ", " +
636           "total=" + StringUtils.byteDesc(this.size.get()) + ", " +
637           "single=" + StringUtils.byteDesc(single) + ", " +
638           "multi=" + StringUtils.byteDesc(multi) + ", " +
639           "memory=" + StringUtils.byteDesc(memory));
640       }
641     } finally {
642       stats.evict();
643       evictionInProgress = false;
644       evictionLock.unlock();
645     }
646   }
647 
648   @Override
649   public String toString() {
650     return Objects.toStringHelper(this)
651       .add("blockCount", getBlockCount())
652       .add("currentSize", getCurrentSize())
653       .add("freeSize", getFreeSize())
654       .add("maxSize", getMaxSize())
655       .add("heapSize", heapSize())
656       .add("minSize", minSize())
657       .add("minFactor", minFactor)
658       .add("multiSize", multiSize())
659       .add("multiFactor", multiFactor)
660       .add("singleSize", singleSize())
661       .add("singleFactor", singleFactor)
662       .toString();
663   }
664 
665   /**
666    * Used to group blocks into priority buckets.  There will be a BlockBucket
667    * for each priority (single, multi, memory).  Once bucketed, the eviction
668    * algorithm takes the appropriate number of elements out of each according
669    * to configuration parameters and their relatives sizes.
670    */
671   private class BlockBucket implements Comparable<BlockBucket> {
672 
673     private final String name;
674     private LruCachedBlockQueue queue;
675     private long totalSize = 0;
676     private long bucketSize;
677 
678     public BlockBucket(String name, long bytesToFree, long blockSize, long bucketSize) {
679       this.name = name;
680       this.bucketSize = bucketSize;
681       queue = new LruCachedBlockQueue(bytesToFree, blockSize);
682       totalSize = 0;
683     }
684 
685     public void add(LruCachedBlock block) {
686       totalSize += block.heapSize();
687       queue.add(block);
688     }
689 
690     public long free(long toFree) {
691       if (LOG.isTraceEnabled()) {
692         LOG.trace("freeing " + StringUtils.byteDesc(toFree) + " from " + this);
693       }
694       LruCachedBlock cb;
695       long freedBytes = 0;
696       while ((cb = queue.pollLast()) != null) {
697         freedBytes += evictBlock(cb, true);
698         if (freedBytes >= toFree) {
699           return freedBytes;
700         }
701       }
702       if (LOG.isTraceEnabled()) {
703         LOG.trace("freed " + StringUtils.byteDesc(freedBytes) + " from " + this);
704       }
705       return freedBytes;
706     }
707 
708     public long overflow() {
709       return totalSize - bucketSize;
710     }
711 
712     public long totalSize() {
713       return totalSize;
714     }
715 
716     public int compareTo(BlockBucket that) {
717       if(this.overflow() == that.overflow()) return 0;
718       return this.overflow() > that.overflow() ? 1 : -1;
719     }
720 
721     @Override
722     public boolean equals(Object that) {
723       if (that == null || !(that instanceof BlockBucket)){
724         return false;
725       }
726 
727       return compareTo((BlockBucket)that) == 0;
728     }
729 
730     @Override
731     public int hashCode() {
732       return Objects.hashCode(name, bucketSize, queue, totalSize);
733     }
734 
735     @Override
736     public String toString() {
737       return Objects.toStringHelper(this)
738         .add("name", name)
739         .add("totalSize", StringUtils.byteDesc(totalSize))
740         .add("bucketSize", StringUtils.byteDesc(bucketSize))
741         .toString();
742     }
743   }
744 
745   /**
746    * Get the maximum size of this cache.
747    * @return max size in bytes
748    */
749   public long getMaxSize() {
750     return this.maxSize;
751   }
752 
753   @Override
754   public long getCurrentSize() {
755     return this.size.get();
756   }
757 
758   @Override
759   public long getFreeSize() {
760     return getMaxSize() - getCurrentSize();
761   }
762 
763   @Override
764   public long size() {
765     return getMaxSize();
766   }
767 
768   @Override
769   public long getBlockCount() {
770     return this.elements.get();
771   }
772 
773   EvictionThread getEvictionThread() {
774     return this.evictionThread;
775   }
776 
777   /*
778    * Eviction thread.  Sits in waiting state until an eviction is triggered
779    * when the cache size grows above the acceptable level.<p>
780    *
781    * Thread is triggered into action by {@link LruBlockCache#runEviction()}
782    */
783   static class EvictionThread extends HasThread {
784     private WeakReference<LruBlockCache> cache;
785     private boolean go = true;
786     // flag set after enter the run method, used for test
787     private boolean enteringRun = false;
788 
789     public EvictionThread(LruBlockCache cache) {
790       super(Thread.currentThread().getName() + ".LruBlockCache.EvictionThread");
791       setDaemon(true);
792       this.cache = new WeakReference<LruBlockCache>(cache);
793     }
794 
795     @Override
796     public void run() {
797       enteringRun = true;
798       while (this.go) {
799         synchronized(this) {
800           try {
801             this.wait(1000 * 10/*Don't wait for ever*/);
802           } catch(InterruptedException e) {}
803         }
804         LruBlockCache cache = this.cache.get();
805         if (cache == null) break;
806         cache.evict();
807       }
808     }
809 
810     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NN_NAKED_NOTIFY",
811         justification="This is what we want")
812     public void evict() {
813       synchronized(this) {
814         this.notifyAll();
815       }
816     }
817 
818     synchronized void shutdown() {
819       this.go = false;
820       this.notifyAll();
821     }
822 
823     /**
824      * Used for the test.
825      */
826     boolean isEnteringRun() {
827       return this.enteringRun;
828     }
829   }
830 
831   /*
832    * Statistics thread.  Periodically prints the cache statistics to the log.
833    */
834   static class StatisticsThread extends Thread {
835     LruBlockCache lru;
836 
837     public StatisticsThread(LruBlockCache lru) {
838       super("LruBlockCache.StatisticsThread");
839       setDaemon(true);
840       this.lru = lru;
841     }
842     @Override
843     public void run() {
844       lru.logStats();
845     }
846   }
847 
848   public void logStats() {
849     if (!LOG.isDebugEnabled()) return;
850     // Log size
851     long totalSize = heapSize();
852     long freeSize = maxSize - totalSize;
853     LruBlockCache.LOG.debug("Total=" + StringUtils.byteDesc(totalSize) + ", " +
854         "free=" + StringUtils.byteDesc(freeSize) + ", " +
855         "max=" + StringUtils.byteDesc(this.maxSize) + ", " +
856         "blockCount=" + getBlockCount() + ", " +
857         "accesses=" + stats.getRequestCount() + ", " +
858         "hits=" + stats.getHitCount() + ", " +
859         "hitRatio=" +
860           (stats.getHitCount() == 0 ? "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
861         "cachingAccesses=" + stats.getRequestCachingCount() + ", " +
862         "cachingHits=" + stats.getHitCachingCount() + ", " +
863         "cachingHitsRatio=" +
864           (stats.getHitCachingCount() == 0 ? "0,": (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", ")) +
865         "evictions=" + stats.getEvictionCount() + ", " +
866         "evicted=" + stats.getEvictedCount() + ", " +
867         "evictedPerRun=" + stats.evictedPerEviction());
868   }
869 
870   /**
871    * Get counter statistics for this cache.
872    *
873    * <p>Includes: total accesses, hits, misses, evicted blocks, and runs
874    * of the eviction processes.
875    */
876   public CacheStats getStats() {
877     return this.stats;
878   }
879 
880   public final static long CACHE_FIXED_OVERHEAD = ClassSize.align(
881       (3 * Bytes.SIZEOF_LONG) + (10 * ClassSize.REFERENCE) +
882       (5 * Bytes.SIZEOF_FLOAT) + (2 * Bytes.SIZEOF_BOOLEAN)
883       + ClassSize.OBJECT);
884 
885   // HeapSize implementation
886   public long heapSize() {
887     return getCurrentSize();
888   }
889 
890   public static long calculateOverhead(long maxSize, long blockSize, int concurrency){
891     // FindBugs ICAST_INTEGER_MULTIPLY_CAST_TO_LONG
892     return CACHE_FIXED_OVERHEAD + ClassSize.CONCURRENT_HASHMAP +
893         ((long)Math.ceil(maxSize*1.2/blockSize)
894             * ClassSize.CONCURRENT_HASHMAP_ENTRY) +
895         ((long)concurrency * ClassSize.CONCURRENT_HASHMAP_SEGMENT);
896   }
897 
898   @Override
899   public Iterator<CachedBlock> iterator() {
900     final Iterator<LruCachedBlock> iterator = map.values().iterator();
901 
902     return new Iterator<CachedBlock>() {
903       private final long now = System.nanoTime();
904 
905       @Override
906       public boolean hasNext() {
907         return iterator.hasNext();
908       }
909 
910       @Override
911       public CachedBlock next() {
912         final LruCachedBlock b = iterator.next();
913         return new CachedBlock() {
914           @Override
915           public String toString() {
916             return BlockCacheUtil.toString(this, now);
917           }
918 
919           @Override
920           public BlockPriority getBlockPriority() {
921             return b.getPriority();
922           }
923 
924           @Override
925           public BlockType getBlockType() {
926             return b.getBuffer().getBlockType();
927           }
928 
929           @Override
930           public long getOffset() {
931             return b.getCacheKey().getOffset();
932           }
933 
934           @Override
935           public long getSize() {
936             return b.getBuffer().heapSize();
937           }
938 
939           @Override
940           public long getCachedTime() {
941             return b.getCachedTime();
942           }
943 
944           @Override
945           public String getFilename() {
946             return b.getCacheKey().getHfileName();
947           }
948 
949           @Override
950           public int compareTo(CachedBlock other) {
951             int diff = this.getFilename().compareTo(other.getFilename());
952             if (diff != 0) return diff;
953             diff = (int)(this.getOffset() - other.getOffset());
954             if (diff != 0) return diff;
955             if (other.getCachedTime() < 0 || this.getCachedTime() < 0) {
956               throw new IllegalStateException("" + this.getCachedTime() + ", " +
957                 other.getCachedTime());
958             }
959             return (int)(other.getCachedTime() - this.getCachedTime());
960           }
961 
962           @Override
963           public int hashCode() {
964             return b.hashCode();
965           }
966 
967           @Override
968           public boolean equals(Object obj) {
969             if (obj instanceof CachedBlock) {
970               CachedBlock cb = (CachedBlock)obj;
971               return compareTo(cb) == 0;
972             } else {
973               return false;
974             }
975           }
976         };
977       }
978 
979       @Override
980       public void remove() {
981         throw new UnsupportedOperationException();
982       }
983     };
984   }
985 
986   // Simple calculators of sizes given factors and maxSize
987 
988   private long acceptableSize() {
989     return (long)Math.floor(this.maxSize * this.acceptableFactor);
990   }
991   private long minSize() {
992     return (long)Math.floor(this.maxSize * this.minFactor);
993   }
994   private long singleSize() {
995     return (long)Math.floor(this.maxSize * this.singleFactor * this.minFactor);
996   }
997   private long multiSize() {
998     return (long)Math.floor(this.maxSize * this.multiFactor * this.minFactor);
999   }
1000   private long memorySize() {
1001     return (long)Math.floor(this.maxSize * this.memoryFactor * this.minFactor);
1002   }
1003 
1004   public void shutdown() {
1005     if (victimHandler != null)
1006       victimHandler.shutdown();
1007     this.scheduleThreadPool.shutdown();
1008     for (int i = 0; i < 10; i++) {
1009       if (!this.scheduleThreadPool.isShutdown()) Threads.sleep(10);
1010     }
1011     if (!this.scheduleThreadPool.isShutdown()) {
1012       List<Runnable> runnables = this.scheduleThreadPool.shutdownNow();
1013       LOG.debug("Still running " + runnables);
1014     }
1015     this.evictionThread.shutdown();
1016   }
1017 
1018   /** Clears the cache. Used in tests. */
1019   @VisibleForTesting
1020   public void clearCache() {
1021     map.clear();
1022     elements.set(0);
1023   }
1024 
1025   /**
1026    * Used in testing. May be very inefficient.
1027    * @return the set of cached file names
1028    */
1029   @VisibleForTesting
1030   SortedSet<String> getCachedFileNamesForTest() {
1031     SortedSet<String> fileNames = new TreeSet<String>();
1032     for (BlockCacheKey cacheKey : map.keySet()) {
1033       fileNames.add(cacheKey.getHfileName());
1034     }
1035     return fileNames;
1036   }
1037 
1038   @VisibleForTesting
1039   Map<BlockType, Integer> getBlockTypeCountsForTest() {
1040     Map<BlockType, Integer> counts =
1041         new EnumMap<BlockType, Integer>(BlockType.class);
1042     for (LruCachedBlock cb : map.values()) {
1043       BlockType blockType = ((HFileBlock) cb.getBuffer()).getBlockType();
1044       Integer count = counts.get(blockType);
1045       counts.put(blockType, (count == null ? 0 : count) + 1);
1046     }
1047     return counts;
1048   }
1049 
1050   @VisibleForTesting
1051   public Map<DataBlockEncoding, Integer> getEncodingCountsForTest() {
1052     Map<DataBlockEncoding, Integer> counts =
1053         new EnumMap<DataBlockEncoding, Integer>(DataBlockEncoding.class);
1054     for (BlockCacheKey cacheKey : map.keySet()) {
1055       DataBlockEncoding encoding = cacheKey.getDataBlockEncoding();
1056       Integer count = counts.get(encoding);
1057       counts.put(encoding, (count == null ? 0 : count) + 1);
1058     }
1059     return counts;
1060   }
1061 
1062   public void setVictimCache(BucketCache handler) {
1063     assert victimHandler == null;
1064     victimHandler = handler;
1065   }
1066 
1067   @VisibleForTesting
1068   Map<BlockCacheKey, LruCachedBlock> getMapForTests() {
1069     return map;
1070   }
1071 
1072   @Override
1073   public BlockCache[] getBlockCaches() {
1074     return null;
1075   }
1076 }