View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.Collections;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.NavigableSet;
29  import java.util.SortedSet;
30  import java.util.concurrent.Callable;
31  import java.util.concurrent.CompletionService;
32  import java.util.concurrent.CopyOnWriteArraySet;
33  import java.util.concurrent.ExecutionException;
34  import java.util.concurrent.ExecutorCompletionService;
35  import java.util.concurrent.Future;
36  import java.util.concurrent.ThreadPoolExecutor;
37  import java.util.concurrent.atomic.AtomicLong;
38  import java.util.concurrent.locks.ReentrantReadWriteLock;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.classification.InterfaceAudience;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.fs.FileSystem;
45  import org.apache.hadoop.fs.Path;
46  import org.apache.hadoop.hbase.Cell;
47  import org.apache.hadoop.hbase.CompoundConfiguration;
48  import org.apache.hadoop.hbase.HColumnDescriptor;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.HRegionInfo;
51  import org.apache.hadoop.hbase.KeyValue;
52  import org.apache.hadoop.hbase.RemoteExceptionHandler;
53  import org.apache.hadoop.hbase.client.Scan;
54  import org.apache.hadoop.hbase.exceptions.WrongRegionException;
55  import org.apache.hadoop.hbase.fs.HFileSystem;
56  import org.apache.hadoop.hbase.io.compress.Compression;
57  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
58  import org.apache.hadoop.hbase.io.hfile.HFile;
59  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
60  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
61  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
62  import org.apache.hadoop.hbase.exceptions.InvalidHFileException;
63  import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
64  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
65  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
66  import org.apache.hadoop.hbase.regionserver.compactions.CompactionPolicy;
67  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
68  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
69  import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
70  import org.apache.hadoop.hbase.regionserver.compactions.OffPeakCompactions;
71  import org.apache.hadoop.hbase.util.Bytes;
72  import org.apache.hadoop.hbase.util.ChecksumType;
73  import org.apache.hadoop.hbase.util.ClassSize;
74  import org.apache.hadoop.hbase.util.CollectionBackedScanner;
75  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
76  import org.apache.hadoop.util.StringUtils;
77  
78  import com.google.common.base.Preconditions;
79  import com.google.common.collect.ImmutableCollection;
80  import com.google.common.collect.ImmutableList;
81  import com.google.common.collect.Lists;
82  
83  /**
84   * A Store holds a column family in a Region.  Its a memstore and a set of zero
85   * or more StoreFiles, which stretch backwards over time.
86   *
87   * <p>There's no reason to consider append-logging at this level; all logging
88   * and locking is handled at the HRegion level.  Store just provides
89   * services to manage sets of StoreFiles.  One of the most important of those
90   * services is compaction services where files are aggregated once they pass
91   * a configurable threshold.
92   *
93   * <p>The only thing having to do with logs that Store needs to deal with is
94   * the reconstructionLog.  This is a segment of an HRegion's log that might
95   * NOT be present upon startup.  If the param is NULL, there's nothing to do.
96   * If the param is non-NULL, we need to process the log to reconstruct
97   * a TreeMap that might not have been written to disk before the process
98   * died.
99   *
100  * <p>It's assumed that after this constructor returns, the reconstructionLog
101  * file will be deleted (by whoever has instantiated the Store).
102  *
103  * <p>Locking and transactions are handled at a higher level.  This API should
104  * not be called directly but by an HRegion manager.
105  */
106 @InterfaceAudience.Private
107 public class HStore implements Store {
108   public static final String BLOCKING_STOREFILES_KEY = "hbase.hstore.blockingStoreFiles";
109   public static final int DEFAULT_BLOCKING_STOREFILE_COUNT = 7;
110 
111   static final Log LOG = LogFactory.getLog(HStore.class);
112 
113   protected final MemStore memstore;
114   private final HRegion region;
115   private final HColumnDescriptor family;
116   private final HRegionFileSystem fs;
117   private final Configuration conf;
118   private final CacheConfig cacheConf;
119   private long lastCompactSize = 0;
120   volatile boolean forceMajor = false;
121   /* how many bytes to write between status checks */
122   static int closeCheckInterval = 0;
123   private volatile long storeSize = 0L;
124   private volatile long totalUncompressedBytes = 0L;
125   private final Object flushLock = new Object();
126   final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
127   private final boolean verifyBulkLoads;
128 
129   private ScanInfo scanInfo;
130 
131   final List<StoreFile> filesCompacting = Lists.newArrayList();
132 
133   // All access must be synchronized.
134   private final CopyOnWriteArraySet<ChangedReadersObserver> changedReaderObservers =
135     new CopyOnWriteArraySet<ChangedReadersObserver>();
136 
137   private final int blocksize;
138   private HFileDataBlockEncoder dataBlockEncoder;
139 
140   /** Checksum configuration */
141   private ChecksumType checksumType;
142   private int bytesPerChecksum;
143 
144   // Comparing KeyValues
145   private final KeyValue.KVComparator comparator;
146 
147   final StoreEngine<?, ?, ?> storeEngine;
148 
149   private OffPeakCompactions offPeakCompactions;
150 
151   private static final int DEFAULT_FLUSH_RETRIES_NUMBER = 10;
152   private static int flush_retries_number;
153   private static int pauseTime;
154 
155   private long blockingFileCount;
156 
157   /**
158    * Constructor
159    * @param region
160    * @param family HColumnDescriptor for this column
161    * @param confParam configuration object
162    * failed.  Can be null.
163    * @throws IOException
164    */
165   protected HStore(final HRegion region, final HColumnDescriptor family,
166       final Configuration confParam) throws IOException {
167 
168     HRegionInfo info = region.getRegionInfo();
169     this.fs = region.getRegionFileSystem();
170 
171     // Assemble the store's home directory and Ensure it exists.
172     fs.createStoreDir(family.getNameAsString());
173     this.region = region;
174     this.family = family;
175     // 'conf' renamed to 'confParam' b/c we use this.conf in the constructor
176     // CompoundConfiguration will look for keys in reverse order of addition, so we'd
177     // add global config first, then table and cf overrides, then cf metadata.
178     this.conf = new CompoundConfiguration()
179       .add(confParam)
180       .addStringMap(region.getTableDesc().getConfiguration())
181       .addStringMap(family.getConfiguration())
182       .addWritableMap(family.getValues());
183     this.blocksize = family.getBlocksize();
184 
185     this.dataBlockEncoder =
186         new HFileDataBlockEncoderImpl(family.getDataBlockEncodingOnDisk(),
187             family.getDataBlockEncoding());
188 
189     this.comparator = info.getComparator();
190     // used by ScanQueryMatcher
191     long timeToPurgeDeletes =
192         Math.max(conf.getLong("hbase.hstore.time.to.purge.deletes", 0), 0);
193     LOG.trace("Time to purge deletes set to " + timeToPurgeDeletes +
194         "ms in store " + this);
195     // Get TTL
196     long ttl = determineTTLFromFamily(family);
197     // Why not just pass a HColumnDescriptor in here altogether?  Even if have
198     // to clone it?
199     scanInfo = new ScanInfo(family, ttl, timeToPurgeDeletes, this.comparator);
200     this.memstore = new MemStore(conf, this.comparator);
201     this.offPeakCompactions = new OffPeakCompactions(conf);
202 
203     // Setting up cache configuration for this family
204     this.cacheConf = new CacheConfig(conf, family);
205 
206     this.verifyBulkLoads = conf.getBoolean("hbase.hstore.bulkload.verify", false);
207 
208     this.blockingFileCount =
209         conf.getInt(BLOCKING_STOREFILES_KEY, DEFAULT_BLOCKING_STOREFILE_COUNT);
210 
211     if (HStore.closeCheckInterval == 0) {
212       HStore.closeCheckInterval = conf.getInt(
213           "hbase.hstore.close.check.interval", 10*1000*1000 /* 10 MB */);
214     }
215 
216     this.storeEngine = StoreEngine.create(this, this.conf, this.comparator);
217     this.storeEngine.getStoreFileManager().loadFiles(loadStoreFiles());
218 
219     // Initialize checksum type from name. The names are CRC32, CRC32C, etc.
220     this.checksumType = getChecksumType(conf);
221     // initilize bytes per checksum
222     this.bytesPerChecksum = getBytesPerChecksum(conf);
223     // Create a compaction manager.
224     if (HStore.flush_retries_number == 0) {
225       HStore.flush_retries_number = conf.getInt(
226           "hbase.hstore.flush.retries.number", DEFAULT_FLUSH_RETRIES_NUMBER);
227       HStore.pauseTime = conf.getInt(HConstants.HBASE_SERVER_PAUSE,
228           HConstants.DEFAULT_HBASE_SERVER_PAUSE);
229       if (HStore.flush_retries_number <= 0) {
230         throw new IllegalArgumentException(
231             "hbase.hstore.flush.retries.number must be > 0, not "
232                 + HStore.flush_retries_number);
233       }
234     }
235   }
236 
237   /**
238    * @param family
239    * @return TTL in seconds of the specified family
240    */
241   private static long determineTTLFromFamily(final HColumnDescriptor family) {
242     // HCD.getTimeToLive returns ttl in seconds.  Convert to milliseconds.
243     long ttl = family.getTimeToLive();
244     if (ttl == HConstants.FOREVER) {
245       // Default is unlimited ttl.
246       ttl = Long.MAX_VALUE;
247     } else if (ttl == -1) {
248       ttl = Long.MAX_VALUE;
249     } else {
250       // Second -> ms adjust for user data
251       ttl *= 1000;
252     }
253     return ttl;
254   }
255 
256   public String getColumnFamilyName() {
257     return this.family.getNameAsString();
258   }
259 
260   @Override
261   public String getTableName() {
262     return this.getRegionInfo().getTableNameAsString();
263   }
264 
265   @Override
266   public FileSystem getFileSystem() {
267     return this.fs.getFileSystem();
268   }
269 
270   public HRegionFileSystem getRegionFileSystem() {
271     return this.fs;
272   }
273 
274   /* Implementation of StoreConfigInformation */
275   @Override
276   public long getStoreFileTtl() {
277     // TTL only applies if there's no MIN_VERSIONs setting on the column.
278     return (this.scanInfo.getMinVersions() == 0) ? this.scanInfo.getTtl() : Long.MAX_VALUE;
279   }
280 
281   @Override
282   public long getMemstoreFlushSize() {
283     return this.region.memstoreFlushSize;
284   }
285   /* End implementation of StoreConfigInformation */
286 
287   /**
288    * Returns the configured bytesPerChecksum value.
289    * @param conf The configuration
290    * @return The bytesPerChecksum that is set in the configuration
291    */
292   public static int getBytesPerChecksum(Configuration conf) {
293     return conf.getInt(HConstants.BYTES_PER_CHECKSUM,
294                        HFile.DEFAULT_BYTES_PER_CHECKSUM);
295   }
296 
297   /**
298    * Returns the configured checksum algorithm.
299    * @param conf The configuration
300    * @return The checksum algorithm that is set in the configuration
301    */
302   public static ChecksumType getChecksumType(Configuration conf) {
303     String checksumName = conf.get(HConstants.CHECKSUM_TYPE_NAME);
304     if (checksumName == null) {
305       return HFile.DEFAULT_CHECKSUM_TYPE;
306     } else {
307       return ChecksumType.nameToType(checksumName);
308     }
309   }
310 
311   /**
312    * @return how many bytes to write between status checks
313    */
314   public static int getCloseCheckInterval() {
315     return closeCheckInterval;
316   }
317 
318   public HColumnDescriptor getFamily() {
319     return this.family;
320   }
321 
322   /**
323    * @return The maximum sequence id in all store files. Used for log replay.
324    */
325   long getMaxSequenceId(boolean includeBulkFiles) {
326     return StoreFile.getMaxSequenceIdInList(this.getStorefiles(), includeBulkFiles);
327   }
328 
329   @Override
330   public long getMaxMemstoreTS() {
331     return StoreFile.getMaxMemstoreTSInList(this.getStorefiles());
332   }
333 
334   /**
335    * @param tabledir {@link Path} to where the table is being stored
336    * @param hri {@link HRegionInfo} for the region.
337    * @param family {@link HColumnDescriptor} describing the column family
338    * @return Path to family/Store home directory.
339    */
340   @Deprecated
341   public static Path getStoreHomedir(final Path tabledir,
342       final HRegionInfo hri, final byte[] family) {
343     return getStoreHomedir(tabledir, hri.getEncodedName(), family);
344   }
345 
346   /**
347    * @param tabledir {@link Path} to where the table is being stored
348    * @param encodedName Encoded region name.
349    * @param family {@link HColumnDescriptor} describing the column family
350    * @return Path to family/Store home directory.
351    */
352   @Deprecated
353   public static Path getStoreHomedir(final Path tabledir,
354       final String encodedName, final byte[] family) {
355     return new Path(tabledir, new Path(encodedName, Bytes.toString(family)));
356   }
357 
358   @Override
359   public HFileDataBlockEncoder getDataBlockEncoder() {
360     return dataBlockEncoder;
361   }
362 
363   /**
364    * Should be used only in tests.
365    * @param blockEncoder the block delta encoder to use
366    */
367   void setDataBlockEncoderInTest(HFileDataBlockEncoder blockEncoder) {
368     this.dataBlockEncoder = blockEncoder;
369   }
370 
371   /**
372    * Creates an unsorted list of StoreFile loaded in parallel
373    * from the given directory.
374    * @throws IOException
375    */
376   private List<StoreFile> loadStoreFiles() throws IOException {
377     Collection<StoreFileInfo> files = fs.getStoreFiles(getColumnFamilyName());
378     if (files == null || files.size() == 0) {
379       return new ArrayList<StoreFile>();
380     }
381 
382     // initialize the thread pool for opening store files in parallel..
383     ThreadPoolExecutor storeFileOpenerThreadPool =
384       this.region.getStoreFileOpenAndCloseThreadPool("StoreFileOpenerThread-" +
385           this.getColumnFamilyName());
386     CompletionService<StoreFile> completionService =
387       new ExecutorCompletionService<StoreFile>(storeFileOpenerThreadPool);
388 
389     int totalValidStoreFile = 0;
390     final FileSystem fs = this.getFileSystem();
391     for (final StoreFileInfo storeFileInfo: files) {
392       // open each store file in parallel
393       completionService.submit(new Callable<StoreFile>() {
394         public StoreFile call() throws IOException {
395           StoreFile storeFile = new StoreFile(fs, storeFileInfo.getPath(), conf, cacheConf,
396               family.getBloomFilterType(), dataBlockEncoder);
397           storeFile.createReader();
398           return storeFile;
399         }
400       });
401       totalValidStoreFile++;
402     }
403 
404     ArrayList<StoreFile> results = new ArrayList<StoreFile>(files.size());
405     IOException ioe = null;
406     try {
407       for (int i = 0; i < totalValidStoreFile; i++) {
408         try {
409           Future<StoreFile> future = completionService.take();
410           StoreFile storeFile = future.get();
411           long length = storeFile.getReader().length();
412           this.storeSize += length;
413           this.totalUncompressedBytes +=
414               storeFile.getReader().getTotalUncompressedBytes();
415           if (LOG.isDebugEnabled()) {
416             LOG.debug("loaded " + storeFile.toStringDetailed());
417           }
418           results.add(storeFile);
419         } catch (InterruptedException e) {
420           if (ioe == null) ioe = new InterruptedIOException(e.getMessage());
421         } catch (ExecutionException e) {
422           if (ioe == null) ioe = new IOException(e.getCause());
423         }
424       }
425     } finally {
426       storeFileOpenerThreadPool.shutdownNow();
427     }
428     if (ioe != null) {
429       // close StoreFile readers
430       try {
431         for (StoreFile file : results) {
432           if (file != null) file.closeReader(true);
433         }
434       } catch (IOException e) { }
435       throw ioe;
436     }
437 
438     return results;
439   }
440 
441   @Override
442   public long add(final KeyValue kv) {
443     lock.readLock().lock();
444     try {
445       return this.memstore.add(kv);
446     } finally {
447       lock.readLock().unlock();
448     }
449   }
450 
451   /**
452    * Adds a value to the memstore
453    *
454    * @param kv
455    * @return memstore size delta
456    */
457   protected long delete(final KeyValue kv) {
458     lock.readLock().lock();
459     try {
460       return this.memstore.delete(kv);
461     } finally {
462       lock.readLock().unlock();
463     }
464   }
465 
466   @Override
467   public void rollback(final KeyValue kv) {
468     lock.readLock().lock();
469     try {
470       this.memstore.rollback(kv);
471     } finally {
472       lock.readLock().unlock();
473     }
474   }
475 
476   /**
477    * @return All store files.
478    */
479   @Override
480   public Collection<StoreFile> getStorefiles() {
481     return this.storeEngine.getStoreFileManager().getStorefiles();
482   }
483 
484   @Override
485   public void assertBulkLoadHFileOk(Path srcPath) throws IOException {
486     HFile.Reader reader  = null;
487     try {
488       LOG.info("Validating hfile at " + srcPath + " for inclusion in "
489           + "store " + this + " region " + this.getRegionInfo().getRegionNameAsString());
490       reader = HFile.createReader(srcPath.getFileSystem(conf),
491           srcPath, cacheConf);
492       reader.loadFileInfo();
493 
494       byte[] firstKey = reader.getFirstRowKey();
495       Preconditions.checkState(firstKey != null, "First key can not be null");
496       byte[] lk = reader.getLastKey();
497       Preconditions.checkState(lk != null, "Last key can not be null");
498       byte[] lastKey =  KeyValue.createKeyValueFromKey(lk).getRow();
499 
500       LOG.debug("HFile bounds: first=" + Bytes.toStringBinary(firstKey) +
501           " last=" + Bytes.toStringBinary(lastKey));
502       LOG.debug("Region bounds: first=" +
503           Bytes.toStringBinary(getRegionInfo().getStartKey()) +
504           " last=" + Bytes.toStringBinary(getRegionInfo().getEndKey()));
505 
506       if (!this.getRegionInfo().containsRange(firstKey, lastKey)) {
507         throw new WrongRegionException(
508             "Bulk load file " + srcPath.toString() + " does not fit inside region "
509             + this.getRegionInfo().getRegionNameAsString());
510       }
511 
512       if (verifyBulkLoads) {
513         KeyValue prevKV = null;
514         HFileScanner scanner = reader.getScanner(false, false, false);
515         scanner.seekTo();
516         do {
517           KeyValue kv = scanner.getKeyValue();
518           if (prevKV != null) {
519             if (Bytes.compareTo(prevKV.getBuffer(), prevKV.getRowOffset(),
520                 prevKV.getRowLength(), kv.getBuffer(), kv.getRowOffset(),
521                 kv.getRowLength()) > 0) {
522               throw new InvalidHFileException("Previous row is greater than"
523                   + " current row: path=" + srcPath + " previous="
524                   + Bytes.toStringBinary(prevKV.getKey()) + " current="
525                   + Bytes.toStringBinary(kv.getKey()));
526             }
527             if (Bytes.compareTo(prevKV.getBuffer(), prevKV.getFamilyOffset(),
528                 prevKV.getFamilyLength(), kv.getBuffer(), kv.getFamilyOffset(),
529                 kv.getFamilyLength()) != 0) {
530               throw new InvalidHFileException("Previous key had different"
531                   + " family compared to current key: path=" + srcPath
532                   + " previous=" + Bytes.toStringBinary(prevKV.getFamily())
533                   + " current=" + Bytes.toStringBinary(kv.getFamily()));
534             }
535           }
536           prevKV = kv;
537         } while (scanner.next());
538       }
539     } finally {
540       if (reader != null) reader.close();
541     }
542   }
543 
544   @Override
545   public void bulkLoadHFile(String srcPathStr, long seqNum) throws IOException {
546     Path srcPath = new Path(srcPathStr);
547     Path dstPath = fs.bulkLoadStoreFile(getColumnFamilyName(), srcPath, seqNum);
548 
549     StoreFile sf = new StoreFile(this.getFileSystem(), dstPath, this.conf, this.cacheConf,
550         this.family.getBloomFilterType(), this.dataBlockEncoder);
551 
552     StoreFile.Reader r = sf.createReader();
553     this.storeSize += r.length();
554     this.totalUncompressedBytes += r.getTotalUncompressedBytes();
555 
556     LOG.info("Loaded HFile " + srcPath + " into store '" + getColumnFamilyName() +
557         "' as " + dstPath + " - updating store file list.");
558 
559     // Append the new storefile into the list
560     this.lock.writeLock().lock();
561     try {
562       this.storeEngine.getStoreFileManager().insertNewFile(sf);
563     } finally {
564       // We need the lock, as long as we are updating the storeFiles
565       // or changing the memstore. Let us release it before calling
566       // notifyChangeReadersObservers. See HBASE-4485 for a possible
567       // deadlock scenario that could have happened if continue to hold
568       // the lock.
569       this.lock.writeLock().unlock();
570     }
571     notifyChangedReadersObservers();
572     LOG.info("Successfully loaded store file " + srcPath
573         + " into store " + this + " (new location: " + dstPath + ")");
574   }
575 
576   @Override
577   public ImmutableCollection<StoreFile> close() throws IOException {
578     this.lock.writeLock().lock();
579     try {
580       // Clear so metrics doesn't find them.
581       ImmutableCollection<StoreFile> result = storeEngine.getStoreFileManager().clearFiles();
582 
583       if (!result.isEmpty()) {
584         // initialize the thread pool for closing store files in parallel.
585         ThreadPoolExecutor storeFileCloserThreadPool = this.region
586             .getStoreFileOpenAndCloseThreadPool("StoreFileCloserThread-"
587                 + this.getColumnFamilyName());
588 
589         // close each store file in parallel
590         CompletionService<Void> completionService =
591           new ExecutorCompletionService<Void>(storeFileCloserThreadPool);
592         for (final StoreFile f : result) {
593           completionService.submit(new Callable<Void>() {
594             public Void call() throws IOException {
595               f.closeReader(true);
596               return null;
597             }
598           });
599         }
600 
601         IOException ioe = null;
602         try {
603           for (int i = 0; i < result.size(); i++) {
604             try {
605               Future<Void> future = completionService.take();
606               future.get();
607             } catch (InterruptedException e) {
608               if (ioe == null) {
609                 ioe = new InterruptedIOException();
610                 ioe.initCause(e);
611               }
612             } catch (ExecutionException e) {
613               if (ioe == null) ioe = new IOException(e.getCause());
614             }
615           }
616         } finally {
617           storeFileCloserThreadPool.shutdownNow();
618         }
619         if (ioe != null) throw ioe;
620       }
621       LOG.info("Closed " + this);
622       return result;
623     } finally {
624       this.lock.writeLock().unlock();
625     }
626   }
627 
628   /**
629    * Snapshot this stores memstore. Call before running
630    * {@link #flushCache(long, SortedSet, TimeRangeTracker, AtomicLong, MonitoredTask)}
631    *  so it has some work to do.
632    */
633   void snapshot() {
634     this.memstore.snapshot();
635   }
636 
637   /**
638    * Write out current snapshot.  Presumes {@link #snapshot()} has been called
639    * previously.
640    * @param logCacheFlushId flush sequence number
641    * @param snapshot
642    * @param snapshotTimeRangeTracker
643    * @param flushedSize The number of bytes flushed
644    * @param status
645    * @return Path The path name of the tmp file to which the store was flushed
646    * @throws IOException
647    */
648   protected Path flushCache(final long logCacheFlushId,
649       SortedSet<KeyValue> snapshot,
650       TimeRangeTracker snapshotTimeRangeTracker,
651       AtomicLong flushedSize,
652       MonitoredTask status) throws IOException {
653     // If an exception happens flushing, we let it out without clearing
654     // the memstore snapshot.  The old snapshot will be returned when we say
655     // 'snapshot', the next time flush comes around.
656     // Retry after catching exception when flushing, otherwise server will abort
657     // itself
658     IOException lastException = null;
659     for (int i = 0; i < HStore.flush_retries_number; i++) {
660       try {
661         Path pathName = internalFlushCache(snapshot, logCacheFlushId,
662             snapshotTimeRangeTracker, flushedSize, status);
663         try {
664           // Path name is null if there is no entry to flush
665           if (pathName != null) {
666             validateStoreFile(pathName);
667           }
668           return pathName;
669         } catch (Exception e) {
670           LOG.warn("Failed validating store file " + pathName
671               + ", retring num=" + i, e);
672           if (e instanceof IOException) {
673             lastException = (IOException) e;
674           } else {
675             lastException = new IOException(e);
676           }
677         }
678       } catch (IOException e) {
679         LOG.warn("Failed flushing store file, retring num=" + i, e);
680         lastException = e;
681       }
682       if (lastException != null) {
683         try {
684           Thread.sleep(pauseTime);
685         } catch (InterruptedException e) {
686           IOException iie = new InterruptedIOException();
687           iie.initCause(e);
688           throw iie;
689         }
690       }
691     }
692     throw lastException;
693   }
694 
695   /*
696    * @param cache
697    * @param logCacheFlushId
698    * @param snapshotTimeRangeTracker
699    * @param flushedSize The number of bytes flushed
700    * @return Path The path name of the tmp file to which the store was flushed
701    * @throws IOException
702    */
703   private Path internalFlushCache(final SortedSet<KeyValue> set,
704       final long logCacheFlushId,
705       TimeRangeTracker snapshotTimeRangeTracker,
706       AtomicLong flushedSize,
707       MonitoredTask status)
708       throws IOException {
709     StoreFile.Writer writer;
710     // Find the smallest read point across all the Scanners.
711     long smallestReadPoint = region.getSmallestReadPoint();
712     long flushed = 0;
713     Path pathName;
714     // Don't flush if there are no entries.
715     if (set.size() == 0) {
716       return null;
717     }
718     // Use a store scanner to find which rows to flush.
719     // Note that we need to retain deletes, hence
720     // treat this as a minor compaction.
721     InternalScanner scanner = null;
722     KeyValueScanner memstoreScanner = new CollectionBackedScanner(set, this.comparator);
723     if (this.getCoprocessorHost() != null) {
724       scanner = this.getCoprocessorHost().preFlushScannerOpen(this, memstoreScanner);
725     }
726     if (scanner == null) {
727       Scan scan = new Scan();
728       scan.setMaxVersions(scanInfo.getMaxVersions());
729       scanner = new StoreScanner(this, scanInfo, scan,
730           Collections.singletonList(memstoreScanner), ScanType.COMPACT_RETAIN_DELETES,
731           smallestReadPoint, HConstants.OLDEST_TIMESTAMP);
732     }
733     if (this.getCoprocessorHost() != null) {
734       InternalScanner cpScanner =
735         this.getCoprocessorHost().preFlush(this, scanner);
736       // NULL scanner returned from coprocessor hooks means skip normal processing
737       if (cpScanner == null) {
738         return null;
739       }
740       scanner = cpScanner;
741     }
742     try {
743       int compactionKVMax = conf.getInt(HConstants.COMPACTION_KV_MAX, 10);
744       // TODO:  We can fail in the below block before we complete adding this
745       // flush to list of store files.  Add cleanup of anything put on filesystem
746       // if we fail.
747       synchronized (flushLock) {
748         status.setStatus("Flushing " + this + ": creating writer");
749         // A. Write the map out to the disk
750         writer = createWriterInTmp(set.size());
751         writer.setTimeRangeTracker(snapshotTimeRangeTracker);
752         pathName = writer.getPath();
753         try {
754           List<KeyValue> kvs = new ArrayList<KeyValue>();
755           boolean hasMore;
756           do {
757             hasMore = scanner.next(kvs, compactionKVMax);
758             if (!kvs.isEmpty()) {
759               for (KeyValue kv : kvs) {
760                 // If we know that this KV is going to be included always, then let us
761                 // set its memstoreTS to 0. This will help us save space when writing to
762                 // disk.
763                 if (kv.getMemstoreTS() <= smallestReadPoint) {
764                   // let us not change the original KV. It could be in the memstore
765                   // changing its memstoreTS could affect other threads/scanners.
766                   kv = kv.shallowCopy();
767                   kv.setMemstoreTS(0);
768                 }
769                 writer.append(kv);
770                 flushed += this.memstore.heapSizeChange(kv, true);
771               }
772               kvs.clear();
773             }
774           } while (hasMore);
775         } finally {
776           // Write out the log sequence number that corresponds to this output
777           // hfile. Also write current time in metadata as minFlushTime.
778           // The hfile is current up to and including logCacheFlushId.
779           status.setStatus("Flushing " + this + ": appending metadata");
780           writer.appendMetadata(logCacheFlushId, false);
781           status.setStatus("Flushing " + this + ": closing flushed file");
782           writer.close();
783         }
784       }
785     } finally {
786       flushedSize.set(flushed);
787       scanner.close();
788     }
789     if (LOG.isInfoEnabled()) {
790       LOG.info("Flushed " +
791                ", sequenceid=" + logCacheFlushId +
792                ", memsize=" + StringUtils.humanReadableInt(flushed) +
793                ", into tmp file " + pathName);
794     }
795     return pathName;
796   }
797 
798   /*
799    * @param path The pathname of the tmp file into which the store was flushed
800    * @param logCacheFlushId
801    * @return StoreFile created.
802    * @throws IOException
803    */
804   private StoreFile commitFile(final Path path,
805       final long logCacheFlushId,
806       TimeRangeTracker snapshotTimeRangeTracker,
807       AtomicLong flushedSize,
808       MonitoredTask status)
809       throws IOException {
810     // Write-out finished successfully, move into the right spot
811     Path dstPath = fs.commitStoreFile(getColumnFamilyName(), path);
812 
813     status.setStatus("Flushing " + this + ": reopening flushed file");
814     StoreFile sf = new StoreFile(this.getFileSystem(), dstPath, this.conf, this.cacheConf,
815         this.family.getBloomFilterType(), this.dataBlockEncoder);
816 
817     StoreFile.Reader r = sf.createReader();
818     this.storeSize += r.length();
819     this.totalUncompressedBytes += r.getTotalUncompressedBytes();
820 
821     if (LOG.isInfoEnabled()) {
822       LOG.info("Added " + sf + ", entries=" + r.getEntries() +
823         ", sequenceid=" + logCacheFlushId +
824         ", filesize=" + StringUtils.humanReadableInt(r.length()));
825     }
826     return sf;
827   }
828 
829   /*
830    * @param maxKeyCount
831    * @return Writer for a new StoreFile in the tmp dir.
832    */
833   private StoreFile.Writer createWriterInTmp(long maxKeyCount)
834   throws IOException {
835     return createWriterInTmp(maxKeyCount, this.family.getCompression(), false, true);
836   }
837 
838   /*
839    * @param maxKeyCount
840    * @param compression Compression algorithm to use
841    * @param isCompaction whether we are creating a new file in a compaction
842    * @return Writer for a new StoreFile in the tmp dir.
843    */
844   public StoreFile.Writer createWriterInTmp(long maxKeyCount,
845     Compression.Algorithm compression, boolean isCompaction, boolean includeMVCCReadpoint)
846   throws IOException {
847     final CacheConfig writerCacheConf;
848     if (isCompaction) {
849       // Don't cache data on write on compactions.
850       writerCacheConf = new CacheConfig(cacheConf);
851       writerCacheConf.setCacheDataOnWrite(false);
852     } else {
853       writerCacheConf = cacheConf;
854     }
855     StoreFile.Writer w = new StoreFile.WriterBuilder(conf, writerCacheConf,
856         this.getFileSystem(), blocksize)
857             .withFilePath(fs.createTempName())
858             .withDataBlockEncoder(dataBlockEncoder)
859             .withComparator(comparator)
860             .withBloomType(family.getBloomFilterType())
861             .withMaxKeyCount(maxKeyCount)
862             .withChecksumType(checksumType)
863             .withBytesPerChecksum(bytesPerChecksum)
864             .withCompression(compression)
865             .includeMVCCReadpoint(includeMVCCReadpoint)
866             .build();
867     return w;
868   }
869 
870   /*
871    * Change storeFiles adding into place the Reader produced by this new flush.
872    * @param sf
873    * @param set That was used to make the passed file <code>p</code>.
874    * @throws IOException
875    * @return Whether compaction is required.
876    */
877   private boolean updateStorefiles(final StoreFile sf,
878                                    final SortedSet<KeyValue> set)
879   throws IOException {
880     this.lock.writeLock().lock();
881     try {
882       this.storeEngine.getStoreFileManager().insertNewFile(sf);
883       this.memstore.clearSnapshot(set);
884     } finally {
885       // We need the lock, as long as we are updating the storeFiles
886       // or changing the memstore. Let us release it before calling
887       // notifyChangeReadersObservers. See HBASE-4485 for a possible
888       // deadlock scenario that could have happened if continue to hold
889       // the lock.
890       this.lock.writeLock().unlock();
891     }
892 
893     // Tell listeners of the change in readers.
894     notifyChangedReadersObservers();
895 
896     return needsCompaction();
897   }
898 
899   /*
900    * Notify all observers that set of Readers has changed.
901    * @throws IOException
902    */
903   private void notifyChangedReadersObservers() throws IOException {
904     for (ChangedReadersObserver o: this.changedReaderObservers) {
905       o.updateReaders();
906     }
907   }
908 
909   /**
910    * Get all scanners with no filtering based on TTL (that happens further down
911    * the line).
912    * @return all scanners for this store
913    */
914   @Override
915   public List<KeyValueScanner> getScanners(boolean cacheBlocks,
916       boolean isGet, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow,
917       byte[] stopRow) throws IOException {
918     Collection<StoreFile> storeFilesToScan;
919     List<KeyValueScanner> memStoreScanners;
920     this.lock.readLock().lock();
921     try {
922       storeFilesToScan =
923           this.storeEngine.getStoreFileManager().getFilesForScanOrGet(isGet, startRow, stopRow);
924       memStoreScanners = this.memstore.getScanners();
925     } finally {
926       this.lock.readLock().unlock();
927     }
928 
929     // First the store file scanners
930 
931     // TODO this used to get the store files in descending order,
932     // but now we get them in ascending order, which I think is
933     // actually more correct, since memstore get put at the end.
934     List<StoreFileScanner> sfScanners = StoreFileScanner
935       .getScannersForStoreFiles(storeFilesToScan, cacheBlocks, isGet, isCompaction, matcher);
936     List<KeyValueScanner> scanners =
937       new ArrayList<KeyValueScanner>(sfScanners.size()+1);
938     scanners.addAll(sfScanners);
939     // Then the memstore scanners
940     scanners.addAll(memStoreScanners);
941     return scanners;
942   }
943 
944   @Override
945   public void addChangedReaderObserver(ChangedReadersObserver o) {
946     this.changedReaderObservers.add(o);
947   }
948 
949   @Override
950   public void deleteChangedReaderObserver(ChangedReadersObserver o) {
951     // We don't check if observer present; it may not be (legitimately)
952     this.changedReaderObservers.remove(o);
953   }
954 
955   //////////////////////////////////////////////////////////////////////////////
956   // Compaction
957   //////////////////////////////////////////////////////////////////////////////
958 
959   /**
960    * Compact the StoreFiles.  This method may take some time, so the calling
961    * thread must be able to block for long periods.
962    *
963    * <p>During this time, the Store can work as usual, getting values from
964    * StoreFiles and writing new StoreFiles from the memstore.
965    *
966    * Existing StoreFiles are not destroyed until the new compacted StoreFile is
967    * completely written-out to disk.
968    *
969    * <p>The compactLock prevents multiple simultaneous compactions.
970    * The structureLock prevents us from interfering with other write operations.
971    *
972    * <p>We don't want to hold the structureLock for the whole time, as a compact()
973    * can be lengthy and we want to allow cache-flushes during this period.
974    *
975    * @param compaction compaction details obtained from requestCompaction()
976    * @throws IOException
977    * @return Storefile we compacted into or null if we failed or opted out early.
978    */
979   public List<StoreFile> compact(CompactionContext compaction) throws IOException {
980     assert compaction != null && compaction.hasSelection();
981     CompactionRequest cr = compaction.getRequest();
982     Collection<StoreFile> filesToCompact = cr.getFiles();
983     assert !filesToCompact.isEmpty();
984     synchronized (filesCompacting) {
985       // sanity check: we're compacting files that this store knows about
986       // TODO: change this to LOG.error() after more debugging
987       Preconditions.checkArgument(filesCompacting.containsAll(filesToCompact));
988     }
989 
990     // Ready to go. Have list of files to compact.
991     LOG.info("Starting compaction of " + filesToCompact.size() + " file(s) in "
992         + this + " of " + this.getRegionInfo().getRegionNameAsString()
993         + " into tmpdir=" + fs.getTempDir() + ", totalSize="
994         + StringUtils.humanReadableInt(cr.getSize()));
995 
996     List<StoreFile> sfs = new ArrayList<StoreFile>();
997     long compactionStartTime = EnvironmentEdgeManager.currentTimeMillis();
998     try {
999       // Commence the compaction.
1000       List<Path> newFiles = compaction.compact();
1001       // Move the compaction into place.
1002       if (this.conf.getBoolean("hbase.hstore.compaction.complete", true)) {
1003         for (Path newFile: newFiles) {
1004           assert newFile != null;
1005           StoreFile sf = moveFileIntoPlace(newFile);
1006           if (this.getCoprocessorHost() != null) {
1007             this.getCoprocessorHost().postCompact(this, sf, cr);
1008           }
1009           assert sf != null;
1010           sfs.add(sf);
1011         }
1012         completeCompaction(filesToCompact, sfs);
1013       } else {
1014         for (Path newFile: newFiles) {
1015           // Create storefile around what we wrote with a reader on it.
1016           StoreFile sf = new StoreFile(this.getFileSystem(), newFile, this.conf, this.cacheConf,
1017             this.family.getBloomFilterType(), this.dataBlockEncoder);
1018           sf.createReader();
1019           sfs.add(sf);
1020         }
1021       }
1022     } finally {
1023       finishCompactionRequest(cr);
1024     }
1025     logCompactionEndMessage(cr, sfs, compactionStartTime);
1026     return sfs;
1027   }
1028 
1029   /**
1030    * Log a very elaborate compaction completion message.
1031    * @param cr Request.
1032    * @param sfs Resulting files.
1033    * @param compactionStartTime Start time.
1034    */
1035   private void logCompactionEndMessage(
1036       CompactionRequest cr, List<StoreFile> sfs, long compactionStartTime) {
1037     long now = EnvironmentEdgeManager.currentTimeMillis();
1038     StringBuilder message = new StringBuilder(
1039       "Completed" + (cr.isMajor() ? " major " : " ") + "compaction of "
1040       + cr.getFiles().size() + " file(s) in " + this + " of "
1041       + this.getRegionInfo().getRegionNameAsString()
1042       + " into ");
1043     if (sfs.isEmpty()) {
1044       message.append("none, ");
1045     } else {
1046       for (StoreFile sf: sfs) {
1047         message.append(sf.getPath().getName());
1048         message.append("(size=");
1049         message.append(StringUtils.humanReadableInt(sf.getReader().length()));
1050         message.append("), ");
1051       }
1052     }
1053     message.append("total size for store is ")
1054       .append(StringUtils.humanReadableInt(storeSize))
1055       .append(". This selection was in queue for ")
1056       .append(StringUtils.formatTimeDiff(compactionStartTime, cr.getSelectionTime()))
1057       .append(", and took ").append(StringUtils.formatTimeDiff(now, compactionStartTime))
1058       .append(" to execute.");
1059     LOG.info(message.toString());
1060   }
1061 
1062   // Package-visible for tests
1063   StoreFile moveFileIntoPlace(final Path newFile) throws IOException {
1064     validateStoreFile(newFile);
1065     // Move the file into the right spot
1066     Path destPath = fs.commitStoreFile(getColumnFamilyName(), newFile);
1067     StoreFile result = new StoreFile(this.getFileSystem(), destPath, this.conf, this.cacheConf,
1068         this.family.getBloomFilterType(), this.dataBlockEncoder);
1069     result.createReader();
1070     return result;
1071   }
1072 
1073   /**
1074    * This method tries to compact N recent files for testing.
1075    * Note that because compacting "recent" files only makes sense for some policies,
1076    * e.g. the default one, it assumes default policy is used. It doesn't use policy,
1077    * but instead makes a compaction candidate list by itself.
1078    * @param N Number of files.
1079    */
1080   public void compactRecentForTestingAssumingDefaultPolicy(int N) throws IOException {
1081     List<StoreFile> filesToCompact;
1082     boolean isMajor;
1083 
1084     this.lock.readLock().lock();
1085     try {
1086       synchronized (filesCompacting) {
1087         filesToCompact = Lists.newArrayList(storeEngine.getStoreFileManager().getStorefiles());
1088         if (!filesCompacting.isEmpty()) {
1089           // exclude all files older than the newest file we're currently
1090           // compacting. this allows us to preserve contiguity (HBASE-2856)
1091           StoreFile last = filesCompacting.get(filesCompacting.size() - 1);
1092           int idx = filesToCompact.indexOf(last);
1093           Preconditions.checkArgument(idx != -1);
1094           filesToCompact.subList(0, idx + 1).clear();
1095         }
1096         int count = filesToCompact.size();
1097         if (N > count) {
1098           throw new RuntimeException("Not enough files");
1099         }
1100 
1101         filesToCompact = filesToCompact.subList(count - N, count);
1102         isMajor = (filesToCompact.size() == storeEngine.getStoreFileManager().getStorefileCount());
1103         filesCompacting.addAll(filesToCompact);
1104         Collections.sort(filesCompacting, StoreFile.Comparators.SEQ_ID);
1105       }
1106     } finally {
1107       this.lock.readLock().unlock();
1108     }
1109 
1110     try {
1111       // Ready to go. Have list of files to compact.
1112       List<Path> newFiles =
1113           this.storeEngine.getCompactor().compactForTesting(filesToCompact, isMajor);
1114       for (Path newFile: newFiles) {
1115         // Move the compaction into place.
1116         StoreFile sf = moveFileIntoPlace(newFile);
1117         if (this.getCoprocessorHost() != null) {
1118           this.getCoprocessorHost().postCompact(this, sf, null);
1119         }
1120         ArrayList<StoreFile> tmp = new ArrayList<StoreFile>();
1121         tmp.add(sf);
1122         completeCompaction(filesToCompact, tmp);
1123       }
1124     } finally {
1125       synchronized (filesCompacting) {
1126         filesCompacting.removeAll(filesToCompact);
1127       }
1128     }
1129   }
1130 
1131   @Override
1132   public boolean hasReferences() {
1133     return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
1134   }
1135 
1136   @Override
1137   public CompactionProgress getCompactionProgress() {
1138     return this.storeEngine.getCompactor().getProgress();
1139   }
1140 
1141   @Override
1142   public boolean isMajorCompaction() throws IOException {
1143     for (StoreFile sf : this.storeEngine.getStoreFileManager().getStorefiles()) {
1144       // TODO: what are these reader checks all over the place?
1145       if (sf.getReader() == null) {
1146         LOG.debug("StoreFile " + sf + " has null Reader");
1147         return false;
1148       }
1149     }
1150     return storeEngine.getCompactionPolicy().isMajorCompaction(
1151         this.storeEngine.getStoreFileManager().getStorefiles());
1152   }
1153 
1154   @Override
1155   public CompactionContext requestCompaction() throws IOException {
1156     return requestCompaction(Store.NO_PRIORITY, null);
1157   }
1158 
1159   @Override
1160   public CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
1161       throws IOException {
1162     // don't even select for compaction if writes are disabled
1163     if (!this.areWritesEnabled()) {
1164       return null;
1165     }
1166 
1167     CompactionContext compaction = storeEngine.createCompaction();
1168     this.lock.readLock().lock();
1169     try {
1170       synchronized (filesCompacting) {
1171         // First, see if coprocessor would want to override selection.
1172         if (this.getCoprocessorHost() != null) {
1173           List<StoreFile> candidatesForCoproc = compaction.preSelect(this.filesCompacting);
1174           boolean override = this.getCoprocessorHost().preCompactSelection(
1175               this, candidatesForCoproc, baseRequest);
1176           if (override) {
1177             // Coprocessor is overriding normal file selection.
1178             compaction.forceSelect(new CompactionRequest(candidatesForCoproc));
1179           }
1180         }
1181 
1182         // Normal case - coprocessor is not overriding file selection.
1183         if (!compaction.hasSelection()) {
1184           boolean isUserCompaction = priority == Store.PRIORITY_USER;
1185           boolean mayUseOffPeak = this.offPeakCompactions.tryStartOffPeakRequest();
1186           compaction.select(this.filesCompacting, isUserCompaction,
1187               mayUseOffPeak, forceMajor && filesCompacting.isEmpty());
1188           assert compaction.hasSelection();
1189           if (mayUseOffPeak && !compaction.getRequest().isOffPeak()) {
1190             // Compaction policy doesn't want to take advantage of off-peak.
1191             this.offPeakCompactions.endOffPeakRequest();
1192           }
1193         }
1194         if (this.getCoprocessorHost() != null) {
1195           this.getCoprocessorHost().postCompactSelection(
1196               this, ImmutableList.copyOf(compaction.getRequest().getFiles()), baseRequest);
1197         }
1198 
1199         // Selected files; see if we have a compaction with some custom base request.
1200         if (baseRequest != null) {
1201           // Update the request with what the system thinks the request should be;
1202           // its up to the request if it wants to listen.
1203           compaction.forceSelect(
1204               baseRequest.combineWith(compaction.getRequest()));
1205         }
1206 
1207         // Finally, we have the resulting files list. Check if we have any files at all.
1208         final Collection<StoreFile> selectedFiles = compaction.getRequest().getFiles();
1209         if (selectedFiles.isEmpty()) {
1210           return null;
1211         }
1212 
1213         // Update filesCompacting (check that we do not try to compact the same StoreFile twice).
1214         if (!Collections.disjoint(filesCompacting, selectedFiles)) {
1215           // TODO: change this from an IAE to LOG.error after sufficient testing
1216           Preconditions.checkArgument(false, "%s overlaps with %s",
1217               selectedFiles, filesCompacting);
1218         }
1219         filesCompacting.addAll(selectedFiles);
1220         Collections.sort(filesCompacting, StoreFile.Comparators.SEQ_ID);
1221 
1222         // If we're enqueuing a major, clear the force flag.
1223         boolean isMajor = selectedFiles.size() == this.getStorefilesCount();
1224         this.forceMajor = this.forceMajor && !isMajor;
1225 
1226         // Set common request properties.
1227         // Set priority, either override value supplied by caller or from store.
1228         compaction.getRequest().setPriority(
1229             (priority != Store.NO_PRIORITY) ? priority : getCompactPriority());
1230         compaction.getRequest().setIsMajor(isMajor);
1231         compaction.getRequest().setDescription(
1232             getRegionInfo().getRegionNameAsString(), getColumnFamilyName());
1233       }
1234     } finally {
1235       this.lock.readLock().unlock();
1236     }
1237 
1238     LOG.debug(getRegionInfo().getEncodedName() + " - " + getColumnFamilyName() + ": Initiating "
1239         + (compaction.getRequest().isMajor() ? "major" : "minor") + " compaction");
1240     this.region.reportCompactionRequestStart(compaction.getRequest().isMajor());
1241     return compaction;
1242   }
1243 
1244   public void cancelRequestedCompaction(CompactionContext compaction) {
1245     finishCompactionRequest(compaction.getRequest());
1246   }
1247 
1248   private void finishCompactionRequest(CompactionRequest cr) {
1249     this.region.reportCompactionRequestEnd(cr.isMajor());
1250     if (cr.isOffPeak()) {
1251       this.offPeakCompactions.endOffPeakRequest();
1252       cr.setOffPeak(false);
1253     }
1254     synchronized (filesCompacting) {
1255       filesCompacting.removeAll(cr.getFiles());
1256     }
1257   }
1258 
1259   /**
1260    * Validates a store file by opening and closing it. In HFileV2 this should
1261    * not be an expensive operation.
1262    *
1263    * @param path the path to the store file
1264    */
1265   private void validateStoreFile(Path path)
1266       throws IOException {
1267     StoreFile storeFile = null;
1268     try {
1269       storeFile = new StoreFile(this.getFileSystem(), path, this.conf,
1270           this.cacheConf, this.family.getBloomFilterType(),
1271           NoOpDataBlockEncoder.INSTANCE);
1272       storeFile.createReader();
1273     } catch (IOException e) {
1274       LOG.error("Failed to open store file : " + path
1275           + ", keeping it in tmp location", e);
1276       throw e;
1277     } finally {
1278       if (storeFile != null) {
1279         storeFile.closeReader(false);
1280       }
1281     }
1282   }
1283 
1284   /*
1285    * <p>It works by processing a compaction that's been written to disk.
1286    *
1287    * <p>It is usually invoked at the end of a compaction, but might also be
1288    * invoked at HStore startup, if the prior execution died midway through.
1289    *
1290    * <p>Moving the compacted TreeMap into place means:
1291    * <pre>
1292    * 1) Moving the new compacted StoreFile into place
1293    * 2) Unload all replaced StoreFile, close and collect list to delete.
1294    * 3) Loading the new TreeMap.
1295    * 4) Compute new store size
1296    * </pre>
1297    *
1298    * @param compactedFiles list of files that were compacted
1299    * @param newFile StoreFile that is the result of the compaction
1300    * @return StoreFile created. May be null.
1301    * @throws IOException
1302    */
1303   private void completeCompaction(final Collection<StoreFile> compactedFiles,
1304       final Collection<StoreFile> result) throws IOException {
1305     try {
1306       this.lock.writeLock().lock();
1307       try {
1308         // Change this.storeFiles so it reflects new state but do not
1309         // delete old store files until we have sent out notification of
1310         // change in case old files are still being accessed by outstanding
1311         // scanners.
1312         this.storeEngine.getStoreFileManager().addCompactionResults(compactedFiles, result);
1313         filesCompacting.removeAll(compactedFiles); // safe bc: lock.writeLock()
1314       } finally {
1315         // We need the lock, as long as we are updating the storeFiles
1316         // or changing the memstore. Let us release it before calling
1317         // notifyChangeReadersObservers. See HBASE-4485 for a possible
1318         // deadlock scenario that could have happened if continue to hold
1319         // the lock.
1320         this.lock.writeLock().unlock();
1321       }
1322 
1323       // Tell observers that list of StoreFiles has changed.
1324       notifyChangedReadersObservers();
1325 
1326       // let the archive util decide if we should archive or delete the files
1327       LOG.debug("Removing store files after compaction...");
1328       this.fs.removeStoreFiles(this.getColumnFamilyName(), compactedFiles);
1329 
1330     } catch (IOException e) {
1331       e = RemoteExceptionHandler.checkIOException(e);
1332       LOG.error("Failed replacing compacted files in " + this +
1333         ". Compacted files are " + (result == null? "none": result.toString()) +
1334         ". Files replaced " + compactedFiles.toString() +
1335         " some of which may have been already removed", e);
1336     }
1337 
1338     // 4. Compute new store size
1339     this.storeSize = 0L;
1340     this.totalUncompressedBytes = 0L;
1341     for (StoreFile hsf : this.storeEngine.getStoreFileManager().getStorefiles()) {
1342       StoreFile.Reader r = hsf.getReader();
1343       if (r == null) {
1344         LOG.warn("StoreFile " + hsf + " has a null Reader");
1345         continue;
1346       }
1347       this.storeSize += r.length();
1348       this.totalUncompressedBytes += r.getTotalUncompressedBytes();
1349     }
1350   }
1351 
1352   /*
1353    * @param wantedVersions How many versions were asked for.
1354    * @return wantedVersions or this families' {@link HConstants#VERSIONS}.
1355    */
1356   int versionsToReturn(final int wantedVersions) {
1357     if (wantedVersions <= 0) {
1358       throw new IllegalArgumentException("Number of versions must be > 0");
1359     }
1360     // Make sure we do not return more than maximum versions for this store.
1361     int maxVersions = this.family.getMaxVersions();
1362     return wantedVersions > maxVersions ? maxVersions: wantedVersions;
1363   }
1364 
1365   static boolean isExpired(final KeyValue key, final long oldestTimestamp) {
1366     return key.getTimestamp() < oldestTimestamp;
1367   }
1368 
1369   @Override
1370   public KeyValue getRowKeyAtOrBefore(final byte[] row) throws IOException {
1371     // If minVersions is set, we will not ignore expired KVs.
1372     // As we're only looking for the latest matches, that should be OK.
1373     // With minVersions > 0 we guarantee that any KV that has any version
1374     // at all (expired or not) has at least one version that will not expire.
1375     // Note that this method used to take a KeyValue as arguments. KeyValue
1376     // can be back-dated, a row key cannot.
1377     long ttlToUse = scanInfo.getMinVersions() > 0 ? Long.MAX_VALUE : this.scanInfo.getTtl();
1378 
1379     KeyValue kv = new KeyValue(row, HConstants.LATEST_TIMESTAMP);
1380 
1381     GetClosestRowBeforeTracker state = new GetClosestRowBeforeTracker(
1382       this.comparator, kv, ttlToUse, this.getRegionInfo().isMetaRegion());
1383     this.lock.readLock().lock();
1384     try {
1385       // First go to the memstore.  Pick up deletes and candidates.
1386       this.memstore.getRowKeyAtOrBefore(state);
1387       // Check if match, if we got a candidate on the asked for 'kv' row.
1388       // Process each relevant store file. Run through from newest to oldest.
1389       Iterator<StoreFile> sfIterator = this.storeEngine.getStoreFileManager()
1390           .getCandidateFilesForRowKeyBefore(state.getTargetKey());
1391       while (sfIterator.hasNext()) {
1392         StoreFile sf = sfIterator.next();
1393         sfIterator.remove(); // Remove sf from iterator.
1394         boolean haveNewCandidate = rowAtOrBeforeFromStoreFile(sf, state);
1395         if (haveNewCandidate) {
1396           // TODO: we may have an optimization here which stops the search if we find exact match.
1397           sfIterator = this.storeEngine.getStoreFileManager().updateCandidateFilesForRowKeyBefore(
1398               sfIterator, state.getTargetKey(), state.getCandidate());
1399         }
1400       }
1401       return state.getCandidate();
1402     } finally {
1403       this.lock.readLock().unlock();
1404     }
1405   }
1406 
1407   /*
1408    * Check an individual MapFile for the row at or before a given row.
1409    * @param f
1410    * @param state
1411    * @throws IOException
1412    * @return True iff the candidate has been updated in the state.
1413    */
1414   private boolean rowAtOrBeforeFromStoreFile(final StoreFile f,
1415                                           final GetClosestRowBeforeTracker state)
1416       throws IOException {
1417     StoreFile.Reader r = f.getReader();
1418     if (r == null) {
1419       LOG.warn("StoreFile " + f + " has a null Reader");
1420       return false;
1421     }
1422     if (r.getEntries() == 0) {
1423       LOG.warn("StoreFile " + f + " is a empty store file");
1424       return false;
1425     }
1426     // TODO: Cache these keys rather than make each time?
1427     byte [] fk = r.getFirstKey();
1428     if (fk == null) return false;
1429     KeyValue firstKV = KeyValue.createKeyValueFromKey(fk, 0, fk.length);
1430     byte [] lk = r.getLastKey();
1431     KeyValue lastKV = KeyValue.createKeyValueFromKey(lk, 0, lk.length);
1432     KeyValue firstOnRow = state.getTargetKey();
1433     if (this.comparator.compareRows(lastKV, firstOnRow) < 0) {
1434       // If last key in file is not of the target table, no candidates in this
1435       // file.  Return.
1436       if (!state.isTargetTable(lastKV)) return false;
1437       // If the row we're looking for is past the end of file, set search key to
1438       // last key. TODO: Cache last and first key rather than make each time.
1439       firstOnRow = new KeyValue(lastKV.getRow(), HConstants.LATEST_TIMESTAMP);
1440     }
1441     // Get a scanner that caches blocks and that uses pread.
1442     HFileScanner scanner = r.getScanner(true, true, false);
1443     // Seek scanner.  If can't seek it, return.
1444     if (!seekToScanner(scanner, firstOnRow, firstKV)) return false;
1445     // If we found candidate on firstOnRow, just return. THIS WILL NEVER HAPPEN!
1446     // Unlikely that there'll be an instance of actual first row in table.
1447     if (walkForwardInSingleRow(scanner, firstOnRow, state)) return true;
1448     // If here, need to start backing up.
1449     while (scanner.seekBefore(firstOnRow.getBuffer(), firstOnRow.getKeyOffset(),
1450        firstOnRow.getKeyLength())) {
1451       KeyValue kv = scanner.getKeyValue();
1452       if (!state.isTargetTable(kv)) break;
1453       if (!state.isBetterCandidate(kv)) break;
1454       // Make new first on row.
1455       firstOnRow = new KeyValue(kv.getRow(), HConstants.LATEST_TIMESTAMP);
1456       // Seek scanner.  If can't seek it, break.
1457       if (!seekToScanner(scanner, firstOnRow, firstKV)) return false;
1458       // If we find something, break;
1459       if (walkForwardInSingleRow(scanner, firstOnRow, state)) return true;
1460     }
1461     return false;
1462   }
1463 
1464   /*
1465    * Seek the file scanner to firstOnRow or first entry in file.
1466    * @param scanner
1467    * @param firstOnRow
1468    * @param firstKV
1469    * @return True if we successfully seeked scanner.
1470    * @throws IOException
1471    */
1472   private boolean seekToScanner(final HFileScanner scanner,
1473                                 final KeyValue firstOnRow,
1474                                 final KeyValue firstKV)
1475       throws IOException {
1476     KeyValue kv = firstOnRow;
1477     // If firstOnRow < firstKV, set to firstKV
1478     if (this.comparator.compareRows(firstKV, firstOnRow) == 0) kv = firstKV;
1479     int result = scanner.seekTo(kv.getBuffer(), kv.getKeyOffset(),
1480       kv.getKeyLength());
1481     return result >= 0;
1482   }
1483 
1484   /*
1485    * When we come in here, we are probably at the kv just before we break into
1486    * the row that firstOnRow is on.  Usually need to increment one time to get
1487    * on to the row we are interested in.
1488    * @param scanner
1489    * @param firstOnRow
1490    * @param state
1491    * @return True we found a candidate.
1492    * @throws IOException
1493    */
1494   private boolean walkForwardInSingleRow(final HFileScanner scanner,
1495                                          final KeyValue firstOnRow,
1496                                          final GetClosestRowBeforeTracker state)
1497       throws IOException {
1498     boolean foundCandidate = false;
1499     do {
1500       KeyValue kv = scanner.getKeyValue();
1501       // If we are not in the row, skip.
1502       if (this.comparator.compareRows(kv, firstOnRow) < 0) continue;
1503       // Did we go beyond the target row? If so break.
1504       if (state.isTooFar(kv, firstOnRow)) break;
1505       if (state.isExpired(kv)) {
1506         continue;
1507       }
1508       // If we added something, this row is a contender. break.
1509       if (state.handle(kv)) {
1510         foundCandidate = true;
1511         break;
1512       }
1513     } while(scanner.next());
1514     return foundCandidate;
1515   }
1516 
1517   public boolean canSplit() {
1518     this.lock.readLock().lock();
1519     try {
1520       // Not split-able if we find a reference store file present in the store.
1521       boolean result = !hasReferences();
1522       if (!result && LOG.isDebugEnabled()) {
1523         LOG.debug("Cannot split region due to reference files being there");
1524       }
1525       return result;
1526     } finally {
1527       this.lock.readLock().unlock();
1528     }
1529   }
1530 
1531   @Override
1532   public byte[] getSplitPoint() {
1533     this.lock.readLock().lock();
1534     try {
1535       // Should already be enforced by the split policy!
1536       assert !this.getRegionInfo().isMetaRegion();
1537       // Not split-able if we find a reference store file present in the store.
1538       if (hasReferences()) {
1539         assert false : "getSplitPoint() called on a region that can't split!";
1540         return null;
1541       }
1542       return this.storeEngine.getStoreFileManager().getSplitPoint();
1543     } catch(IOException e) {
1544       LOG.warn("Failed getting store size for " + this, e);
1545     } finally {
1546       this.lock.readLock().unlock();
1547     }
1548     return null;
1549   }
1550 
1551   @Override
1552   public long getLastCompactSize() {
1553     return this.lastCompactSize;
1554   }
1555 
1556   @Override
1557   public long getSize() {
1558     return storeSize;
1559   }
1560 
1561   public void triggerMajorCompaction() {
1562     this.forceMajor = true;
1563   }
1564 
1565   boolean getForceMajorCompaction() {
1566     return this.forceMajor;
1567   }
1568 
1569   //////////////////////////////////////////////////////////////////////////////
1570   // File administration
1571   //////////////////////////////////////////////////////////////////////////////
1572 
1573   @Override
1574   public KeyValueScanner getScanner(Scan scan,
1575       final NavigableSet<byte []> targetCols) throws IOException {
1576     lock.readLock().lock();
1577     try {
1578       KeyValueScanner scanner = null;
1579       if (this.getCoprocessorHost() != null) {
1580         scanner = this.getCoprocessorHost().preStoreScannerOpen(this, scan, targetCols);
1581       }
1582       if (scanner == null) {
1583         scanner = new StoreScanner(this, getScanInfo(), scan, targetCols);
1584       }
1585       return scanner;
1586     } finally {
1587       lock.readLock().unlock();
1588     }
1589   }
1590 
1591   @Override
1592   public String toString() {
1593     return this.getColumnFamilyName();
1594   }
1595 
1596   @Override
1597   // TODO: why is there this and also getNumberOfStorefiles?! Remove one.
1598   public int getStorefilesCount() {
1599     return this.storeEngine.getStoreFileManager().getStorefileCount();
1600   }
1601 
1602   @Override
1603   public long getStoreSizeUncompressed() {
1604     return this.totalUncompressedBytes;
1605   }
1606 
1607   @Override
1608   public long getStorefilesSize() {
1609     long size = 0;
1610     for (StoreFile s: this.storeEngine.getStoreFileManager().getStorefiles()) {
1611       StoreFile.Reader r = s.getReader();
1612       if (r == null) {
1613         LOG.warn("StoreFile " + s + " has a null Reader");
1614         continue;
1615       }
1616       size += r.length();
1617     }
1618     return size;
1619   }
1620 
1621   @Override
1622   public long getStorefilesIndexSize() {
1623     long size = 0;
1624     for (StoreFile s: this.storeEngine.getStoreFileManager().getStorefiles()) {
1625       StoreFile.Reader r = s.getReader();
1626       if (r == null) {
1627         LOG.warn("StoreFile " + s + " has a null Reader");
1628         continue;
1629       }
1630       size += r.indexSize();
1631     }
1632     return size;
1633   }
1634 
1635   @Override
1636   public long getTotalStaticIndexSize() {
1637     long size = 0;
1638     for (StoreFile s : this.storeEngine.getStoreFileManager().getStorefiles()) {
1639       size += s.getReader().getUncompressedDataIndexSize();
1640     }
1641     return size;
1642   }
1643 
1644   @Override
1645   public long getTotalStaticBloomSize() {
1646     long size = 0;
1647     for (StoreFile s : this.storeEngine.getStoreFileManager().getStorefiles()) {
1648       StoreFile.Reader r = s.getReader();
1649       size += r.getTotalBloomSize();
1650     }
1651     return size;
1652   }
1653 
1654   @Override
1655   public long getMemStoreSize() {
1656     return this.memstore.heapSize();
1657   }
1658 
1659   @Override
1660   public int getCompactPriority() {
1661     return this.storeEngine.getStoreFileManager().getStoreCompactionPriority();
1662   }
1663 
1664   @Override
1665   public boolean throttleCompaction(long compactionSize) {
1666     return storeEngine.getCompactionPolicy().throttleCompaction(compactionSize);
1667   }
1668 
1669   public HRegion getHRegion() {
1670     return this.region;
1671   }
1672 
1673   @Override
1674   public RegionCoprocessorHost getCoprocessorHost() {
1675     return this.region.getCoprocessorHost();
1676   }
1677 
1678   @Override
1679   public HRegionInfo getRegionInfo() {
1680     return this.fs.getRegionInfo();
1681   }
1682 
1683   @Override
1684   public boolean areWritesEnabled() {
1685     return this.region.areWritesEnabled();
1686   }
1687 
1688   @Override
1689   public long getSmallestReadPoint() {
1690     return this.region.getSmallestReadPoint();
1691   }
1692 
1693   /**
1694    * Used in tests. TODO: Remove
1695    *
1696    * Updates the value for the given row/family/qualifier. This function will always be seen as
1697    * atomic by other readers because it only puts a single KV to memstore. Thus no read/write
1698    * control necessary.
1699    * @param row row to update
1700    * @param f family to update
1701    * @param qualifier qualifier to update
1702    * @param newValue the new value to set into memstore
1703    * @return memstore size delta
1704    * @throws IOException
1705    */
1706   public long updateColumnValue(byte [] row, byte [] f,
1707                                 byte [] qualifier, long newValue)
1708       throws IOException {
1709 
1710     this.lock.readLock().lock();
1711     try {
1712       long now = EnvironmentEdgeManager.currentTimeMillis();
1713 
1714       return this.memstore.updateColumnValue(row,
1715           f,
1716           qualifier,
1717           newValue,
1718           now);
1719 
1720     } finally {
1721       this.lock.readLock().unlock();
1722     }
1723   }
1724 
1725   @Override
1726   public long upsert(Iterable<? extends Cell> cells, long readpoint) throws IOException {
1727     this.lock.readLock().lock();
1728     try {
1729       return this.memstore.upsert(cells, readpoint);
1730     } finally {
1731       this.lock.readLock().unlock();
1732     }
1733   }
1734 
1735   public StoreFlusher getStoreFlusher(long cacheFlushId) {
1736     return new StoreFlusherImpl(cacheFlushId);
1737   }
1738 
1739   private class StoreFlusherImpl implements StoreFlusher {
1740 
1741     private long cacheFlushId;
1742     private SortedSet<KeyValue> snapshot;
1743     private StoreFile storeFile;
1744     private Path storeFilePath;
1745     private TimeRangeTracker snapshotTimeRangeTracker;
1746     private AtomicLong flushedSize;
1747 
1748     private StoreFlusherImpl(long cacheFlushId) {
1749       this.cacheFlushId = cacheFlushId;
1750       this.flushedSize = new AtomicLong();
1751     }
1752 
1753     @Override
1754     public void prepare() {
1755       memstore.snapshot();
1756       this.snapshot = memstore.getSnapshot();
1757       this.snapshotTimeRangeTracker = memstore.getSnapshotTimeRangeTracker();
1758     }
1759 
1760     @Override
1761     public void flushCache(MonitoredTask status) throws IOException {
1762       storeFilePath = HStore.this.flushCache(
1763         cacheFlushId, snapshot, snapshotTimeRangeTracker, flushedSize, status);
1764     }
1765 
1766     @Override
1767     public boolean commit(MonitoredTask status) throws IOException {
1768       if (storeFilePath == null) {
1769         return false;
1770       }
1771       storeFile = HStore.this.commitFile(storeFilePath, cacheFlushId,
1772                                snapshotTimeRangeTracker, flushedSize, status);
1773       if (HStore.this.getCoprocessorHost() != null) {
1774         HStore.this.getCoprocessorHost().postFlush(HStore.this, storeFile);
1775       }
1776 
1777       // Add new file to store files.  Clear snapshot too while we have
1778       // the Store write lock.
1779       return HStore.this.updateStorefiles(storeFile, snapshot);
1780     }
1781   }
1782 
1783   @Override
1784   public boolean needsCompaction() {
1785     return storeEngine.getCompactionPolicy().needsCompaction(
1786         this.storeEngine.getStoreFileManager().getStorefiles(), filesCompacting);
1787   }
1788 
1789   @Override
1790   public CacheConfig getCacheConfig() {
1791     return this.cacheConf;
1792   }
1793 
1794   public static final long FIXED_OVERHEAD =
1795       ClassSize.align((17 * ClassSize.REFERENCE) + (5 * Bytes.SIZEOF_LONG)
1796               + (2 * Bytes.SIZEOF_INT) + Bytes.SIZEOF_BOOLEAN);
1797 
1798   public static final long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD
1799       + ClassSize.OBJECT + ClassSize.REENTRANT_LOCK
1800       + ClassSize.CONCURRENT_SKIPLISTMAP
1801       + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + ClassSize.OBJECT
1802       + ScanInfo.FIXED_OVERHEAD);
1803 
1804   @Override
1805   public long heapSize() {
1806     return DEEP_OVERHEAD + this.memstore.heapSize();
1807   }
1808 
1809   public KeyValue.KVComparator getComparator() {
1810     return comparator;
1811   }
1812 
1813   @Override
1814   public ScanInfo getScanInfo() {
1815     return scanInfo;
1816   }
1817 
1818   /**
1819    * Set scan info, used by test
1820    * @param scanInfo new scan info to use for test
1821    */
1822   void setScanInfo(ScanInfo scanInfo) {
1823     this.scanInfo = scanInfo;
1824   }
1825 
1826   @Override
1827   public boolean hasTooManyStoreFiles() {
1828     return getStorefilesCount() > this.blockingFileCount;
1829   }
1830 }