View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.fs.FileSystem;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.Cell;
28  import org.apache.hadoop.hbase.HColumnDescriptor;
29  import org.apache.hadoop.hbase.HRegionInfo;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.classification.InterfaceStability;
34  import org.apache.hadoop.hbase.client.Scan;
35  import org.apache.hadoop.hbase.io.HeapSize;
36  import org.apache.hadoop.hbase.io.compress.Compression;
37  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
38  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
39  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
40  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
41  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
42  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
43  import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController;
44  import org.apache.hadoop.hbase.security.User;
45  
46  /**
47   * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or
48   * more StoreFiles, which stretch backwards over time.
49   */
50  @InterfaceAudience.Private
51  @InterfaceStability.Evolving
52  public interface Store extends HeapSize, StoreConfigInformation {
53  
54    /* The default priority for user-specified compaction requests.
55     * The user gets top priority unless we have blocking compactions. (Pri <= 0)
56     */ int PRIORITY_USER = 1;
57    int NO_PRIORITY = Integer.MIN_VALUE;
58  
59    // General Accessors
60    KeyValue.KVComparator getComparator();
61  
62    Collection<StoreFile> getStorefiles();
63  
64    /**
65     * Close all the readers We don't need to worry about subsequent requests because the HRegion
66     * holds a write lock that will prevent any more reads or writes.
67     * @return the {@link StoreFile StoreFiles} that were previously being used.
68     * @throws IOException on failure
69     */
70    Collection<StoreFile> close() throws IOException;
71  
72    /**
73     * Return a scanner for both the memstore and the HStore files. Assumes we are not in a
74     * compaction.
75     * @param scan Scan to apply when scanning the stores
76     * @param targetCols columns to scan
77     * @return a scanner over the current key values
78     * @throws IOException on failure
79     */
80    KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt)
81        throws IOException;
82  
83    /**
84     * Get all scanners with no filtering based on TTL (that happens further down
85     * the line).
86     * @param cacheBlocks
87     * @param isGet
88     * @param usePread
89     * @param isCompaction
90     * @param matcher
91     * @param startRow
92     * @param stopRow
93     * @param readPt
94     * @return all scanners for this store
95     */
96    List<KeyValueScanner> getScanners(
97      boolean cacheBlocks,
98      boolean isGet,
99      boolean usePread,
100     boolean isCompaction,
101     ScanQueryMatcher matcher,
102     byte[] startRow,
103     byte[] stopRow,
104     long readPt
105   ) throws IOException;
106 
107   ScanInfo getScanInfo();
108 
109   /**
110    * Adds or replaces the specified KeyValues.
111    * <p>
112    * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in
113    * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore.
114    * <p>
115    * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic
116    * across all of them.
117    * @param cells
118    * @param readpoint readpoint below which we can safely remove duplicate KVs
119    * @return memstore size delta
120    * @throws IOException
121    */
122   long upsert(Iterable<Cell> cells, long readpoint) throws IOException;
123 
124   /**
125    * Adds a value to the memstore
126    * @param kv
127    * @return memstore size delta
128    */
129   long add(KeyValue kv);
130 
131   /**
132    * When was the last edit done in the memstore
133    */
134   long timeOfOldestEdit();
135 
136   /**
137    * Removes a kv from the memstore. The KeyValue is removed only if its key & memstoreTS match the
138    * key & memstoreTS value of the kv parameter.
139    * @param kv
140    */
141   void rollback(final KeyValue kv);
142 
143   /**
144    * Find the key that matches <i>row</i> exactly, or the one that immediately precedes it. WARNING:
145    * Only use this method on a table where writes occur with strictly increasing timestamps. This
146    * method assumes this pattern of writes in order to make it reasonably performant. Also our
147    * search is dependent on the axiom that deletes are for cells that are in the container that
148    * follows whether a memstore snapshot or a storefile, not for the current container: i.e. we'll
149    * see deletes before we come across cells we are to delete. Presumption is that the
150    * memstore#kvset is processed before memstore#snapshot and so on.
151    * @param row The row key of the targeted row.
152    * @return Found keyvalue or null if none found.
153    * @throws IOException
154    */
155   KeyValue getRowKeyAtOrBefore(final byte[] row) throws IOException;
156 
157   FileSystem getFileSystem();
158 
159 
160   /**
161    * @param maxKeyCount
162    * @param compression Compression algorithm to use
163    * @param isCompaction whether we are creating a new file in a compaction
164    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
165    * @return Writer for a new StoreFile in the tmp dir.
166    */
167   StoreFile.Writer createWriterInTmp(
168       long maxKeyCount,
169       Compression.Algorithm compression,
170       boolean isCompaction,
171       boolean includeMVCCReadpoint,
172       boolean includesTags
173   ) throws IOException;
174 
175   /**
176    * @param maxKeyCount
177    * @param compression Compression algorithm to use
178    * @param isCompaction whether we are creating a new file in a compaction
179    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
180    * @param shouldDropBehind should the writer drop caches behind writes
181    * @return Writer for a new StoreFile in the tmp dir.
182    */
183   StoreFile.Writer createWriterInTmp(
184     long maxKeyCount,
185     Compression.Algorithm compression,
186     boolean isCompaction,
187     boolean includeMVCCReadpoint,
188     boolean includesTags,
189     boolean shouldDropBehind
190   ) throws IOException;
191 
192 
193 
194 
195   // Compaction oriented methods
196 
197   boolean throttleCompaction(long compactionSize);
198 
199   /**
200    * getter for CompactionProgress object
201    * @return CompactionProgress object; can be null
202    */
203   CompactionProgress getCompactionProgress();
204 
205   CompactionContext requestCompaction() throws IOException;
206 
207   /**
208    * @deprecated see requestCompaction(int, CompactionRequest, User)
209    */
210   @Deprecated
211   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
212       throws IOException;
213 
214   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest, User user)
215       throws IOException;
216 
217   void cancelRequestedCompaction(CompactionContext compaction);
218 
219   /**
220    * @deprecated see compact(CompactionContext, CompactionThroughputController, User)
221    */
222   @Deprecated
223   List<StoreFile> compact(CompactionContext compaction,
224       CompactionThroughputController throughputController) throws IOException;
225 
226   List<StoreFile> compact(CompactionContext compaction,
227     CompactionThroughputController throughputController, User user) throws IOException;
228 
229   /**
230    * @return true if we should run a major compaction.
231    */
232   boolean isMajorCompaction() throws IOException;
233 
234   void triggerMajorCompaction();
235 
236   /**
237    * See if there's too much store files in this store
238    * @return true if number of store files is greater than the number defined in minFilesToCompact
239    */
240   boolean needsCompaction();
241 
242   int getCompactPriority();
243 
244   StoreFlushContext createFlushContext(long cacheFlushId);
245 
246   /**
247    * Call to complete a compaction. Its for the case where we find in the WAL a compaction
248    * that was not finished.  We could find one recovering a WAL after a regionserver crash.
249    * See HBASE-2331.
250    * @param compaction
251    */
252   void completeCompactionMarker(CompactionDescriptor compaction)
253       throws IOException;
254 
255   // Split oriented methods
256 
257   boolean canSplit();
258 
259   /**
260    * Determines if Store should be split
261    * @return byte[] if store should be split, null otherwise.
262    */
263   byte[] getSplitPoint();
264 
265   // Bulk Load methods
266 
267   /**
268    * This throws a WrongRegionException if the HFile does not fit in this region, or an
269    * InvalidHFileException if the HFile is not valid.
270    */
271   void assertBulkLoadHFileOk(Path srcPath) throws IOException;
272 
273   /**
274    * This method should only be called from HRegion. It is assumed that the ranges of values in the
275    * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this)
276    *
277    * @param srcPathStr
278    * @param sequenceId sequence Id associated with the HFile
279    */
280   void bulkLoadHFile(String srcPathStr, long sequenceId) throws IOException;
281 
282   // General accessors into the state of the store
283   // TODO abstract some of this out into a metrics class
284 
285   /**
286    * @return <tt>true</tt> if the store has any underlying reference files to older HFiles
287    */
288   boolean hasReferences();
289 
290   /**
291    * @return The size of this store's memstore, in bytes
292    */
293   long getMemStoreSize();
294 
295   /**
296    * @return The amount of memory we could flush from this memstore; usually this is equal to
297    * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of
298    * outstanding snapshots.
299    */
300   long getFlushableSize();
301 
302   HColumnDescriptor getFamily();
303 
304   /**
305    * @return The maximum sequence id in all store files.
306    */
307   long getMaxSequenceId();
308 
309   /**
310    * @return The maximum memstoreTS in all store files.
311    */
312   long getMaxMemstoreTS();
313 
314   /**
315    * @return the data block encoder
316    */
317   HFileDataBlockEncoder getDataBlockEncoder();
318 
319   /** @return aggregate size of all HStores used in the last compaction */
320   long getLastCompactSize();
321 
322   /** @return aggregate size of HStore */
323   long getSize();
324 
325   /**
326    * @return Count of store files
327    */
328   int getStorefilesCount();
329 
330   /**
331    * @return Max age of store files in this store
332    */
333   long getMaxStoreFileAge();
334 
335   /**
336    * @return Min age of store files in this store
337    */
338   long getMinStoreFileAge();
339 
340   /**
341    *  @return Average age of store files in this store, 0 if no store files
342    */
343   long getAvgStoreFileAge();
344 
345   /**
346    *  @return Number of reference files in this store
347    */
348   long getNumReferenceFiles();
349 
350   /**
351    *  @return Number of HFiles in this store
352    */
353   long getNumHFiles();
354 
355   /**
356    * @return The size of the store files, in bytes, uncompressed.
357    */
358   long getStoreSizeUncompressed();
359 
360   /**
361    * @return The size of the store files, in bytes.
362    */
363   long getStorefilesSize();
364 
365   /**
366    * @return The size of the store file indexes, in bytes.
367    */
368   long getStorefilesIndexSize();
369 
370   /**
371    * Returns the total size of all index blocks in the data block indexes, including the root level,
372    * intermediate levels, and the leaf level for multi-level indexes, or just the root level for
373    * single-level indexes.
374    * @return the total size of block indexes in the store
375    */
376   long getTotalStaticIndexSize();
377 
378   /**
379    * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the
380    * Bloom blocks currently not loaded into the block cache are counted.
381    * @return the total size of all Bloom filters in the store
382    */
383   long getTotalStaticBloomSize();
384 
385   // Test-helper methods
386 
387   /**
388    * Used for tests.
389    * @return cache configuration for this Store.
390    */
391   CacheConfig getCacheConfig();
392 
393   /**
394    * @return the parent region info hosting this store
395    */
396   HRegionInfo getRegionInfo();
397 
398   RegionCoprocessorHost getCoprocessorHost();
399 
400   boolean areWritesEnabled();
401 
402   /**
403    * @return The smallest mvcc readPoint across all the scanners in this
404    * region. Writes older than this readPoint, are included  in every
405    * read operation.
406    */
407   long getSmallestReadPoint();
408 
409   String getColumnFamilyName();
410 
411   TableName getTableName();
412 
413   /**
414    * @return The number of cells flushed to disk
415    */
416   long getFlushedCellsCount();
417 
418   /**
419    * @return The total size of data flushed to disk, in bytes
420    */
421   long getFlushedCellsSize();
422 
423   /**
424    * @return The number of cells processed during minor compactions
425    */
426   long getCompactedCellsCount();
427 
428   /**
429    * @return The total amount of data processed during minor compactions, in bytes
430    */
431   long getCompactedCellsSize();
432 
433   /**
434    * @return The number of cells processed during major compactions
435    */
436   long getMajorCompactedCellsCount();
437 
438   /**
439    * @return The total amount of data processed during major compactions, in bytes
440    */
441   long getMajorCompactedCellsSize();
442 
443   /*
444    * @param o Observer who wants to know about changes in set of Readers
445    */
446   void addChangedReaderObserver(ChangedReadersObserver o);
447 
448   /*
449    * @param o Observer no longer interested in changes in set of Readers.
450    */
451   void deleteChangedReaderObserver(ChangedReadersObserver o);
452 
453   /**
454    * @return Whether this store has too many store files.
455    */
456   boolean hasTooManyStoreFiles();
457 
458   /**
459    * This value can represent the degree of emergency of compaction for this store. It should be
460    * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files.
461    * <ul>
462    * <li>if getStorefilesCount &lt;= getMinFilesToCompact, return 0.0</li>
463    * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount -
464    * getMinFilesToCompact)</li>
465    * </ul>
466    * <p>
467    * And for striped stores, we should calculate this value by the files in each stripe separately
468    * and return the maximum value.
469    * <p>
470    * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a
471    * linear formula.
472    */
473   double getCompactionPressure();
474 }