View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.Collection;
23  import java.util.List;
24  import java.util.NavigableSet;
25  
26  import org.apache.hadoop.fs.FileSystem;
27  import org.apache.hadoop.fs.Path;
28  import org.apache.hadoop.hbase.Cell;
29  import org.apache.hadoop.hbase.HColumnDescriptor;
30  import org.apache.hadoop.hbase.HRegionInfo;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.hbase.classification.InterfaceStability;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.io.HeapSize;
37  import org.apache.hadoop.hbase.io.compress.Compression;
38  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
39  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
40  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
41  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
42  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
43  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
44  import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController;
45  import org.apache.hadoop.hbase.security.User;
46  
47  /**
48   * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or
49   * more StoreFiles, which stretch backwards over time.
50   */
51  @InterfaceAudience.Private
52  @InterfaceStability.Evolving
53  public interface Store extends HeapSize, StoreConfigInformation {
54  
55    /* The default priority for user-specified compaction requests.
56     * The user gets top priority unless we have blocking compactions. (Pri <= 0)
57     */ int PRIORITY_USER = 1;
58    int NO_PRIORITY = Integer.MIN_VALUE;
59  
60    // General Accessors
61    KeyValue.KVComparator getComparator();
62  
63    Collection<StoreFile> getStorefiles();
64  
65    /**
66     * Close all the readers We don't need to worry about subsequent requests because the HRegion
67     * holds a write lock that will prevent any more reads or writes.
68     * @return the {@link StoreFile StoreFiles} that were previously being used.
69     * @throws IOException on failure
70     */
71    Collection<StoreFile> close() throws IOException;
72  
73    /**
74     * Return a scanner for both the memstore and the HStore files. Assumes we are not in a
75     * compaction.
76     * @param scan Scan to apply when scanning the stores
77     * @param targetCols columns to scan
78     * @return a scanner over the current key values
79     * @throws IOException on failure
80     */
81    KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt)
82        throws IOException;
83  
84    /**
85     * Get all scanners with no filtering based on TTL (that happens further down
86     * the line).
87     * @param cacheBlocks
88     * @param isGet
89     * @param usePread
90     * @param isCompaction
91     * @param matcher
92     * @param startRow
93     * @param stopRow
94     * @param readPt
95     * @return all scanners for this store
96     */
97    List<KeyValueScanner> getScanners(
98      boolean cacheBlocks,
99      boolean isGet,
100     boolean usePread,
101     boolean isCompaction,
102     ScanQueryMatcher matcher,
103     byte[] startRow,
104     byte[] stopRow,
105     long readPt
106   ) throws IOException;
107 
108   ScanInfo getScanInfo();
109 
110   /**
111    * Adds or replaces the specified KeyValues.
112    * <p>
113    * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in
114    * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore.
115    * <p>
116    * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic
117    * across all of them.
118    * @param cells
119    * @param readpoint readpoint below which we can safely remove duplicate KVs
120    * @return memstore size delta
121    * @throws IOException
122    */
123   long upsert(Iterable<Cell> cells, long readpoint) throws IOException;
124 
125   /**
126    * Adds a value to the memstore
127    * @param kv
128    * @return memstore size delta
129    */
130   long add(KeyValue kv);
131 
132   /**
133    * When was the last edit done in the memstore
134    */
135   long timeOfOldestEdit();
136 
137   /**
138    * Removes a kv from the memstore. The KeyValue is removed only if its key & memstoreTS match the
139    * key & memstoreTS value of the kv parameter.
140    * @param kv
141    */
142   void rollback(final KeyValue kv);
143 
144   /**
145    * Find the key that matches <i>row</i> exactly, or the one that immediately precedes it. WARNING:
146    * Only use this method on a table where writes occur with strictly increasing timestamps. This
147    * method assumes this pattern of writes in order to make it reasonably performant. Also our
148    * search is dependent on the axiom that deletes are for cells that are in the container that
149    * follows whether a memstore snapshot or a storefile, not for the current container: i.e. we'll
150    * see deletes before we come across cells we are to delete. Presumption is that the
151    * memstore#kvset is processed before memstore#snapshot and so on.
152    * @param row The row key of the targeted row.
153    * @return Found keyvalue or null if none found.
154    * @throws IOException
155    */
156   KeyValue getRowKeyAtOrBefore(final byte[] row) throws IOException;
157 
158   FileSystem getFileSystem();
159 
160 
161   /**
162    * @param maxKeyCount
163    * @param compression Compression algorithm to use
164    * @param isCompaction whether we are creating a new file in a compaction
165    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
166    * @return Writer for a new StoreFile in the tmp dir.
167    */
168   StoreFile.Writer createWriterInTmp(
169       long maxKeyCount,
170       Compression.Algorithm compression,
171       boolean isCompaction,
172       boolean includeMVCCReadpoint,
173       boolean includesTags
174   ) throws IOException;
175 
176   /**
177    * @param maxKeyCount
178    * @param compression Compression algorithm to use
179    * @param isCompaction whether we are creating a new file in a compaction
180    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
181    * @param shouldDropBehind should the writer drop caches behind writes
182    * @return Writer for a new StoreFile in the tmp dir.
183    */
184   StoreFile.Writer createWriterInTmp(
185     long maxKeyCount,
186     Compression.Algorithm compression,
187     boolean isCompaction,
188     boolean includeMVCCReadpoint,
189     boolean includesTags,
190     boolean shouldDropBehind
191   ) throws IOException;
192 
193 
194 
195 
196   // Compaction oriented methods
197 
198   boolean throttleCompaction(long compactionSize);
199 
200   /**
201    * getter for CompactionProgress object
202    * @return CompactionProgress object; can be null
203    */
204   CompactionProgress getCompactionProgress();
205 
206   CompactionContext requestCompaction() throws IOException;
207 
208   /**
209    * @deprecated see requestCompaction(int, CompactionRequest, User)
210    */
211   @Deprecated
212   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
213       throws IOException;
214 
215   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest, User user)
216       throws IOException;
217 
218   void cancelRequestedCompaction(CompactionContext compaction);
219 
220   /**
221    * @deprecated see compact(CompactionContext, CompactionThroughputController, User)
222    */
223   @Deprecated
224   List<StoreFile> compact(CompactionContext compaction,
225       CompactionThroughputController throughputController) throws IOException;
226 
227   List<StoreFile> compact(CompactionContext compaction,
228     CompactionThroughputController throughputController, User user) throws IOException;
229 
230   /**
231    * @return true if we should run a major compaction.
232    */
233   boolean isMajorCompaction() throws IOException;
234 
235   void triggerMajorCompaction();
236 
237   /**
238    * See if there's too much store files in this store
239    * @return true if number of store files is greater than the number defined in minFilesToCompact
240    */
241   boolean needsCompaction();
242 
243   int getCompactPriority();
244 
245   StoreFlushContext createFlushContext(long cacheFlushId);
246 
247   /**
248    * Call to complete a compaction. Its for the case where we find in the WAL a compaction
249    * that was not finished.  We could find one recovering a WAL after a regionserver crash.
250    * See HBASE-2331.
251    * @param compaction
252    */
253   void completeCompactionMarker(CompactionDescriptor compaction)
254       throws IOException;
255 
256   // Split oriented methods
257 
258   boolean canSplit();
259 
260   /**
261    * Determines if Store should be split
262    * @return byte[] if store should be split, null otherwise.
263    */
264   byte[] getSplitPoint();
265 
266   // Bulk Load methods
267 
268   /**
269    * This throws a WrongRegionException if the HFile does not fit in this region, or an
270    * InvalidHFileException if the HFile is not valid.
271    */
272   void assertBulkLoadHFileOk(Path srcPath) throws IOException;
273 
274   /**
275    * This method should only be called from HRegion. It is assumed that the ranges of values in the
276    * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this)
277    *
278    * @param srcPathStr
279    * @param sequenceId sequence Id associated with the HFile
280    */
281   void bulkLoadHFile(String srcPathStr, long sequenceId) throws IOException;
282 
283   // General accessors into the state of the store
284   // TODO abstract some of this out into a metrics class
285 
286   /**
287    * @return <tt>true</tt> if the store has any underlying reference files to older HFiles
288    */
289   boolean hasReferences();
290 
291   /**
292    * @return The size of this store's memstore, in bytes
293    */
294   long getMemStoreSize();
295 
296   /**
297    * @return The amount of memory we could flush from this memstore; usually this is equal to
298    * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of
299    * outstanding snapshots.
300    */
301   long getFlushableSize();
302 
303   HColumnDescriptor getFamily();
304 
305   /**
306    * @return The maximum memstoreTS in all store files.
307    */
308   long getMaxMemstoreTS();
309 
310   /**
311    * @return the data block encoder
312    */
313   HFileDataBlockEncoder getDataBlockEncoder();
314 
315   /** @return aggregate size of all HStores used in the last compaction */
316   long getLastCompactSize();
317 
318   /** @return aggregate size of HStore */
319   long getSize();
320 
321   /**
322    * @return Count of store files
323    */
324   int getStorefilesCount();
325 
326   /**
327    * @return Max age of store files in this store
328    */
329   long getMaxStoreFileAge();
330 
331   /**
332    * @return Min age of store files in this store
333    */
334   long getMinStoreFileAge();
335 
336   /**
337    *  @return Average age of store files in this store, 0 if no store files
338    */
339   long getAvgStoreFileAge();
340 
341   /**
342    *  @return Number of reference files in this store
343    */
344   long getNumReferenceFiles();
345 
346   /**
347    *  @return Number of HFiles in this store
348    */
349   long getNumHFiles();
350 
351   /**
352    * @return The size of the store files, in bytes, uncompressed.
353    */
354   long getStoreSizeUncompressed();
355 
356   /**
357    * @return The size of the store files, in bytes.
358    */
359   long getStorefilesSize();
360 
361   /**
362    * @return The size of the store file indexes, in bytes.
363    */
364   long getStorefilesIndexSize();
365 
366   /**
367    * Returns the total size of all index blocks in the data block indexes, including the root level,
368    * intermediate levels, and the leaf level for multi-level indexes, or just the root level for
369    * single-level indexes.
370    * @return the total size of block indexes in the store
371    */
372   long getTotalStaticIndexSize();
373 
374   /**
375    * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the
376    * Bloom blocks currently not loaded into the block cache are counted.
377    * @return the total size of all Bloom filters in the store
378    */
379   long getTotalStaticBloomSize();
380 
381   // Test-helper methods
382 
383   /**
384    * Used for tests.
385    * @return cache configuration for this Store.
386    */
387   CacheConfig getCacheConfig();
388 
389   /**
390    * @return the parent region info hosting this store
391    */
392   HRegionInfo getRegionInfo();
393 
394   RegionCoprocessorHost getCoprocessorHost();
395 
396   boolean areWritesEnabled();
397 
398   /**
399    * @return The smallest mvcc readPoint across all the scanners in this
400    * region. Writes older than this readPoint, are included  in every
401    * read operation.
402    */
403   long getSmallestReadPoint();
404 
405   String getColumnFamilyName();
406 
407   TableName getTableName();
408 
409   /**
410    * @return The number of cells flushed to disk
411    */
412   long getFlushedCellsCount();
413 
414   /**
415    * @return The total size of data flushed to disk, in bytes
416    */
417   long getFlushedCellsSize();
418 
419   /**
420    * @return The number of cells processed during minor compactions
421    */
422   long getCompactedCellsCount();
423 
424   /**
425    * @return The total amount of data processed during minor compactions, in bytes
426    */
427   long getCompactedCellsSize();
428 
429   /**
430    * @return The number of cells processed during major compactions
431    */
432   long getMajorCompactedCellsCount();
433 
434   /**
435    * @return The total amount of data processed during major compactions, in bytes
436    */
437   long getMajorCompactedCellsSize();
438 
439   /*
440    * @param o Observer who wants to know about changes in set of Readers
441    */
442   void addChangedReaderObserver(ChangedReadersObserver o);
443 
444   /*
445    * @param o Observer no longer interested in changes in set of Readers.
446    */
447   void deleteChangedReaderObserver(ChangedReadersObserver o);
448 
449   /**
450    * @return Whether this store has too many store files.
451    */
452   boolean hasTooManyStoreFiles();
453 
454   /**
455    * This value can represent the degree of emergency of compaction for this store. It should be
456    * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files.
457    * <ul>
458    * <li>if getStorefilesCount &lt;= getMinFilesToCompact, return 0.0</li>
459    * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount -
460    * getMinFilesToCompact)</li>
461    * </ul>
462    * <p>
463    * And for striped stores, we should calculate this value by the files in each stripe separately
464    * and return the maximum value.
465    * <p>
466    * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a
467    * linear formula.
468    */
469   double getCompactionPressure();
470 }