View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.compactions;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collection;
24  import java.util.Collections;
25  import java.util.List;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
31  import org.apache.hadoop.hbase.regionserver.StoreFile;
32  import org.apache.hadoop.hbase.util.Pair;
33  import org.apache.hadoop.hbase.util.ReflectionUtils;
34  
35  import com.google.common.annotations.VisibleForTesting;
36  import com.google.common.base.Predicate;
37  import com.google.common.collect.ImmutableList;
38  import com.google.common.collect.Iterables;
39  import com.google.common.collect.Iterators;
40  import com.google.common.collect.Lists;
41  import com.google.common.collect.PeekingIterator;
42  
43  /**
44   * HBASE-15181 This is a simple implementation of date-based tiered compaction similar to
45   * Cassandra's for the following benefits:
46   * 1. Improve date-range-based scan by structuring store files in date-based tiered layout.
47   * 2. Reduce compaction overhead.
48   * 3. Improve TTL efficiency.
49   * Perfect fit for the use cases that:
50   * 1. has mostly date-based data write and scan and a focus on the most recent data.
51   * 2. never or rarely deletes data. Out-of-order writes are handled gracefully. Time range
52   * overlapping among store files is tolerated and the performance impact is minimized. Configuration
53   * can be set at hbase-site or overriden at per-table or per-column-famly level by hbase shell.
54   * Design spec is at
55   * https://docs.google.com/document/d/1_AmlNb2N8Us1xICsTeGDLKIqL6T-oHoRLZ323MG_uy8/
56   */
57  public class DateTieredCompactionPolicy extends RatioBasedCompactionPolicy {
58    private static final Log LOG = LogFactory.getLog(DateTieredCompactionPolicy.class);
59  
60    private RatioBasedCompactionPolicy compactionPolicyPerWindow;
61  
62    public DateTieredCompactionPolicy(Configuration conf, StoreConfigInformation storeConfigInfo)
63        throws IOException {
64      super(conf, storeConfigInfo);
65      try {
66        compactionPolicyPerWindow =
67            ReflectionUtils.instantiateWithCustomCtor(comConf.getCompactionPolicyForTieredWindow(),
68              new Class[] { Configuration.class, StoreConfigInformation.class }, new Object[] { conf,
69                  storeConfigInfo });
70      } catch (Exception e) {
71        throw new IOException("Unable to load configured compaction policy '"
72            + comConf.getCompactionPolicyForTieredWindow() + "'", e);
73      }
74    }
75  
76    @Override
77    public boolean isMajorCompaction(Collection<StoreFile> filesToCompact) throws IOException {
78      // TODO: major compaction with tiered output. Never do major compaction unless forced for now.
79      return false;
80    }
81  
82    @Override
83    /**
84     * Heuristics for guessing whether we need compaction.
85     */
86    public boolean needsCompaction(final Collection<StoreFile> storeFiles,
87        final List<StoreFile> filesCompacting) {
88      return needsCompaction(storeFiles, filesCompacting, System.currentTimeMillis());
89    }
90  
91    @VisibleForTesting
92    public boolean needsCompaction(final Collection<StoreFile> storeFiles,
93        final List<StoreFile> filesCompacting, long now) {
94      ArrayList<StoreFile> candidates = new ArrayList<StoreFile>(storeFiles);
95      candidates = filterBulk(candidates);
96      candidates = skipLargeFiles(candidates);
97      try {
98        candidates = applyCompactionPolicy(candidates, true, false, now);
99      } catch (Exception e) {
100       LOG.error("Can not check for compaction: ", e);
101       return false;
102     }
103     return candidates != null;
104   }
105 
106   @Override
107   /**
108    * Input candidates are sorted from oldest to newest by seqId
109    * Could return null if no candidates are found
110    */
111   public ArrayList<StoreFile> applyCompactionPolicy(ArrayList<StoreFile> candidates,
112       boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
113     return applyCompactionPolicy(candidates, mayUseOffPeak, mayBeStuck,
114       System.currentTimeMillis());
115   }
116 
117   @VisibleForTesting
118   public ArrayList<StoreFile> applyCompactionPolicy(ArrayList<StoreFile> candidates,
119       boolean mayUseOffPeak, boolean mayBeStuck, long now) throws IOException {
120     Iterable<StoreFile> candidatesInWindow =
121       filterOldStoreFiles(Lists.newArrayList(candidates), comConf.getMaxStoreFileAgeMillis(), now);
122 
123     List<ArrayList<StoreFile>> buckets =
124         partitionFilesToBuckets(candidatesInWindow, comConf.getBaseWindowMillis(),
125           comConf.getWindowsPerTier(), now);
126     LOG.debug("Compaction buckets are: " + buckets);
127     if (buckets.size() >= storeConfigInfo.getBlockingFileCount()) {
128       LOG.warn("Number of compaction buckets:" +  buckets.size()
129         + ", exceeds blocking file count setting: "
130         + storeConfigInfo.getBlockingFileCount()
131         + ", either increase hbase.hstore.blockingStoreFiles or "
132         + "reduce the number of tiered compaction windows");
133     }
134     
135     return newestBucket(buckets, comConf.getIncomingWindowMin(), now, comConf.getBaseWindowMillis(),
136       mayUseOffPeak);
137   }
138 
139   /**
140    * @param buckets the list of buckets, sorted from newest to oldest, from which to return the
141    *          newest bucket within thresholds.
142    * @param incomingWindowThreshold minimum number of storeFiles in a bucket to qualify.
143    * @param maxThreshold maximum number of storeFiles to compact at once (the returned bucket will
144    *          be trimmed down to this).
145    * @return a bucket (a list of store files within a window to be compacted).
146    * @throws IOException
147    */
148   private ArrayList<StoreFile> newestBucket(List<ArrayList<StoreFile>> buckets,
149       int incomingWindowThreshold, long now, long baseWindowMillis, boolean mayUseOffPeak)
150       throws IOException {
151     Window incomingWindow = getInitialWindow(now, baseWindowMillis);
152     for (ArrayList<StoreFile> bucket : buckets) {
153       int minThreshold = incomingWindow.compareToTimestamp(bucket.get(0).getMaximumTimestamp())
154         <= 0? comConf.getIncomingWindowMin() : comConf.minFilesToCompact;
155       compactionPolicyPerWindow.setMinThreshold(minThreshold);
156       ArrayList<StoreFile> candidates = compactionPolicyPerWindow.applyCompactionPolicy(bucket,
157         mayUseOffPeak, false);
158       if (candidates != null && !candidates.isEmpty()) {
159         return candidates;
160       }
161     }
162     return null;
163   }
164 
165   /**
166    * We receive store files sorted in ascending order by seqId then scan the list of files. If the
167    * current file has a maxTimestamp older than last known maximum, treat this file as it carries
168    * the last known maximum. This way both seqId and timestamp are in the same order. If files carry
169    * the same maxTimestamps, they are ordered by seqId. We then reverse the list so they are ordered
170    * by seqId and maxTimestamp in decending order and build the time windows. All the out-of-order
171    * data into the same compaction windows, guaranteeing contiguous compaction based on sequence id.
172    */
173   private static List<ArrayList<StoreFile>> partitionFilesToBuckets(Iterable<StoreFile> storeFiles,
174       long baseWindowSizeMillis, int windowsPerTier, long now) {
175     List<ArrayList<StoreFile>> buckets = Lists.newArrayList();
176     Window window = getInitialWindow(now, baseWindowSizeMillis);
177 
178     List<Pair<StoreFile, Long>> storefileMaxTimestampPairs =
179         Lists.newArrayListWithCapacity(Iterables.size(storeFiles));
180     long maxTimestampSeen = Long.MIN_VALUE;
181     for (StoreFile storeFile : storeFiles) {
182       // if there is out-of-order data,
183       // we put them in the same window as the last file in increasing order
184       maxTimestampSeen = Math.max(maxTimestampSeen, storeFile.getMaximumTimestamp());
185       storefileMaxTimestampPairs.add(new Pair<StoreFile, Long>(storeFile, maxTimestampSeen));
186     }
187 
188     Collections.reverse(storefileMaxTimestampPairs);
189     PeekingIterator<Pair<StoreFile, Long>> it =
190         Iterators.peekingIterator(storefileMaxTimestampPairs.iterator());
191 
192     while (it.hasNext()) {
193       int compResult = window.compareToTimestamp(it.peek().getSecond());
194       if (compResult > 0) {
195         // If the file is too old for the window, switch to the next window
196         window = window.nextWindow(windowsPerTier);
197       } else {
198         // The file is within the target window
199         ArrayList<StoreFile> bucket = Lists.newArrayList();
200         // Add all files in the same window to current bucket. For incoming window
201         // we tolerate files with future data although it is sub-optimal
202         while (it.hasNext() && window.compareToTimestamp(it.peek().getSecond()) <= 0) {
203           bucket.add(it.next().getFirst());
204         }
205         if (!bucket.isEmpty()) {
206           buckets.add(bucket);
207         }
208       }
209     }
210 
211     return buckets;
212   }
213 
214   /**
215    * Removes all store files with max timestamp older than (current - maxAge).
216    * @param storeFiles all store files to consider
217    * @param maxAge the age in milliseconds when a store file stops participating in compaction.
218    * @param now current time. store files with max timestamp less than (now - maxAge) are filtered.
219    * @return a list of storeFiles with the store file older than maxAge excluded
220    */
221   private static Iterable<StoreFile> filterOldStoreFiles(List<StoreFile> storeFiles, long maxAge,
222       long now) {
223     if (maxAge == 0) return ImmutableList.of();
224     final long cutoff = now - maxAge;
225     return Iterables.filter(storeFiles, new Predicate<StoreFile>() {
226       @Override
227       public boolean apply(StoreFile storeFile) {
228         // This is for findbugs' issue with Guava. We know this won't happen.
229         if (storeFile == null) {
230           return false;
231         }
232         return storeFile.getMaximumTimestamp() >= cutoff;
233       }
234     });
235   }
236 
237   /**
238    * This is the class we use to partition from epoch time to now into tiers of exponential sizes of
239    * windows.
240    */
241   private static Window getInitialWindow(long now, long timeUnit) {
242     return new Window(timeUnit, now / timeUnit);
243   }
244 
245   private static class Window {
246     /**
247      * How big a range of timestamps fit inside the window in milliseconds.
248      */
249     private final long windowMillis;
250     /**
251      * A timestamp t is within the window iff t / size == divPosition.
252      */
253     private final long divPosition;
254 
255     public Window(long baseWindowMillis, long divPosition) {
256       this.windowMillis = baseWindowMillis;
257       this.divPosition = divPosition;
258     }
259 
260     /**
261      * Compares the window to a timestamp.
262      * @param timestamp the timestamp to compare.
263      * @return a negative integer, zero, or a positive integer as the window lies before, covering,
264      *         or after than the timestamp.
265      */
266     public int compareToTimestamp(long timestamp) {
267       long pos = timestamp / windowMillis;
268       return divPosition == pos ? 0 : divPosition < pos ? -1 : 1;
269     }
270 
271     /**
272      * Move to the new window of the same tier or of the next tier, which represents an earlier time
273      * span.
274      * @param windowsPerTier The number of contiguous windows that will have the same size. Windows
275      *          following those will be <code>tierBase</code> times as big.
276      * @return The next window
277      */
278     public Window nextWindow(int windowsPerTier) {
279       if (divPosition % windowsPerTier > 0) return new Window(windowMillis, divPosition - 1);
280       else return new Window(windowMillis * windowsPerTier, divPosition / windowsPerTier - 1);
281     }
282   }
283 }