View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver.compactions;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.List;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
31  import org.apache.hadoop.hbase.regionserver.StoreFile;
32  
33  /**
34   * Class to pick which files if any to compact together.
35   *
36   * This class will search all possibilities for different and if it gets stuck it will choose
37   * the smallest set of files to compact.
38   */
39  @InterfaceAudience.Private
40  public class ExploringCompactionPolicy extends RatioBasedCompactionPolicy {
41    private static final Log LOG = LogFactory.getLog(ExploringCompactionPolicy.class);
42  
43    /**
44     * Constructor for ExploringCompactionPolicy.
45     * @param conf The configuration object
46     * @param storeConfigInfo An object to provide info about the store.
47     */
48    public ExploringCompactionPolicy(final Configuration conf,
49                                     final StoreConfigInformation storeConfigInfo) {
50      super(conf, storeConfigInfo);
51    }
52  
53    @Override
54    final ArrayList<StoreFile> applyCompactionPolicy(final ArrayList<StoreFile> candidates,
55      final boolean mayUseOffPeak, final boolean mightBeStuck) throws IOException {
56      // Start off choosing nothing.
57      List<StoreFile> bestSelection = new ArrayList<StoreFile>(0);
58      List<StoreFile> smallest = new ArrayList<StoreFile>(0);
59      long bestSize = 0;
60      long smallestSize = Long.MAX_VALUE;
61  
62      int opts = 0, optsInRatio = 0, bestStart = -1; // for debug logging
63      // Consider every starting place.
64      for (int start = 0; start < candidates.size(); start++) {
65        // Consider every different sub list permutation in between start and end with min files.
66        for (int currentEnd = start + comConf.getMinFilesToCompact() - 1;
67            currentEnd < candidates.size(); currentEnd++) {
68          List<StoreFile> potentialMatchFiles = candidates.subList(start, currentEnd + 1);
69  
70          // Sanity checks
71          if (potentialMatchFiles.size() < comConf.getMinFilesToCompact()) {
72            continue;
73          }
74          if (potentialMatchFiles.size() > comConf.getMaxFilesToCompact()) {
75            continue;
76          }
77  
78          // Compute the total size of files that will
79          // have to be read if this set of files is compacted.
80          long size = getTotalStoreSize(potentialMatchFiles);
81  
82          // Store the smallest set of files.  This stored set of files will be used
83          // if it looks like the algorithm is stuck.
84          if (size < smallestSize) {
85            smallest = potentialMatchFiles;
86            smallestSize = size;
87          }
88  
89          if (size > comConf.getMaxCompactSize()) {
90            continue;
91          }
92  
93          ++opts;
94          if (size >= comConf.getMinCompactSize()
95              && !filesInRatio(potentialMatchFiles, mayUseOffPeak)) {
96            continue;
97          }
98  
99          ++optsInRatio;
100         if (isBetterSelection(bestSelection, bestSize, potentialMatchFiles, size, mightBeStuck)) {
101           bestSelection = potentialMatchFiles;
102           bestSize = size;
103           bestStart = start;
104         }
105       }
106     }
107     if (bestSelection.size() == 0 && mightBeStuck) {
108       LOG.debug("Exploring compaction algorithm has selected " + smallest.size()
109           + " files of size "+ smallestSize + " because the store might be stuck");
110       return new ArrayList<StoreFile>(smallest);
111     }
112     LOG.debug("Exploring compaction algorithm has selected " + bestSelection.size()
113         + " files of size " + bestSize + " starting at candidate #" + bestStart +
114         " after considering " + opts + " permutations with " + optsInRatio + " in ratio");
115     return new ArrayList<StoreFile>(bestSelection);
116   }
117 
118   private boolean isBetterSelection(List<StoreFile> bestSelection,
119       long bestSize, List<StoreFile> selection, long size, boolean mightBeStuck) {
120     if (mightBeStuck && bestSize > 0 && size > 0) {
121       // Keep the selection that removes most files for least size. That penaltizes adding
122       // large files to compaction, but not small files, so we don't become totally inefficient
123       // (might want to tweak that in future). Also, given the current order of looking at
124       // permutations, prefer earlier files and smaller selection if the difference is small.
125       final double REPLACE_IF_BETTER_BY = 1.05;
126       double thresholdQuality = ((double)bestSelection.size() / bestSize) * REPLACE_IF_BETTER_BY;
127       return thresholdQuality < ((double)selection.size() / size);
128     }
129     // Keep if this gets rid of more files.  Or the same number of files for less io.
130     return selection.size() > bestSelection.size()
131       || (selection.size() == bestSelection.size() && size < bestSize);
132   }
133 
134   /**
135    * Find the total size of a list of store files.
136    * @param potentialMatchFiles StoreFile list.
137    * @return Sum of StoreFile.getReader().length();
138    */
139   private long getTotalStoreSize(final List<StoreFile> potentialMatchFiles) {
140     long size = 0;
141 
142     for (StoreFile s:potentialMatchFiles) {
143       size += s.getReader().length();
144     }
145     return size;
146   }
147 
148   /**
149    * Check that all files satisfy the constraint
150    *      FileSize(i) <= ( Sum(0,N,FileSize(_)) - FileSize(i) ) * Ratio.
151    *
152    * @param files List of store files to consider as a compaction candidate.
153    * @param isOffPeak should the offPeak compaction ratio be used ?
154    * @return a boolean if these files satisfy the ratio constraints.
155    */
156   private boolean filesInRatio(final List<StoreFile> files, final boolean isOffPeak) {
157     if (files.size() < 2) {
158       return  true;
159     }
160     final double currentRatio =
161         isOffPeak ? comConf.getCompactionRatioOffPeak() : comConf.getCompactionRatio();
162 
163     long totalFileSize = getTotalStoreSize(files);
164 
165     for (StoreFile file : files) {
166       long singleFileSize = file.getReader().length();
167       long sumAllOtherFileSizes = totalFileSize - singleFileSize;
168 
169       if (singleFileSize > sumAllOtherFileSizes * currentRatio) {
170         return false;
171       }
172     }
173     return true;
174   }
175 }