1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver.compactions;
20
21 import com.google.common.annotations.VisibleForTesting;
22 import com.google.common.collect.Iterators;
23 import com.google.common.collect.Lists;
24 import com.google.common.collect.PeekingIterator;
25 import com.google.common.math.LongMath;
26
27 import java.io.IOException;
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.Collections;
31 import java.util.List;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
36 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
37 import org.apache.hadoop.hbase.classification.InterfaceAudience;
38 import org.apache.hadoop.hbase.regionserver.HRegionServer;
39 import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
40 import org.apache.hadoop.hbase.regionserver.StoreFile;
41 import org.apache.hadoop.hbase.regionserver.StoreUtils;
42 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
43 import org.apache.hadoop.hbase.util.Pair;
44 import org.apache.hadoop.hbase.util.ReflectionUtils;
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
75 public class DateTieredCompactionPolicy extends SortedCompactionPolicy {
76
77 private static final Log LOG = LogFactory.getLog(DateTieredCompactionPolicy.class);
78
79 private final RatioBasedCompactionPolicy compactionPolicyPerWindow;
80
81 private final CompactionWindowFactory windowFactory;
82
83 public DateTieredCompactionPolicy(Configuration conf, StoreConfigInformation storeConfigInfo)
84 throws IOException {
85 super(conf, storeConfigInfo);
86 try {
87 compactionPolicyPerWindow = ReflectionUtils.instantiateWithCustomCtor(
88 comConf.getCompactionPolicyForDateTieredWindow(),
89 new Class[] { Configuration.class, StoreConfigInformation.class },
90 new Object[] { conf, storeConfigInfo });
91 } catch (Exception e) {
92 throw new IOException("Unable to load configured compaction policy '"
93 + comConf.getCompactionPolicyForDateTieredWindow() + "'", e);
94 }
95 try {
96 windowFactory = ReflectionUtils.instantiateWithCustomCtor(
97 comConf.getDateTieredCompactionWindowFactory(),
98 new Class[] { CompactionConfiguration.class }, new Object[] { comConf });
99 } catch (Exception e) {
100 throw new IOException("Unable to load configured window factory '"
101 + comConf.getDateTieredCompactionWindowFactory() + "'", e);
102 }
103 }
104
105
106
107
108 @Override
109 @VisibleForTesting
110 public boolean needsCompaction(final Collection<StoreFile> storeFiles,
111 final List<StoreFile> filesCompacting) {
112 ArrayList<StoreFile> candidates = new ArrayList<StoreFile>(storeFiles);
113 try {
114 return !selectMinorCompaction(candidates, false, true).getFiles().isEmpty();
115 } catch (Exception e) {
116 LOG.error("Can not check for compaction: ", e);
117 return false;
118 }
119 }
120
121 public boolean shouldPerformMajorCompaction(final Collection<StoreFile> filesToCompact) throws IOException {
122 long mcTime = getNextMajorCompactTime(filesToCompact);
123 if (filesToCompact == null || mcTime == 0) {
124 return false;
125 }
126
127
128 long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
129 long now = EnvironmentEdgeManager.currentTimeMillis();
130 if (lowTimestamp <= 0l || lowTimestamp >= (now - mcTime)) {
131 return false;
132 }
133
134 long cfTtl = this.storeConfigInfo.getStoreFileTtl();
135 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
136 List<Long> boundaries = getCompactBoundariesForMajor(filesToCompact, now);
137 boolean[] filesInWindow = new boolean[boundaries.size()];
138
139 for (StoreFile file: filesToCompact) {
140 Long minTimestamp = file.getMinimumTimestamp();
141 long oldest = (minTimestamp == null) ? Long.MIN_VALUE : now - minTimestamp.longValue();
142 if (cfTtl != Long.MAX_VALUE && oldest >= cfTtl) {
143 LOG.debug("Major compaction triggered on store " + this
144 + "; for TTL maintenance");
145 return true;
146 }
147 if (!file.isMajorCompaction() || file.isBulkLoadResult()) {
148 LOG.debug("Major compaction triggered on store " + this
149 + ", because there are new files and time since last major compaction "
150 + (now - lowTimestamp) + "ms");
151 return true;
152 }
153
154 int lowerWindowIndex = Collections.binarySearch(boundaries,
155 minTimestamp == null ? Long.MAX_VALUE : file.getMinimumTimestamp());
156 int upperWindowIndex = Collections.binarySearch(boundaries,
157 file.getMaximumTimestamp() == null ? Long.MAX_VALUE : file.getMaximumTimestamp());
158
159 if (lowerWindowIndex != upperWindowIndex) {
160 LOG.debug("Major compaction triggered on store " + this + "; because file "
161 + file.getPath() + " has data with timestamps cross window boundaries");
162 return true;
163 } else if (filesInWindow[upperWindowIndex]) {
164 LOG.debug("Major compaction triggered on store " + this +
165 "; because there are more than one file in some windows");
166 return true;
167 } else {
168 filesInWindow[upperWindowIndex] = true;
169 }
170
171 hdfsBlocksDistribution.add(file.getHDFSBlockDistribution());
172 }
173
174 float blockLocalityIndex = hdfsBlocksDistribution
175 .getBlockLocalityIndex(HRegionServer.getHostname(comConf.conf));
176 if (blockLocalityIndex < comConf.getMinLocalityToForceCompact()) {
177 LOG.debug("Major compaction triggered on store " + this
178 + "; to make hdfs blocks local, current blockLocalityIndex is "
179 + blockLocalityIndex + " (min " + comConf.getMinLocalityToForceCompact() + ")");
180 return true;
181 }
182
183 LOG.debug("Skipping major compaction of " + this + ", because the files are already major compacted");
184 return false;
185 }
186
187 @Override
188 protected CompactionRequest getCompactionRequest(ArrayList<StoreFile> candidateSelection, boolean tryingMajor,
189 boolean isUserCompaction, boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
190 CompactionRequest result = tryingMajor ? selectMajorCompaction(candidateSelection)
191 : selectMinorCompaction(candidateSelection, mayUseOffPeak, mayBeStuck);
192 ArrayList<StoreFile> filesToCompact = Lists.newArrayList(result.getFiles());
193 removeExcessFiles(filesToCompact, isUserCompaction, tryingMajor);
194 result.updateFiles(filesToCompact);
195 result.setOffPeak(!filesToCompact.isEmpty() && !tryingMajor && mayUseOffPeak);
196 if (LOG.isDebugEnabled()) {
197 LOG.debug("Generated compaction request: " + result);
198 }
199 return result;
200 }
201
202 public CompactionRequest selectMajorCompaction(ArrayList<StoreFile> candidateSelection) {
203 long now = EnvironmentEdgeManager.currentTimeMillis();
204 return new DateTieredCompactionRequest(candidateSelection,
205 this.getCompactBoundariesForMajor(candidateSelection, now));
206 }
207
208
209
210
211
212
213
214
215
216 public CompactionRequest selectMinorCompaction(ArrayList<StoreFile> candidateSelection,
217 boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
218 long now = EnvironmentEdgeManager.currentTimeMillis();
219 long oldestToCompact = getOldestToCompact(comConf.getDateTieredMaxStoreFileAgeMillis(), now);
220
221 List<Pair<StoreFile, Long>> storefileMaxTimestampPairs =
222 Lists.newArrayListWithCapacity(candidateSelection.size());
223 long maxTimestampSeen = Long.MIN_VALUE;
224 for (StoreFile storeFile : candidateSelection) {
225
226
227 maxTimestampSeen = Math.max(maxTimestampSeen,
228 storeFile.getMaximumTimestamp() == null? Long.MIN_VALUE : storeFile.getMaximumTimestamp());
229 storefileMaxTimestampPairs.add(new Pair<StoreFile, Long>(storeFile, maxTimestampSeen));
230 }
231 Collections.reverse(storefileMaxTimestampPairs);
232
233 CompactionWindow window = getIncomingWindow(now);
234 int minThreshold = comConf.getDateTieredIncomingWindowMin();
235 PeekingIterator<Pair<StoreFile, Long>> it =
236 Iterators.peekingIterator(storefileMaxTimestampPairs.iterator());
237 while (it.hasNext()) {
238 if (window.compareToTimestamp(oldestToCompact) < 0) {
239 break;
240 }
241 int compResult = window.compareToTimestamp(it.peek().getSecond());
242 if (compResult > 0) {
243
244 window = window.nextEarlierWindow();
245 minThreshold = comConf.getMinFilesToCompact();
246 } else {
247
248 ArrayList<StoreFile> fileList = Lists.newArrayList();
249
250
251 while (it.hasNext() && window.compareToTimestamp(it.peek().getSecond()) <= 0) {
252 fileList.add(it.next().getFirst());
253 }
254 if (fileList.size() >= minThreshold) {
255 if (LOG.isDebugEnabled()) {
256 LOG.debug("Processing files: " + fileList + " for window: " + window);
257 }
258 DateTieredCompactionRequest request = generateCompactionRequest(fileList, window,
259 mayUseOffPeak, mayBeStuck, minThreshold);
260 if (request != null) {
261 return request;
262 }
263 }
264 }
265 }
266
267 return new CompactionRequest(Collections.<StoreFile> emptyList());
268 }
269
270 private DateTieredCompactionRequest generateCompactionRequest(ArrayList<StoreFile> storeFiles,
271 CompactionWindow window, boolean mayUseOffPeak, boolean mayBeStuck, int minThreshold)
272 throws IOException {
273
274
275 Collections.reverse(storeFiles);
276
277
278 compactionPolicyPerWindow.setMinThreshold(minThreshold);
279 ArrayList<StoreFile> storeFileSelection = mayBeStuck ? storeFiles
280 : compactionPolicyPerWindow.applyCompactionPolicy(storeFiles, mayUseOffPeak, false);
281 if (storeFileSelection != null && !storeFileSelection.isEmpty()) {
282
283
284 boolean singleOutput = storeFiles.size() != storeFileSelection.size() ||
285 comConf.useDateTieredSingleOutputForMinorCompaction();
286 List<Long> boundaries = getCompactionBoundariesForMinor(window, singleOutput);
287 DateTieredCompactionRequest result = new DateTieredCompactionRequest(storeFileSelection,
288 boundaries);
289 return result;
290 }
291 return null;
292 }
293
294
295
296
297
298 private List<Long> getCompactBoundariesForMajor(Collection<StoreFile> filesToCompact, long now) {
299 long minTimestamp = Long.MAX_VALUE;
300 for (StoreFile file : filesToCompact) {
301 minTimestamp =
302 Math.min(minTimestamp,
303 file.getMinimumTimestamp() == null ? Long.MAX_VALUE : file.getMinimumTimestamp());
304 }
305
306 List<Long> boundaries = new ArrayList<Long>();
307
308
309 for (CompactionWindow window = getIncomingWindow(now);
310 window.compareToTimestamp(minTimestamp) > 0;
311 window = window.nextEarlierWindow()) {
312 boundaries.add(window.startMillis());
313 }
314 boundaries.add(Long.MIN_VALUE);
315 Collections.reverse(boundaries);
316 return boundaries;
317 }
318
319
320
321
322 private static List<Long> getCompactionBoundariesForMinor(CompactionWindow window,
323 boolean singleOutput) {
324 List<Long> boundaries = new ArrayList<Long>();
325 boundaries.add(Long.MIN_VALUE);
326 if (!singleOutput) {
327 boundaries.add(window.startMillis());
328 }
329 return boundaries;
330 }
331
332 private CompactionWindow getIncomingWindow(long now) {
333 return windowFactory.newIncomingWindow(now);
334 }
335
336 private static long getOldestToCompact(long maxAgeMillis, long now) {
337 try {
338 return LongMath.checkedSubtract(now, maxAgeMillis);
339 } catch (ArithmeticException ae) {
340 LOG.warn("Value for " + CompactionConfiguration.DATE_TIERED_MAX_AGE_MILLIS_KEY + ": "
341 + maxAgeMillis + ". All the files will be eligible for minor compaction.");
342 return Long.MIN_VALUE;
343 }
344 }
345 }