1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.HashSet;
24 import java.util.LinkedList;
25 import java.util.List;
26 import java.util.Set;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.conf.Configuration;
32 import org.apache.hadoop.conf.Configured;
33 import org.apache.hadoop.fs.FileStatus;
34 import org.apache.hadoop.fs.FileSystem;
35 import org.apache.hadoop.fs.FSDataOutputStream;
36 import org.apache.hadoop.fs.Path;
37 import org.apache.hadoop.io.LongWritable;
38 import org.apache.hadoop.io.NullWritable;
39 import org.apache.hadoop.io.Text;
40 import org.apache.hadoop.util.LineReader;
41 import org.apache.hadoop.util.Tool;
42 import org.apache.hadoop.util.ToolRunner;
43 import org.apache.hadoop.mapreduce.InputSplit;
44 import org.apache.hadoop.mapreduce.Job;
45 import org.apache.hadoop.mapreduce.JobContext;
46 import org.apache.hadoop.mapreduce.Mapper;
47 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
48 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
49 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50 import org.apache.hadoop.hbase.HBaseConfiguration;
51 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
52 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
53 import org.apache.hadoop.hbase.HTableDescriptor;
54 import org.apache.hadoop.hbase.HRegionInfo;
55 import org.apache.hadoop.hbase.regionserver.HRegion;
56 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
57 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
58 import org.apache.hadoop.hbase.mapreduce.JobUtil;
59 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
60 import org.apache.hadoop.hbase.util.Bytes;
61 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
62 import org.apache.hadoop.hbase.util.FSTableDescriptors;
63 import org.apache.hadoop.hbase.util.FSUtils;
64
65
66
67
68
69
70
71
72
73 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
74 public class CompactionTool extends Configured implements Tool {
75 private static final Log LOG = LogFactory.getLog(CompactionTool.class);
76
77 private final static String CONF_TMP_DIR = "hbase.tmp.dir";
78 private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
79 private final static String CONF_COMPACT_MAJOR = "hbase.compactiontool.compact.major";
80 private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
81 private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
82
83
84
85
86
87 private static class CompactionWorker {
88 private final boolean keepCompactedFiles;
89 private final boolean deleteCompacted;
90 private final Configuration conf;
91 private final FileSystem fs;
92 private final Path tmpDir;
93
94 public CompactionWorker(final FileSystem fs, final Configuration conf) {
95 this.conf = conf;
96 this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
97 this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
98 this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
99 this.fs = fs;
100 }
101
102
103
104
105
106
107
108
109 public void compact(final Path path, final boolean compactOnce, final boolean major) throws IOException {
110 if (isFamilyDir(fs, path)) {
111 Path regionDir = path.getParent();
112 Path tableDir = regionDir.getParent();
113 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
114 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
115 compactStoreFiles(tableDir, htd, hri, path.getName(), compactOnce, major);
116 } else if (isRegionDir(fs, path)) {
117 Path tableDir = path.getParent();
118 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
119 compactRegion(tableDir, htd, path, compactOnce, major);
120 } else if (isTableDir(fs, path)) {
121 compactTable(path, compactOnce, major);
122 } else {
123 throw new IOException(
124 "Specified path is not a table, region or family directory. path=" + path);
125 }
126 }
127
128 private void compactTable(final Path tableDir, final boolean compactOnce, final boolean major)
129 throws IOException {
130 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
131 for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
132 compactRegion(tableDir, htd, regionDir, compactOnce, major);
133 }
134 }
135
136 private void compactRegion(final Path tableDir, final HTableDescriptor htd,
137 final Path regionDir, final boolean compactOnce, final boolean major)
138 throws IOException {
139 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
140 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
141 compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce, major);
142 }
143 }
144
145
146
147
148
149
150 private void compactStoreFiles(final Path tableDir, final HTableDescriptor htd,
151 final HRegionInfo hri, final String familyName, final boolean compactOnce,
152 final boolean major) throws IOException {
153 HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
154 LOG.info("Compact table=" + htd.getTableName() +
155 " region=" + hri.getRegionNameAsString() +
156 " family=" + familyName);
157 if (major) {
158 store.triggerMajorCompaction();
159 }
160 do {
161 CompactionContext compaction = store.requestCompaction(Store.PRIORITY_USER, null);
162 if (compaction == null) break;
163 List<StoreFile> storeFiles = store.compact(compaction);
164 if (storeFiles != null && !storeFiles.isEmpty()) {
165 if (keepCompactedFiles && deleteCompacted) {
166 for (StoreFile storeFile: storeFiles) {
167 fs.delete(storeFile.getPath(), false);
168 }
169 }
170 }
171 } while (store.needsCompaction() && !compactOnce);
172 }
173
174
175
176
177
178 private static HStore getStore(final Configuration conf, final FileSystem fs,
179 final Path tableDir, final HTableDescriptor htd, final HRegionInfo hri,
180 final String familyName, final Path tempDir) throws IOException {
181 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
182 @Override
183 public Path getTempDir() {
184 return tempDir;
185 }
186 };
187 HRegion region = new HRegion(regionFs, null, conf, htd, null);
188 return new HStore(region, htd.getFamily(Bytes.toBytes(familyName)), conf);
189 }
190 }
191
192 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
193 Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
194 return fs.exists(regionInfo);
195 }
196
197 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
198 return FSTableDescriptors.getTableInfoPath(fs, path) != null;
199 }
200
201 private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
202 return isRegionDir(fs, path.getParent());
203 }
204
205 private static class CompactionMapper
206 extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
207 private CompactionWorker compactor = null;
208 private boolean compactOnce = false;
209 private boolean major = false;
210
211 @Override
212 public void setup(Context context) {
213 Configuration conf = context.getConfiguration();
214 compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
215 major = conf.getBoolean(CONF_COMPACT_MAJOR, false);
216
217 try {
218 FileSystem fs = FileSystem.get(conf);
219 this.compactor = new CompactionWorker(fs, conf);
220 } catch (IOException e) {
221 throw new RuntimeException("Could not get the input FileSystem", e);
222 }
223 }
224
225 @Override
226 public void map(LongWritable key, Text value, Context context)
227 throws InterruptedException, IOException {
228 Path path = new Path(value.toString());
229 this.compactor.compact(path, compactOnce, major);
230 }
231 }
232
233
234
235
236 private static class CompactionInputFormat extends TextInputFormat {
237 @Override
238 protected boolean isSplitable(JobContext context, Path file) {
239 return true;
240 }
241
242
243
244
245
246 @Override
247 public List<InputSplit> getSplits(JobContext job) throws IOException {
248 List<InputSplit> splits = new ArrayList<InputSplit>();
249 List<FileStatus> files = listStatus(job);
250
251 Text key = new Text();
252 for (FileStatus file: files) {
253 Path path = file.getPath();
254 FileSystem fs = path.getFileSystem(job.getConfiguration());
255 LineReader reader = new LineReader(fs.open(path));
256 long pos = 0;
257 int n;
258 try {
259 while ((n = reader.readLine(key)) > 0) {
260 String[] hosts = getStoreDirHosts(fs, path);
261 splits.add(new FileSplit(path, pos, n, hosts));
262 pos += n;
263 }
264 } finally {
265 reader.close();
266 }
267 }
268
269 return splits;
270 }
271
272
273
274
275 private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
276 throws IOException {
277 FileStatus[] files = FSUtils.listStatus(fs, path);
278 if (files == null) {
279 return new String[] {};
280 }
281
282 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
283 for (FileStatus hfileStatus: files) {
284 HDFSBlocksDistribution storeFileBlocksDistribution =
285 FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
286 hdfsBlocksDistribution.add(storeFileBlocksDistribution);
287 }
288
289 List<String> hosts = hdfsBlocksDistribution.getTopHosts();
290 return hosts.toArray(new String[hosts.size()]);
291 }
292
293
294
295
296
297
298 public static void createInputFile(final FileSystem fs, final Path path,
299 final Set<Path> toCompactDirs) throws IOException {
300
301 List<Path> storeDirs = new LinkedList<Path>();
302 for (Path compactDir: toCompactDirs) {
303 if (isFamilyDir(fs, compactDir)) {
304 storeDirs.add(compactDir);
305 } else if (isRegionDir(fs, compactDir)) {
306 for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
307 storeDirs.add(familyDir);
308 }
309 } else if (isTableDir(fs, compactDir)) {
310
311 for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
312 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
313 storeDirs.add(familyDir);
314 }
315 }
316 } else {
317 throw new IOException(
318 "Specified path is not a table, region or family directory. path=" + compactDir);
319 }
320 }
321
322
323 FSDataOutputStream stream = fs.create(path);
324 LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
325 try {
326 final byte[] newLine = Bytes.toBytes("\n");
327 for (Path storeDir: storeDirs) {
328 stream.write(Bytes.toBytes(storeDir.toString()));
329 stream.write(newLine);
330 }
331 } finally {
332 stream.close();
333 }
334 }
335 }
336
337
338
339
340 private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
341 final boolean compactOnce, final boolean major) throws Exception {
342 Configuration conf = getConf();
343 conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
344 conf.setBoolean(CONF_COMPACT_MAJOR, major);
345
346 Job job = new Job(conf);
347 job.setJobName("CompactionTool");
348 job.setJarByClass(CompactionTool.class);
349 job.setMapperClass(CompactionMapper.class);
350 job.setInputFormatClass(CompactionInputFormat.class);
351 job.setOutputFormatClass(NullOutputFormat.class);
352 job.setMapSpeculativeExecution(false);
353 job.setNumReduceTasks(0);
354
355
356 TableMapReduceUtil.addDependencyJars(job);
357
358 Path stagingDir = JobUtil.getStagingDir(conf);
359 try {
360
361 Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
362 CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
363 CompactionInputFormat.addInputPath(job, inputPath);
364
365
366 TableMapReduceUtil.initCredentials(job);
367
368
369 return job.waitForCompletion(true) ? 0 : 1;
370 } finally {
371 fs.delete(stagingDir, true);
372 }
373 }
374
375
376
377
378 private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
379 final boolean compactOnce, final boolean major) throws IOException {
380 CompactionWorker worker = new CompactionWorker(fs, getConf());
381 for (Path path: toCompactDirs) {
382 worker.compact(path, compactOnce, major);
383 }
384 return 0;
385 }
386
387 @Override
388 public int run(String[] args) throws Exception {
389 Set<Path> toCompactDirs = new HashSet<Path>();
390 boolean compactOnce = false;
391 boolean major = false;
392 boolean mapred = false;
393
394 Configuration conf = getConf();
395 FileSystem fs = FileSystem.get(conf);
396
397 try {
398 for (int i = 0; i < args.length; ++i) {
399 String opt = args[i];
400 if (opt.equals("-compactOnce")) {
401 compactOnce = true;
402 } else if (opt.equals("-major")) {
403 major = true;
404 } else if (opt.equals("-mapred")) {
405 mapred = true;
406 } else if (!opt.startsWith("-")) {
407 Path path = new Path(opt);
408 FileStatus status = fs.getFileStatus(path);
409 if (!status.isDirectory()) {
410 printUsage("Specified path is not a directory. path=" + path);
411 return 1;
412 }
413 toCompactDirs.add(path);
414 } else {
415 printUsage();
416 }
417 }
418 } catch (Exception e) {
419 printUsage(e.getMessage());
420 return 1;
421 }
422
423 if (toCompactDirs.size() == 0) {
424 printUsage("No directories to compact specified.");
425 return 1;
426 }
427
428
429 if (mapred) {
430 return doMapReduce(fs, toCompactDirs, compactOnce, major);
431 } else {
432 return doClient(fs, toCompactDirs, compactOnce, major);
433 }
434 }
435
436 private void printUsage() {
437 printUsage(null);
438 }
439
440 private void printUsage(final String message) {
441 if (message != null && message.length() > 0) {
442 System.err.println(message);
443 }
444 System.err.println("Usage: java " + this.getClass().getName() + " \\");
445 System.err.println(" [-compactOnce] [-major] [-mapred] [-D<property=value>]* files...");
446 System.err.println();
447 System.err.println("Options:");
448 System.err.println(" mapred Use MapReduce to run compaction.");
449 System.err.println(" compactOnce Execute just one compaction step. (default: while needed)");
450 System.err.println(" major Trigger major compaction.");
451 System.err.println();
452 System.err.println("Note: -D properties will be applied to the conf used. ");
453 System.err.println("For example: ");
454 System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
455 System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
456 System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
457 System.err.println();
458 System.err.println("Examples:");
459 System.err.println(" To compact the full 'TestTable' using MapReduce:");
460 System.err.println(" $ bin/hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/data/default/TestTable");
461 System.err.println();
462 System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
463 System.err.println(" $ bin/hbase " + this.getClass().getName() + " hdfs:///hbase/data/default/TestTable/abc/x");
464 }
465
466 public static void main(String[] args) throws Exception {
467 System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
468 }
469 }