1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.HashSet;
24 import java.util.LinkedList;
25 import java.util.List;
26 import java.util.Set;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30
31 import org.apache.hadoop.classification.InterfaceAudience;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.conf.Configured;
34 import org.apache.hadoop.fs.FileStatus;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.FSDataOutputStream;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.io.LongWritable;
39 import org.apache.hadoop.io.NullWritable;
40 import org.apache.hadoop.io.Text;
41 import org.apache.hadoop.io.Writable;
42 import org.apache.hadoop.util.LineReader;
43 import org.apache.hadoop.util.Tool;
44 import org.apache.hadoop.util.ToolRunner;
45
46 import org.apache.hadoop.mapreduce.InputSplit;
47 import org.apache.hadoop.mapreduce.Job;
48 import org.apache.hadoop.mapreduce.JobContext;
49 import org.apache.hadoop.mapreduce.Mapper;
50 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
51 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
52 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
53
54 import org.apache.hadoop.hbase.HBaseConfiguration;
55 import org.apache.hadoop.hbase.HColumnDescriptor;
56 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
57 import org.apache.hadoop.hbase.HTableDescriptor;
58 import org.apache.hadoop.hbase.HRegionInfo;
59 import org.apache.hadoop.hbase.regionserver.HRegion;
60 import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
61 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
62 import org.apache.hadoop.hbase.util.Bytes;
63 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
64 import org.apache.hadoop.hbase.util.FSTableDescriptors;
65 import org.apache.hadoop.hbase.util.FSUtils;
66
67
68
69
70
71
72
73
74
75 @InterfaceAudience.Public
76 public class CompactionTool extends Configured implements Tool {
77 private static final Log LOG = LogFactory.getLog(CompactionTool.class);
78
79 private final static String CONF_TMP_DIR = "hbase.tmp.dir";
80 private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
81 private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
82 private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
83
84
85
86
87
88 private static class CompactionWorker {
89 private final boolean keepCompactedFiles;
90 private final boolean deleteCompacted;
91 private final Configuration conf;
92 private final FileSystem fs;
93 private final Path tmpDir;
94
95 public CompactionWorker(final FileSystem fs, final Configuration conf) {
96 this.conf = conf;
97 this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
98 this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
99 this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
100 this.fs = fs;
101 }
102
103
104
105
106
107
108
109 public void compact(final Path path, final boolean compactOnce) throws IOException {
110 if (isFamilyDir(fs, path)) {
111 Path regionDir = path.getParent();
112 Path tableDir = regionDir.getParent();
113 HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
114 HRegion region = loadRegion(fs, conf, htd, regionDir);
115 compactStoreFiles(region, path, compactOnce);
116 } else if (isRegionDir(fs, path)) {
117 Path tableDir = path.getParent();
118 HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
119 compactRegion(htd, path, compactOnce);
120 } else if (isTableDir(fs, path)) {
121 compactTable(path, compactOnce);
122 } else {
123 throw new IOException(
124 "Specified path is not a table, region or family directory. path=" + path);
125 }
126 }
127
128 private void compactTable(final Path tableDir, final boolean compactOnce)
129 throws IOException {
130 HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
131 LOG.info("Compact table=" + htd.getNameAsString());
132 for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
133 compactRegion(htd, regionDir, compactOnce);
134 }
135 }
136
137 private void compactRegion(final HTableDescriptor htd, final Path regionDir,
138 final boolean compactOnce) throws IOException {
139 HRegion region = loadRegion(fs, conf, htd, regionDir);
140 LOG.info("Compact table=" + htd.getNameAsString() +
141 " region=" + region.getRegionNameAsString());
142 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
143 compactStoreFiles(region, familyDir, compactOnce);
144 }
145 }
146
147
148
149
150
151
152 private void compactStoreFiles(final HRegion region, final Path familyDir,
153 final boolean compactOnce) throws IOException {
154 LOG.info("Compact table=" + region.getTableDesc().getNameAsString() +
155 " region=" + region.getRegionNameAsString() +
156 " family=" + familyDir.getName());
157 Store store = getStore(region, familyDir);
158 do {
159 CompactionRequest cr = store.requestCompaction();
160 StoreFile storeFile = store.compact(cr);
161 if (storeFile != null) {
162 if (keepCompactedFiles && deleteCompacted) {
163 fs.delete(storeFile.getPath(), false);
164 }
165 }
166 } while (store.needsCompaction() && !compactOnce);
167 }
168
169
170
171
172
173 private Store getStore(final HRegion region, final Path storeDir) throws IOException {
174 byte[] familyName = Bytes.toBytes(storeDir.getName());
175 HColumnDescriptor hcd = region.getTableDesc().getFamily(familyName);
176
177
178 return new Store(tmpDir, region, hcd, fs, conf) {
179 @Override
180 public FileStatus[] getStoreFiles() throws IOException {
181 return this.fs.listStatus(getHomedir());
182 }
183
184 @Override
185 Path createStoreHomeDir(FileSystem fs, Path homedir) throws IOException {
186 return storeDir;
187 }
188 };
189 }
190
191 private static HRegion loadRegion(final FileSystem fs, final Configuration conf,
192 final HTableDescriptor htd, final Path regionDir) throws IOException {
193 Path rootDir = regionDir.getParent().getParent();
194 HRegionInfo hri = HRegion.loadDotRegionInfoFileContent(fs, regionDir);
195 return HRegion.createHRegion(hri, rootDir, conf, htd, null, false, true);
196 }
197 }
198
199 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
200 Path regionInfo = new Path(path, HRegion.REGIONINFO_FILE);
201 return fs.exists(regionInfo);
202 }
203
204 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
205 return FSTableDescriptors.getTableInfoPath(fs, path) != null;
206 }
207
208 private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
209 return isRegionDir(fs, path.getParent());
210 }
211
212 private static class CompactionMapper
213 extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
214 private CompactionWorker compactor = null;
215 private boolean compactOnce = false;
216
217 @Override
218 public void setup(Context context) {
219 Configuration conf = context.getConfiguration();
220 compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
221
222 try {
223 FileSystem fs = FileSystem.get(conf);
224 this.compactor = new CompactionWorker(fs, conf);
225 } catch (IOException e) {
226 throw new RuntimeException("Could not get the input FileSystem", e);
227 }
228 }
229
230 @Override
231 public void map(LongWritable key, Text value, Context context)
232 throws InterruptedException, IOException {
233 Path path = new Path(value.toString());
234 this.compactor.compact(path, compactOnce);
235 }
236 }
237
238
239
240
241 private static class CompactionInputFormat extends TextInputFormat {
242 @Override
243 protected boolean isSplitable(JobContext context, Path file) {
244 return true;
245 }
246
247
248
249
250
251 @Override
252 public List<InputSplit> getSplits(JobContext job) throws IOException {
253 List<InputSplit> splits = new ArrayList<InputSplit>();
254 List<FileStatus> files = listStatus(job);
255
256 Text key = new Text();
257 for (FileStatus file: files) {
258 Path path = file.getPath();
259 FileSystem fs = path.getFileSystem(job.getConfiguration());
260 LineReader reader = new LineReader(fs.open(path));
261 long pos = 0;
262 int n;
263 try {
264 while ((n = reader.readLine(key)) > 0) {
265 String[] hosts = getStoreDirHosts(fs, path);
266 splits.add(new FileSplit(path, pos, n, hosts));
267 pos += n;
268 }
269 } finally {
270 reader.close();
271 }
272 }
273
274 return splits;
275 }
276
277
278
279
280 private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
281 throws IOException {
282 FileStatus[] files = FSUtils.listStatus(fs, path, null);
283 if (files == null) {
284 return new String[] {};
285 }
286
287 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
288 for (FileStatus hfileStatus: files) {
289 HDFSBlocksDistribution storeFileBlocksDistribution =
290 FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
291 hdfsBlocksDistribution.add(storeFileBlocksDistribution);
292 }
293
294 List<String> hosts = hdfsBlocksDistribution.getTopHosts();
295 return hosts.toArray(new String[hosts.size()]);
296 }
297
298
299
300
301
302
303 public static void createInputFile(final FileSystem fs, final Path path,
304 final Set<Path> toCompactDirs) throws IOException {
305
306 List<Path> storeDirs = new LinkedList<Path>();
307 for (Path compactDir: toCompactDirs) {
308 if (isFamilyDir(fs, compactDir)) {
309 storeDirs.add(compactDir);
310 } else if (isRegionDir(fs, compactDir)) {
311 for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
312 storeDirs.add(familyDir);
313 }
314 } else if (isTableDir(fs, compactDir)) {
315
316 for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
317 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
318 storeDirs.add(familyDir);
319 }
320 }
321 } else {
322 throw new IOException(
323 "Specified path is not a table, region or family directory. path=" + compactDir);
324 }
325 }
326
327
328 FSDataOutputStream stream = fs.create(path);
329 LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
330 try {
331 final byte[] newLine = Bytes.toBytes("\n");
332 for (Path storeDir: storeDirs) {
333 stream.write(Bytes.toBytes(storeDir.toString()));
334 stream.write(newLine);
335 }
336 } finally {
337 stream.close();
338 }
339 }
340 }
341
342
343
344
345 private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
346 final boolean compactOnce) throws Exception {
347 Configuration conf = getConf();
348 conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
349
350 Job job = new Job(conf);
351 job.setJobName("CompactionTool");
352 job.setJarByClass(CompactionTool.class);
353 job.setMapperClass(CompactionMapper.class);
354 job.setInputFormatClass(CompactionInputFormat.class);
355 job.setOutputFormatClass(NullOutputFormat.class);
356 job.setMapSpeculativeExecution(false);
357 job.setNumReduceTasks(0);
358
359 String stagingName = "compact-" + EnvironmentEdgeManager.currentTimeMillis();
360 Path stagingDir = new Path(conf.get(CONF_TMP_DIR), stagingName);
361 fs.mkdirs(stagingDir);
362 try {
363
364 Path inputPath = new Path(stagingDir, stagingName);
365 CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
366 CompactionInputFormat.addInputPath(job, inputPath);
367
368
369 TableMapReduceUtil.initCredentials(job);
370
371
372 return job.waitForCompletion(true) ? 0 : 1;
373 } finally {
374 fs.delete(stagingDir, true);
375 }
376 }
377
378
379
380
381 private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
382 final boolean compactOnce) throws IOException {
383 CompactionWorker worker = new CompactionWorker(fs, getConf());
384 for (Path path: toCompactDirs) {
385 worker.compact(path, compactOnce);
386 }
387 return 0;
388 }
389
390 @Override
391 public int run(String[] args) throws Exception {
392 Set<Path> toCompactDirs = new HashSet<Path>();
393 boolean compactOnce = false;
394 boolean mapred = false;
395
396 Configuration conf = getConf();
397 FileSystem fs = FileSystem.get(conf);
398
399 try {
400 for (int i = 0; i < args.length; ++i) {
401 String opt = args[i];
402 if (opt.equals("-compactOnce")) {
403 compactOnce = true;
404 } else if (opt.equals("-mapred")) {
405 mapred = true;
406 } else if (!opt.startsWith("-")) {
407 Path path = new Path(opt);
408 FileStatus status = fs.getFileStatus(path);
409 if (!status.isDir()) {
410 printUsage("Specified path is not a directory. path=" + path);
411 return 1;
412 }
413 toCompactDirs.add(path);
414 } else {
415 printUsage();
416 }
417 }
418 } catch (Exception e) {
419 printUsage(e.getMessage());
420 return 1;
421 }
422
423 if (toCompactDirs.size() == 0) {
424 printUsage("No directories to compact specified.");
425 return 1;
426 }
427
428
429 if (mapred) {
430 return doMapReduce(fs, toCompactDirs, compactOnce);
431 } else {
432 return doClient(fs, toCompactDirs, compactOnce);
433 }
434 }
435
436 private void printUsage() {
437 printUsage(null);
438 }
439
440 private void printUsage(final String message) {
441 if (message != null && message.length() > 0) {
442 System.err.println(message);
443 }
444 System.err.println("Usage: java " + this.getClass().getName() + " \\");
445 System.err.println(" [-compactOnce] [-mapred] [-D<property=value>]* files...");
446 System.err.println();
447 System.err.println("Options:");
448 System.err.println(" mapred Use MapReduce to run compaction.");
449 System.err.println(" compactOnce Execute just one compaction step. (default: while needed)");
450 System.err.println();
451 System.err.println("Note: -D properties will be applied to the conf used. ");
452 System.err.println("For example: ");
453 System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
454 System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
455 System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
456 System.err.println();
457 System.err.println("Examples:");
458 System.err.println(" To compact the full 'TestTable' using MapReduce:");
459 System.err.println(" $ bin/hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/TestTable");
460 System.err.println();
461 System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
462 System.err.println(" $ bin/hbase " + this.getClass().getName() + " hdfs:///hbase/TestTable/abc/x");
463 }
464
465 public static void main(String[] args) throws Exception {
466 System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
467 }
468 }