1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.HashSet;
24 import java.util.LinkedList;
25 import java.util.List;
26 import java.util.Set;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30
31 import org.apache.hadoop.classification.InterfaceAudience;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.conf.Configured;
34 import org.apache.hadoop.fs.FileStatus;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.FSDataOutputStream;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.io.LongWritable;
39 import org.apache.hadoop.io.NullWritable;
40 import org.apache.hadoop.io.Text;
41 import org.apache.hadoop.util.LineReader;
42 import org.apache.hadoop.util.Tool;
43 import org.apache.hadoop.util.ToolRunner;
44
45 import org.apache.hadoop.mapreduce.InputSplit;
46 import org.apache.hadoop.mapreduce.Job;
47 import org.apache.hadoop.mapreduce.JobContext;
48 import org.apache.hadoop.mapreduce.Mapper;
49 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
50 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
51 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
52
53 import org.apache.hadoop.hbase.HBaseConfiguration;
54 import org.apache.hadoop.hbase.HColumnDescriptor;
55 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
56 import org.apache.hadoop.hbase.HTableDescriptor;
57 import org.apache.hadoop.hbase.HRegionInfo;
58 import org.apache.hadoop.hbase.regionserver.HRegion;
59 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
60 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
61 import org.apache.hadoop.hbase.mapreduce.JobUtil;
62 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
63 import org.apache.hadoop.hbase.util.Bytes;
64 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
65 import org.apache.hadoop.hbase.util.FSTableDescriptors;
66 import org.apache.hadoop.hbase.util.FSUtils;
67
68
69
70
71
72
73
74
75
76 @InterfaceAudience.Public
77 public class CompactionTool extends Configured implements Tool {
78 private static final Log LOG = LogFactory.getLog(CompactionTool.class);
79
80 private final static String CONF_TMP_DIR = "hbase.tmp.dir";
81 private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
82 private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
83 private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
84
85
86
87
88
89 private static class CompactionWorker {
90 private final boolean keepCompactedFiles;
91 private final boolean deleteCompacted;
92 private final Configuration conf;
93 private final FileSystem fs;
94 private final Path tmpDir;
95
96 public CompactionWorker(final FileSystem fs, final Configuration conf) {
97 this.conf = conf;
98 this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
99 this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
100 this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
101 this.fs = fs;
102 }
103
104
105
106
107
108
109
110 public void compact(final Path path, final boolean compactOnce) throws IOException {
111 if (isFamilyDir(fs, path)) {
112 Path regionDir = path.getParent();
113 Path tableDir = regionDir.getParent();
114 HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
115 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
116 compactStoreFiles(tableDir, htd, hri, path.getName(), compactOnce);
117 } else if (isRegionDir(fs, path)) {
118 Path tableDir = path.getParent();
119 HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
120 compactRegion(tableDir, htd, path, compactOnce);
121 } else if (isTableDir(fs, path)) {
122 compactTable(path, compactOnce);
123 } else {
124 throw new IOException(
125 "Specified path is not a table, region or family directory. path=" + path);
126 }
127 }
128
129 private void compactTable(final Path tableDir, final boolean compactOnce)
130 throws IOException {
131 HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
132 for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
133 compactRegion(tableDir, htd, regionDir, compactOnce);
134 }
135 }
136
137 private void compactRegion(final Path tableDir, final HTableDescriptor htd,
138 final Path regionDir, final boolean compactOnce) throws IOException {
139 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
140 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
141 compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce);
142 }
143 }
144
145
146
147
148
149
150 private void compactStoreFiles(final Path tableDir, final HTableDescriptor htd,
151 final HRegionInfo hri, final String familyName, final boolean compactOnce)
152 throws IOException {
153 HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
154 LOG.info("Compact table=" + htd.getNameAsString() +
155 " region=" + hri.getRegionNameAsString() +
156 " family=" + familyName);
157 do {
158 CompactionContext compaction = store.requestCompaction();
159 if (compaction == null) break;
160 List<StoreFile> storeFiles = store.compact(compaction);
161 if (storeFiles != null && !storeFiles.isEmpty()) {
162 if (keepCompactedFiles && deleteCompacted) {
163 for (StoreFile storeFile: storeFiles) {
164 fs.delete(storeFile.getPath(), false);
165 }
166 }
167 }
168 } while (store.needsCompaction() && !compactOnce);
169 }
170
171
172
173
174
175 private static HStore getStore(final Configuration conf, final FileSystem fs,
176 final Path tableDir, final HTableDescriptor htd, final HRegionInfo hri,
177 final String familyName, final Path tempDir) throws IOException {
178 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
179 @Override
180 public Path getTempDir() {
181 return tempDir;
182 }
183 };
184 HRegion region = new HRegion(regionFs, null, conf, htd, null);
185 return new HStore(region, htd.getFamily(Bytes.toBytes(familyName)), conf);
186 }
187 }
188
189 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
190 Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
191 return fs.exists(regionInfo);
192 }
193
194 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
195 return FSTableDescriptors.getTableInfoPath(fs, path) != null;
196 }
197
198 private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
199 return isRegionDir(fs, path.getParent());
200 }
201
202 private static class CompactionMapper
203 extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
204 private CompactionWorker compactor = null;
205 private boolean compactOnce = false;
206
207 @Override
208 public void setup(Context context) {
209 Configuration conf = context.getConfiguration();
210 compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
211
212 try {
213 FileSystem fs = FileSystem.get(conf);
214 this.compactor = new CompactionWorker(fs, conf);
215 } catch (IOException e) {
216 throw new RuntimeException("Could not get the input FileSystem", e);
217 }
218 }
219
220 @Override
221 public void map(LongWritable key, Text value, Context context)
222 throws InterruptedException, IOException {
223 Path path = new Path(value.toString());
224 this.compactor.compact(path, compactOnce);
225 }
226 }
227
228
229
230
231 private static class CompactionInputFormat extends TextInputFormat {
232 @Override
233 protected boolean isSplitable(JobContext context, Path file) {
234 return true;
235 }
236
237
238
239
240
241 @Override
242 public List<InputSplit> getSplits(JobContext job) throws IOException {
243 List<InputSplit> splits = new ArrayList<InputSplit>();
244 List<FileStatus> files = listStatus(job);
245
246 Text key = new Text();
247 for (FileStatus file: files) {
248 Path path = file.getPath();
249 FileSystem fs = path.getFileSystem(job.getConfiguration());
250 LineReader reader = new LineReader(fs.open(path));
251 long pos = 0;
252 int n;
253 try {
254 while ((n = reader.readLine(key)) > 0) {
255 String[] hosts = getStoreDirHosts(fs, path);
256 splits.add(new FileSplit(path, pos, n, hosts));
257 pos += n;
258 }
259 } finally {
260 reader.close();
261 }
262 }
263
264 return splits;
265 }
266
267
268
269
270 private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
271 throws IOException {
272 FileStatus[] files = FSUtils.listStatus(fs, path);
273 if (files == null) {
274 return new String[] {};
275 }
276
277 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
278 for (FileStatus hfileStatus: files) {
279 HDFSBlocksDistribution storeFileBlocksDistribution =
280 FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
281 hdfsBlocksDistribution.add(storeFileBlocksDistribution);
282 }
283
284 List<String> hosts = hdfsBlocksDistribution.getTopHosts();
285 return hosts.toArray(new String[hosts.size()]);
286 }
287
288
289
290
291
292
293 public static void createInputFile(final FileSystem fs, final Path path,
294 final Set<Path> toCompactDirs) throws IOException {
295
296 List<Path> storeDirs = new LinkedList<Path>();
297 for (Path compactDir: toCompactDirs) {
298 if (isFamilyDir(fs, compactDir)) {
299 storeDirs.add(compactDir);
300 } else if (isRegionDir(fs, compactDir)) {
301 for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
302 storeDirs.add(familyDir);
303 }
304 } else if (isTableDir(fs, compactDir)) {
305
306 for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
307 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
308 storeDirs.add(familyDir);
309 }
310 }
311 } else {
312 throw new IOException(
313 "Specified path is not a table, region or family directory. path=" + compactDir);
314 }
315 }
316
317
318 FSDataOutputStream stream = fs.create(path);
319 LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
320 try {
321 final byte[] newLine = Bytes.toBytes("\n");
322 for (Path storeDir: storeDirs) {
323 stream.write(Bytes.toBytes(storeDir.toString()));
324 stream.write(newLine);
325 }
326 } finally {
327 stream.close();
328 }
329 }
330 }
331
332
333
334
335 private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
336 final boolean compactOnce) throws Exception {
337 Configuration conf = getConf();
338 conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
339
340 Job job = new Job(conf);
341 job.setJobName("CompactionTool");
342 job.setJarByClass(CompactionTool.class);
343 job.setMapperClass(CompactionMapper.class);
344 job.setInputFormatClass(CompactionInputFormat.class);
345 job.setOutputFormatClass(NullOutputFormat.class);
346 job.setMapSpeculativeExecution(false);
347 job.setNumReduceTasks(0);
348
349 Path stagingDir = JobUtil.getStagingDir(conf);
350 try {
351
352 Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTimeMillis());
353 CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
354 CompactionInputFormat.addInputPath(job, inputPath);
355
356
357 TableMapReduceUtil.initCredentials(job);
358
359
360 return job.waitForCompletion(true) ? 0 : 1;
361 } finally {
362 fs.delete(stagingDir, true);
363 }
364 }
365
366
367
368
369 private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
370 final boolean compactOnce) throws IOException {
371 CompactionWorker worker = new CompactionWorker(fs, getConf());
372 for (Path path: toCompactDirs) {
373 worker.compact(path, compactOnce);
374 }
375 return 0;
376 }
377
378 @Override
379 public int run(String[] args) throws Exception {
380 Set<Path> toCompactDirs = new HashSet<Path>();
381 boolean compactOnce = false;
382 boolean mapred = false;
383
384 Configuration conf = getConf();
385 FileSystem fs = FileSystem.get(conf);
386
387 try {
388 for (int i = 0; i < args.length; ++i) {
389 String opt = args[i];
390 if (opt.equals("-compactOnce")) {
391 compactOnce = true;
392 } else if (opt.equals("-mapred")) {
393 mapred = true;
394 } else if (!opt.startsWith("-")) {
395 Path path = new Path(opt);
396 FileStatus status = fs.getFileStatus(path);
397 if (!status.isDir()) {
398 printUsage("Specified path is not a directory. path=" + path);
399 return 1;
400 }
401 toCompactDirs.add(path);
402 } else {
403 printUsage();
404 }
405 }
406 } catch (Exception e) {
407 printUsage(e.getMessage());
408 return 1;
409 }
410
411 if (toCompactDirs.size() == 0) {
412 printUsage("No directories to compact specified.");
413 return 1;
414 }
415
416
417 if (mapred) {
418 return doMapReduce(fs, toCompactDirs, compactOnce);
419 } else {
420 return doClient(fs, toCompactDirs, compactOnce);
421 }
422 }
423
424 private void printUsage() {
425 printUsage(null);
426 }
427
428 private void printUsage(final String message) {
429 if (message != null && message.length() > 0) {
430 System.err.println(message);
431 }
432 System.err.println("Usage: java " + this.getClass().getName() + " \\");
433 System.err.println(" [-compactOnce] [-mapred] [-D<property=value>]* files...");
434 System.err.println();
435 System.err.println("Options:");
436 System.err.println(" mapred Use MapReduce to run compaction.");
437 System.err.println(" compactOnce Execute just one compaction step. (default: while needed)");
438 System.err.println();
439 System.err.println("Note: -D properties will be applied to the conf used. ");
440 System.err.println("For example: ");
441 System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
442 System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
443 System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
444 System.err.println();
445 System.err.println("Examples:");
446 System.err.println(" To compact the full 'TestTable' using MapReduce:");
447 System.err.println(" $ bin/hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/TestTable");
448 System.err.println();
449 System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
450 System.err.println(" $ bin/hbase " + this.getClass().getName() + " hdfs:///hbase/TestTable/abc/x");
451 }
452
453 public static void main(String[] args) throws Exception {
454 System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
455 }
456 }