View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.HashSet;
24  import java.util.LinkedList;
25  import java.util.List;
26  import java.util.Set;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.conf.Configured;
34  import org.apache.hadoop.fs.FileStatus;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.io.LongWritable;
39  import org.apache.hadoop.io.NullWritable;
40  import org.apache.hadoop.io.Text;
41  import org.apache.hadoop.io.Writable;
42  import org.apache.hadoop.util.LineReader;
43  import org.apache.hadoop.util.Tool;
44  import org.apache.hadoop.util.ToolRunner;
45  
46  import org.apache.hadoop.mapreduce.InputSplit;
47  import org.apache.hadoop.mapreduce.Job;
48  import org.apache.hadoop.mapreduce.JobContext;
49  import org.apache.hadoop.mapreduce.Mapper;
50  import org.apache.hadoop.mapreduce.lib.input.FileSplit;
51  import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
52  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
53  
54  import org.apache.hadoop.hbase.HBaseConfiguration;
55  import org.apache.hadoop.hbase.HColumnDescriptor;
56  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
57  import org.apache.hadoop.hbase.HTableDescriptor;
58  import org.apache.hadoop.hbase.HRegionInfo;
59  import org.apache.hadoop.hbase.regionserver.HRegion;
60  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
61  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
62  import org.apache.hadoop.hbase.util.Bytes;
63  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
64  import org.apache.hadoop.hbase.util.FSTableDescriptors;
65  import org.apache.hadoop.hbase.util.FSUtils;
66  
67  /*
68   * The CompactionTool allows to execute a compaction specifying a:
69   * <ul>
70   *  <li>table folder (all regions and families will be compacted)
71   *  <li>region folder (all families in the region will be compacted)
72   *  <li>family folder (the store files will be compacted)
73   * </ul>
74   */
75  @InterfaceAudience.Public
76  public class CompactionTool extends Configured implements Tool {
77    private static final Log LOG = LogFactory.getLog(CompactionTool.class);
78  
79    private final static String CONF_TMP_DIR = "hbase.tmp.dir";
80    private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
81    private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
82    private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
83  
84    /**
85     * Class responsible to execute the Compaction on the specified path.
86     * The path can be a table, region or family directory.
87     */
88    private static class CompactionWorker {
89      private final boolean keepCompactedFiles;
90      private final boolean deleteCompacted;
91      private final Configuration conf;
92      private final FileSystem fs;
93      private final Path tmpDir;
94  
95      public CompactionWorker(final FileSystem fs, final Configuration conf) {
96        this.conf = conf;
97        this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
98        this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
99        this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
100       this.fs = fs;
101     }
102 
103     /**
104      * Execute the compaction on the specified path.
105      *
106      * @param path Directory path on which run a
107      * @param compactOnce Execute just a single step of compaction.
108      */
109     public void compact(final Path path, final boolean compactOnce) throws IOException {
110       if (isFamilyDir(fs, path)) {
111         Path regionDir = path.getParent();
112         Path tableDir = regionDir.getParent();
113         HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
114         HRegion region = loadRegion(fs, conf, htd, regionDir);
115         compactStoreFiles(region, path, compactOnce);
116       } else if (isRegionDir(fs, path)) {
117         Path tableDir = path.getParent();
118         HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
119         compactRegion(htd, path, compactOnce);
120       } else if (isTableDir(fs, path)) {
121         compactTable(path, compactOnce);
122       } else {
123         throw new IOException(
124           "Specified path is not a table, region or family directory. path=" + path);
125       }
126     }
127 
128     private void compactTable(final Path tableDir, final boolean compactOnce)
129         throws IOException {
130       HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
131       LOG.info("Compact table=" + htd.getNameAsString());
132       for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
133         compactRegion(htd, regionDir, compactOnce);
134       }
135     }
136 
137     private void compactRegion(final HTableDescriptor htd, final Path regionDir,
138         final boolean compactOnce) throws IOException {
139       HRegion region = loadRegion(fs, conf, htd, regionDir);
140       LOG.info("Compact table=" + htd.getNameAsString() +
141         " region=" + region.getRegionNameAsString());
142       for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
143         compactStoreFiles(region, familyDir, compactOnce);
144       }
145     }
146 
147     /**
148      * Execute the actual compaction job.
149      * If the compact once flag is not specified, execute the compaction until
150      * no more compactions are needed. Uses the Configuration settings provided.
151      */
152     private void compactStoreFiles(final HRegion region, final Path familyDir,
153         final boolean compactOnce) throws IOException {
154       LOG.info("Compact table=" + region.getTableDesc().getNameAsString() +
155         " region=" + region.getRegionNameAsString() +
156         " family=" + familyDir.getName());
157       Store store = getStore(region, familyDir);
158       do {
159         CompactionRequest cr = store.requestCompaction();
160         StoreFile storeFile = store.compact(cr);
161         if (storeFile != null) {
162           if (keepCompactedFiles && deleteCompacted) {
163             fs.delete(storeFile.getPath(), false);
164           }
165         }
166       } while (store.needsCompaction() && !compactOnce);
167     }
168 
169     /**
170      * Create a "mock" HStore that uses the tmpDir specified by the user and
171      * the store dir to compact as source.
172      */
173     private Store getStore(final HRegion region, final Path storeDir) throws IOException {
174       byte[] familyName = Bytes.toBytes(storeDir.getName());
175       HColumnDescriptor hcd = region.getTableDesc().getFamily(familyName);
176       // Create a Store w/ check of hbase.rootdir blanked out and return our
177       // list of files instead of have Store search its home dir.
178       return new Store(tmpDir, region, hcd, fs, conf) {
179         @Override
180         public FileStatus[] getStoreFiles() throws IOException {
181           return this.fs.listStatus(getHomedir());
182         }
183 
184         @Override
185         Path createStoreHomeDir(FileSystem fs, Path homedir) throws IOException {
186           return storeDir;
187         }
188       };
189     }
190 
191     private static HRegion loadRegion(final FileSystem fs, final Configuration conf,
192         final HTableDescriptor htd, final Path regionDir) throws IOException {
193       Path rootDir = regionDir.getParent().getParent();
194       HRegionInfo hri = HRegion.loadDotRegionInfoFileContent(fs, regionDir);
195       return HRegion.createHRegion(hri, rootDir, conf, htd, null, false, true);
196     }
197   }
198 
199   private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
200     Path regionInfo = new Path(path, HRegion.REGIONINFO_FILE);
201     return fs.exists(regionInfo);
202   }
203 
204   private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
205     return FSTableDescriptors.getTableInfoPath(fs, path) != null;
206   }
207 
208   private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
209     return isRegionDir(fs, path.getParent());
210   }
211 
212   private static class CompactionMapper
213       extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
214     private CompactionWorker compactor = null;
215     private boolean compactOnce = false;
216 
217     @Override
218     public void setup(Context context) {
219       Configuration conf = context.getConfiguration();
220       compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
221 
222       try {
223         FileSystem fs = FileSystem.get(conf);
224         this.compactor = new CompactionWorker(fs, conf);
225       } catch (IOException e) {
226         throw new RuntimeException("Could not get the input FileSystem", e);
227       }
228     }
229 
230     @Override
231     public void map(LongWritable key, Text value, Context context)
232         throws InterruptedException, IOException {
233       Path path = new Path(value.toString());
234       this.compactor.compact(path, compactOnce);
235     }
236   }
237 
238   /**
239    * Input format that uses store files block location as input split locality.
240    */
241   private static class CompactionInputFormat extends TextInputFormat {
242     @Override
243     protected boolean isSplitable(JobContext context, Path file) {
244       return true;
245     }
246 
247     /**
248      * Returns a split for each store files directory using the block location
249      * of each file as locality reference.
250      */
251     @Override
252     public List<InputSplit> getSplits(JobContext job) throws IOException {
253       List<InputSplit> splits = new ArrayList<InputSplit>();
254       List<FileStatus> files = listStatus(job);
255 
256       Text key = new Text();
257       for (FileStatus file: files) {
258         Path path = file.getPath();
259         FileSystem fs = path.getFileSystem(job.getConfiguration());
260         LineReader reader = new LineReader(fs.open(path));
261         long pos = 0;
262         int n;
263         try {
264           while ((n = reader.readLine(key)) > 0) {
265             String[] hosts = getStoreDirHosts(fs, path);
266             splits.add(new FileSplit(path, pos, n, hosts));
267             pos += n;
268           }
269         } finally {
270           reader.close();
271         }
272       }
273 
274       return splits;
275     }
276 
277     /**
278      * return the top hosts of the store files, used by the Split
279      */
280     private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
281         throws IOException {
282       FileStatus[] files = FSUtils.listStatus(fs, path, null);
283       if (files == null) {
284         return new String[] {};
285       }
286 
287       HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
288       for (FileStatus hfileStatus: files) {
289         HDFSBlocksDistribution storeFileBlocksDistribution =
290           FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
291         hdfsBlocksDistribution.add(storeFileBlocksDistribution);
292       }
293 
294       List<String> hosts = hdfsBlocksDistribution.getTopHosts();
295       return hosts.toArray(new String[hosts.size()]);
296     }
297 
298     /**
299      * Create the input file for the given directories to compact.
300      * The file is a TextFile with each line corrisponding to a
301      * store files directory to compact.
302      */
303     public static void createInputFile(final FileSystem fs, final Path path,
304         final Set<Path> toCompactDirs) throws IOException {
305       // Extract the list of store dirs
306       List<Path> storeDirs = new LinkedList<Path>();
307       for (Path compactDir: toCompactDirs) {
308         if (isFamilyDir(fs, compactDir)) {
309           storeDirs.add(compactDir);
310         } else if (isRegionDir(fs, compactDir)) {
311           for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
312             storeDirs.add(familyDir);
313           }
314         } else if (isTableDir(fs, compactDir)) {
315           // Lookup regions
316           for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
317             for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
318               storeDirs.add(familyDir);
319             }
320           }
321         } else {
322           throw new IOException(
323             "Specified path is not a table, region or family directory. path=" + compactDir);
324         }
325       }
326 
327       // Write Input File
328       FSDataOutputStream stream = fs.create(path);
329       LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
330       try {
331         final byte[] newLine = Bytes.toBytes("\n");
332         for (Path storeDir: storeDirs) {
333           stream.write(Bytes.toBytes(storeDir.toString()));
334           stream.write(newLine);
335         }
336       } finally {
337         stream.close();
338       }
339     }
340   }
341 
342   /**
343    * Execute compaction, using a Map-Reduce job.
344    */
345   private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
346       final boolean compactOnce) throws Exception {
347     Configuration conf = getConf();
348     conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
349 
350     Job job = new Job(conf);
351     job.setJobName("CompactionTool");
352     job.setJarByClass(CompactionTool.class);
353     job.setMapperClass(CompactionMapper.class);
354     job.setInputFormatClass(CompactionInputFormat.class);
355     job.setOutputFormatClass(NullOutputFormat.class);
356     job.setMapSpeculativeExecution(false);
357     job.setNumReduceTasks(0);
358 
359     String stagingName = "compact-" + EnvironmentEdgeManager.currentTimeMillis();
360     Path stagingDir = new Path(conf.get(CONF_TMP_DIR), stagingName);
361     fs.mkdirs(stagingDir);
362     try {
363       // Create input file with the store dirs
364       Path inputPath = new Path(stagingDir, stagingName);
365       CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
366       CompactionInputFormat.addInputPath(job, inputPath);
367 
368       // Initialize credential for secure cluster
369       TableMapReduceUtil.initCredentials(job);
370 
371       // Start the MR Job and wait
372       return job.waitForCompletion(true) ? 0 : 1;
373     } finally {
374       fs.delete(stagingDir, true);
375     }
376   }
377 
378   /**
379    * Execute compaction, from this client, one path at the time.
380    */
381   private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
382       final boolean compactOnce) throws IOException {
383     CompactionWorker worker = new CompactionWorker(fs, getConf());
384     for (Path path: toCompactDirs) {
385       worker.compact(path, compactOnce);
386     }
387     return 0;
388   }
389 
390   @Override
391   public int run(String[] args) throws Exception {
392     Set<Path> toCompactDirs = new HashSet<Path>();
393     boolean compactOnce = false;
394     boolean mapred = false;
395 
396     Configuration conf = getConf();
397     FileSystem fs = FileSystem.get(conf);
398 
399     try {
400       for (int i = 0; i < args.length; ++i) {
401         String opt = args[i];
402         if (opt.equals("-compactOnce")) {
403           compactOnce = true;
404         } else if (opt.equals("-mapred")) {
405           mapred = true;
406         } else if (!opt.startsWith("-")) {
407           Path path = new Path(opt);
408           FileStatus status = fs.getFileStatus(path);
409           if (!status.isDir()) {
410             printUsage("Specified path is not a directory. path=" + path);
411             return 1;
412           }
413           toCompactDirs.add(path);
414         } else {
415           printUsage();
416         }
417       }
418     } catch (Exception e) {
419       printUsage(e.getMessage());
420       return 1;
421     }
422 
423     if (toCompactDirs.size() == 0) {
424       printUsage("No directories to compact specified.");
425       return 1;
426     }
427 
428     // Execute compaction!
429     if (mapred) {
430       return doMapReduce(fs, toCompactDirs, compactOnce);
431     } else {
432       return doClient(fs, toCompactDirs, compactOnce);
433     }
434   }
435 
436   private void printUsage() {
437     printUsage(null);
438   }
439 
440   private void printUsage(final String message) {
441     if (message != null && message.length() > 0) {
442       System.err.println(message);
443     }
444     System.err.println("Usage: java " + this.getClass().getName() + " \\");
445     System.err.println("  [-compactOnce] [-mapred] [-D<property=value>]* files...");
446     System.err.println();
447     System.err.println("Options:");
448     System.err.println(" mapred         Use MapReduce to run compaction.");
449     System.err.println(" compactOnce    Execute just one compaction step. (default: while needed)");
450     System.err.println();
451     System.err.println("Note: -D properties will be applied to the conf used. ");
452     System.err.println("For example: ");
453     System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
454     System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
455     System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
456     System.err.println();
457     System.err.println("Examples:");
458     System.err.println(" To compact the full 'TestTable' using MapReduce:");
459     System.err.println(" $ bin/hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/TestTable");
460     System.err.println();
461     System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
462     System.err.println(" $ bin/hbase " + this.getClass().getName() + " hdfs:///hbase/TestTable/abc/x");
463   }
464 
465   public static void main(String[] args) throws Exception {
466     System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
467   }
468 }