View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.HashSet;
24  import java.util.LinkedList;
25  import java.util.List;
26  import java.util.Set;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.conf.Configured;
34  import org.apache.hadoop.fs.FileStatus;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.io.LongWritable;
39  import org.apache.hadoop.io.NullWritable;
40  import org.apache.hadoop.io.Text;
41  import org.apache.hadoop.util.LineReader;
42  import org.apache.hadoop.util.Tool;
43  import org.apache.hadoop.util.ToolRunner;
44  
45  import org.apache.hadoop.mapreduce.InputSplit;
46  import org.apache.hadoop.mapreduce.Job;
47  import org.apache.hadoop.mapreduce.JobContext;
48  import org.apache.hadoop.mapreduce.Mapper;
49  import org.apache.hadoop.mapreduce.lib.input.FileSplit;
50  import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
51  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
52  
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HColumnDescriptor;
55  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
56  import org.apache.hadoop.hbase.HTableDescriptor;
57  import org.apache.hadoop.hbase.HRegionInfo;
58  import org.apache.hadoop.hbase.regionserver.HRegion;
59  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
60  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
61  import org.apache.hadoop.hbase.mapreduce.JobUtil;
62  import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
63  import org.apache.hadoop.hbase.util.Bytes;
64  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
65  import org.apache.hadoop.hbase.util.FSTableDescriptors;
66  import org.apache.hadoop.hbase.util.FSUtils;
67  
68  /*
69   * The CompactionTool allows to execute a compaction specifying a:
70   * <ul>
71   *  <li>table folder (all regions and families will be compacted)
72   *  <li>region folder (all families in the region will be compacted)
73   *  <li>family folder (the store files will be compacted)
74   * </ul>
75   */
76  @InterfaceAudience.Public
77  public class CompactionTool extends Configured implements Tool {
78    private static final Log LOG = LogFactory.getLog(CompactionTool.class);
79  
80    private final static String CONF_TMP_DIR = "hbase.tmp.dir";
81    private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
82    private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
83    private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
84  
85    /**
86     * Class responsible to execute the Compaction on the specified path.
87     * The path can be a table, region or family directory.
88     */
89    private static class CompactionWorker {
90      private final boolean keepCompactedFiles;
91      private final boolean deleteCompacted;
92      private final Configuration conf;
93      private final FileSystem fs;
94      private final Path tmpDir;
95  
96      public CompactionWorker(final FileSystem fs, final Configuration conf) {
97        this.conf = conf;
98        this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
99        this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
100       this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
101       this.fs = fs;
102     }
103 
104     /**
105      * Execute the compaction on the specified path.
106      *
107      * @param path Directory path on which run a
108      * @param compactOnce Execute just a single step of compaction.
109      */
110     public void compact(final Path path, final boolean compactOnce) throws IOException {
111       if (isFamilyDir(fs, path)) {
112         Path regionDir = path.getParent();
113         Path tableDir = regionDir.getParent();
114         HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
115         HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
116         compactStoreFiles(tableDir, htd, hri, path.getName(), compactOnce);
117       } else if (isRegionDir(fs, path)) {
118         Path tableDir = path.getParent();
119         HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
120         compactRegion(tableDir, htd, path, compactOnce);
121       } else if (isTableDir(fs, path)) {
122         compactTable(path, compactOnce);
123       } else {
124         throw new IOException(
125           "Specified path is not a table, region or family directory. path=" + path);
126       }
127     }
128 
129     private void compactTable(final Path tableDir, final boolean compactOnce)
130         throws IOException {
131       HTableDescriptor htd = FSTableDescriptors.getTableDescriptor(fs, tableDir);
132       for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
133         compactRegion(tableDir, htd, regionDir, compactOnce);
134       }
135     }
136 
137     private void compactRegion(final Path tableDir, final HTableDescriptor htd,
138         final Path regionDir, final boolean compactOnce) throws IOException {
139       HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
140       for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
141         compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce);
142       }
143     }
144 
145     /**
146      * Execute the actual compaction job.
147      * If the compact once flag is not specified, execute the compaction until
148      * no more compactions are needed. Uses the Configuration settings provided.
149      */
150     private void compactStoreFiles(final Path tableDir, final HTableDescriptor htd,
151         final HRegionInfo hri, final String familyName, final boolean compactOnce)
152         throws IOException {
153       HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
154       LOG.info("Compact table=" + htd.getNameAsString() +
155         " region=" + hri.getRegionNameAsString() +
156         " family=" + familyName);
157       do {
158         CompactionContext compaction = store.requestCompaction();
159         if (compaction == null) break;
160         List<StoreFile> storeFiles = store.compact(compaction);
161         if (storeFiles != null && !storeFiles.isEmpty()) {
162           if (keepCompactedFiles && deleteCompacted) {
163             for (StoreFile storeFile: storeFiles) {
164               fs.delete(storeFile.getPath(), false);
165             }
166           }
167         }
168       } while (store.needsCompaction() && !compactOnce);
169     }
170 
171     /**
172      * Create a "mock" HStore that uses the tmpDir specified by the user and
173      * the store dir to compact as source.
174      */
175     private static HStore getStore(final Configuration conf, final FileSystem fs,
176         final Path tableDir, final HTableDescriptor htd, final HRegionInfo hri,
177         final String familyName, final Path tempDir) throws IOException {
178       HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
179         @Override
180         public Path getTempDir() {
181           return tempDir;
182         }
183       };
184       HRegion region = new HRegion(regionFs, null, conf, htd, null);
185       return new HStore(region, htd.getFamily(Bytes.toBytes(familyName)), conf);
186     }
187   }
188 
189   private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
190     Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
191     return fs.exists(regionInfo);
192   }
193 
194   private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
195     return FSTableDescriptors.getTableInfoPath(fs, path) != null;
196   }
197 
198   private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
199     return isRegionDir(fs, path.getParent());
200   }
201 
202   private static class CompactionMapper
203       extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
204     private CompactionWorker compactor = null;
205     private boolean compactOnce = false;
206 
207     @Override
208     public void setup(Context context) {
209       Configuration conf = context.getConfiguration();
210       compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
211 
212       try {
213         FileSystem fs = FileSystem.get(conf);
214         this.compactor = new CompactionWorker(fs, conf);
215       } catch (IOException e) {
216         throw new RuntimeException("Could not get the input FileSystem", e);
217       }
218     }
219 
220     @Override
221     public void map(LongWritable key, Text value, Context context)
222         throws InterruptedException, IOException {
223       Path path = new Path(value.toString());
224       this.compactor.compact(path, compactOnce);
225     }
226   }
227 
228   /**
229    * Input format that uses store files block location as input split locality.
230    */
231   private static class CompactionInputFormat extends TextInputFormat {
232     @Override
233     protected boolean isSplitable(JobContext context, Path file) {
234       return true;
235     }
236 
237     /**
238      * Returns a split for each store files directory using the block location
239      * of each file as locality reference.
240      */
241     @Override
242     public List<InputSplit> getSplits(JobContext job) throws IOException {
243       List<InputSplit> splits = new ArrayList<InputSplit>();
244       List<FileStatus> files = listStatus(job);
245 
246       Text key = new Text();
247       for (FileStatus file: files) {
248         Path path = file.getPath();
249         FileSystem fs = path.getFileSystem(job.getConfiguration());
250         LineReader reader = new LineReader(fs.open(path));
251         long pos = 0;
252         int n;
253         try {
254           while ((n = reader.readLine(key)) > 0) {
255             String[] hosts = getStoreDirHosts(fs, path);
256             splits.add(new FileSplit(path, pos, n, hosts));
257             pos += n;
258           }
259         } finally {
260           reader.close();
261         }
262       }
263 
264       return splits;
265     }
266 
267     /**
268      * return the top hosts of the store files, used by the Split
269      */
270     private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
271         throws IOException {
272       FileStatus[] files = FSUtils.listStatus(fs, path);
273       if (files == null) {
274         return new String[] {};
275       }
276 
277       HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
278       for (FileStatus hfileStatus: files) {
279         HDFSBlocksDistribution storeFileBlocksDistribution =
280           FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
281         hdfsBlocksDistribution.add(storeFileBlocksDistribution);
282       }
283 
284       List<String> hosts = hdfsBlocksDistribution.getTopHosts();
285       return hosts.toArray(new String[hosts.size()]);
286     }
287 
288     /**
289      * Create the input file for the given directories to compact.
290      * The file is a TextFile with each line corrisponding to a
291      * store files directory to compact.
292      */
293     public static void createInputFile(final FileSystem fs, final Path path,
294         final Set<Path> toCompactDirs) throws IOException {
295       // Extract the list of store dirs
296       List<Path> storeDirs = new LinkedList<Path>();
297       for (Path compactDir: toCompactDirs) {
298         if (isFamilyDir(fs, compactDir)) {
299           storeDirs.add(compactDir);
300         } else if (isRegionDir(fs, compactDir)) {
301           for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
302             storeDirs.add(familyDir);
303           }
304         } else if (isTableDir(fs, compactDir)) {
305           // Lookup regions
306           for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
307             for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
308               storeDirs.add(familyDir);
309             }
310           }
311         } else {
312           throw new IOException(
313             "Specified path is not a table, region or family directory. path=" + compactDir);
314         }
315       }
316 
317       // Write Input File
318       FSDataOutputStream stream = fs.create(path);
319       LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
320       try {
321         final byte[] newLine = Bytes.toBytes("\n");
322         for (Path storeDir: storeDirs) {
323           stream.write(Bytes.toBytes(storeDir.toString()));
324           stream.write(newLine);
325         }
326       } finally {
327         stream.close();
328       }
329     }
330   }
331 
332   /**
333    * Execute compaction, using a Map-Reduce job.
334    */
335   private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
336       final boolean compactOnce) throws Exception {
337     Configuration conf = getConf();
338     conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
339 
340     Job job = new Job(conf);
341     job.setJobName("CompactionTool");
342     job.setJarByClass(CompactionTool.class);
343     job.setMapperClass(CompactionMapper.class);
344     job.setInputFormatClass(CompactionInputFormat.class);
345     job.setOutputFormatClass(NullOutputFormat.class);
346     job.setMapSpeculativeExecution(false);
347     job.setNumReduceTasks(0);
348 
349     Path stagingDir = JobUtil.getStagingDir(conf);
350     try {
351       // Create input file with the store dirs
352       Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTimeMillis());
353       CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
354       CompactionInputFormat.addInputPath(job, inputPath);
355 
356       // Initialize credential for secure cluster
357       TableMapReduceUtil.initCredentials(job);
358 
359       // Start the MR Job and wait
360       return job.waitForCompletion(true) ? 0 : 1;
361     } finally {
362       fs.delete(stagingDir, true);
363     }
364   }
365 
366   /**
367    * Execute compaction, from this client, one path at the time.
368    */
369   private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
370       final boolean compactOnce) throws IOException {
371     CompactionWorker worker = new CompactionWorker(fs, getConf());
372     for (Path path: toCompactDirs) {
373       worker.compact(path, compactOnce);
374     }
375     return 0;
376   }
377 
378   @Override
379   public int run(String[] args) throws Exception {
380     Set<Path> toCompactDirs = new HashSet<Path>();
381     boolean compactOnce = false;
382     boolean mapred = false;
383 
384     Configuration conf = getConf();
385     FileSystem fs = FileSystem.get(conf);
386 
387     try {
388       for (int i = 0; i < args.length; ++i) {
389         String opt = args[i];
390         if (opt.equals("-compactOnce")) {
391           compactOnce = true;
392         } else if (opt.equals("-mapred")) {
393           mapred = true;
394         } else if (!opt.startsWith("-")) {
395           Path path = new Path(opt);
396           FileStatus status = fs.getFileStatus(path);
397           if (!status.isDir()) {
398             printUsage("Specified path is not a directory. path=" + path);
399             return 1;
400           }
401           toCompactDirs.add(path);
402         } else {
403           printUsage();
404         }
405       }
406     } catch (Exception e) {
407       printUsage(e.getMessage());
408       return 1;
409     }
410 
411     if (toCompactDirs.size() == 0) {
412       printUsage("No directories to compact specified.");
413       return 1;
414     }
415 
416     // Execute compaction!
417     if (mapred) {
418       return doMapReduce(fs, toCompactDirs, compactOnce);
419     } else {
420       return doClient(fs, toCompactDirs, compactOnce);
421     }
422   }
423 
424   private void printUsage() {
425     printUsage(null);
426   }
427 
428   private void printUsage(final String message) {
429     if (message != null && message.length() > 0) {
430       System.err.println(message);
431     }
432     System.err.println("Usage: java " + this.getClass().getName() + " \\");
433     System.err.println("  [-compactOnce] [-mapred] [-D<property=value>]* files...");
434     System.err.println();
435     System.err.println("Options:");
436     System.err.println(" mapred         Use MapReduce to run compaction.");
437     System.err.println(" compactOnce    Execute just one compaction step. (default: while needed)");
438     System.err.println();
439     System.err.println("Note: -D properties will be applied to the conf used. ");
440     System.err.println("For example: ");
441     System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
442     System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
443     System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
444     System.err.println();
445     System.err.println("Examples:");
446     System.err.println(" To compact the full 'TestTable' using MapReduce:");
447     System.err.println(" $ bin/hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/TestTable");
448     System.err.println();
449     System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
450     System.err.println(" $ bin/hbase " + this.getClass().getName() + " hdfs:///hbase/TestTable/abc/x");
451   }
452 
453   public static void main(String[] args) throws Exception {
454     System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
455   }
456 }