View Javadoc

1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.IOException;
23  import java.io.UnsupportedEncodingException;
24  import java.net.URI;
25  import java.net.URISyntaxException;
26  import java.net.URLDecoder;
27  import java.net.URLEncoder;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.TreeMap;
33  import java.util.TreeSet;
34  import java.util.UUID;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.filecache.DistributedCache;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.KeyValue;
46  import org.apache.hadoop.hbase.client.HTable;
47  import org.apache.hadoop.hbase.client.Put;
48  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
49  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
50  import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
51  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
52  import org.apache.hadoop.hbase.io.hfile.Compression;
53  import org.apache.hadoop.hbase.io.hfile.HFile;
54  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
55  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
56  import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
57  import org.apache.hadoop.hbase.regionserver.Store;
58  import org.apache.hadoop.hbase.regionserver.StoreFile;
59  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
60  import org.apache.hadoop.hbase.util.Bytes;
61  import org.apache.hadoop.io.NullWritable;
62  import org.apache.hadoop.io.SequenceFile;
63  import org.apache.hadoop.mapreduce.Job;
64  import org.apache.hadoop.mapreduce.Partitioner;
65  import org.apache.hadoop.mapreduce.RecordWriter;
66  import org.apache.hadoop.mapreduce.TaskAttemptContext;
67  import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
68  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
69  
70  /**
71   * Writes HFiles. Passed KeyValues must arrive in order.
72   * Currently, can only write files to a single column family at a
73   * time.  Multiple column families requires coordinating keys cross family.
74   * Writes current time as the sequence id for the file. Sets the major compacted
75   * attribute on created hfiles. Calling write(null,null) will forceably roll
76   * all HFiles being written.
77   * @see KeyValueSortReducer
78   */
79  public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
80    static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
81    static final String COMPRESSION_CONF_KEY = "hbase.hfileoutputformat.families.compression";
82    private static final String BLOOM_TYPE_CONF_KEY = "hbase.hfileoutputformat.families.bloomtype";
83    private static final String DATABLOCK_ENCODING_CONF_KEY = 
84       "hbase.mapreduce.hfileoutputformat.datablock.encoding";
85    private static final String BLOCK_SIZE_CONF_KEY = "hbase.mapreduce.hfileoutputformat.blocksize";
86  
87    public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
88    throws IOException, InterruptedException {
89      // Get the path of the temporary output file
90      final Path outputPath = FileOutputFormat.getOutputPath(context);
91      final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
92      final Configuration conf = context.getConfiguration();
93      final FileSystem fs = outputdir.getFileSystem(conf);
94      // These configs. are from hbase-*.xml
95      final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
96          HConstants.DEFAULT_MAX_FILE_SIZE);
97      // Invented config.  Add to hbase-*.xml if other than default compression.
98      final String defaultCompression = conf.get("hfile.compression",
99          Compression.Algorithm.NONE.getName());
100     final boolean compactionExclude = conf.getBoolean(
101         "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
102 
103     // create a map from column family to the compression algorithm
104     final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
105     final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
106     final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);
107 
108     String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
109     final HFileDataBlockEncoder encoder;
110     if (dataBlockEncodingStr == null) {
111       encoder = NoOpDataBlockEncoder.INSTANCE;
112     } else {
113       try {
114         encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding
115             .valueOf(dataBlockEncodingStr));
116       } catch (IllegalArgumentException ex) {
117         throw new RuntimeException(
118             "Invalid data block encoding type configured for the param "
119                 + DATABLOCK_ENCODING_CONF_KEY + " : "
120                 + dataBlockEncodingStr);
121       }
122     }
123 
124     return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
125       // Map of families to writers and how much has been output on the writer.
126       private final Map<byte [], WriterLength> writers =
127         new TreeMap<byte [], WriterLength>(Bytes.BYTES_COMPARATOR);
128       private byte [] previousRow = HConstants.EMPTY_BYTE_ARRAY;
129       private final byte [] now = Bytes.toBytes(System.currentTimeMillis());
130       private boolean rollRequested = false;
131 
132       public void write(ImmutableBytesWritable row, KeyValue kv)
133       throws IOException {
134         // null input == user explicitly wants to flush
135         if (row == null && kv == null) {
136           rollWriters();
137           return;
138         }
139 
140         byte [] rowKey = kv.getRow();
141         long length = kv.getLength();
142         byte [] family = kv.getFamily();
143         WriterLength wl = this.writers.get(family);
144 
145         // If this is a new column family, verify that the directory exists
146         if (wl == null) {
147           fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
148         }
149 
150         // If any of the HFiles for the column families has reached
151         // maxsize, we need to roll all the writers
152         if (wl != null && wl.written + length >= maxsize) {
153           this.rollRequested = true;
154         }
155 
156         // This can only happen once a row is finished though
157         if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
158           rollWriters();
159         }
160 
161         // create a new HLog writer, if necessary
162         if (wl == null || wl.writer == null) {
163           wl = getNewWriter(family, conf);
164         }
165 
166         // we now have the proper HLog writer. full steam ahead
167         kv.updateLatestStamp(this.now);
168         wl.writer.append(kv);
169         wl.written += length;
170 
171         // Copy the row so we know when a row transition.
172         this.previousRow = rowKey;
173       }
174 
175       private void rollWriters() throws IOException {
176         for (WriterLength wl : this.writers.values()) {
177           if (wl.writer != null) {
178             LOG.info("Writer=" + wl.writer.getPath() +
179                 ((wl.written == 0)? "": ", wrote=" + wl.written));
180             close(wl.writer);
181           }
182           wl.writer = null;
183           wl.written = 0;
184         }
185         this.rollRequested = false;
186       }
187 
188       /* Create a new StoreFile.Writer.
189        * @param family
190        * @return A WriterLength, containing a new StoreFile.Writer.
191        * @throws IOException
192        */
193       private WriterLength getNewWriter(byte[] family, Configuration conf)
194           throws IOException {
195         WriterLength wl = new WriterLength();
196         Path familydir = new Path(outputdir, Bytes.toString(family));
197         String compression = compressionMap.get(family);
198         compression = compression == null ? defaultCompression : compression;
199         String bloomTypeStr = bloomTypeMap.get(family);
200         BloomType bloomType = BloomType.NONE;
201         if (bloomTypeStr != null) {
202           bloomType = BloomType.valueOf(bloomTypeStr);
203         }
204         String blockSizeString = blockSizeMap.get(family);
205         int blockSize = blockSizeString == null ? HFile.DEFAULT_BLOCKSIZE
206             : Integer.parseInt(blockSizeString);
207         Configuration tempConf = new Configuration(conf);
208         tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
209         wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
210             .withOutputDir(familydir)
211             .withCompression(AbstractHFileWriter.compressionByName(compression))
212             .withBloomType(bloomType)
213             .withComparator(KeyValue.COMPARATOR)
214             .withDataBlockEncoder(encoder)
215             .withChecksumType(Store.getChecksumType(conf))
216             .withBytesPerChecksum(Store.getBytesPerChecksum(conf))
217             .build();
218 
219         this.writers.put(family, wl);
220         return wl;
221       }
222 
223       private void close(final StoreFile.Writer w) throws IOException {
224         if (w != null) {
225           w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
226               Bytes.toBytes(System.currentTimeMillis()));
227           w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
228               Bytes.toBytes(context.getTaskAttemptID().toString()));
229           w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY,
230               Bytes.toBytes(true));
231           w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
232               Bytes.toBytes(compactionExclude));
233           w.appendTrackedTimestampsToMetadata();
234           w.close();
235         }
236       }
237 
238       public void close(TaskAttemptContext c)
239       throws IOException, InterruptedException {
240         for (WriterLength wl: this.writers.values()) {
241           close(wl.writer);
242         }
243       }
244     };
245   }
246 
247   /*
248    * Data structure to hold a Writer and amount of data written on it.
249    */
250   static class WriterLength {
251     long written = 0;
252     StoreFile.Writer writer = null;
253   }
254 
255   /**
256    * Return the start keys of all of the regions in this table,
257    * as a list of ImmutableBytesWritable.
258    */
259   private static List<ImmutableBytesWritable> getRegionStartKeys(HTable table)
260   throws IOException {
261     byte[][] byteKeys = table.getStartKeys();
262     ArrayList<ImmutableBytesWritable> ret =
263       new ArrayList<ImmutableBytesWritable>(byteKeys.length);
264     for (byte[] byteKey : byteKeys) {
265       ret.add(new ImmutableBytesWritable(byteKey));
266     }
267     return ret;
268   }
269 
270   /**
271    * Write out a SequenceFile that can be read by TotalOrderPartitioner
272    * that contains the split points in startKeys.
273    * @param partitionsPath output path for SequenceFile
274    * @param startKeys the region start keys
275    */
276   private static void writePartitions(Configuration conf, Path partitionsPath,
277       List<ImmutableBytesWritable> startKeys) throws IOException {
278     if (startKeys.isEmpty()) {
279       throw new IllegalArgumentException("No regions passed");
280     }
281 
282     // We're generating a list of split points, and we don't ever
283     // have keys < the first region (which has an empty start key)
284     // so we need to remove it. Otherwise we would end up with an
285     // empty reducer with index 0
286     TreeSet<ImmutableBytesWritable> sorted =
287       new TreeSet<ImmutableBytesWritable>(startKeys);
288 
289     ImmutableBytesWritable first = sorted.first();
290     if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
291       throw new IllegalArgumentException(
292           "First region of table should have empty start key. Instead has: "
293           + Bytes.toStringBinary(first.get()));
294     }
295     sorted.remove(first);
296 
297     // Write the actual file
298     FileSystem fs = partitionsPath.getFileSystem(conf);
299     SequenceFile.Writer writer = SequenceFile.createWriter(fs,
300         conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
301 
302     try {
303       for (ImmutableBytesWritable startKey : sorted) {
304         writer.append(startKey, NullWritable.get());
305       }
306     } finally {
307       writer.close();
308     }
309   }
310 
311   /**
312    * Configure a MapReduce Job to perform an incremental load into the given
313    * table. This
314    * <ul>
315    *   <li>Inspects the table to configure a total order partitioner</li>
316    *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
317    *   <li>Sets the number of reduce tasks to match the current number of regions</li>
318    *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
319    *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
320    *     PutSortReducer)</li>
321    * </ul>
322    * The user should be sure to set the map output value class to either KeyValue or Put before
323    * running this function.
324    */
325   public static void configureIncrementalLoad(Job job, HTable table)
326   throws IOException {
327     Configuration conf = job.getConfiguration();
328     Class<? extends Partitioner> topClass;
329     try {
330       topClass = getTotalOrderPartitionerClass();
331     } catch (ClassNotFoundException e) {
332       throw new IOException("Failed getting TotalOrderPartitioner", e);
333     }
334     job.setPartitionerClass(topClass);
335     job.setOutputKeyClass(ImmutableBytesWritable.class);
336     job.setOutputValueClass(KeyValue.class);
337     job.setOutputFormatClass(HFileOutputFormat.class);
338 
339     // Based on the configured map output class, set the correct reducer to properly
340     // sort the incoming values.
341     // TODO it would be nice to pick one or the other of these formats.
342     if (KeyValue.class.equals(job.getMapOutputValueClass())) {
343       job.setReducerClass(KeyValueSortReducer.class);
344     } else if (Put.class.equals(job.getMapOutputValueClass())) {
345       job.setReducerClass(PutSortReducer.class);
346     } else {
347       LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
348     }
349 
350     LOG.info("Looking up current regions for table " + table);
351     List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
352     LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
353         "to match current region count");
354     job.setNumReduceTasks(startKeys.size());
355 
356     Path partitionsPath = new Path(job.getWorkingDirectory(),
357                                    "partitions_" + UUID.randomUUID());
358     LOG.info("Writing partition information to " + partitionsPath);
359 
360     FileSystem fs = partitionsPath.getFileSystem(conf);
361     writePartitions(conf, partitionsPath, startKeys);
362     partitionsPath.makeQualified(fs);
363 
364     URI cacheUri;
365     try {
366       // Below we make explicit reference to the bundled TOP.  Its cheating.
367       // We are assume the define in the hbase bundled TOP is as it is in
368       // hadoop (whether 0.20 or 0.22, etc.)
369       cacheUri = new URI(partitionsPath.toString() + "#" +
370         org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH);
371     } catch (URISyntaxException e) {
372       throw new IOException(e);
373     }
374     DistributedCache.addCacheFile(cacheUri, conf);
375     DistributedCache.createSymlink(conf);
376 
377     // Set compression algorithms based on column families
378     configureCompression(table, conf);
379     configureBloomType(table, conf);
380     configureBlockSize(table, conf);
381     
382     TableMapReduceUtil.addDependencyJars(job);
383     LOG.info("Incremental table output configured.");
384   }
385 
386   /**
387    * If > hadoop 0.20, then we want to use the hadoop TotalOrderPartitioner.
388    * If 0.20, then we want to use the TOP that we have under hadoopbackport.
389    * This method is about hbase being able to run on different versions of
390    * hadoop.  In 0.20.x hadoops, we have to use the TOP that is bundled with
391    * hbase.  Otherwise, we use the one in Hadoop.
392    * @return Instance of the TotalOrderPartitioner class
393    * @throws ClassNotFoundException If can't find a TotalOrderPartitioner.
394    */
395   private static Class<? extends Partitioner> getTotalOrderPartitionerClass()
396   throws ClassNotFoundException {
397     Class<? extends Partitioner> clazz = null;
398     try {
399       clazz = (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner");
400     } catch (ClassNotFoundException e) {
401       clazz =
402         (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner");
403     }
404     return clazz;
405   }
406 
407   /**
408    * Run inside the task to deserialize column family to compression algorithm
409    * map from the
410    * configuration.
411    *
412    * Package-private for unit tests only.
413    *
414    * @return a map from column family to the name of the configured compression
415    *         algorithm
416    */
417   static Map<byte[], String> createFamilyCompressionMap(Configuration conf) {
418     return createFamilyConfValueMap(conf, COMPRESSION_CONF_KEY);
419   }
420 
421   private static Map<byte[], String> createFamilyBloomMap(Configuration conf) {
422     return createFamilyConfValueMap(conf, BLOOM_TYPE_CONF_KEY);
423   }
424 
425   private static Map<byte[], String> createFamilyBlockSizeMap(Configuration conf) {
426     return createFamilyConfValueMap(conf, BLOCK_SIZE_CONF_KEY);
427   }
428 
429   /**
430    * Run inside the task to deserialize column family to given conf value map.
431    * 
432    * @param conf
433    * @param confName
434    * @return a map of column family to the given configuration value
435    */
436   private static Map<byte[], String> createFamilyConfValueMap(Configuration conf, String confName) {
437     Map<byte[], String> confValMap = new TreeMap<byte[], String>(Bytes.BYTES_COMPARATOR);
438     String confVal = conf.get(confName, "");
439     for (String familyConf : confVal.split("&")) {
440       String[] familySplit = familyConf.split("=");
441       if (familySplit.length != 2) {
442         continue;
443       }
444       try {
445         confValMap.put(URLDecoder.decode(familySplit[0], "UTF-8").getBytes(),
446             URLDecoder.decode(familySplit[1], "UTF-8"));
447       } catch (UnsupportedEncodingException e) {
448         // will not happen with UTF-8 encoding
449         throw new AssertionError(e);
450       }
451     }
452     return confValMap;
453   }
454   
455   /**
456    * Serialize column family to compression algorithm map to configuration.
457    * Invoked while configuring the MR job for incremental load.
458    *
459    * Package-private for unit tests only.
460    *
461    * @throws IOException
462    *           on failure to read column family descriptors
463    */
464   static void configureCompression(HTable table, Configuration conf) throws IOException {
465     StringBuilder compressionConfigValue = new StringBuilder();
466     HTableDescriptor tableDescriptor = table.getTableDescriptor();
467     if(tableDescriptor == null){
468       // could happen with mock table instance
469       return;
470     }
471     Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
472     int i = 0;
473     for (HColumnDescriptor familyDescriptor : families) {
474       if (i++ > 0) {
475         compressionConfigValue.append('&');
476       }
477       compressionConfigValue.append(URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
478       compressionConfigValue.append('=');
479       compressionConfigValue.append(URLEncoder.encode(familyDescriptor.getCompression().getName(), "UTF-8"));
480     }
481     // Get rid of the last ampersand
482     conf.set(COMPRESSION_CONF_KEY, compressionConfigValue.toString());
483   }
484   
485   private static void configureBlockSize(HTable table, Configuration conf) throws IOException {
486     StringBuilder blockSizeConfigValue = new StringBuilder();
487     HTableDescriptor tableDescriptor = table.getTableDescriptor();
488     if (tableDescriptor == null) {
489       // could happen with mock table instance
490       return;
491     }
492     Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
493     int i = 0;
494     for (HColumnDescriptor familyDescriptor : families) {
495       if (i++ > 0) {
496         blockSizeConfigValue.append('&');
497       }
498       blockSizeConfigValue.append(URLEncoder.encode(
499           familyDescriptor.getNameAsString(), "UTF-8"));
500       blockSizeConfigValue.append('=');
501       blockSizeConfigValue.append(URLEncoder.encode(
502           String.valueOf(familyDescriptor.getBlocksize()), "UTF-8"));
503     }
504     // Get rid of the last ampersand
505     conf.set(BLOCK_SIZE_CONF_KEY, blockSizeConfigValue.toString());
506   }
507 
508   /**
509    * Serialize column family to bloom type map to configuration.
510    * Invoked while configuring the MR job for incremental load.
511    *
512    * @throws IOException
513    *           on failure to read column family descriptors
514    */
515   static void configureBloomType(HTable table, Configuration conf) throws IOException {
516     HTableDescriptor tableDescriptor = table.getTableDescriptor();
517     if (tableDescriptor == null) {
518       // could happen with mock table instance
519       return;
520     }
521     StringBuilder bloomTypeConfigValue = new StringBuilder();
522     Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
523     int i = 0;
524     for (HColumnDescriptor familyDescriptor : families) {
525       if (i++ > 0) {
526         bloomTypeConfigValue.append('&');
527       }
528       bloomTypeConfigValue.append(URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
529       bloomTypeConfigValue.append('=');
530       String bloomType = familyDescriptor.getBloomFilterType().toString();
531       if (bloomType == null) {
532         bloomType = HColumnDescriptor.DEFAULT_BLOOMFILTER;
533       }
534       bloomTypeConfigValue.append(URLEncoder.encode(bloomType, "UTF-8"));
535     }
536     conf.set(BLOOM_TYPE_CONF_KEY, bloomTypeConfigValue.toString());
537   }
538 }