View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import com.google.common.annotations.VisibleForTesting;
22  import java.io.IOException;
23  import java.io.UnsupportedEncodingException;
24  import java.net.URLDecoder;
25  import java.net.URLEncoder;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.TreeMap;
31  import java.util.TreeSet;
32  import java.util.UUID;
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.classification.InterfaceAudience;
36  import org.apache.hadoop.classification.InterfaceStability;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.client.HTable;
45  import org.apache.hadoop.hbase.client.Put;
46  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
47  import org.apache.hadoop.hbase.io.compress.Compression;
48  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
49  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
50  import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
51  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
52  import org.apache.hadoop.hbase.io.hfile.HFileContext;
53  import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
54  import org.apache.hadoop.hbase.regionserver.BloomType;
55  import org.apache.hadoop.hbase.regionserver.HStore;
56  import org.apache.hadoop.hbase.regionserver.StoreFile;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.io.NullWritable;
59  import org.apache.hadoop.io.SequenceFile;
60  import org.apache.hadoop.io.Text;
61  import org.apache.hadoop.mapreduce.Job;
62  import org.apache.hadoop.mapreduce.RecordWriter;
63  import org.apache.hadoop.mapreduce.TaskAttemptContext;
64  import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
65  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
66  import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
67  
68  /**
69   * Writes HFiles. Passed KeyValues must arrive in order.
70   * Writes current time as the sequence id for the file. Sets the major compacted
71   * attribute on created hfiles. Calling write(null,null) will forcibly roll
72   * all HFiles being written.
73   * <p>
74   * Using this class as part of a MapReduce job is best done
75   * using {@link #configureIncrementalLoad(Job, HTable)}.
76   * @see KeyValueSortReducer
77   */
78  @InterfaceAudience.Public
79  @InterfaceStability.Stable
80  public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
81    static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
82  
83    // The following constants are private since these are used by
84    // HFileOutputFormat to internally transfer data between job setup and
85    // reducer run using conf.
86    // These should not be changed by the client.
87    private static final String COMPRESSION_FAMILIES_CONF_KEY =
88        "hbase.hfileoutputformat.families.compression";
89    private static final String BLOOM_TYPE_FAMILIES_CONF_KEY =
90        "hbase.hfileoutputformat.families.bloomtype";
91    private static final String BLOCK_SIZE_FAMILIES_CONF_KEY =
92        "hbase.mapreduce.hfileoutputformat.blocksize";
93    private static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY =
94        "hbase.mapreduce.hfileoutputformat.families.datablock.encoding";
95  
96    // This constant is public since the client can modify this when setting
97    // up their conf object and thus refer to this symbol.
98    // It is present for backwards compatibility reasons. Use it only to
99    // override the auto-detection of datablock encoding.
100   public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
101       "hbase.mapreduce.hfileoutputformat.datablock.encoding";
102 
103   public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
104   throws IOException, InterruptedException {
105     // Get the path of the temporary output file
106     final Path outputPath = FileOutputFormat.getOutputPath(context);
107     final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
108     final Configuration conf = context.getConfiguration();
109     final FileSystem fs = outputdir.getFileSystem(conf);
110     // These configs. are from hbase-*.xml
111     final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
112         HConstants.DEFAULT_MAX_FILE_SIZE);
113     // Invented config.  Add to hbase-*.xml if other than default compression.
114     final String defaultCompressionStr = conf.get("hfile.compression",
115         Compression.Algorithm.NONE.getName());
116     final Algorithm defaultCompression = AbstractHFileWriter
117         .compressionByName(defaultCompressionStr);
118     final boolean compactionExclude = conf.getBoolean(
119         "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
120 
121     // create a map from column family to the compression algorithm
122     final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
123     final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
124     final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);
125 
126     String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
127     final Map<byte[], DataBlockEncoding> datablockEncodingMap
128         = createFamilyDataBlockEncodingMap(conf);
129     final DataBlockEncoding overriddenEncoding;
130     if (dataBlockEncodingStr != null) {
131       overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
132     } else {
133       overriddenEncoding = null;
134     }
135 
136     return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
137       // Map of families to writers and how much has been output on the writer.
138       private final Map<byte [], WriterLength> writers =
139         new TreeMap<byte [], WriterLength>(Bytes.BYTES_COMPARATOR);
140       private byte [] previousRow = HConstants.EMPTY_BYTE_ARRAY;
141       private final byte [] now = Bytes.toBytes(System.currentTimeMillis());
142       private boolean rollRequested = false;
143 
144       public void write(ImmutableBytesWritable row, KeyValue kv)
145       throws IOException {
146         // null input == user explicitly wants to flush
147         if (row == null && kv == null) {
148           rollWriters();
149           return;
150         }
151 
152         byte [] rowKey = kv.getRow();
153         long length = kv.getLength();
154         byte [] family = kv.getFamily();
155         WriterLength wl = this.writers.get(family);
156 
157         // If this is a new column family, verify that the directory exists
158         if (wl == null) {
159           fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
160         }
161 
162         // If any of the HFiles for the column families has reached
163         // maxsize, we need to roll all the writers
164         if (wl != null && wl.written + length >= maxsize) {
165           this.rollRequested = true;
166         }
167 
168         // This can only happen once a row is finished though
169         if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
170           rollWriters();
171         }
172 
173         // create a new HLog writer, if necessary
174         if (wl == null || wl.writer == null) {
175           wl = getNewWriter(family, conf);
176         }
177 
178         // we now have the proper HLog writer. full steam ahead
179         kv.updateLatestStamp(this.now);
180         wl.writer.append(kv);
181         wl.written += length;
182 
183         // Copy the row so we know when a row transition.
184         this.previousRow = rowKey;
185       }
186 
187       private void rollWriters() throws IOException {
188         for (WriterLength wl : this.writers.values()) {
189           if (wl.writer != null) {
190             LOG.info("Writer=" + wl.writer.getPath() +
191                 ((wl.written == 0)? "": ", wrote=" + wl.written));
192             close(wl.writer);
193           }
194           wl.writer = null;
195           wl.written = 0;
196         }
197         this.rollRequested = false;
198       }
199 
200       /* Create a new StoreFile.Writer.
201        * @param family
202        * @return A WriterLength, containing a new StoreFile.Writer.
203        * @throws IOException
204        */
205       private WriterLength getNewWriter(byte[] family, Configuration conf)
206           throws IOException {
207         WriterLength wl = new WriterLength();
208         Path familydir = new Path(outputdir, Bytes.toString(family));
209         Algorithm compression = compressionMap.get(family);
210         compression = compression == null ? defaultCompression : compression;
211         BloomType bloomType = bloomTypeMap.get(family);
212         bloomType = bloomType == null ? BloomType.NONE : bloomType;
213         Integer blockSize = blockSizeMap.get(family);
214         blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
215         DataBlockEncoding encoding = overriddenEncoding;
216         encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
217         encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
218         Configuration tempConf = new Configuration(conf);
219         tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
220         HFileContextBuilder contextBuilder = new HFileContextBuilder()
221                                     .withCompression(compression)
222                                     .withChecksumType(HStore.getChecksumType(conf))
223                                     .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
224                                     .withBlockSize(blockSize);
225         contextBuilder.withDataBlockEncoding(encoding);
226         HFileContext hFileContext = contextBuilder.build();
227                                     
228         wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
229             .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
230             .withFileContext(hFileContext)
231             .build();
232 
233         this.writers.put(family, wl);
234         return wl;
235       }
236 
237       private void close(final StoreFile.Writer w) throws IOException {
238         if (w != null) {
239           w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
240               Bytes.toBytes(System.currentTimeMillis()));
241           w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
242               Bytes.toBytes(context.getTaskAttemptID().toString()));
243           w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY,
244               Bytes.toBytes(true));
245           w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
246               Bytes.toBytes(compactionExclude));
247           w.appendTrackedTimestampsToMetadata();
248           w.close();
249         }
250       }
251 
252       public void close(TaskAttemptContext c)
253       throws IOException, InterruptedException {
254         for (WriterLength wl: this.writers.values()) {
255           close(wl.writer);
256         }
257       }
258     };
259   }
260 
261   /*
262    * Data structure to hold a Writer and amount of data written on it.
263    */
264   static class WriterLength {
265     long written = 0;
266     StoreFile.Writer writer = null;
267   }
268 
269   /**
270    * Return the start keys of all of the regions in this table,
271    * as a list of ImmutableBytesWritable.
272    */
273   private static List<ImmutableBytesWritable> getRegionStartKeys(HTable table)
274   throws IOException {
275     byte[][] byteKeys = table.getStartKeys();
276     ArrayList<ImmutableBytesWritable> ret =
277       new ArrayList<ImmutableBytesWritable>(byteKeys.length);
278     for (byte[] byteKey : byteKeys) {
279       ret.add(new ImmutableBytesWritable(byteKey));
280     }
281     return ret;
282   }
283 
284   /**
285    * Write out a {@link SequenceFile} that can be read by
286    * {@link TotalOrderPartitioner} that contains the split points in startKeys.
287    */
288   private static void writePartitions(Configuration conf, Path partitionsPath,
289       List<ImmutableBytesWritable> startKeys) throws IOException {
290     LOG.info("Writing partition information to " + partitionsPath);
291     if (startKeys.isEmpty()) {
292       throw new IllegalArgumentException("No regions passed");
293     }
294 
295     // We're generating a list of split points, and we don't ever
296     // have keys < the first region (which has an empty start key)
297     // so we need to remove it. Otherwise we would end up with an
298     // empty reducer with index 0
299     TreeSet<ImmutableBytesWritable> sorted =
300       new TreeSet<ImmutableBytesWritable>(startKeys);
301 
302     ImmutableBytesWritable first = sorted.first();
303     if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
304       throw new IllegalArgumentException(
305           "First region of table should have empty start key. Instead has: "
306           + Bytes.toStringBinary(first.get()));
307     }
308     sorted.remove(first);
309 
310     // Write the actual file
311     FileSystem fs = partitionsPath.getFileSystem(conf);
312     SequenceFile.Writer writer = SequenceFile.createWriter(fs,
313         conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
314 
315     try {
316       for (ImmutableBytesWritable startKey : sorted) {
317         writer.append(startKey, NullWritable.get());
318       }
319     } finally {
320       writer.close();
321     }
322   }
323 
324   /**
325    * Configure a MapReduce Job to perform an incremental load into the given
326    * table. This
327    * <ul>
328    *   <li>Inspects the table to configure a total order partitioner</li>
329    *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
330    *   <li>Sets the number of reduce tasks to match the current number of regions</li>
331    *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
332    *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
333    *     PutSortReducer)</li>
334    * </ul>
335    * The user should be sure to set the map output value class to either KeyValue or Put before
336    * running this function.
337    */
338   public static void configureIncrementalLoad(Job job, HTable table)
339   throws IOException {
340     Configuration conf = job.getConfiguration();
341 
342     job.setOutputKeyClass(ImmutableBytesWritable.class);
343     job.setOutputValueClass(KeyValue.class);
344     job.setOutputFormatClass(HFileOutputFormat.class);
345 
346     // Based on the configured map output class, set the correct reducer to properly
347     // sort the incoming values.
348     // TODO it would be nice to pick one or the other of these formats.
349     if (KeyValue.class.equals(job.getMapOutputValueClass())) {
350       job.setReducerClass(KeyValueSortReducer.class);
351     } else if (Put.class.equals(job.getMapOutputValueClass())) {
352       job.setReducerClass(PutSortReducer.class);
353     } else if (Text.class.equals(job.getMapOutputValueClass())) {
354       job.setReducerClass(TextSortReducer.class);
355     } else {
356       LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
357     }
358 
359     conf.setStrings("io.serializations", conf.get("io.serializations"),
360         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
361         KeyValueSerialization.class.getName());
362 
363     // Use table's region boundaries for TOP split points.
364     LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
365     List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
366     LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
367         "to match current region count");
368     job.setNumReduceTasks(startKeys.size());
369 
370     configurePartitioner(job, startKeys);
371     // Set compression algorithms based on column families
372     configureCompression(table, conf);
373     configureBloomType(table, conf);
374     configureBlockSize(table, conf);
375     configureDataBlockEncoding(table, conf);
376 
377     TableMapReduceUtil.addDependencyJars(job);
378     TableMapReduceUtil.initCredentials(job);
379     LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
380   }
381 
382   /**
383    * Runs inside the task to deserialize column family to compression algorithm
384    * map from the configuration.
385    *
386    * @param conf to read the serialized values from
387    * @return a map from column family to the configured compression algorithm
388    */
389   @VisibleForTesting
390   static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
391       conf) {
392     Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
393         COMPRESSION_FAMILIES_CONF_KEY);
394     Map<byte[], Algorithm> compressionMap = new TreeMap<byte[],
395         Algorithm>(Bytes.BYTES_COMPARATOR);
396     for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
397       Algorithm algorithm = AbstractHFileWriter.compressionByName
398           (e.getValue());
399       compressionMap.put(e.getKey(), algorithm);
400     }
401     return compressionMap;
402   }
403 
404   /**
405    * Runs inside the task to deserialize column family to bloom filter type
406    * map from the configuration.
407    *
408    * @param conf to read the serialized values from
409    * @return a map from column family to the the configured bloom filter type
410    */
411   @VisibleForTesting
412   static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
413     Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
414         BLOOM_TYPE_FAMILIES_CONF_KEY);
415     Map<byte[], BloomType> bloomTypeMap = new TreeMap<byte[],
416         BloomType>(Bytes.BYTES_COMPARATOR);
417     for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
418       BloomType bloomType = BloomType.valueOf(e.getValue());
419       bloomTypeMap.put(e.getKey(), bloomType);
420     }
421     return bloomTypeMap;
422   }
423 
424   /**
425    * Runs inside the task to deserialize column family to block size
426    * map from the configuration.
427    *
428    * @param conf to read the serialized values from
429    * @return a map from column family to the configured block size
430    */
431   @VisibleForTesting
432   static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
433     Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
434         BLOCK_SIZE_FAMILIES_CONF_KEY);
435     Map<byte[], Integer> blockSizeMap = new TreeMap<byte[],
436         Integer>(Bytes.BYTES_COMPARATOR);
437     for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
438       Integer blockSize = Integer.parseInt(e.getValue());
439       blockSizeMap.put(e.getKey(), blockSize);
440     }
441     return blockSizeMap;
442   }
443 
444   /**
445    * Runs inside the task to deserialize column family to data block encoding
446    * type map from the configuration.
447    *
448    * @param conf to read the serialized values from
449    * @return a map from column family to HFileDataBlockEncoder for the
450    *         configured data block type for the family
451    */
452   @VisibleForTesting
453   static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
454       Configuration conf) {
455     Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
456         DATABLOCK_ENCODING_FAMILIES_CONF_KEY);
457     Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<byte[],
458         DataBlockEncoding>(Bytes.BYTES_COMPARATOR);
459     for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
460       encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue())));
461     }
462     return encoderMap;
463   }
464 
465 
466   /**
467    * Run inside the task to deserialize column family to given conf value map.
468    *
469    * @param conf to read the serialized values from
470    * @param confName conf key to read from the configuration
471    * @return a map of column family to the given configuration value
472    */
473   private static Map<byte[], String> createFamilyConfValueMap(Configuration conf, String confName) {
474     Map<byte[], String> confValMap = new TreeMap<byte[], String>(Bytes.BYTES_COMPARATOR);
475     String confVal = conf.get(confName, "");
476     for (String familyConf : confVal.split("&")) {
477       String[] familySplit = familyConf.split("=");
478       if (familySplit.length != 2) {
479         continue;
480       }
481       try {
482         confValMap.put(URLDecoder.decode(familySplit[0], "UTF-8").getBytes(),
483             URLDecoder.decode(familySplit[1], "UTF-8"));
484       } catch (UnsupportedEncodingException e) {
485         // will not happen with UTF-8 encoding
486         throw new AssertionError(e);
487       }
488     }
489     return confValMap;
490   }
491 
492   /**
493    * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
494    * <code>splitPoints</code>. Cleans up the partitions file after job exists.
495    */
496   static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
497       throws IOException {
498 
499     // create the partitions file
500     FileSystem fs = FileSystem.get(job.getConfiguration());
501     Path partitionsPath = new Path("/tmp", "partitions_" + UUID.randomUUID());
502     fs.makeQualified(partitionsPath);
503     fs.deleteOnExit(partitionsPath);
504     writePartitions(job.getConfiguration(), partitionsPath, splitPoints);
505 
506     // configure job to use it
507     job.setPartitionerClass(TotalOrderPartitioner.class);
508     TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);
509   }
510 
511   /**
512    * Serialize column family to compression algorithm map to configuration.
513    * Invoked while configuring the MR job for incremental load.
514    *
515    * @param table to read the properties from
516    * @param conf to persist serialized values into
517    * @throws IOException
518    *           on failure to read column family descriptors
519    */
520   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
521       value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
522   @VisibleForTesting
523   static void configureCompression(HTable table, Configuration conf) throws IOException {
524     StringBuilder compressionConfigValue = new StringBuilder();
525     HTableDescriptor tableDescriptor = table.getTableDescriptor();
526     if(tableDescriptor == null){
527       // could happen with mock table instance
528       return;
529     }
530     Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
531     int i = 0;
532     for (HColumnDescriptor familyDescriptor : families) {
533       if (i++ > 0) {
534         compressionConfigValue.append('&');
535       }
536       compressionConfigValue.append(URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
537       compressionConfigValue.append('=');
538       compressionConfigValue.append(URLEncoder.encode(familyDescriptor.getCompression().getName(), "UTF-8"));
539     }
540     // Get rid of the last ampersand
541     conf.set(COMPRESSION_FAMILIES_CONF_KEY, compressionConfigValue.toString());
542   }
543 
544   /**
545    * Serialize column family to block size map to configuration.
546    * Invoked while configuring the MR job for incremental load.
547    *
548    * @param table to read the properties from
549    * @param conf to persist serialized values into
550    * @throws IOException
551    *           on failure to read column family descriptors
552    */
553   @VisibleForTesting
554   static void configureBlockSize(HTable table, Configuration conf) throws IOException {
555     StringBuilder blockSizeConfigValue = new StringBuilder();
556     HTableDescriptor tableDescriptor = table.getTableDescriptor();
557     if (tableDescriptor == null) {
558       // could happen with mock table instance
559       return;
560     }
561     Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
562     int i = 0;
563     for (HColumnDescriptor familyDescriptor : families) {
564       if (i++ > 0) {
565         blockSizeConfigValue.append('&');
566       }
567       blockSizeConfigValue.append(URLEncoder.encode(
568           familyDescriptor.getNameAsString(), "UTF-8"));
569       blockSizeConfigValue.append('=');
570       blockSizeConfigValue.append(URLEncoder.encode(
571           String.valueOf(familyDescriptor.getBlocksize()), "UTF-8"));
572     }
573     // Get rid of the last ampersand
574     conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, blockSizeConfigValue.toString());
575   }
576 
577   /**
578    * Serialize column family to bloom type map to configuration.
579    * Invoked while configuring the MR job for incremental load.
580    *
581    * @param table to read the properties from
582    * @param conf to persist serialized values into
583    * @throws IOException
584    *           on failure to read column family descriptors
585    */
586   @VisibleForTesting
587   static void configureBloomType(HTable table, Configuration conf) throws IOException {
588     HTableDescriptor tableDescriptor = table.getTableDescriptor();
589     if (tableDescriptor == null) {
590       // could happen with mock table instance
591       return;
592     }
593     StringBuilder bloomTypeConfigValue = new StringBuilder();
594     Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
595     int i = 0;
596     for (HColumnDescriptor familyDescriptor : families) {
597       if (i++ > 0) {
598         bloomTypeConfigValue.append('&');
599       }
600       bloomTypeConfigValue.append(URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
601       bloomTypeConfigValue.append('=');
602       String bloomType = familyDescriptor.getBloomFilterType().toString();
603       if (bloomType == null) {
604         bloomType = HColumnDescriptor.DEFAULT_BLOOMFILTER;
605       }
606       bloomTypeConfigValue.append(URLEncoder.encode(bloomType, "UTF-8"));
607     }
608     conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, bloomTypeConfigValue.toString());
609   }
610 
611   /**
612    * Serialize column family to data block encoding map to configuration.
613    * Invoked while configuring the MR job for incremental load.
614    *
615    * @param table to read the properties from
616    * @param conf to persist serialized values into
617    * @throws IOException
618    *           on failure to read column family descriptors
619    */
620   @VisibleForTesting
621   static void configureDataBlockEncoding(HTable table,
622       Configuration conf) throws IOException {
623     HTableDescriptor tableDescriptor = table.getTableDescriptor();
624     if (tableDescriptor == null) {
625       // could happen with mock table instance
626       return;
627     }
628     StringBuilder dataBlockEncodingConfigValue = new StringBuilder();
629     Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
630     int i = 0;
631     for (HColumnDescriptor familyDescriptor : families) {
632       if (i++ > 0) {
633         dataBlockEncodingConfigValue.append('&');
634       }
635       dataBlockEncodingConfigValue.append(
636           URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
637       dataBlockEncodingConfigValue.append('=');
638       DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
639       if (encoding == null) {
640         encoding = DataBlockEncoding.NONE;
641       }
642       dataBlockEncodingConfigValue.append(URLEncoder.encode(encoding.toString(),
643           "UTF-8"));
644     }
645     conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
646         dataBlockEncodingConfigValue.toString());
647   }
648 }