View Javadoc

1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.IOException;
23  import java.net.URI;
24  import java.net.URISyntaxException;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.TreeMap;
29  import java.util.TreeSet;
30  
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.filecache.DistributedCache;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.KeyValue;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40  import org.apache.hadoop.hbase.io.hfile.Compression;
41  import org.apache.hadoop.hbase.io.hfile.HFile;
42  import org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner;
43  import org.apache.hadoop.hbase.regionserver.StoreFile;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.io.NullWritable;
46  import org.apache.hadoop.io.SequenceFile;
47  import org.apache.hadoop.mapreduce.Job;
48  import org.apache.hadoop.mapreduce.RecordWriter;
49  import org.apache.hadoop.mapreduce.TaskAttemptContext;
50  import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
51  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
52  
53  import org.apache.commons.logging.Log;
54  import org.apache.commons.logging.LogFactory;
55  
56  import com.google.common.base.Preconditions;
57  
58  /**
59   * Writes HFiles. Passed KeyValues must arrive in order.
60   * Currently, can only write files to a single column family at a
61   * time.  Multiple column families requires coordinating keys cross family.
62   * Writes current time as the sequence id for the file. Sets the major compacted
63   * attribute on created hfiles.
64   * @see KeyValueSortReducer
65   */
66  public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
67    static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
68    
69    public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
70    throws IOException, InterruptedException {
71      // Get the path of the temporary output file
72      final Path outputPath = FileOutputFormat.getOutputPath(context);
73      final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
74      Configuration conf = context.getConfiguration();
75      final FileSystem fs = outputdir.getFileSystem(conf);
76      // These configs. are from hbase-*.xml
77      final long maxsize = conf.getLong("hbase.hregion.max.filesize", 268435456);
78      final int blocksize = conf.getInt("hfile.min.blocksize.size", 65536);
79      // Invented config.  Add to hbase-*.xml if other than default compression.
80      final String compression = conf.get("hfile.compression",
81        Compression.Algorithm.NONE.getName());
82  
83      return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
84        // Map of families to writers and how much has been output on the writer.
85        private final Map<byte [], WriterLength> writers =
86          new TreeMap<byte [], WriterLength>(Bytes.BYTES_COMPARATOR);
87        private byte [] previousRow = HConstants.EMPTY_BYTE_ARRAY;
88        private final byte [] now = Bytes.toBytes(System.currentTimeMillis());
89  
90        public void write(ImmutableBytesWritable row, KeyValue kv)
91        throws IOException {
92          long length = kv.getLength();
93          byte [] family = kv.getFamily();
94          WriterLength wl = this.writers.get(family);
95          if (wl == null || ((length + wl.written) >= maxsize) &&
96              Bytes.compareTo(this.previousRow, 0, this.previousRow.length,
97                kv.getBuffer(), kv.getRowOffset(), kv.getRowLength()) != 0) {
98            // Get a new writer.
99            Path basedir = new Path(outputdir, Bytes.toString(family));
100           if (wl == null) {
101             wl = new WriterLength();
102             this.writers.put(family, wl);
103             if (this.writers.size() > 1) throw new IOException("One family only");
104             // If wl == null, first file in family.  Ensure family dir exits.
105             if (!fs.exists(basedir)) fs.mkdirs(basedir);
106           }
107           wl.writer = getNewWriter(wl.writer, basedir);
108           LOG.info("Writer=" + wl.writer.getPath() +
109             ((wl.written == 0)? "": ", wrote=" + wl.written));
110           wl.written = 0;
111         }
112         kv.updateLatestStamp(this.now);
113         wl.writer.append(kv);
114         wl.written += length;
115         // Copy the row so we know when a row transition.
116         this.previousRow = kv.getRow();
117       }
118 
119       /* Create a new HFile.Writer. Close current if there is one.
120        * @param writer
121        * @param familydir
122        * @return A new HFile.Writer.
123        * @throws IOException
124        */
125       private HFile.Writer getNewWriter(final HFile.Writer writer,
126           final Path familydir)
127       throws IOException {
128         close(writer);
129         return new HFile.Writer(fs,  StoreFile.getUniqueFile(fs, familydir),
130           blocksize, compression, KeyValue.KEY_COMPARATOR);
131       }
132 
133       private void close(final HFile.Writer w) throws IOException {
134         if (w != null) {
135           w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
136               Bytes.toBytes(System.currentTimeMillis()));
137           w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
138               Bytes.toBytes(context.getTaskAttemptID().toString()));
139           w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, 
140               Bytes.toBytes(true));
141           w.close();
142         }
143       }
144 
145       public void close(TaskAttemptContext c)
146       throws IOException, InterruptedException {
147         for (Map.Entry<byte [], WriterLength> e: this.writers.entrySet()) {
148           close(e.getValue().writer);
149         }
150       }
151     };
152   }
153 
154   /*
155    * Data structure to hold a Writer and amount of data written on it.
156    */
157   static class WriterLength {
158     long written = 0;
159     HFile.Writer writer = null;
160   }
161 
162   /**
163    * Return the start keys of all of the regions in this table,
164    * as a list of ImmutableBytesWritable.
165    */
166   private static List<ImmutableBytesWritable> getRegionStartKeys(HTable table)
167   throws IOException {
168     byte[][] byteKeys = table.getStartKeys();
169     ArrayList<ImmutableBytesWritable> ret =
170       new ArrayList<ImmutableBytesWritable>(byteKeys.length);
171     for (byte[] byteKey : byteKeys) {
172       ret.add(new ImmutableBytesWritable(byteKey));
173     }
174     return ret;
175   }
176 
177   /**
178    * Write out a SequenceFile that can be read by TotalOrderPartitioner
179    * that contains the split points in startKeys.
180    * @param partitionsPath output path for SequenceFile
181    * @param startKeys the region start keys
182    */
183   private static void writePartitions(Configuration conf, Path partitionsPath,
184       List<ImmutableBytesWritable> startKeys) throws IOException {
185     Preconditions.checkArgument(!startKeys.isEmpty(), "No regions passed");
186 
187     // We're generating a list of split points, and we don't ever
188     // have keys < the first region (which has an empty start key)
189     // so we need to remove it. Otherwise we would end up with an
190     // empty reducer with index 0
191     TreeSet<ImmutableBytesWritable> sorted =
192       new TreeSet<ImmutableBytesWritable>(startKeys);
193 
194     ImmutableBytesWritable first = sorted.first();
195     Preconditions.checkArgument(
196         first.equals(HConstants.EMPTY_BYTE_ARRAY),
197         "First region of table should have empty start key. Instead has: %s",
198         Bytes.toStringBinary(first.get()));
199     sorted.remove(first);
200     
201     // Write the actual file
202     FileSystem fs = partitionsPath.getFileSystem(conf);
203     SequenceFile.Writer writer = SequenceFile.createWriter(fs, 
204         conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
205     
206     try {
207       for (ImmutableBytesWritable startKey : sorted) {
208         writer.append(startKey, NullWritable.get());
209       }
210     } finally {
211       writer.close();
212     }
213   }
214   
215   /**
216    * Configure a MapReduce Job to perform an incremental load into the given
217    * table. This
218    * <ul>
219    *   <li>Inspects the table to configure a total order partitioner</li>
220    *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
221    *   <li>Sets the number of reduce tasks to match the current number of regions</li>
222    *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
223    *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
224    *     PutSortReducer)</li>
225    * </ul> 
226    * The user should be sure to set the map output value class to either KeyValue or Put before
227    * running this function.
228    */
229   public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
230     Configuration conf = job.getConfiguration();
231     job.setPartitionerClass(TotalOrderPartitioner.class);
232     job.setOutputKeyClass(ImmutableBytesWritable.class);
233     job.setOutputValueClass(KeyValue.class);
234     job.setOutputFormatClass(HFileOutputFormat.class);
235     
236     // Based on the configured map output class, set the correct reducer to properly
237     // sort the incoming values.
238     // TODO it would be nice to pick one or the other of these formats.
239     if (KeyValue.class.equals(job.getMapOutputValueClass())) {
240       job.setReducerClass(KeyValueSortReducer.class);
241     } else if (Put.class.equals(job.getMapOutputValueClass())) {
242       job.setReducerClass(PutSortReducer.class);
243     } else {
244       LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
245     }
246     
247     LOG.info("Looking up current regions for table " + table);
248     List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
249     LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
250         "to match current region count");
251     job.setNumReduceTasks(startKeys.size());
252     
253     Path partitionsPath = new Path(job.getWorkingDirectory(),
254         "partitions_" + System.currentTimeMillis());
255     LOG.info("Writing partition information to " + partitionsPath);
256 
257     FileSystem fs = partitionsPath.getFileSystem(conf);
258     writePartitions(conf, partitionsPath, startKeys);
259     partitionsPath.makeQualified(fs);
260     URI cacheUri;
261     try {
262       cacheUri = new URI(partitionsPath.toString() + "#" +
263           TotalOrderPartitioner.DEFAULT_PATH);
264     } catch (URISyntaxException e) {
265       throw new IOException(e);
266     }
267     DistributedCache.addCacheFile(cacheUri, conf);
268     DistributedCache.createSymlink(conf);
269     
270     LOG.info("Incremental table output configured.");
271   }
272   
273 }