View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.mapreduce.lib.output;
20  
21  import java.io.IOException;
22  import java.util.Arrays;
23  
24  import org.apache.hadoop.fs.FileSystem;
25  import org.apache.hadoop.fs.Path;
26  import org.apache.hadoop.fs.FileUtil;
27  
28  import org.apache.hadoop.io.MapFile;
29  import org.apache.hadoop.io.WritableComparable;
30  import org.apache.hadoop.io.Writable;
31  import org.apache.hadoop.io.SequenceFile.CompressionType;
32  import org.apache.hadoop.io.compress.CompressionCodec;
33  import org.apache.hadoop.io.compress.DefaultCodec;
34  import org.apache.hadoop.mapreduce.Partitioner;
35  import org.apache.hadoop.mapreduce.RecordWriter;
36  import org.apache.hadoop.mapreduce.TaskAttemptContext;
37  import org.apache.hadoop.util.ReflectionUtils;
38  import org.apache.hadoop.classification.InterfaceAudience;
39  import org.apache.hadoop.classification.InterfaceStability;
40  import org.apache.hadoop.conf.Configuration;
41  
42  /** 
43   * An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes 
44   * {@link MapFile}s.
45   */
46  @InterfaceAudience.Public
47  @InterfaceStability.Stable
48  public class MapFileOutputFormat 
49      extends FileOutputFormat<WritableComparable<?>, Writable> {
50  
51    public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(
52        TaskAttemptContext context) throws IOException {
53      Configuration conf = context.getConfiguration();
54      CompressionCodec codec = null;
55      CompressionType compressionType = CompressionType.NONE;
56      if (getCompressOutput(context)) {
57        // find the kind of compression to do
58        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);
59  
60        // find the right codec
61        Class<?> codecClass = getOutputCompressorClass(context,
62  	                          DefaultCodec.class);
63        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
64      }
65  
66      Path file = getDefaultWorkFile(context, "");
67      FileSystem fs = file.getFileSystem(conf);
68      // ignore the progress parameter, since MapFile is local
69      final MapFile.Writer out =
70        new MapFile.Writer(conf, fs, file.toString(),
71          context.getOutputKeyClass().asSubclass(WritableComparable.class),
72          context.getOutputValueClass().asSubclass(Writable.class),
73          compressionType, codec, context);
74  
75      return new RecordWriter<WritableComparable<?>, Writable>() {
76          public void write(WritableComparable<?> key, Writable value)
77              throws IOException {
78            out.append(key, value);
79          }
80  
81          public void close(TaskAttemptContext context) throws IOException { 
82            out.close();
83          }
84        };
85    }
86  
87    /** Open the output generated by this format. */
88    public static MapFile.Reader[] getReaders(Path dir,
89        Configuration conf) throws IOException {
90      FileSystem fs = dir.getFileSystem(conf);
91      Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
92  
93      // sort names, so that hash partitioning works
94      Arrays.sort(names);
95      
96      MapFile.Reader[] parts = new MapFile.Reader[names.length];
97      for (int i = 0; i < names.length; i++) {
98        parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
99      }
100     return parts;
101   }
102     
103   /** Get an entry from output generated by this class. */
104   public static <K extends WritableComparable<?>, V extends Writable>
105       Writable getEntry(MapFile.Reader[] readers, 
106       Partitioner<K, V> partitioner, K key, V value) throws IOException {
107     int part = partitioner.getPartition(key, value, readers.length);
108     return readers[part].get(key, value);
109   }
110 }
111