View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.util.List;
23  import java.util.TreeSet;
24  
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.client.Put;
27  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28  import org.apache.hadoop.mapreduce.Reducer;
29  
30  /**
31   * Emits sorted Puts.
32   * Reads in all Puts from passed Iterator, sorts them, then emits
33   * Puts in sorted order.  If lots of columns per row, it will use lots of
34   * memory sorting.
35   * @see HFileOutputFormat
36   * @see KeyValueSortReducer
37   */
38  public class PutSortReducer extends
39      Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
40    
41    @Override
42    protected void reduce(
43        ImmutableBytesWritable row,
44        java.lang.Iterable<Put> puts,
45        Reducer<ImmutableBytesWritable, Put,
46                ImmutableBytesWritable, KeyValue>.Context context)
47        throws java.io.IOException, InterruptedException
48    {
49      TreeSet<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR);
50    
51      for (Put p : puts) {
52        for (List<KeyValue> kvs : p.getFamilyMap().values()) {
53          for (KeyValue kv : kvs) {
54            map.add(kv.clone());
55          }
56        }
57      }
58      context.setStatus("Read " + map.getClass());
59      int index = 0;
60      for (KeyValue kv : map) {
61        context.write(row, kv);
62        if (index > 0 && index % 100 == 0)
63          context.setStatus("Wrote " + index);
64      }
65    }
66  }