1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.TreeSet;
25
26 import org.apache.hadoop.hbase.KeyValue;
27 import org.apache.hadoop.hbase.client.Put;
28 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
29 import org.apache.hadoop.mapreduce.Reducer;
30 import org.apache.hadoop.util.StringUtils;
31
32
33
34
35
36
37
38
39
40 public class PutSortReducer extends
41 Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
42
43 @Override
44 protected void reduce(
45 ImmutableBytesWritable row,
46 java.lang.Iterable<Put> puts,
47 Reducer<ImmutableBytesWritable, Put,
48 ImmutableBytesWritable, KeyValue>.Context context)
49 throws java.io.IOException, InterruptedException
50 {
51
52 long threshold = context.getConfiguration().getLong(
53 "putsortreducer.row.threshold", 2L * (1<<30));
54 Iterator<Put> iter = puts.iterator();
55 while (iter.hasNext()) {
56 TreeSet<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR);
57 long curSize = 0;
58
59 while (iter.hasNext() && curSize < threshold) {
60 Put p = iter.next();
61 for (List<KeyValue> kvs : p.getFamilyMap().values()) {
62 for (KeyValue kv : kvs) {
63 map.add(kv);
64 curSize += kv.getLength();
65 }
66 }
67 }
68 context.setStatus("Read " + map.size() + " entries of " + map.getClass()
69 + "(" + StringUtils.humanReadableInt(curSize) + ")");
70 int index = 0;
71 for (KeyValue kv : map) {
72 context.write(row, kv);
73 if (index > 0 && index % 100 == 0)
74 context.setStatus("Wrote " + index);
75 }
76
77
78 if (iter.hasNext()) {
79
80 context.write(null, null);
81 }
82 }
83 }
84 }