1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.IOException;
23
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.hbase.HBaseConfiguration;
26 import org.apache.hadoop.hbase.KeyValue;
27 import org.apache.hadoop.hbase.client.Result;
28 import org.apache.hadoop.hbase.client.Scan;
29 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
30 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
31 import org.apache.hadoop.hbase.util.Bytes;
32 import org.apache.hadoop.mapreduce.Job;
33 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
34 import org.apache.hadoop.util.GenericOptionsParser;
35
36
37
38
39
40 public class RowCounter {
41
42
43 static final String NAME = "rowcounter";
44
45
46
47
48 static class RowCounterMapper
49 extends TableMapper<ImmutableBytesWritable, Result> {
50
51
52 public static enum Counters {ROWS}
53
54
55
56
57
58
59
60
61
62
63
64 @Override
65 public void map(ImmutableBytesWritable row, Result values,
66 Context context)
67 throws IOException {
68
69 context.getCounter(Counters.ROWS).increment(1);
70 }
71 }
72
73
74
75
76
77
78
79
80
81 public static Job createSubmittableJob(Configuration conf, String[] args)
82 throws IOException {
83 String tableName = args[0];
84 String startKey = null;
85 String endKey = null;
86 StringBuilder sb = new StringBuilder();
87
88 final String rangeSwitch = "--range=";
89
90
91 for (int i = 1; i < args.length; i++) {
92 if (args[i].startsWith(rangeSwitch)) {
93 String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2);
94 if (startEnd.length != 2 || startEnd[1].contains(",")) {
95 printUsage("Please specify range in such format as \"--range=a,b\" " +
96 "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
97 return null;
98 }
99 startKey = startEnd[0];
100 endKey = startEnd[1];
101 }
102 else {
103
104 sb.append(args[i]);
105 sb.append(" ");
106 }
107 }
108
109 Job job = new Job(conf, NAME + "_" + tableName);
110 job.setJarByClass(RowCounter.class);
111 Scan scan = new Scan();
112 scan.setCacheBlocks(false);
113 if (startKey != null && !startKey.equals("")) {
114 scan.setStartRow(Bytes.toBytes(startKey));
115 }
116 if (endKey != null && !endKey.equals("")) {
117 scan.setStopRow(Bytes.toBytes(endKey));
118 }
119 scan.setFilter(new FirstKeyOnlyFilter());
120 if (sb.length() > 0) {
121 for (String columnName : sb.toString().trim().split(" ")) {
122 String [] fields = columnName.split(":");
123 if(fields.length == 1) {
124 scan.addFamily(Bytes.toBytes(fields[0]));
125 } else {
126 scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1]));
127 }
128 }
129 }
130 job.setOutputFormatClass(NullOutputFormat.class);
131 TableMapReduceUtil.initTableMapperJob(tableName, scan,
132 RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
133 job.setNumReduceTasks(0);
134 return job;
135 }
136
137
138
139
140 private static void printUsage(String errorMessage) {
141 System.err.println("ERROR: " + errorMessage);
142 printUsage();
143 }
144
145
146
147
148 private static void printUsage() {
149 System.err.println("Usage: RowCounter [options] <tablename> " +
150 "[--range=[startKey],[endKey]] [<column1> <column2>...]");
151 System.err.println("For performance consider the following options:\n"
152 + "-Dhbase.client.scanner.caching=100\n"
153 + "-Dmapred.map.tasks.speculative.execution=false");
154 }
155
156
157
158
159
160
161
162 public static void main(String[] args) throws Exception {
163 Configuration conf = HBaseConfiguration.create();
164 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
165 if (otherArgs.length < 1) {
166 printUsage("Wrong number of parameters: " + args.length);
167 System.exit(-1);
168 }
169 Job job = createSubmittableJob(conf, otherArgs);
170 if (job == null) {
171 System.exit(-1);
172 }
173 System.exit(job.waitForCompletion(true) ? 0 : 1);
174 }
175 }