1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.classification.InterfaceAudience;
26 import org.apache.hadoop.classification.InterfaceStability;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.fs.Path;
29 import org.apache.hadoop.hbase.Cell;
30 import org.apache.hadoop.hbase.CellUtil;
31 import org.apache.hadoop.hbase.HBaseConfiguration;
32 import org.apache.hadoop.hbase.client.Result;
33 import org.apache.hadoop.hbase.client.Scan;
34 import org.apache.hadoop.hbase.filter.CompareFilter;
35 import org.apache.hadoop.hbase.filter.Filter;
36 import org.apache.hadoop.hbase.filter.PrefixFilter;
37 import org.apache.hadoop.hbase.filter.RegexStringComparator;
38 import org.apache.hadoop.hbase.filter.RowFilter;
39 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40 import org.apache.hadoop.hbase.util.Bytes;
41 import org.apache.hadoop.io.IntWritable;
42 import org.apache.hadoop.io.Text;
43 import org.apache.hadoop.mapreduce.Job;
44 import org.apache.hadoop.mapreduce.Reducer;
45 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
46 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
47 import org.apache.hadoop.util.GenericOptionsParser;
48
49 import com.google.common.base.Preconditions;
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69 @InterfaceAudience.Public
70 @InterfaceStability.Stable
71 public class CellCounter {
72 private static final Log LOG =
73 LogFactory.getLog(CellCounter.class.getName());
74
75
76
77
78
79 static final String NAME = "CellCounter";
80
81
82
83
84 static class CellCounterMapper
85 extends TableMapper<Text, IntWritable> {
86
87
88
89 public static enum Counters {
90 ROWS
91 }
92
93
94
95
96
97
98
99
100
101
102
103
104 @Override
105 public void map(ImmutableBytesWritable row, Result values,
106 Context context)
107 throws IOException {
108 Preconditions.checkState(values != null,
109 "values passed to the map is null");
110 String currentFamilyName = null;
111 String currentQualifierName = null;
112 String currentRowKey = null;
113 Configuration config = context.getConfiguration();
114 String separator = config.get("ReportSeparator",":");
115 try {
116 context.getCounter(Counters.ROWS).increment(1);
117 context.write(new Text("Total ROWS"), new IntWritable(1));
118
119 for (Cell value : values.listCells()) {
120 currentRowKey = Bytes.toStringBinary(CellUtil.cloneRow(value));
121 String thisRowFamilyName = Bytes.toStringBinary(CellUtil.cloneFamily(value));
122 if (thisRowFamilyName != null &&
123 !thisRowFamilyName.equals(currentFamilyName)) {
124 currentFamilyName = thisRowFamilyName;
125 context.getCounter("CF", thisRowFamilyName).increment(1);
126 context.write(new Text("Total Families Across all Rows"),
127 new IntWritable(1));
128 context.write(new Text(thisRowFamilyName), new IntWritable(1));
129 }
130 String thisRowQualifierName = thisRowFamilyName + separator
131 + Bytes.toStringBinary(CellUtil.cloneQualifier(value));
132 if (thisRowQualifierName != null &&
133 !thisRowQualifierName.equals(currentQualifierName)) {
134 currentQualifierName = thisRowQualifierName;
135 context.getCounter("CFQL", thisRowQualifierName).increment(1);
136 context.write(new Text("Total Qualifiers across all Rows"),
137 new IntWritable(1));
138 context.write(new Text(thisRowQualifierName), new IntWritable(1));
139
140 context.getCounter("QL_VERSIONS", currentRowKey + separator +
141 thisRowQualifierName).increment(1);
142 context.write(new Text(currentRowKey + separator
143 + thisRowQualifierName + "_Versions"), new IntWritable(1));
144
145 } else {
146
147 currentQualifierName = thisRowQualifierName;
148 context.getCounter("QL_VERSIONS", currentRowKey + separator +
149 thisRowQualifierName).increment(1);
150 context.write(new Text(currentRowKey + separator
151 + thisRowQualifierName + "_Versions"), new IntWritable(1));
152 }
153 }
154 } catch (InterruptedException e) {
155 e.printStackTrace();
156 }
157 }
158 }
159
160 static class IntSumReducer<Key> extends Reducer<Key, IntWritable,
161 Key, IntWritable> {
162
163 private IntWritable result = new IntWritable();
164 public void reduce(Key key, Iterable<IntWritable> values,
165 Context context)
166 throws IOException, InterruptedException {
167 int sum = 0;
168 for (IntWritable val : values) {
169 sum += val.get();
170 }
171 result.set(sum);
172 context.write(key, result);
173 }
174 }
175
176
177
178
179
180
181
182
183
184 public static Job createSubmittableJob(Configuration conf, String[] args)
185 throws IOException {
186 String tableName = args[0];
187 Path outputDir = new Path(args[1]);
188 String reportSeparatorString = (args.length > 2) ? args[2]: ":";
189 conf.set("ReportSeparator", reportSeparatorString);
190 Job job = new Job(conf, NAME + "_" + tableName);
191 job.setJarByClass(CellCounter.class);
192 Scan scan = getConfiguredScanForJob(conf, args);
193 TableMapReduceUtil.initTableMapperJob(tableName, scan,
194 CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
195 job.setNumReduceTasks(1);
196 job.setMapOutputKeyClass(Text.class);
197 job.setMapOutputValueClass(IntWritable.class);
198 job.setOutputFormatClass(TextOutputFormat.class);
199 job.setOutputKeyClass(Text.class);
200 job.setOutputValueClass(IntWritable.class);
201 FileOutputFormat.setOutputPath(job, outputDir);
202 job.setReducerClass(IntSumReducer.class);
203 return job;
204 }
205
206 private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
207 Scan s = new Scan();
208
209 s.setMaxVersions(Integer.MAX_VALUE);
210 s.setCacheBlocks(false);
211
212 if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
213 s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
214 }
215
216 Filter rowFilter = getRowFilter(args);
217 if (rowFilter!= null) {
218 LOG.info("Setting Row Filter for counter.");
219 s.setFilter(rowFilter);
220 }
221 return s;
222 }
223
224
225 private static Filter getRowFilter(String[] args) {
226 Filter rowFilter = null;
227 String filterCriteria = (args.length > 3) ? args[3]: null;
228 if (filterCriteria == null) return null;
229 if (filterCriteria.startsWith("^")) {
230 String regexPattern = filterCriteria.substring(1, filterCriteria.length());
231 rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));
232 } else {
233 rowFilter = new PrefixFilter(Bytes.toBytes(filterCriteria));
234 }
235 return rowFilter;
236 }
237
238
239
240
241
242
243
244 public static void main(String[] args) throws Exception {
245 Configuration conf = HBaseConfiguration.create();
246 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
247 if (otherArgs.length < 1) {
248 System.err.println("ERROR: Wrong number of parameters: " + args.length);
249 System.err.println("Usage: CellCounter <tablename> <outputDir> <reportSeparator> " +
250 "[^[regex pattern] or [Prefix] for row filter]] ");
251 System.err.println(" Note: -D properties will be applied to the conf used. ");
252 System.err.println(" Additionally, the following SCAN properties can be specified");
253 System.err.println(" to get fine grained control on what is counted..");
254 System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
255 System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
256 "string : used to separate the rowId/column family name and qualifier name.");
257 System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
258 "operation to a limited subset of rows from the table based on regex or prefix pattern.");
259 System.exit(-1);
260 }
261 Job job = createSubmittableJob(conf, otherArgs);
262 System.exit(job.waitForCompletion(true) ? 0 : 1);
263 }
264 }