1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.text.ParseException;
22 import java.text.SimpleDateFormat;
23 import java.util.Map;
24 import java.util.TreeMap;
25
26 import org.apache.hadoop.classification.InterfaceAudience;
27 import org.apache.hadoop.classification.InterfaceStability;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.conf.Configured;
30 import org.apache.hadoop.fs.Path;
31 import org.apache.hadoop.hbase.HBaseConfiguration;
32 import org.apache.hadoop.hbase.KeyValue;
33 import org.apache.hadoop.hbase.client.Delete;
34 import org.apache.hadoop.hbase.client.HTable;
35 import org.apache.hadoop.hbase.client.Mutation;
36 import org.apache.hadoop.hbase.client.Put;
37 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38 import org.apache.hadoop.hbase.regionserver.wal.HLog;
39 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
40 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
41 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
42 import org.apache.hadoop.hbase.util.Bytes;
43 import org.apache.hadoop.mapreduce.Job;
44 import org.apache.hadoop.mapreduce.Mapper;
45 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
46 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
47 import org.apache.hadoop.util.GenericOptionsParser;
48 import org.apache.hadoop.util.Tool;
49 import org.apache.hadoop.util.ToolRunner;
50
51
52
53
54
55
56
57
58
59
60
61 @InterfaceAudience.Public
62 @InterfaceStability.Stable
63 public class WALPlayer extends Configured implements Tool {
64 final static String NAME = "WALPlayer";
65 final static String BULK_OUTPUT_CONF_KEY = "hlog.bulk.output";
66 final static String HLOG_INPUT_KEY = "hlog.input.dir";
67 final static String TABLES_KEY = "hlog.input.tables";
68 final static String TABLE_MAP_KEY = "hlog.input.tablesmap";
69
70
71
72
73
74 static class HLogKeyValueMapper
75 extends Mapper<HLogKey, WALEdit, ImmutableBytesWritable, KeyValue> {
76 private byte[] table;
77
78 @Override
79 public void map(HLogKey key, WALEdit value,
80 Context context)
81 throws IOException {
82 try {
83
84 if (Bytes.equals(table, key.getTablename())) {
85 for (KeyValue kv : value.getKeyValues()) {
86 if (HLogUtil.isMetaFamily(kv.getFamily())) continue;
87 context.write(new ImmutableBytesWritable(kv.getRow()), kv);
88 }
89 }
90 } catch (InterruptedException e) {
91 e.printStackTrace();
92 }
93 }
94
95 @Override
96 public void setup(Context context) throws IOException {
97
98 String tables[] = context.getConfiguration().getStrings(TABLES_KEY);
99 if (tables == null || tables.length != 1) {
100
101 throw new IOException("Exactly one table must be specified for bulk HFile case.");
102 }
103 table = Bytes.toBytes(tables[0]);
104 }
105 }
106
107
108
109
110
111 static class HLogMapper
112 extends Mapper<HLogKey, WALEdit, ImmutableBytesWritable, Mutation> {
113 private Map<byte[], byte[]> tables = new TreeMap<byte[], byte[]>(Bytes.BYTES_COMPARATOR);
114
115 @Override
116 public void map(HLogKey key, WALEdit value,
117 Context context)
118 throws IOException {
119 try {
120 if (tables.isEmpty() || tables.containsKey(key.getTablename())) {
121 byte[] targetTable = tables.isEmpty() ?
122 key.getTablename() :
123 tables.get(key.getTablename());
124 ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable);
125 Put put = null;
126 Delete del = null;
127 KeyValue lastKV = null;
128 for (KeyValue kv : value.getKeyValues()) {
129
130 if (HLogUtil.isMetaFamily(kv.getFamily())) continue;
131
132
133
134
135
136 if (lastKV == null || lastKV.getType() != kv.getType() || !lastKV.matchingRow(kv)) {
137
138 if (put != null) context.write(tableOut, put);
139 if (del != null) context.write(tableOut, del);
140
141 if (kv.isDelete()) {
142 del = new Delete(kv.getRow());
143 } else {
144 put = new Put(kv.getRow());
145 }
146 }
147 if (kv.isDelete()) {
148 del.addDeleteMarker(kv);
149 } else {
150 put.add(kv);
151 }
152 lastKV = kv;
153 }
154
155 if (put != null) context.write(tableOut, put);
156 if (del != null) context.write(tableOut, del);
157 }
158 } catch (InterruptedException e) {
159 e.printStackTrace();
160 }
161 }
162
163 @Override
164 public void setup(Context context) throws IOException {
165 String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
166 String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
167 if (tablesToUse == null || tableMap == null || tablesToUse.length != tableMap.length) {
168
169 throw new IOException("No tables or incorrect table mapping specified.");
170 }
171 int i = 0;
172 for (String table : tablesToUse) {
173 tables.put(Bytes.toBytes(table), Bytes.toBytes(tableMap[i++]));
174 }
175 }
176 }
177
178
179
180
181 public WALPlayer(Configuration conf) {
182 super(conf);
183 }
184
185 void setupTime(Configuration conf, String option) throws IOException {
186 String val = conf.get(option);
187 if (val == null) return;
188 long ms;
189 try {
190
191 ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
192 } catch (ParseException pe) {
193 try {
194
195 ms = Long.parseLong(val);
196 } catch (NumberFormatException nfe) {
197 throw new IOException(option
198 + " must be specified either in the form 2001-02-20T16:35:06.99 "
199 + "or as number of milliseconds");
200 }
201 }
202 conf.setLong(option, ms);
203 }
204
205
206
207
208
209
210
211
212 public Job createSubmittableJob(String[] args)
213 throws IOException {
214 Configuration conf = getConf();
215 setupTime(conf, HLogInputFormat.START_TIME_KEY);
216 setupTime(conf, HLogInputFormat.END_TIME_KEY);
217 Path inputDir = new Path(args[0]);
218 String[] tables = args[1].split(",");
219 String[] tableMap;
220 if (args.length > 2) {
221 tableMap = args[2].split(",");
222 if (tableMap.length != tables.length) {
223 throw new IOException("The same number of tables and mapping must be provided.");
224 }
225 } else {
226
227 tableMap = tables;
228 }
229 conf.setStrings(TABLES_KEY, tables);
230 conf.setStrings(TABLE_MAP_KEY, tableMap);
231 Job job = new Job(conf, NAME + "_" + inputDir);
232 job.setJarByClass(WALPlayer.class);
233 FileInputFormat.setInputPaths(job, inputDir);
234 job.setInputFormatClass(HLogInputFormat.class);
235 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
236 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
237 if (hfileOutPath != null) {
238
239 if (tables.length != 1) {
240 throw new IOException("Exactly one table must be specified for the bulk export option");
241 }
242 HTable table = new HTable(conf, tables[0]);
243 job.setMapperClass(HLogKeyValueMapper.class);
244 job.setReducerClass(KeyValueSortReducer.class);
245 Path outputDir = new Path(hfileOutPath);
246 FileOutputFormat.setOutputPath(job, outputDir);
247 job.setMapOutputValueClass(KeyValue.class);
248 HFileOutputFormat.configureIncrementalLoad(job, table);
249 TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
250 com.google.common.base.Preconditions.class);
251 } else {
252
253 job.setMapperClass(HLogMapper.class);
254 job.setOutputFormatClass(MultiTableOutputFormat.class);
255 TableMapReduceUtil.addDependencyJars(job);
256
257 job.setNumReduceTasks(0);
258 }
259 return job;
260 }
261
262
263
264
265 private void usage(final String errorMsg) {
266 if (errorMsg != null && errorMsg.length() > 0) {
267 System.err.println("ERROR: " + errorMsg);
268 }
269 System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
270 System.err.println("Read all WAL entries for <tables>.");
271 System.err.println("If no tables (\"\") are specific, all tables are imported.");
272 System.err.println("(Careful, even -ROOT- and .META. entries will be imported in that case.)");
273 System.err.println("Otherwise <tables> is a comma separated list of tables.\n");
274 System.err.println("The WAL entries can be mapped to new set of tables via <tableMapping>.");
275 System.err.println("<tableMapping> is a command separated list of targettables.");
276 System.err.println("If specified, each table in <tables> must have a mapping.\n");
277 System.err.println("By default " + NAME + " will load data directly into HBase.");
278 System.err.println("To generate HFiles for a bulk data load instead, pass the option:");
279 System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
280 System.err.println(" (Only one table can be specified, and no mapping is allowed!)");
281 System.err.println("Other options: (specify time range to WAL edit to consider)");
282 System.err.println(" -D" + HLogInputFormat.START_TIME_KEY + "=[date|ms]");
283 System.err.println(" -D" + HLogInputFormat.END_TIME_KEY + "=[date|ms]");
284 System.err.println("For performance also consider the following options:\n"
285 + " -Dmapred.map.tasks.speculative.execution=false\n"
286 + " -Dmapred.reduce.tasks.speculative.execution=false");
287 }
288
289
290
291
292
293
294
295 public static void main(String[] args) throws Exception {
296 int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args);
297 System.exit(ret);
298 }
299
300 @Override
301 public int run(String[] args) throws Exception {
302 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
303 if (otherArgs.length < 2) {
304 usage("Wrong number of arguments: " + otherArgs.length);
305 System.exit(-1);
306 }
307 Job job = createSubmittableJob(otherArgs);
308 return job.waitForCompletion(true) ? 0 : 1;
309 }
310 }