1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.text.ParseException;
22 import java.text.SimpleDateFormat;
23 import java.util.Map;
24 import java.util.TreeMap;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.classification.InterfaceStability;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.conf.Configured;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.Cell;
34 import org.apache.hadoop.hbase.CellUtil;
35 import org.apache.hadoop.hbase.HBaseConfiguration;
36 import org.apache.hadoop.hbase.KeyValue;
37 import org.apache.hadoop.hbase.KeyValueUtil;
38 import org.apache.hadoop.hbase.TableName;
39 import org.apache.hadoop.hbase.client.Delete;
40 import org.apache.hadoop.hbase.client.HTable;
41 import org.apache.hadoop.hbase.client.Mutation;
42 import org.apache.hadoop.hbase.client.Put;
43 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
44 import org.apache.hadoop.hbase.wal.WALKey;
45 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
46 import org.apache.hadoop.hbase.util.Bytes;
47 import org.apache.hadoop.mapreduce.Job;
48 import org.apache.hadoop.mapreduce.Mapper;
49 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
50 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
51 import org.apache.hadoop.util.GenericOptionsParser;
52 import org.apache.hadoop.util.Tool;
53 import org.apache.hadoop.util.ToolRunner;
54
55
56
57
58
59
60
61
62
63
64
65 @InterfaceAudience.Public
66 @InterfaceStability.Stable
67 public class WALPlayer extends Configured implements Tool {
68 final static Log LOG = LogFactory.getLog(WALPlayer.class);
69 final static String NAME = "WALPlayer";
70 final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output";
71 final static String TABLES_KEY = "wal.input.tables";
72 final static String TABLE_MAP_KEY = "wal.input.tablesmap";
73
74
75
76 static {
77 Configuration.addDeprecation("hlog.bulk.output", BULK_OUTPUT_CONF_KEY);
78 Configuration.addDeprecation("hlog.input.tables", TABLES_KEY);
79 Configuration.addDeprecation("hlog.input.tablesmap", TABLE_MAP_KEY);
80 Configuration.addDeprecation(HLogInputFormat.START_TIME_KEY, WALInputFormat.START_TIME_KEY);
81 Configuration.addDeprecation(HLogInputFormat.END_TIME_KEY, WALInputFormat.END_TIME_KEY);
82 }
83
84
85
86
87
88 static class WALKeyValueMapper
89 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> {
90 private byte[] table;
91
92 @Override
93 public void map(WALKey key, WALEdit value,
94 Context context)
95 throws IOException {
96 try {
97
98 if (Bytes.equals(table, key.getTablename().getName())) {
99 for (Cell cell : value.getCells()) {
100 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
101 if (WALEdit.isMetaEditFamily(kv.getFamily())) continue;
102 context.write(new ImmutableBytesWritable(kv.getRow()), kv);
103 }
104 }
105 } catch (InterruptedException e) {
106 e.printStackTrace();
107 }
108 }
109
110 @Override
111 public void setup(Context context) throws IOException {
112
113 String tables[] = context.getConfiguration().getStrings(TABLES_KEY);
114 if (tables == null || tables.length != 1) {
115
116 throw new IOException("Exactly one table must be specified for bulk HFile case.");
117 }
118 table = Bytes.toBytes(tables[0]);
119 }
120 }
121
122
123
124
125
126 protected static class WALMapper
127 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> {
128 private Map<TableName, TableName> tables = new TreeMap<TableName, TableName>();
129
130 @Override
131 public void map(WALKey key, WALEdit value, Context context)
132 throws IOException {
133 try {
134 if (tables.isEmpty() || tables.containsKey(key.getTablename())) {
135 TableName targetTable = tables.isEmpty() ?
136 key.getTablename() :
137 tables.get(key.getTablename());
138 ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName());
139 Put put = null;
140 Delete del = null;
141 Cell lastCell = null;
142 for (Cell cell : value.getCells()) {
143
144 if (WALEdit.isMetaEditFamily(cell.getFamily())) continue;
145
146
147 if (filter(context, cell)) {
148
149
150
151
152 if (lastCell == null || lastCell.getTypeByte() != cell.getTypeByte()
153 || !CellUtil.matchingRow(lastCell, cell)) {
154
155 if (put != null) context.write(tableOut, put);
156 if (del != null) context.write(tableOut, del);
157 if (CellUtil.isDelete(cell)) {
158 del = new Delete(cell.getRow());
159 } else {
160 put = new Put(cell.getRow());
161 }
162 }
163 if (CellUtil.isDelete(cell)) {
164 del.addDeleteMarker(cell);
165 } else {
166 put.add(cell);
167 }
168 }
169 lastCell = cell;
170 }
171
172 if (put != null) context.write(tableOut, put);
173 if (del != null) context.write(tableOut, del);
174 }
175 } catch (InterruptedException e) {
176 e.printStackTrace();
177 }
178 }
179
180
181
182
183
184 protected boolean filter(Context context, final Cell cell) {
185 return true;
186 }
187
188 @Override
189 public void setup(Context context) throws IOException {
190 String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
191 String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
192 if (tablesToUse == null && tableMap == null) {
193
194 } else if (tablesToUse == null || tableMap == null || tablesToUse.length != tableMap.length) {
195
196 throw new IOException("No tables or incorrect table mapping specified.");
197 }
198 int i = 0;
199 if (tablesToUse != null) {
200 for (String table : tablesToUse) {
201 tables.put(TableName.valueOf(table),
202 TableName.valueOf(tableMap[i++]));
203 }
204 }
205 }
206 }
207
208
209
210
211 public WALPlayer(Configuration conf) {
212 super(conf);
213 }
214
215 void setupTime(Configuration conf, String option) throws IOException {
216 String val = conf.get(option);
217 if (null == val) return;
218 long ms;
219 try {
220
221 ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
222 } catch (ParseException pe) {
223 try {
224
225 ms = Long.parseLong(val);
226 } catch (NumberFormatException nfe) {
227 throw new IOException(option
228 + " must be specified either in the form 2001-02-20T16:35:06.99 "
229 + "or as number of milliseconds");
230 }
231 }
232 conf.setLong(option, ms);
233 }
234
235
236
237
238
239
240
241
242 public Job createSubmittableJob(String[] args)
243 throws IOException {
244 Configuration conf = getConf();
245 setupTime(conf, HLogInputFormat.START_TIME_KEY);
246 setupTime(conf, HLogInputFormat.END_TIME_KEY);
247 Path inputDir = new Path(args[0]);
248 String[] tables = args[1].split(",");
249 String[] tableMap;
250 if (args.length > 2) {
251 tableMap = args[2].split(",");
252 if (tableMap.length != tables.length) {
253 throw new IOException("The same number of tables and mapping must be provided.");
254 }
255 } else {
256
257 tableMap = tables;
258 }
259 conf.setStrings(TABLES_KEY, tables);
260 conf.setStrings(TABLE_MAP_KEY, tableMap);
261 Job job = new Job(conf, NAME + "_" + inputDir);
262 job.setJarByClass(WALPlayer.class);
263 FileInputFormat.setInputPaths(job, inputDir);
264 job.setInputFormatClass(WALInputFormat.class);
265 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
266 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
267 if (hfileOutPath != null) {
268
269 if (tables.length != 1) {
270 throw new IOException("Exactly one table must be specified for the bulk export option");
271 }
272 HTable table = new HTable(conf, TableName.valueOf(tables[0]));
273 job.setMapperClass(WALKeyValueMapper.class);
274 job.setReducerClass(KeyValueSortReducer.class);
275 Path outputDir = new Path(hfileOutPath);
276 FileOutputFormat.setOutputPath(job, outputDir);
277 job.setMapOutputValueClass(KeyValue.class);
278 HFileOutputFormat2.configureIncrementalLoad(job, table, table);
279 TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
280 com.google.common.base.Preconditions.class);
281 } else {
282
283 job.setMapperClass(WALMapper.class);
284 job.setOutputFormatClass(MultiTableOutputFormat.class);
285 TableMapReduceUtil.addDependencyJars(job);
286 TableMapReduceUtil.initCredentials(job);
287
288 job.setNumReduceTasks(0);
289 }
290 return job;
291 }
292
293
294
295
296 private void usage(final String errorMsg) {
297 if (errorMsg != null && errorMsg.length() > 0) {
298 System.err.println("ERROR: " + errorMsg);
299 }
300 System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
301 System.err.println("Read all WAL entries for <tables>.");
302 System.err.println("If no tables (\"\") are specific, all tables are imported.");
303 System.err.println("(Careful, even -ROOT- and hbase:meta entries will be imported in that case.)");
304 System.err.println("Otherwise <tables> is a comma separated list of tables.\n");
305 System.err.println("The WAL entries can be mapped to new set of tables via <tableMapping>.");
306 System.err.println("<tableMapping> is a command separated list of targettables.");
307 System.err.println("If specified, each table in <tables> must have a mapping.\n");
308 System.err.println("By default " + NAME + " will load data directly into HBase.");
309 System.err.println("To generate HFiles for a bulk data load instead, pass the option:");
310 System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
311 System.err.println(" (Only one table can be specified, and no mapping is allowed!)");
312 System.err.println("Other options: (specify time range to WAL edit to consider)");
313 System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
314 System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
315 System.err.println("For performance also consider the following options:\n"
316 + " -Dmapreduce.map.speculative=false\n"
317 + " -Dmapreduce.reduce.speculative=false");
318 }
319
320
321
322
323
324
325
326 public static void main(String[] args) throws Exception {
327 int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args);
328 System.exit(ret);
329 }
330
331 @Override
332 public int run(String[] args) throws Exception {
333 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
334 if (otherArgs.length < 2) {
335 usage("Wrong number of arguments: " + otherArgs.length);
336 System.exit(-1);
337 }
338 Job job = createSubmittableJob(otherArgs);
339 return job.waitForCompletion(true) ? 0 : 1;
340 }
341 }