1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce.replication;
20
21 import java.io.IOException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.conf.Configured;
27 import org.apache.hadoop.hbase.*;
28 import org.apache.hadoop.hbase.client.HConnectable;
29 import org.apache.hadoop.hbase.client.HConnection;
30 import org.apache.hadoop.hbase.client.HConnectionManager;
31 import org.apache.hadoop.hbase.client.HTable;
32 import org.apache.hadoop.hbase.client.Put;
33 import org.apache.hadoop.hbase.client.Result;
34 import org.apache.hadoop.hbase.client.ResultScanner;
35 import org.apache.hadoop.hbase.client.Scan;
36 import org.apache.hadoop.hbase.client.Table;
37 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
39 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
40 import org.apache.hadoop.hbase.mapreduce.TableMapper;
41 import org.apache.hadoop.hbase.mapreduce.TableSplit;
42 import org.apache.hadoop.hbase.replication.ReplicationException;
43 import org.apache.hadoop.hbase.replication.ReplicationFactory;
44 import org.apache.hadoop.hbase.replication.ReplicationPeerZKImpl;
45 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
46 import org.apache.hadoop.hbase.replication.ReplicationPeers;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.Pair;
49 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
50 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
51 import org.apache.hadoop.mapreduce.Job;
52 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
53 import org.apache.hadoop.util.Tool;
54 import org.apache.hadoop.util.ToolRunner;
55
56
57
58
59
60
61
62
63
64
65
66 public class VerifyReplication extends Configured implements Tool {
67
68 private static final Log LOG =
69 LogFactory.getLog(VerifyReplication.class);
70
71 public final static String NAME = "verifyrep";
72 static long startTime = 0;
73 static long endTime = Long.MAX_VALUE;
74 static int versions = -1;
75 static String tableName = null;
76 static String families = null;
77 static String peerId = null;
78
79
80
81
82 public static class Verifier
83 extends TableMapper<ImmutableBytesWritable, Put> {
84
85 public static enum Counters {
86 GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
87
88 private ResultScanner replicatedScanner;
89 private Result currentCompareRowInPeerTable;
90
91
92
93
94
95
96
97
98
99 @Override
100 public void map(ImmutableBytesWritable row, final Result value,
101 Context context)
102 throws IOException {
103 if (replicatedScanner == null) {
104 Configuration conf = context.getConfiguration();
105 final Scan scan = new Scan();
106 scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
107 long startTime = conf.getLong(NAME + ".startTime", 0);
108 long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
109 String families = conf.get(NAME + ".families", null);
110 if(families != null) {
111 String[] fams = families.split(",");
112 for(String fam : fams) {
113 scan.addFamily(Bytes.toBytes(fam));
114 }
115 }
116 scan.setTimeRange(startTime, endTime);
117 if (versions >= 0) {
118 scan.setMaxVersions(versions);
119 }
120
121 final TableSplit tableSplit = (TableSplit)(context.getInputSplit());
122 HConnectionManager.execute(new HConnectable<Void>(conf) {
123 @Override
124 public Void connect(HConnection conn) throws IOException {
125 String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
126 Configuration peerConf = HBaseConfiguration.create(conf);
127 ZKUtil.applyClusterKeyToConf(peerConf, zkClusterKey);
128
129 TableName tableName = TableName.valueOf(conf.get(NAME + ".tableName"));
130
131 Table replicatedTable = new HTable(peerConf, tableName);
132 scan.setStartRow(value.getRow());
133 scan.setStopRow(tableSplit.getEndRow());
134 replicatedScanner = replicatedTable.getScanner(scan);
135 return null;
136 }
137 });
138 currentCompareRowInPeerTable = replicatedScanner.next();
139 }
140 while (true) {
141 if (currentCompareRowInPeerTable == null) {
142
143 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
144 break;
145 }
146 int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
147 if (rowCmpRet == 0) {
148
149 try {
150 Result.compareResults(value, currentCompareRowInPeerTable);
151 context.getCounter(Counters.GOODROWS).increment(1);
152 } catch (Exception e) {
153 logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
154 }
155 currentCompareRowInPeerTable = replicatedScanner.next();
156 break;
157 } else if (rowCmpRet < 0) {
158
159 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
160 break;
161 } else {
162
163 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
164 currentCompareRowInPeerTable);
165 currentCompareRowInPeerTable = replicatedScanner.next();
166 }
167 }
168 }
169
170 private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
171 context.getCounter(counter).increment(1);
172 context.getCounter(Counters.BADROWS).increment(1);
173 LOG.error(counter.toString() + ", rowkey=" + Bytes.toString(row.getRow()));
174 }
175
176 @Override
177 protected void cleanup(Context context) {
178 if (replicatedScanner != null) {
179 try {
180 while (currentCompareRowInPeerTable != null) {
181 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
182 currentCompareRowInPeerTable);
183 currentCompareRowInPeerTable = replicatedScanner.next();
184 }
185 } catch (Exception e) {
186 LOG.error("fail to scan peer table in cleanup", e);
187 } finally {
188 replicatedScanner.close();
189 replicatedScanner = null;
190 }
191 }
192 }
193 }
194
195 private static String getPeerQuorumAddress(final Configuration conf) throws IOException {
196 ZooKeeperWatcher localZKW = null;
197 ReplicationPeerZKImpl peer = null;
198 try {
199 localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
200 new Abortable() {
201 @Override public void abort(String why, Throwable e) {}
202 @Override public boolean isAborted() {return false;}
203 });
204
205 ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
206 rp.init();
207
208 Pair<ReplicationPeerConfig, Configuration> pair = rp.getPeerConf(peerId);
209 if (pair == null) {
210 throw new IOException("Couldn't get peer conf!");
211 }
212 Configuration peerConf = rp.getPeerConf(peerId).getSecond();
213 return ZKUtil.getZooKeeperClusterKey(peerConf);
214 } catch (ReplicationException e) {
215 throw new IOException(
216 "An error occured while trying to connect to the remove peer cluster", e);
217 } finally {
218 if (peer != null) {
219 peer.close();
220 }
221 if (localZKW != null) {
222 localZKW.close();
223 }
224 }
225 }
226
227
228
229
230
231
232
233
234
235 public static Job createSubmittableJob(Configuration conf, String[] args)
236 throws IOException {
237 if (!doCommandLine(args)) {
238 return null;
239 }
240 if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY,
241 HConstants.REPLICATION_ENABLE_DEFAULT)) {
242 throw new IOException("Replication needs to be enabled to verify it.");
243 }
244 conf.set(NAME+".peerId", peerId);
245 conf.set(NAME+".tableName", tableName);
246 conf.setLong(NAME+".startTime", startTime);
247 conf.setLong(NAME+".endTime", endTime);
248 if (families != null) {
249 conf.set(NAME+".families", families);
250 }
251
252 String peerQuorumAddress = getPeerQuorumAddress(conf);
253 conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
254 LOG.info("Peer Quorum Address: " + peerQuorumAddress);
255
256 Job job = new Job(conf, NAME + "_" + tableName);
257 job.setJarByClass(VerifyReplication.class);
258
259 Scan scan = new Scan();
260 scan.setTimeRange(startTime, endTime);
261 if (versions >= 0) {
262 scan.setMaxVersions(versions);
263 }
264 if(families != null) {
265 String[] fams = families.split(",");
266 for(String fam : fams) {
267 scan.addFamily(Bytes.toBytes(fam));
268 }
269 }
270 TableMapReduceUtil.initTableMapperJob(tableName, scan,
271 Verifier.class, null, null, job);
272
273
274 TableMapReduceUtil.initCredentialsForCluster(job, peerQuorumAddress);
275
276 job.setOutputFormatClass(NullOutputFormat.class);
277 job.setNumReduceTasks(0);
278 return job;
279 }
280
281 private static boolean doCommandLine(final String[] args) {
282 if (args.length < 2) {
283 printUsage(null);
284 return false;
285 }
286 try {
287 for (int i = 0; i < args.length; i++) {
288 String cmd = args[i];
289 if (cmd.equals("-h") || cmd.startsWith("--h")) {
290 printUsage(null);
291 return false;
292 }
293
294 final String startTimeArgKey = "--starttime=";
295 if (cmd.startsWith(startTimeArgKey)) {
296 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
297 continue;
298 }
299
300 final String endTimeArgKey = "--endtime=";
301 if (cmd.startsWith(endTimeArgKey)) {
302 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
303 continue;
304 }
305
306 final String versionsArgKey = "--versions=";
307 if (cmd.startsWith(versionsArgKey)) {
308 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
309 continue;
310 }
311
312 final String familiesArgKey = "--families=";
313 if (cmd.startsWith(familiesArgKey)) {
314 families = cmd.substring(familiesArgKey.length());
315 continue;
316 }
317
318 if (i == args.length-2) {
319 peerId = cmd;
320 }
321
322 if (i == args.length-1) {
323 tableName = cmd;
324 }
325 }
326 } catch (Exception e) {
327 e.printStackTrace();
328 printUsage("Can't start because " + e.getMessage());
329 return false;
330 }
331 return true;
332 }
333
334
335
336
337 private static void printUsage(final String errorMsg) {
338 if (errorMsg != null && errorMsg.length() > 0) {
339 System.err.println("ERROR: " + errorMsg);
340 }
341 System.err.println("Usage: verifyrep [--starttime=X]" +
342 " [--stoptime=Y] [--families=A] <peerid> <tablename>");
343 System.err.println();
344 System.err.println("Options:");
345 System.err.println(" starttime beginning of the time range");
346 System.err.println(" without endtime means from starttime to forever");
347 System.err.println(" endtime end of the time range");
348 System.err.println(" versions number of cell versions to verify");
349 System.err.println(" families comma-separated list of families to copy");
350 System.err.println();
351 System.err.println("Args:");
352 System.err.println(" peerid Id of the peer used for verification, must match the one given for replication");
353 System.err.println(" tablename Name of the table to verify");
354 System.err.println();
355 System.err.println("Examples:");
356 System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
357 System.err.println(" $ bin/hbase " +
358 "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
359 " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
360 }
361
362 @Override
363 public int run(String[] args) throws Exception {
364 Configuration conf = this.getConf();
365 Job job = createSubmittableJob(conf, args);
366 if (job != null) {
367 return job.waitForCompletion(true) ? 0 : 1;
368 }
369 return 1;
370 }
371
372
373
374
375
376
377
378 public static void main(String[] args) throws Exception {
379 int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
380 System.exit(res);
381 }
382 }