1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce.replication;
20
21 import java.io.IOException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.conf.Configured;
27 import org.apache.hadoop.hbase.*;
28 import org.apache.hadoop.hbase.client.HConnectable;
29 import org.apache.hadoop.hbase.client.HConnection;
30 import org.apache.hadoop.hbase.client.HConnectionManager;
31 import org.apache.hadoop.hbase.client.HTable;
32 import org.apache.hadoop.hbase.client.Put;
33 import org.apache.hadoop.hbase.client.Result;
34 import org.apache.hadoop.hbase.client.ResultScanner;
35 import org.apache.hadoop.hbase.client.Scan;
36 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
38 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
39 import org.apache.hadoop.hbase.mapreduce.TableMapper;
40 import org.apache.hadoop.hbase.mapreduce.TableSplit;
41 import org.apache.hadoop.hbase.replication.ReplicationException;
42 import org.apache.hadoop.hbase.replication.ReplicationFactory;
43 import org.apache.hadoop.hbase.replication.ReplicationPeer;
44 import org.apache.hadoop.hbase.replication.ReplicationPeers;
45 import org.apache.hadoop.hbase.util.Bytes;
46 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
47 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
48 import org.apache.hadoop.mapreduce.Job;
49 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50 import org.apache.hadoop.util.Tool;
51 import org.apache.hadoop.util.ToolRunner;
52
53
54
55
56
57
58
59
60
61
62
63 public class VerifyReplication extends Configured implements Tool {
64
65 private static final Log LOG =
66 LogFactory.getLog(VerifyReplication.class);
67
68 public final static String NAME = "verifyrep";
69 static long startTime = 0;
70 static long endTime = Long.MAX_VALUE;
71 static int versions = -1;
72 static String tableName = null;
73 static String families = null;
74 static String peerId = null;
75
76
77
78
79 public static class Verifier
80 extends TableMapper<ImmutableBytesWritable, Put> {
81
82 public static enum Counters {
83 GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
84
85 private ResultScanner replicatedScanner;
86 private Result currentCompareRowInPeerTable;
87
88
89
90
91
92
93
94
95
96 @Override
97 public void map(ImmutableBytesWritable row, final Result value,
98 Context context)
99 throws IOException {
100 if (replicatedScanner == null) {
101 Configuration conf = context.getConfiguration();
102 final Scan scan = new Scan();
103 scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
104 long startTime = conf.getLong(NAME + ".startTime", 0);
105 long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
106 String families = conf.get(NAME + ".families", null);
107 if(families != null) {
108 String[] fams = families.split(",");
109 for(String fam : fams) {
110 scan.addFamily(Bytes.toBytes(fam));
111 }
112 }
113 scan.setTimeRange(startTime, endTime);
114 if (versions >= 0) {
115 scan.setMaxVersions(versions);
116 }
117
118 final TableSplit tableSplit = (TableSplit)(context.getInputSplit());
119 HConnectionManager.execute(new HConnectable<Void>(conf) {
120 @Override
121 public Void connect(HConnection conn) throws IOException {
122 String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
123 Configuration peerConf = HBaseConfiguration.create(conf);
124 ZKUtil.applyClusterKeyToConf(peerConf, zkClusterKey);
125
126 HTable replicatedTable = new HTable(peerConf, conf.get(NAME + ".tableName"));
127 scan.setStartRow(tableSplit.getStartRow());
128 scan.setStopRow(tableSplit.getEndRow());
129 replicatedScanner = replicatedTable.getScanner(scan);
130 return null;
131 }
132 });
133 currentCompareRowInPeerTable = replicatedScanner.next();
134 }
135 while (true) {
136 if (currentCompareRowInPeerTable == null) {
137
138 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
139 break;
140 }
141 int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
142 if (rowCmpRet == 0) {
143
144 try {
145 Result.compareResults(value, currentCompareRowInPeerTable);
146 context.getCounter(Counters.GOODROWS).increment(1);
147 } catch (Exception e) {
148 logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
149 }
150 currentCompareRowInPeerTable = replicatedScanner.next();
151 break;
152 } else if (rowCmpRet < 0) {
153
154 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
155 break;
156 } else {
157
158 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
159 currentCompareRowInPeerTable);
160 currentCompareRowInPeerTable = replicatedScanner.next();
161 }
162 }
163 }
164
165 private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
166 context.getCounter(counter).increment(1);
167 context.getCounter(Counters.BADROWS).increment(1);
168 LOG.error(counter.toString() + ", rowkey=" + Bytes.toString(row.getRow()));
169 }
170
171 protected void cleanup(Context context) {
172 if (replicatedScanner != null) {
173 try {
174 while (currentCompareRowInPeerTable != null) {
175 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
176 currentCompareRowInPeerTable);
177 currentCompareRowInPeerTable = replicatedScanner.next();
178 }
179 } catch (Exception e) {
180 LOG.error("fail to scan peer table in cleanup", e);
181 } finally {
182 replicatedScanner.close();
183 replicatedScanner = null;
184 }
185 }
186 }
187 }
188
189 private static String getPeerQuorumAddress(final Configuration conf) throws IOException {
190 ZooKeeperWatcher localZKW = null;
191 ReplicationPeer peer = null;
192 try {
193 localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
194 new Abortable() {
195 @Override public void abort(String why, Throwable e) {}
196 @Override public boolean isAborted() {return false;}
197 });
198
199 ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
200 rp.init();
201
202 Configuration peerConf = rp.getPeerConf(peerId);
203 if (peerConf == null) {
204 throw new IOException("Couldn't get peer conf!");
205 }
206
207 return ZKUtil.getZooKeeperClusterKey(peerConf);
208 } catch (ReplicationException e) {
209 throw new IOException(
210 "An error occured while trying to connect to the remove peer cluster", e);
211 } finally {
212 if (peer != null) {
213 peer.close();
214 }
215 if (localZKW != null) {
216 localZKW.close();
217 }
218 }
219 }
220
221
222
223
224
225
226
227
228
229 public static Job createSubmittableJob(Configuration conf, String[] args)
230 throws IOException {
231 if (!doCommandLine(args)) {
232 return null;
233 }
234 if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY,
235 HConstants.REPLICATION_ENABLE_DEFAULT)) {
236 throw new IOException("Replication needs to be enabled to verify it.");
237 }
238 conf.set(NAME+".peerId", peerId);
239 conf.set(NAME+".tableName", tableName);
240 conf.setLong(NAME+".startTime", startTime);
241 conf.setLong(NAME+".endTime", endTime);
242 if (families != null) {
243 conf.set(NAME+".families", families);
244 }
245
246 String peerQuorumAddress = getPeerQuorumAddress(conf);
247 conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
248 LOG.info("Peer Quorum Address: " + peerQuorumAddress);
249
250 Job job = new Job(conf, NAME + "_" + tableName);
251 job.setJarByClass(VerifyReplication.class);
252
253 Scan scan = new Scan();
254 scan.setTimeRange(startTime, endTime);
255 if (versions >= 0) {
256 scan.setMaxVersions(versions);
257 }
258 if(families != null) {
259 String[] fams = families.split(",");
260 for(String fam : fams) {
261 scan.addFamily(Bytes.toBytes(fam));
262 }
263 }
264 TableMapReduceUtil.initTableMapperJob(tableName, scan,
265 Verifier.class, null, null, job);
266
267
268 TableMapReduceUtil.initCredentialsForCluster(job, peerQuorumAddress);
269
270 job.setOutputFormatClass(NullOutputFormat.class);
271 job.setNumReduceTasks(0);
272 return job;
273 }
274
275 private static boolean doCommandLine(final String[] args) {
276 if (args.length < 2) {
277 printUsage(null);
278 return false;
279 }
280 try {
281 for (int i = 0; i < args.length; i++) {
282 String cmd = args[i];
283 if (cmd.equals("-h") || cmd.startsWith("--h")) {
284 printUsage(null);
285 return false;
286 }
287
288 final String startTimeArgKey = "--starttime=";
289 if (cmd.startsWith(startTimeArgKey)) {
290 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
291 continue;
292 }
293
294 final String endTimeArgKey = "--endtime=";
295 if (cmd.startsWith(endTimeArgKey)) {
296 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
297 continue;
298 }
299
300 final String versionsArgKey = "--versions=";
301 if (cmd.startsWith(versionsArgKey)) {
302 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
303 continue;
304 }
305
306 final String familiesArgKey = "--families=";
307 if (cmd.startsWith(familiesArgKey)) {
308 families = cmd.substring(familiesArgKey.length());
309 continue;
310 }
311
312 if (i == args.length-2) {
313 peerId = cmd;
314 }
315
316 if (i == args.length-1) {
317 tableName = cmd;
318 }
319 }
320 } catch (Exception e) {
321 e.printStackTrace();
322 printUsage("Can't start because " + e.getMessage());
323 return false;
324 }
325 return true;
326 }
327
328
329
330
331 private static void printUsage(final String errorMsg) {
332 if (errorMsg != null && errorMsg.length() > 0) {
333 System.err.println("ERROR: " + errorMsg);
334 }
335 System.err.println("Usage: verifyrep [--starttime=X]" +
336 " [--stoptime=Y] [--families=A] <peerid> <tablename>");
337 System.err.println();
338 System.err.println("Options:");
339 System.err.println(" starttime beginning of the time range");
340 System.err.println(" without endtime means from starttime to forever");
341 System.err.println(" endtime end of the time range");
342 System.err.println(" versions number of cell versions to verify");
343 System.err.println(" families comma-separated list of families to copy");
344 System.err.println();
345 System.err.println("Args:");
346 System.err.println(" peerid Id of the peer used for verification, must match the one given for replication");
347 System.err.println(" tablename Name of the table to verify");
348 System.err.println();
349 System.err.println("Examples:");
350 System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
351 System.err.println(" $ bin/hbase " +
352 "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
353 " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
354 }
355
356 @Override
357 public int run(String[] args) throws Exception {
358 Configuration conf = this.getConf();
359 Job job = createSubmittableJob(conf, args);
360 if (job != null) {
361 return job.waitForCompletion(true) ? 0 : 1;
362 }
363 return 1;
364 }
365
366
367
368
369
370
371
372 public static void main(String[] args) throws Exception {
373 int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
374 System.exit(res);
375 }
376 }