1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce.replication;
20
21 import java.io.IOException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.conf.Configured;
27 import org.apache.hadoop.hbase.Abortable;
28 import org.apache.hadoop.hbase.HBaseConfiguration;
29 import org.apache.hadoop.hbase.HConstants;
30 import org.apache.hadoop.hbase.client.HConnectable;
31 import org.apache.hadoop.hbase.client.HConnection;
32 import org.apache.hadoop.hbase.client.HConnectionManager;
33 import org.apache.hadoop.hbase.client.HTable;
34 import org.apache.hadoop.hbase.client.Put;
35 import org.apache.hadoop.hbase.client.Result;
36 import org.apache.hadoop.hbase.client.ResultScanner;
37 import org.apache.hadoop.hbase.client.Scan;
38 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
39 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
40 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
41 import org.apache.hadoop.hbase.mapreduce.TableMapper;
42 import org.apache.hadoop.hbase.mapreduce.TableSplit;
43 import org.apache.hadoop.hbase.replication.ReplicationException;
44 import org.apache.hadoop.hbase.replication.ReplicationFactory;
45 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
46 import org.apache.hadoop.hbase.replication.ReplicationPeerZKImpl;
47 import org.apache.hadoop.hbase.replication.ReplicationPeers;
48 import org.apache.hadoop.hbase.util.Bytes;
49 import org.apache.hadoop.hbase.util.Pair;
50 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
51 import org.apache.hadoop.mapreduce.Job;
52 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
53 import org.apache.hadoop.util.Tool;
54 import org.apache.hadoop.util.ToolRunner;
55
56
57
58
59
60
61
62
63
64
65
66 public class VerifyReplication extends Configured implements Tool {
67
68 private static final Log LOG =
69 LogFactory.getLog(VerifyReplication.class);
70
71 public final static String NAME = "verifyrep";
72 private final static String PEER_CONFIG_PREFIX = NAME + ".peer.";
73 static long startTime = 0;
74 static long endTime = Long.MAX_VALUE;
75 static int batch = Integer.MAX_VALUE;
76 static int versions = -1;
77 static String tableName = null;
78 static String families = null;
79 static String peerId = null;
80
81
82
83
84 public static class Verifier
85 extends TableMapper<ImmutableBytesWritable, Put> {
86
87 public static enum Counters {
88 GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS}
89
90 private ResultScanner replicatedScanner;
91 private Result currentCompareRowInPeerTable;
92
93
94
95
96
97
98
99
100
101 @Override
102 public void map(ImmutableBytesWritable row, final Result value,
103 Context context)
104 throws IOException {
105 if (replicatedScanner == null) {
106 Configuration conf = context.getConfiguration();
107 final Scan scan = new Scan();
108 scan.setBatch(batch);
109 scan.setCacheBlocks(false);
110 scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1));
111 long startTime = conf.getLong(NAME + ".startTime", 0);
112 long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE);
113 String families = conf.get(NAME + ".families", null);
114 if(families != null) {
115 String[] fams = families.split(",");
116 for(String fam : fams) {
117 scan.addFamily(Bytes.toBytes(fam));
118 }
119 }
120 scan.setTimeRange(startTime, endTime);
121 int versions = conf.getInt(NAME+".versions", -1);
122 LOG.info("Setting number of version inside map as: " + versions);
123 if (versions >= 0) {
124 scan.setMaxVersions(versions);
125 }
126
127 final TableSplit tableSplit = (TableSplit)(context.getInputSplit());
128 HConnectionManager.execute(new HConnectable<Void>(conf) {
129 @Override
130 public Void connect(HConnection conn) throws IOException {
131 String zkClusterKey = conf.get(NAME + ".peerQuorumAddress");
132 Configuration peerConf = HBaseConfiguration.createClusterConf(conf,
133 zkClusterKey, PEER_CONFIG_PREFIX);
134
135 HTable replicatedTable = new HTable(peerConf, conf.get(NAME + ".tableName"));
136 scan.setStartRow(tableSplit.getStartRow());
137 scan.setStopRow(tableSplit.getEndRow());
138 replicatedScanner = replicatedTable.getScanner(scan);
139 return null;
140 }
141 });
142 currentCompareRowInPeerTable = replicatedScanner.next();
143 }
144 while (true) {
145 if (currentCompareRowInPeerTable == null) {
146
147 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
148 break;
149 }
150 int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow());
151 if (rowCmpRet == 0) {
152
153 try {
154 Result.compareResults(value, currentCompareRowInPeerTable);
155 context.getCounter(Counters.GOODROWS).increment(1);
156 } catch (Exception e) {
157 logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value);
158 LOG.error("Exception while comparing row : " + e);
159 }
160 currentCompareRowInPeerTable = replicatedScanner.next();
161 break;
162 } else if (rowCmpRet < 0) {
163
164 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value);
165 break;
166 } else {
167
168 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
169 currentCompareRowInPeerTable);
170 currentCompareRowInPeerTable = replicatedScanner.next();
171 }
172 }
173 }
174
175 private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) {
176 context.getCounter(counter).increment(1);
177 context.getCounter(Counters.BADROWS).increment(1);
178 LOG.error(counter.toString() + ", rowkey=" + Bytes.toString(row.getRow()));
179 }
180
181 @Override
182 protected void cleanup(Context context) {
183 if (replicatedScanner != null) {
184 try {
185 while (currentCompareRowInPeerTable != null) {
186 logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS,
187 currentCompareRowInPeerTable);
188 currentCompareRowInPeerTable = replicatedScanner.next();
189 }
190 } catch (Exception e) {
191 LOG.error("fail to scan peer table in cleanup", e);
192 } finally {
193 replicatedScanner.close();
194 replicatedScanner = null;
195 }
196 }
197 }
198 }
199
200 private static Pair<ReplicationPeerConfig, Configuration> getPeerQuorumConfig(
201 final Configuration conf) throws IOException {
202 ZooKeeperWatcher localZKW = null;
203 ReplicationPeerZKImpl peer = null;
204 try {
205 localZKW = new ZooKeeperWatcher(conf, "VerifyReplication",
206 new Abortable() {
207 @Override public void abort(String why, Throwable e) {}
208 @Override public boolean isAborted() {return false;}
209 });
210
211 ReplicationPeers rp = ReplicationFactory.getReplicationPeers(localZKW, conf, localZKW);
212 rp.init();
213
214 Pair<ReplicationPeerConfig, Configuration> pair = rp.getPeerConf(peerId);
215 if (pair == null) {
216 throw new IOException("Couldn't get peer conf!");
217 }
218
219 return pair;
220 } catch (ReplicationException e) {
221 throw new IOException(
222 "An error occured while trying to connect to the remove peer cluster", e);
223 } finally {
224 if (peer != null) {
225 peer.close();
226 }
227 if (localZKW != null) {
228 localZKW.close();
229 }
230 }
231 }
232
233
234
235
236
237
238
239
240
241 public static Job createSubmittableJob(Configuration conf, String[] args)
242 throws IOException {
243 if (!doCommandLine(args)) {
244 return null;
245 }
246 if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY,
247 HConstants.REPLICATION_ENABLE_DEFAULT)) {
248 throw new IOException("Replication needs to be enabled to verify it.");
249 }
250 conf.set(NAME+".peerId", peerId);
251 conf.set(NAME+".tableName", tableName);
252 conf.setLong(NAME+".startTime", startTime);
253 conf.setLong(NAME+".endTime", endTime);
254 if (families != null) {
255 conf.set(NAME+".families", families);
256 }
257
258 Pair<ReplicationPeerConfig, Configuration> peerConfigPair = getPeerQuorumConfig(conf);
259 ReplicationPeerConfig peerConfig = peerConfigPair.getFirst();
260 String peerQuorumAddress = peerConfig.getClusterKey();
261 LOG.info("Peer Quorum Address: " + peerQuorumAddress + ", Peer Configuration: " +
262 peerConfig.getConfiguration());
263 conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress);
264 HBaseConfiguration.setWithPrefix(conf, PEER_CONFIG_PREFIX,
265 peerConfig.getConfiguration().entrySet());
266
267 conf.setInt(NAME + ".versions", versions);
268 LOG.info("Number of version: " + versions);
269
270 Job job = new Job(conf, NAME + "_" + tableName);
271 job.setJarByClass(VerifyReplication.class);
272
273 Scan scan = new Scan();
274 scan.setTimeRange(startTime, endTime);
275 if (versions >= 0) {
276 scan.setMaxVersions(versions);
277 LOG.info("Number of versions set to " + versions);
278 }
279 if(families != null) {
280 String[] fams = families.split(",");
281 for(String fam : fams) {
282 scan.addFamily(Bytes.toBytes(fam));
283 }
284 }
285 TableMapReduceUtil.initTableMapperJob(tableName, scan,
286 Verifier.class, null, null, job);
287
288 Configuration peerClusterConf = peerConfigPair.getSecond();
289
290 TableMapReduceUtil.initCredentialsForCluster(job, peerClusterConf);
291
292 job.setOutputFormatClass(NullOutputFormat.class);
293 job.setNumReduceTasks(0);
294 return job;
295 }
296
297 private static boolean doCommandLine(final String[] args) {
298 if (args.length < 2) {
299 printUsage(null);
300 return false;
301 }
302 try {
303 for (int i = 0; i < args.length; i++) {
304 String cmd = args[i];
305 if (cmd.equals("-h") || cmd.startsWith("--h")) {
306 printUsage(null);
307 return false;
308 }
309
310 final String startTimeArgKey = "--starttime=";
311 if (cmd.startsWith(startTimeArgKey)) {
312 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
313 continue;
314 }
315
316 final String endTimeArgKey = "--endtime=";
317 if (cmd.startsWith(endTimeArgKey)) {
318 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
319 continue;
320 }
321
322 final String versionsArgKey = "--versions=";
323 if (cmd.startsWith(versionsArgKey)) {
324 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
325 continue;
326 }
327
328 final String batchArgKey = "--batch=";
329 if (cmd.startsWith(batchArgKey)) {
330 batch = Integer.parseInt(cmd.substring(batchArgKey.length()));
331 continue;
332 }
333
334 final String familiesArgKey = "--families=";
335 if (cmd.startsWith(familiesArgKey)) {
336 families = cmd.substring(familiesArgKey.length());
337 continue;
338 }
339
340 if (i == args.length-2) {
341 peerId = cmd;
342 }
343
344 if (i == args.length-1) {
345 tableName = cmd;
346 }
347 }
348 } catch (Exception e) {
349 e.printStackTrace();
350 printUsage("Can't start because " + e.getMessage());
351 return false;
352 }
353 return true;
354 }
355
356
357
358
359 private static void printUsage(final String errorMsg) {
360 if (errorMsg != null && errorMsg.length() > 0) {
361 System.err.println("ERROR: " + errorMsg);
362 }
363 System.err.println("Usage: verifyrep [--starttime=X]" +
364 " [--stoptime=Y] [--families=A] <peerid> <tablename>");
365 System.err.println();
366 System.err.println("Options:");
367 System.err.println(" starttime beginning of the time range");
368 System.err.println(" without endtime means from starttime to forever");
369 System.err.println(" endtime end of the time range");
370 System.err.println(" versions number of cell versions to verify");
371 System.err.println(" families comma-separated list of families to copy");
372 System.err.println();
373 System.err.println("Args:");
374 System.err.println(" peerid Id of the peer used for verification, must match the one given for replication");
375 System.err.println(" tablename Name of the table to verify");
376 System.err.println();
377 System.err.println("Examples:");
378 System.err.println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 ");
379 System.err.println(" $ bin/hbase " +
380 "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" +
381 " --starttime=1265875194289 --endtime=1265878794289 5 TestTable ");
382 }
383
384 @Override
385 public int run(String[] args) throws Exception {
386 Configuration conf = this.getConf();
387 Job job = createSubmittableJob(conf, args);
388 if (job != null) {
389 return job.waitForCompletion(true) ? 0 : 1;
390 }
391 return 1;
392 }
393
394
395
396
397
398
399
400 public static void main(String[] args) throws Exception {
401 int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args);
402 System.exit(res);
403 }
404 }