View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import java.io.IOException;
21  import java.util.Iterator;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.conf.Configured;
27  import org.apache.hadoop.fs.FileStatus;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.CellComparator;
32  import org.apache.hadoop.hbase.CellUtil;
33  import org.apache.hadoop.hbase.HBaseConfiguration;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.client.Delete;
36  import org.apache.hadoop.hbase.client.HConnection;
37  import org.apache.hadoop.hbase.client.HConnectionManager;
38  import org.apache.hadoop.hbase.client.HTableInterface;
39  import org.apache.hadoop.hbase.client.Mutation;
40  import org.apache.hadoop.hbase.client.Put;
41  import org.apache.hadoop.hbase.client.Result;
42  import org.apache.hadoop.hbase.client.ResultScanner;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
47  import org.apache.hadoop.mapreduce.Counters;
48  import org.apache.hadoop.mapreduce.Job;
49  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50  import org.apache.hadoop.util.GenericOptionsParser;
51  import org.apache.hadoop.util.Tool;
52  import org.apache.hadoop.util.ToolRunner;
53  
54  import com.google.common.base.Throwables;
55  import com.google.common.collect.Iterators;
56  
57  public class SyncTable extends Configured implements Tool {
58  
59    private static final Log LOG = LogFactory.getLog(SyncTable.class);
60  
61    static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
62    static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
63    static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
64    static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
65    static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
66    static final String DRY_RUN_CONF_KEY="sync.table.dry.run";
67    
68    Path sourceHashDir;
69    String sourceTableName;
70    String targetTableName;
71    
72    String sourceZkCluster;
73    String targetZkCluster;
74    boolean dryRun;
75    
76    Counters counters;
77    
78    public SyncTable(Configuration conf) {
79      super(conf);
80    }
81    
82    public Job createSubmittableJob(String[] args) throws IOException {
83      FileSystem fs = sourceHashDir.getFileSystem(getConf());
84      if (!fs.exists(sourceHashDir)) {
85        throw new IOException("Source hash dir not found: " + sourceHashDir);
86      }
87      
88      HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
89      LOG.info("Read source hash manifest: " + tableHash);
90      LOG.info("Read " + tableHash.partitions.size() + " partition keys");
91      if (!tableHash.tableName.equals(sourceTableName)) {
92        LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
93            + tableHash.tableName + " but job is reading from: " + sourceTableName);
94      }
95      if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
96        throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
97            + " should be 1 more than the number of partition keys.  However, the manifest file "
98            + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
99            + " found in the partitions file is " + tableHash.partitions.size());
100     }
101     
102     Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
103     int dataSubdirCount = 0;
104     for (FileStatus file : fs.listStatus(dataDir)) {
105       if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
106         dataSubdirCount++;
107       }
108     }
109     
110     if (dataSubdirCount != tableHash.numHashFiles) {
111       throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
112           + " should be 1 more than the number of partition keys.  However, the number of data dirs"
113           + " found is " + dataSubdirCount + " but the number of partition keys"
114           + " found in the partitions file is " + tableHash.partitions.size());
115     }
116     
117     Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
118         "syncTable_" + sourceTableName + "-" + targetTableName));
119     Configuration jobConf = job.getConfiguration();
120     job.setJarByClass(HashTable.class);
121     jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
122     jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
123     jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
124     if (sourceZkCluster != null) {
125       jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
126     }
127     if (targetZkCluster != null) {
128       jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
129     }
130     jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
131     
132     TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
133         SyncMapper.class, null, null, job);
134     
135     job.setNumReduceTasks(0);
136      
137     if (dryRun) {
138       job.setOutputFormatClass(NullOutputFormat.class);
139     } else {
140       // No reducers.  Just write straight to table.  Call initTableReducerJob
141       // because it sets up the TableOutputFormat.
142       TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
143           targetZkCluster, null, null);
144       
145       // would be nice to add an option for bulk load instead
146     }
147     
148     return job;
149   }
150   
151   public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {
152     Path sourceHashDir;
153     
154     HConnection sourceConnection;
155     HConnection targetConnection;
156     HTableInterface sourceTable;
157     HTableInterface targetTable;
158     boolean dryRun;
159     
160     HashTable.TableHash sourceTableHash;
161     HashTable.TableHash.Reader sourceHashReader;
162     ImmutableBytesWritable currentSourceHash;
163     ImmutableBytesWritable nextSourceKey;
164     HashTable.ResultHasher targetHasher;
165     
166     Throwable mapperException;
167      
168     public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
169       SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
170       MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
171     
172     @Override
173     protected void setup(Context context) throws IOException {
174       
175       Configuration conf = context.getConfiguration();
176       sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
177       sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY);
178       targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY);
179       sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
180       targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
181       dryRun = conf.getBoolean(SOURCE_TABLE_CONF_KEY, false);
182       
183       sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
184       LOG.info("Read source hash manifest: " + sourceTableHash);
185       LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
186       
187       TableSplit split = (TableSplit) context.getInputSplit();
188       ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
189       
190       sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
191       findNextKeyHashPair();
192       
193       // create a hasher, but don't start it right away
194       // instead, find the first hash batch at or after the start row
195       // and skip any rows that come before.  they will be caught by the previous task
196       targetHasher = new HashTable.ResultHasher();
197     }
198   
199     private static HConnection openConnection(Configuration conf, String zkClusterConfKey)
200       throws IOException {
201         Configuration clusterConf = new Configuration(conf);
202         String zkCluster = conf.get(zkClusterConfKey);
203         if (zkCluster != null) {
204           ZKUtil.applyClusterKeyToConf(clusterConf, zkCluster);
205         }
206         return HConnectionManager.createConnection(clusterConf);
207     }
208     
209     private static HTableInterface openTable(HConnection connection, Configuration conf,
210         String tableNameConfKey) throws IOException {
211       return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
212     }
213     
214     /**
215      * Attempt to read the next source key/hash pair.
216      * If there are no more, set nextSourceKey to null
217      */
218     private void findNextKeyHashPair() throws IOException {
219       boolean hasNext = sourceHashReader.next();
220       if (hasNext) {
221         nextSourceKey = sourceHashReader.getCurrentKey();
222       } else {
223         // no more keys - last hash goes to the end
224         nextSourceKey = null;
225       }
226     }
227     
228     @Override
229     protected void map(ImmutableBytesWritable key, Result value, Context context)
230         throws IOException, InterruptedException {
231       try {
232         // first, finish any hash batches that end before the scanned row
233         while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
234           moveToNextBatch(context);
235         }
236         
237         // next, add the scanned row (as long as we've reached the first batch)
238         if (targetHasher.isBatchStarted()) {
239           targetHasher.hashResult(value);
240         }
241       } catch (Throwable t) {
242         mapperException = t;
243         Throwables.propagateIfInstanceOf(t, IOException.class);
244         Throwables.propagateIfInstanceOf(t, InterruptedException.class);
245         Throwables.propagate(t);
246       }
247     }
248 
249     /**
250      * If there is an open hash batch, complete it and sync if there are diffs.
251      * Start a new batch, and seek to read the 
252      */
253     private void moveToNextBatch(Context context) throws IOException, InterruptedException {
254       if (targetHasher.isBatchStarted()) {
255         finishBatchAndCompareHashes(context);
256       }
257       targetHasher.startBatch(nextSourceKey);
258       currentSourceHash = sourceHashReader.getCurrentHash();
259       
260       findNextKeyHashPair();
261     }
262 
263     /**
264      * Finish the currently open hash batch.
265      * Compare the target hash to the given source hash.
266      * If they do not match, then sync the covered key range.
267      */
268     private void finishBatchAndCompareHashes(Context context)
269         throws IOException, InterruptedException {
270       targetHasher.finishBatch();
271       context.getCounter(Counter.BATCHES).increment(1);
272       if (targetHasher.getBatchSize() == 0) {
273         context.getCounter(Counter.EMPTY_BATCHES).increment(1);
274       }
275       ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
276       if (targetHash.equals(currentSourceHash)) {
277         context.getCounter(Counter.HASHES_MATCHED).increment(1);
278       } else {
279         context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
280         
281         ImmutableBytesWritable stopRow = nextSourceKey == null
282                                           ? new ImmutableBytesWritable(sourceTableHash.stopRow)
283                                           : nextSourceKey;
284         
285         if (LOG.isDebugEnabled()) {
286           LOG.debug("Hash mismatch.  Key range: " + toHex(targetHasher.getBatchStartKey())
287               + " to " + toHex(stopRow)
288               + " sourceHash: " + toHex(currentSourceHash)
289               + " targetHash: " + toHex(targetHash));
290         }
291         
292         syncRange(context, targetHasher.getBatchStartKey(), stopRow);
293       }
294     }
295     private static String toHex(ImmutableBytesWritable bytes) {
296       return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
297     }
298     
299     private static final CellScanner EMPTY_CELL_SCANNER
300       = new CellScanner(Iterators.<Result>emptyIterator());
301     
302     /**
303      * Rescan the given range directly from the source and target tables.
304      * Count and log differences, and if this is not a dry run, output Puts and Deletes
305      * to make the target table match the source table for this range
306      */
307     private void syncRange(Context context, ImmutableBytesWritable startRow,
308         ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
309       
310       Scan scan = sourceTableHash.initScan();
311       scan.setStartRow(startRow.copyBytes());
312       scan.setStopRow(stopRow.copyBytes());
313       
314       ResultScanner sourceScanner = sourceTable.getScanner(scan);
315       CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
316 
317       ResultScanner targetScanner = targetTable.getScanner(scan);
318       CellScanner targetCells = new CellScanner(targetScanner.iterator());
319       
320       boolean rangeMatched = true;
321       byte[] nextSourceRow = sourceCells.nextRow();
322       byte[] nextTargetRow = targetCells.nextRow();
323       while(nextSourceRow != null || nextTargetRow != null) {
324         boolean rowMatched;
325         int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
326         if (rowComparison < 0) {
327           if (LOG.isInfoEnabled()) {
328             LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
329           }
330           context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
331           
332           rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
333           nextSourceRow = sourceCells.nextRow();  // advance only source to next row
334         } else if (rowComparison > 0) {
335           if (LOG.isInfoEnabled()) {
336             LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
337           }
338           context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
339           
340           rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
341           nextTargetRow = targetCells.nextRow();  // advance only target to next row
342         } else {
343           // current row is the same on both sides, compare cell by cell
344           rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
345           nextSourceRow = sourceCells.nextRow();  
346           nextTargetRow = targetCells.nextRow();
347         }
348         
349         if (!rowMatched) {
350           rangeMatched = false;
351         }
352       }
353       
354       sourceScanner.close();
355       targetScanner.close();
356       
357       context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
358         .increment(1);
359     }
360     
361     private static class CellScanner {
362       private final Iterator<Result> results;
363       
364       private byte[] currentRow;
365       private Result currentRowResult;
366       private int nextCellInRow;
367       
368       private Result nextRowResult;
369       
370       public CellScanner(Iterator<Result> results) {
371         this.results = results;
372       }
373       
374       /**
375        * Advance to the next row and return its row key.
376        * Returns null iff there are no more rows.
377        */
378       public byte[] nextRow() {
379         if (nextRowResult == null) {
380           // no cached row - check scanner for more
381           while (results.hasNext()) {
382             nextRowResult = results.next();
383             Cell nextCell = nextRowResult.rawCells()[0];
384             if (currentRow == null
385                 || !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
386                 nextCell.getRowOffset(), nextCell.getRowLength())) {
387               // found next row
388               break;
389             } else {
390               // found another result from current row, keep scanning
391               nextRowResult = null;
392             }
393           }
394           
395           if (nextRowResult == null) {
396             // end of data, no more rows
397             currentRowResult = null;
398             currentRow = null;
399             return null;
400           }
401         }
402         
403         // advance to cached result for next row
404         currentRowResult = nextRowResult;
405         nextCellInRow = 0;
406         currentRow = currentRowResult.getRow();
407         nextRowResult = null;
408         return currentRow;
409       }
410       
411       /**
412        * Returns the next Cell in the current row or null iff none remain.
413        */
414       public Cell nextCellInRow() {
415         if (currentRowResult == null) {
416           // nothing left in current row
417           return null;
418         }
419         
420         Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
421         nextCellInRow++;
422         if (nextCellInRow == currentRowResult.size()) {
423           if (results.hasNext()) {
424             Result result = results.next();
425             Cell cell = result.rawCells()[0];
426             if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
427                 cell.getRowOffset(), cell.getRowLength())) {
428               // result is part of current row
429               currentRowResult = result;
430               nextCellInRow = 0;
431             } else {
432               // result is part of next row, cache it
433               nextRowResult = result;
434               // current row is complete
435               currentRowResult = null;
436             }
437           } else {
438             // end of data
439             currentRowResult = null;
440           }
441         }
442         return nextCell;
443       }
444     }
445        
446     /**
447      * Compare the cells for the given row from the source and target tables.
448      * Count and log any differences.
449      * If not a dry run, output a Put and/or Delete needed to sync the target table
450      * to match the source table.
451      */
452     private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
453         CellScanner targetCells) throws IOException, InterruptedException {
454       Put put = null;
455       Delete delete = null;
456       long matchingCells = 0;
457       boolean matchingRow = true;
458       Cell sourceCell = sourceCells.nextCellInRow();
459       Cell targetCell = targetCells.nextCellInRow();
460       while (sourceCell != null || targetCell != null) {
461 
462         int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
463         if (cellKeyComparison < 0) {
464           if (LOG.isDebugEnabled()) {
465             LOG.debug("Target missing cell: " + sourceCell);
466           }
467           context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
468           matchingRow = false;
469           
470           if (!dryRun) {
471             if (put == null) {
472               put = new Put(rowKey);
473             }
474             put.add(sourceCell);
475           }
476           
477           sourceCell = sourceCells.nextCellInRow();
478         } else if (cellKeyComparison > 0) {
479           if (LOG.isDebugEnabled()) {
480             LOG.debug("Source missing cell: " + targetCell);
481           }
482           context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
483           matchingRow = false;
484           
485           if (!dryRun) {
486             if (delete == null) {
487               delete = new Delete(rowKey);
488             }
489             // add a tombstone to exactly match the target cell that is missing on the source
490             delete.deleteColumn(CellUtil.cloneFamily(targetCell),
491                 CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
492           }
493           
494           targetCell = targetCells.nextCellInRow();
495         } else {
496           // the cell keys are equal, now check values
497           if (CellUtil.matchingValue(sourceCell, targetCell)) {
498             matchingCells++;
499           } else {
500             if (LOG.isDebugEnabled()) {
501               LOG.debug("Different values: ");
502               LOG.debug("  source cell: " + sourceCell
503                   + " value: " + Bytes.toHex(sourceCell.getValueArray(),
504                       sourceCell.getValueOffset(), sourceCell.getValueLength()));
505               LOG.debug("  target cell: " + targetCell
506                   + " value: " + Bytes.toHex(targetCell.getValueArray(),
507                       targetCell.getValueOffset(), targetCell.getValueLength()));
508             }
509             context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
510             matchingRow = false;
511             
512             if (!dryRun) {
513               // overwrite target cell
514               if (put == null) {
515                 put = new Put(rowKey);
516               }
517               put.add(sourceCell);
518             }
519           }
520           sourceCell = sourceCells.nextCellInRow();
521           targetCell = targetCells.nextCellInRow();
522         }
523         
524         if (!dryRun && sourceTableHash.scanBatch > 0) {
525           if (put != null && put.size() >= sourceTableHash.scanBatch) {
526             context.write(new ImmutableBytesWritable(rowKey), put);
527             put = null;
528           }
529           if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
530             context.write(new ImmutableBytesWritable(rowKey), delete);
531             delete = null;
532           }
533         }
534       }
535       
536       if (!dryRun) {
537         if (put != null) {
538           context.write(new ImmutableBytesWritable(rowKey), put);
539         }
540         if (delete != null) {
541           context.write(new ImmutableBytesWritable(rowKey), delete);
542         }
543       }
544       
545       if (matchingCells > 0) {
546         context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
547       }
548       if (matchingRow) {
549         context.getCounter(Counter.MATCHINGROWS).increment(1);
550         return true;
551       } else {
552         context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
553         return false;
554       }
555     }
556 
557     /**
558      * Compare row keys of the given Result objects.
559      * Nulls are after non-nulls
560      */
561     private static int compareRowKeys(byte[] r1, byte[] r2) {
562       if (r1 == null) {
563         return 1;  // source missing row
564       } else if (r2 == null) {
565         return -1; // target missing row
566       } else {
567         return CellComparator.compareRows(r1, 0, r1.length, r2, 0, r2.length);
568       }
569     }
570 
571     /**
572      * Compare families, qualifiers, and timestamps of the given Cells.
573      * They are assumed to be of the same row.
574      * Nulls are after non-nulls.
575      */
576      private static int compareCellKeysWithinRow(Cell c1, Cell c2) {
577       if (c1 == null) {
578         return 1; // source missing cell
579       }
580       if (c2 == null) {
581         return -1; // target missing cell
582       }
583       
584       int result = CellComparator.compareFamilies(c1, c2);
585       if (result != 0) {
586         return result;
587       }
588       
589       result = CellComparator.compareQualifiers(c1, c2);
590       if (result != 0) {
591         return result;
592       }
593       
594       // note timestamp comparison is inverted - more recent cells first
595       return CellComparator.compareTimestamps(c1, c2);
596     }
597      
598     @Override
599     protected void cleanup(Context context)
600         throws IOException, InterruptedException {
601       if (mapperException == null) {
602         try {
603           finishRemainingHashRanges(context);
604         } catch (Throwable t) {
605           mapperException = t;
606         }
607       }
608       
609       try {
610         sourceTable.close();
611         targetTable.close();
612         sourceConnection.close();
613         targetConnection.close();
614       } catch (Throwable t) {
615         if (mapperException == null) {
616           mapperException = t;
617         } else {
618           LOG.error("Suppressing exception from closing tables", t);
619         }
620       }
621       
622       // propagate first exception
623       if (mapperException != null) {
624         Throwables.propagateIfInstanceOf(mapperException, IOException.class);
625         Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
626         Throwables.propagate(mapperException);
627       }
628     }
629 
630     private void finishRemainingHashRanges(Context context) throws IOException,
631         InterruptedException {
632       TableSplit split = (TableSplit) context.getInputSplit();
633       byte[] splitEndRow = split.getEndRow();
634       boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
635 
636       // if there are more hash batches that begin before the end of this split move to them
637       while (nextSourceKey != null
638           && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
639         moveToNextBatch(context);
640       }
641       
642       if (targetHasher.isBatchStarted()) {
643         // need to complete the final open hash batch
644 
645         if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
646               || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
647           // the open hash range continues past the end of this region
648           // add a scan to complete the current hash range
649           Scan scan = sourceTableHash.initScan();
650           scan.setStartRow(splitEndRow);
651           if (nextSourceKey == null) {
652             scan.setStopRow(sourceTableHash.stopRow);
653           } else {
654             scan.setStopRow(nextSourceKey.copyBytes());
655           }
656           
657           ResultScanner targetScanner = targetTable.getScanner(scan);
658           for (Result row : targetScanner) {
659             targetHasher.hashResult(row);          
660           }
661         } // else current batch ends exactly at split end row
662 
663         finishBatchAndCompareHashes(context);
664       }
665     }
666   }
667   
668   private static final int NUM_ARGS = 3;
669   private static void printUsage(final String errorMsg) {
670     if (errorMsg != null && errorMsg.length() > 0) {
671       System.err.println("ERROR: " + errorMsg);
672       System.err.println();
673     }
674     System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");
675     System.err.println();
676     System.err.println("Options:");
677     
678     System.err.println(" sourcezkcluster  ZK cluster key of the source table");
679     System.err.println("                  (defaults to cluster in classpath's config)");
680     System.err.println(" targetzkcluster  ZK cluster key of the target table");
681     System.err.println("                  (defaults to cluster in classpath's config)");
682     System.err.println(" dryrun           if true, output counters but no writes");
683     System.err.println("                  (defaults to false)");
684     System.err.println();
685     System.err.println("Args:");
686     System.err.println(" sourcehashdir    path to HashTable output dir for source table");
687     System.err.println("                  if not specified, then all data will be scanned");
688     System.err.println(" sourcetable      Name of the source table to sync from");
689     System.err.println(" targettable      Name of the target table to sync to");
690     System.err.println();
691     System.err.println("Examples:");
692     System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
693     System.err.println(" to a local target cluster:");
694     System.err.println(" $ bin/hbase " +
695         "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
696         + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
697         + " hdfs://nn:9000/hashes/tableA tableA tableA");
698   }
699   
700   private boolean doCommandLine(final String[] args) {
701     if (args.length < NUM_ARGS) {
702       printUsage(null);
703       return false;
704     }
705     try {
706       sourceHashDir = new Path(args[args.length - 3]);
707       sourceTableName = args[args.length - 2];
708       targetTableName = args[args.length - 1];
709             
710       for (int i = 0; i < args.length - NUM_ARGS; i++) {
711         String cmd = args[i];
712         if (cmd.equals("-h") || cmd.startsWith("--h")) {
713           printUsage(null);
714           return false;
715         }
716         
717         final String sourceZkClusterKey = "--sourcezkcluster=";
718         if (cmd.startsWith(sourceZkClusterKey)) {
719           sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
720           continue;
721         }
722         
723         final String targetZkClusterKey = "--targetzkcluster=";
724         if (cmd.startsWith(targetZkClusterKey)) {
725           targetZkCluster = cmd.substring(targetZkClusterKey.length());
726           continue;
727         }
728         
729         final String dryRunKey = "--dryrun=";
730         if (cmd.startsWith(dryRunKey)) {
731           dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
732           continue;
733         }
734         
735         printUsage("Invalid argument '" + cmd + "'");
736         return false;
737       }
738 
739       
740     } catch (Exception e) {
741       e.printStackTrace();
742       printUsage("Can't start because " + e.getMessage());
743       return false;
744     }
745     return true;
746   }
747   
748   /**
749    * Main entry point.
750    */
751   public static void main(String[] args) throws Exception {
752     int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
753     System.exit(ret);
754   }
755 
756   @Override
757   public int run(String[] args) throws Exception {
758     String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
759     if (!doCommandLine(otherArgs)) {
760       return 1;
761     }
762 
763     Job job = createSubmittableJob(otherArgs);
764     if (!job.waitForCompletion(true)) {
765       LOG.info("Map-reduce job failed!");
766       return 1;
767     }
768     counters = job.getCounters();
769     return 0;
770   }
771 
772 }