View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.replication.regionserver;
20  
21  import java.io.EOFException;
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Comparator;
26  import java.util.List;
27  import java.util.UUID;
28  import java.util.concurrent.PriorityBlockingQueue;
29  import java.util.concurrent.TimeUnit;
30  
31  import org.apache.commons.lang.StringUtils;
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileStatus;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.Stoppable;
42  import org.apache.hadoop.hbase.regionserver.wal.HLog;
43  import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
44  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
45  import org.apache.hadoop.hbase.replication.ChainWALEntryFilter;
46  import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
47  import org.apache.hadoop.hbase.replication.ReplicationException;
48  import org.apache.hadoop.hbase.replication.ReplicationPeers;
49  import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
50  import org.apache.hadoop.hbase.replication.ReplicationQueues;
51  import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
52  import org.apache.hadoop.hbase.replication.WALEntryFilter;
53  import org.apache.hadoop.hbase.util.Threads;
54  import com.google.common.collect.Lists;
55  import com.google.common.util.concurrent.ListenableFuture;
56  import com.google.common.util.concurrent.Service;
57  
58  /**
59   * Class that handles the source of a replication stream.
60   * Currently does not handle more than 1 slave
61   * For each slave cluster it selects a random number of peers
62   * using a replication ratio. For example, if replication ration = 0.1
63   * and slave cluster has 100 region servers, 10 will be selected.
64   * <p/>
65   * A stream is considered down when we cannot contact a region server on the
66   * peer cluster for more than 55 seconds by default.
67   * <p/>
68   *
69   */
70  @InterfaceAudience.Private
71  public class ReplicationSource extends Thread
72      implements ReplicationSourceInterface {
73  
74    public static final Log LOG = LogFactory.getLog(ReplicationSource.class);
75    // Queue of logs to process
76    private PriorityBlockingQueue<Path> queue;
77    private ReplicationQueues replicationQueues;
78    private ReplicationPeers replicationPeers;
79  
80    private Configuration conf;
81    private ReplicationQueueInfo replicationQueueInfo;
82    // id of the peer cluster this source replicates to
83    private String peerId;
84    // The manager of all sources to which we ping back our progress
85    private ReplicationSourceManager manager;
86    // Should we stop everything?
87    private Stoppable stopper;
88    // How long should we sleep for each retry
89    private long sleepForRetries;
90    // Max size in bytes of entriesArray
91    private long replicationQueueSizeCapacity;
92    // Max number of entries in entriesArray
93    private int replicationQueueNbCapacity;
94    // Our reader for the current log
95    private HLog.Reader reader;
96    // Last position in the log that we sent to ZooKeeper
97    private long lastLoggedPosition = -1;
98    // Path of the current log
99    private volatile Path currentPath;
100   private FileSystem fs;
101   // id of this cluster
102   private UUID clusterId;
103   // id of the other cluster
104   private UUID peerClusterId;
105   // total number of edits we replicated
106   private long totalReplicatedEdits = 0;
107   // total number of edits we replicated
108   private long totalReplicatedOperations = 0;
109   // The znode we currently play with
110   private String peerClusterZnode;
111   // Maximum number of retries before taking bold actions
112   private int maxRetriesMultiplier;
113   // Current number of operations (Put/Delete) that we need to replicate
114   private int currentNbOperations = 0;
115   // Current size of data we need to replicate
116   private int currentSize = 0;
117   // Indicates if this particular source is running
118   private volatile boolean running = true;
119   // Metrics for this source
120   private MetricsSource metrics;
121   // Handle on the log reader helper
122   private ReplicationHLogReaderManager repLogReader;
123   //WARN threshold for the number of queued logs, defaults to 2
124   private int logQueueWarnThreshold;
125   // ReplicationEndpoint which will handle the actual replication
126   private ReplicationEndpoint replicationEndpoint;
127   // A filter (or a chain of filters) for the WAL entries.
128   private WALEntryFilter walEntryFilter;
129   // throttler
130   private ReplicationThrottler throttler;
131 
132   /**
133    * Instantiation method used by region servers
134    *
135    * @param conf configuration to use
136    * @param fs file system to use
137    * @param manager replication manager to ping to
138    * @param stopper     the atomic boolean to use to stop the regionserver
139    * @param peerClusterZnode the name of our znode
140    * @param clusterId unique UUID for the cluster
141    * @param replicationEndpoint the replication endpoint implementation
142    * @param metrics metrics for replication source
143    * @throws IOException
144    */
145   @Override
146   public void init(final Configuration conf, final FileSystem fs,
147       final ReplicationSourceManager manager, final ReplicationQueues replicationQueues,
148       final ReplicationPeers replicationPeers, final Stoppable stopper,
149       final String peerClusterZnode, final UUID clusterId, ReplicationEndpoint replicationEndpoint,
150       final MetricsSource metrics)
151           throws IOException {
152     this.stopper = stopper;
153     this.conf = conf;
154     decorateConf();
155     this.replicationQueueSizeCapacity =
156         this.conf.getLong("replication.source.size.capacity", 1024*1024*64);
157     this.replicationQueueNbCapacity =
158         this.conf.getInt("replication.source.nb.capacity", 25000);
159     this.maxRetriesMultiplier = this.conf.getInt("replication.source.maxretriesmultiplier", 10);
160     this.queue =
161         new PriorityBlockingQueue<Path>(
162             this.conf.getInt("hbase.regionserver.maxlogs", 32),
163             new LogsComparator());
164     long bandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0);
165     this.throttler = new ReplicationThrottler((double)bandwidth/10.0);
166     this.replicationQueues = replicationQueues;
167     this.replicationPeers = replicationPeers;
168     this.manager = manager;
169     this.sleepForRetries =
170         this.conf.getLong("replication.source.sleepforretries", 1000);
171     this.fs = fs;
172     this.metrics = metrics;
173     this.repLogReader = new ReplicationHLogReaderManager(this.fs, this.conf);
174     this.clusterId = clusterId;
175 
176     this.peerClusterZnode = peerClusterZnode;
177     this.replicationQueueInfo = new ReplicationQueueInfo(peerClusterZnode);
178     // ReplicationQueueInfo parses the peerId out of the znode for us
179     this.peerId = this.replicationQueueInfo.getPeerId();
180     this.logQueueWarnThreshold = this.conf.getInt("replication.source.log.queue.warn", 2);
181     this.replicationEndpoint = replicationEndpoint;
182   }
183 
184   private void decorateConf() {
185     String replicationCodec = this.conf.get(HConstants.REPLICATION_CODEC_CONF_KEY);
186     if (StringUtils.isNotEmpty(replicationCodec)) {
187       this.conf.set(HConstants.RPC_CODEC_CONF_KEY, replicationCodec);
188     }
189   }
190 
191   @Override
192   public void enqueueLog(Path log) {
193     this.queue.put(log);
194     int queueSize = queue.size();
195     this.metrics.setSizeOfLogQueue(queueSize);
196     // This will log a warning for each new log that gets created above the warn threshold
197     if (queueSize > this.logQueueWarnThreshold) {
198       LOG.warn("Queue size: " + queueSize +
199         " exceeds value of replication.source.log.queue.warn: " + logQueueWarnThreshold);
200     }
201   }
202 
203   private void uninitialize() {
204     LOG.debug("Source exiting " + this.peerId);
205     metrics.clear();
206     if (replicationEndpoint.state() == Service.State.STARTING
207         || replicationEndpoint.state() == Service.State.RUNNING) {
208       replicationEndpoint.stopAndWait();
209     }
210   }
211 
212   @Override
213   public void run() {
214     // We were stopped while looping to connect to sinks, just abort
215     if (!this.isActive()) {
216       uninitialize();
217       return;
218     }
219 
220     try {
221       // start the endpoint, connect to the cluster
222       Service.State state = replicationEndpoint.start().get();
223       if (state != Service.State.RUNNING) {
224         LOG.warn("ReplicationEndpoint was not started. Exiting");
225         uninitialize();
226         return;
227       }
228     } catch (Exception ex) {
229       LOG.warn("Error starting ReplicationEndpoint, exiting", ex);
230       throw new RuntimeException(ex);
231     }
232 
233     // get the WALEntryFilter from ReplicationEndpoint and add it to default filters
234     ArrayList<WALEntryFilter> filters = Lists.newArrayList(
235       (WALEntryFilter)new SystemTableWALEntryFilter());
236     WALEntryFilter filterFromEndpoint = this.replicationEndpoint.getWALEntryfilter();
237     if (filterFromEndpoint != null) {
238       filters.add(filterFromEndpoint);
239     }
240     this.walEntryFilter = new ChainWALEntryFilter(filters);
241 
242     int sleepMultiplier = 1;
243     // delay this until we are in an asynchronous thread
244     while (this.isActive() && this.peerClusterId == null) {
245       this.peerClusterId = replicationEndpoint.getPeerUUID();
246       if (this.isActive() && this.peerClusterId == null) {
247         if (sleepForRetries("Cannot contact the peer's zk ensemble", sleepMultiplier)) {
248           sleepMultiplier++;
249         }
250       }
251     }
252     // We were stopped while looping to contact peer's zk ensemble, just abort
253     if (!this.isActive()) {
254       uninitialize();
255       return;
256     }
257 
258     // resetting to 1 to reuse later
259     sleepMultiplier = 1;
260 
261     // In rare case, zookeeper setting may be messed up. That leads to the incorrect
262     // peerClusterId value, which is the same as the source clusterId
263     if (clusterId.equals(peerClusterId) && !replicationEndpoint.canReplicateToSameCluster()) {
264       this.terminate("ClusterId " + clusterId + " is replicating to itself: peerClusterId "
265           + peerClusterId + " which is not allowed by ReplicationEndpoint:"
266           + replicationEndpoint.getClass().getName(), null, false);
267     }
268     LOG.info("Replicating "+clusterId + " -> " + peerClusterId);
269 
270     // If this is recovered, the queue is already full and the first log
271     // normally has a position (unless the RS failed between 2 logs)
272     if (this.replicationQueueInfo.isQueueRecovered()) {
273       try {
274         this.repLogReader.setPosition(this.replicationQueues.getLogPosition(this.peerClusterZnode,
275           this.queue.peek().getName()));
276         if (LOG.isTraceEnabled()) {
277           LOG.trace("Recovered queue started with log " + this.queue.peek() +
278               " at position " + this.repLogReader.getPosition());
279         }
280       } catch (ReplicationException e) {
281         this.terminate("Couldn't get the position of this recovered queue " +
282             this.peerClusterZnode, e);
283       }
284     }
285     // Loop until we close down
286     while (isActive()) {
287       // Sleep until replication is enabled again
288       if (!isPeerEnabled()) {
289         if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
290           sleepMultiplier++;
291         }
292         continue;
293       }
294       Path oldPath = getCurrentPath(); //note that in the current scenario,
295                                        //oldPath will be null when a log roll
296                                        //happens.
297       // Get a new path
298       boolean hasCurrentPath = getNextPath();
299       if (getCurrentPath() != null && oldPath == null) {
300         sleepMultiplier = 1; //reset the sleepMultiplier on a path change
301       }
302       if (!hasCurrentPath) {
303         if (sleepForRetries("No log to process", sleepMultiplier)) {
304           sleepMultiplier++;
305         }
306         continue;
307       }
308       boolean currentWALisBeingWrittenTo = false;
309       //For WAL files we own (rather than recovered), take a snapshot of whether the
310       //current WAL file (this.currentPath) is in use (for writing) NOW!
311       //Since the new WAL paths are enqueued only after the prev WAL file
312       //is 'closed', presence of an element in the queue means that
313       //the previous WAL file was closed, else the file is in use (currentPath)
314       //We take the snapshot now so that we are protected against races
315       //where a new file gets enqueued while the current file is being processed
316       //(and where we just finished reading the current file).
317       if (!this.replicationQueueInfo.isQueueRecovered() && queue.size() == 0) {
318         currentWALisBeingWrittenTo = true;
319       }
320       // Open a reader on it
321       if (!openReader(sleepMultiplier)) {
322         // Reset the sleep multiplier, else it'd be reused for the next file
323         sleepMultiplier = 1;
324         continue;
325       }
326 
327       // If we got a null reader but didn't continue, then sleep and continue
328       if (this.reader == null) {
329         if (sleepForRetries("Unable to open a reader", sleepMultiplier)) {
330           sleepMultiplier++;
331         }
332         continue;
333       }
334 
335       boolean gotIOE = false;
336       currentNbOperations = 0;
337       List<HLog.Entry> entries = new ArrayList<HLog.Entry>(1);
338       currentSize = 0;
339       try {
340         if (readAllEntriesToReplicateOrNextFile(currentWALisBeingWrittenTo, entries)) {
341           continue;
342         }
343       } catch (IOException ioe) {
344         LOG.warn(this.peerClusterZnode + " Got: ", ioe);
345         gotIOE = true;
346         if (ioe.getCause() instanceof EOFException) {
347 
348           boolean considerDumping = false;
349           if (this.replicationQueueInfo.isQueueRecovered()) {
350             try {
351               FileStatus stat = this.fs.getFileStatus(this.currentPath);
352               if (stat.getLen() == 0) {
353                 LOG.warn(this.peerClusterZnode + " Got EOF and the file was empty");
354               }
355               considerDumping = true;
356             } catch (IOException e) {
357               LOG.warn(this.peerClusterZnode + " Got while getting file size: ", e);
358             }
359           }
360 
361           if (considerDumping &&
362               sleepMultiplier == this.maxRetriesMultiplier &&
363               processEndOfFile()) {
364             continue;
365           }
366         }
367       } finally {
368         try {
369           this.reader = null;
370           this.repLogReader.closeReader();
371         } catch (IOException e) {
372           gotIOE = true;
373           LOG.warn("Unable to finalize the tailing of a file", e);
374         }
375       }
376 
377       // If we didn't get anything to replicate, or if we hit a IOE,
378       // wait a bit and retry.
379       // But if we need to stop, don't bother sleeping
380       if (this.isActive() && (gotIOE || entries.isEmpty())) {
381         if (this.lastLoggedPosition != this.repLogReader.getPosition()) {
382           this.manager.logPositionAndCleanOldLogs(this.currentPath,
383               this.peerClusterZnode, this.repLogReader.getPosition(),
384               this.replicationQueueInfo.isQueueRecovered(), currentWALisBeingWrittenTo);
385           this.lastLoggedPosition = this.repLogReader.getPosition();
386         }
387         // Reset the sleep multiplier if nothing has actually gone wrong
388         if (!gotIOE) {
389           sleepMultiplier = 1;
390           // if there was nothing to ship and it's not an error
391           // set "ageOfLastShippedOp" to <now> to indicate that we're current
392           this.metrics.setAgeOfLastShippedOp(System.currentTimeMillis());
393         }
394         if (sleepForRetries("Nothing to replicate", sleepMultiplier)) {
395           sleepMultiplier++;
396         }
397         continue;
398       }
399       sleepMultiplier = 1;
400       shipEdits(currentWALisBeingWrittenTo, entries);
401     }
402     uninitialize();
403   }
404 
405   /**
406    * Read all the entries from the current log files and retain those
407    * that need to be replicated. Else, process the end of the current file.
408    * @param currentWALisBeingWrittenTo is the current WAL being written to
409    * @param entries resulting entries to be replicated
410    * @return true if we got nothing and went to the next file, false if we got
411    * entries
412    * @throws IOException
413    */
414   protected boolean readAllEntriesToReplicateOrNextFile(boolean currentWALisBeingWrittenTo,
415       List<HLog.Entry> entries) throws IOException{
416     long seenEntries = 0;
417     if (LOG.isTraceEnabled()) {
418       LOG.trace("Seeking in " + this.currentPath + " at position "
419           + this.repLogReader.getPosition());
420     }
421     this.repLogReader.seek();
422     long positionBeforeRead = this.repLogReader.getPosition();
423     HLog.Entry entry =
424         this.repLogReader.readNextAndSetPosition();
425     while (entry != null) {
426       this.metrics.incrLogEditsRead();
427       seenEntries++;
428 
429       // don't replicate if the log entries have already been consumed by the cluster
430       if (replicationEndpoint.canReplicateToSameCluster()
431           || !entry.getKey().getClusterIds().contains(peerClusterId)) {
432         // Remove all KVs that should not be replicated
433         entry = walEntryFilter.filter(entry);
434         WALEdit edit = null;
435         HLogKey logKey = null;
436         if (entry != null) {
437           edit = entry.getEdit();
438           logKey = entry.getKey();
439         }
440 
441         if (edit != null && edit.size() != 0) {
442           //Mark that the current cluster has the change
443           logKey.addClusterId(clusterId);
444           currentNbOperations += countDistinctRowKeys(edit);
445           entries.add(entry);
446           currentSize += entry.getEdit().heapSize();
447         } else {
448           this.metrics.incrLogEditsFiltered();
449         }
450       }
451       // Stop if too many entries or too big
452       if (currentSize >= this.replicationQueueSizeCapacity ||
453           entries.size() >= this.replicationQueueNbCapacity) {
454         break;
455       }
456       try {
457         entry = this.repLogReader.readNextAndSetPosition();
458       } catch (IOException ie) {
459         LOG.debug("Break on IOE: " + ie.getMessage());
460         break;
461       }
462     }
463     metrics.incrLogReadInBytes(this.repLogReader.getPosition() - positionBeforeRead);
464     if (currentWALisBeingWrittenTo) {
465       return false;
466     }
467     // If we didn't get anything and the queue has an object, it means we
468     // hit the end of the file for sure
469     return seenEntries == 0 && processEndOfFile();
470   }
471 
472   /**
473    * Poll for the next path
474    * @return true if a path was obtained, false if not
475    */
476   protected boolean getNextPath() {
477     try {
478       if (this.currentPath == null) {
479         this.currentPath = queue.poll(this.sleepForRetries, TimeUnit.MILLISECONDS);
480         this.metrics.setSizeOfLogQueue(queue.size());
481         if (this.currentPath != null) {
482           this.manager.cleanOldLogs(this.currentPath.getName(),
483               this.peerId,
484               this.replicationQueueInfo.isQueueRecovered());
485           if (LOG.isTraceEnabled()) {
486             LOG.trace("New log: " + this.currentPath);
487           }
488         }
489       }
490     } catch (InterruptedException e) {
491       LOG.warn("Interrupted while reading edits", e);
492     }
493     return this.currentPath != null;
494   }
495 
496   /**
497    * Open a reader on the current path
498    *
499    * @param sleepMultiplier by how many times the default sleeping time is augmented
500    * @return true if we should continue with that file, false if we are over with it
501    */
502   protected boolean openReader(int sleepMultiplier) {
503     try {
504       try {
505         if (LOG.isTraceEnabled()) {
506           LOG.trace("Opening log " + this.currentPath);
507         }
508         this.reader = repLogReader.openReader(this.currentPath);
509       } catch (FileNotFoundException fnfe) {
510         if (this.replicationQueueInfo.isQueueRecovered()) {
511           // We didn't find the log in the archive directory, look if it still
512           // exists in the dead RS folder (there could be a chain of failures
513           // to look at)
514           List<String> deadRegionServers = this.replicationQueueInfo.getDeadRegionServers();
515           LOG.info("NB dead servers : " + deadRegionServers.size());
516           for (String curDeadServerName : deadRegionServers) {
517             Path deadRsDirectory =
518                 new Path(manager.getLogDir().getParent(), curDeadServerName);
519             Path[] locs = new Path[] {
520                 new Path(deadRsDirectory, currentPath.getName()),
521                 new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT),
522                                           currentPath.getName()),
523             };
524             for (Path possibleLogLocation : locs) {
525               LOG.info("Possible location " + possibleLogLocation.toUri().toString());
526               if (this.manager.getFs().exists(possibleLogLocation)) {
527                 // We found the right new location
528                 LOG.info("Log " + this.currentPath + " still exists at " +
529                     possibleLogLocation);
530                 // Breaking here will make us sleep since reader is null
531                 return true;
532               }
533             }
534           }
535           // In the case of disaster/recovery, HMaster may be shutdown/crashed before flush data
536           // from .logs to .oldlogs. Loop into .logs folders and check whether a match exists
537           if (stopper instanceof ReplicationSyncUp.DummyServer) {
538             FileStatus[] rss = fs.listStatus(manager.getLogDir());
539             for (FileStatus rs : rss) {
540               Path p = rs.getPath();
541               FileStatus[] logs = fs.listStatus(p);
542               for (FileStatus log : logs) {
543                 p = new Path(p, log.getPath().getName());
544                 if (p.getName().equals(currentPath.getName())) {
545                   currentPath = p;
546                   LOG.info("Log " + this.currentPath + " exists under " + manager.getLogDir());
547                   // Open the log at the new location
548                   this.openReader(sleepMultiplier);
549                   return true;
550                 }
551               }
552             }
553           }
554 
555           // TODO What happens if the log was missing from every single location?
556           // Although we need to check a couple of times as the log could have
557           // been moved by the master between the checks
558           // It can also happen if a recovered queue wasn't properly cleaned,
559           // such that the znode pointing to a log exists but the log was
560           // deleted a long time ago.
561           // For the moment, we'll throw the IO and processEndOfFile
562           throw new IOException("File from recovered queue is " +
563               "nowhere to be found", fnfe);
564         } else {
565           // If the log was archived, continue reading from there
566           Path archivedLogLocation =
567               new Path(manager.getOldLogDir(), currentPath.getName());
568           if (this.manager.getFs().exists(archivedLogLocation)) {
569             currentPath = archivedLogLocation;
570             LOG.info("Log " + this.currentPath + " was moved to " +
571                 archivedLogLocation);
572             // Open the log at the new location
573             this.openReader(sleepMultiplier);
574 
575           }
576           // TODO What happens the log is missing in both places?
577         }
578       }
579     } catch (IOException ioe) {
580       if (ioe instanceof EOFException && isCurrentLogEmpty()) return true;
581       LOG.warn(this.peerClusterZnode + " Got: ", ioe);
582       this.reader = null;
583       if (ioe.getCause() instanceof NullPointerException) {
584         // Workaround for race condition in HDFS-4380
585         // which throws a NPE if we open a file before any data node has the most recent block
586         // Just sleep and retry. Will require re-reading compressed HLogs for compressionContext.
587         LOG.warn("Got NPE opening reader, will retry.");
588       } else if (sleepMultiplier == this.maxRetriesMultiplier) {
589         // TODO Need a better way to determine if a file is really gone but
590         // TODO without scanning all logs dir
591         LOG.warn("Waited too long for this file, considering dumping");
592         return !processEndOfFile();
593       }
594     }
595     return true;
596   }
597 
598   /*
599    * Checks whether the current log file is empty, and it is not a recovered queue. This is to
600    * handle scenario when in an idle cluster, there is no entry in the current log and we keep on
601    * trying to read the log file and get EOFException. In case of a recovered queue the last log
602    * file may be empty, and we don't want to retry that.
603    */
604   private boolean isCurrentLogEmpty() {
605     return (this.repLogReader.getPosition() == 0 &&
606         !this.replicationQueueInfo.isQueueRecovered() && queue.size() == 0);
607   }
608 
609   /**
610    * Do the sleeping logic
611    * @param msg Why we sleep
612    * @param sleepMultiplier by how many times the default sleeping time is augmented
613    * @return True if <code>sleepMultiplier</code> is &lt; <code>maxRetriesMultiplier</code>
614    */
615   protected boolean sleepForRetries(String msg, int sleepMultiplier) {
616     try {
617       if (LOG.isTraceEnabled()) {
618         LOG.trace(msg + ", sleeping " + sleepForRetries + " times " + sleepMultiplier);
619       }
620       Thread.sleep(this.sleepForRetries * sleepMultiplier);
621     } catch (InterruptedException e) {
622       LOG.debug("Interrupted while sleeping between retries");
623       Thread.currentThread().interrupt();
624     }
625     return sleepMultiplier < maxRetriesMultiplier;
626   }
627 
628   /**
629    * Count the number of different row keys in the given edit because of
630    * mini-batching. We assume that there's at least one KV in the WALEdit.
631    * @param edit edit to count row keys from
632    * @return number of different row keys
633    */
634   private int countDistinctRowKeys(WALEdit edit) {
635     List<KeyValue> kvs = edit.getKeyValues();
636     int distinctRowKeys = 1;
637     KeyValue lastKV = kvs.get(0);
638     for (int i = 0; i < edit.size(); i++) {
639       if (!kvs.get(i).matchingRow(lastKV)) {
640         distinctRowKeys++;
641       }
642       lastKV = kvs.get(i);
643     }
644     return distinctRowKeys;
645   }
646 
647   /**
648    * Do the shipping logic
649    * @param currentWALisBeingWrittenTo was the current WAL being (seemingly)
650    * written to when this method was called
651    */
652   protected void shipEdits(boolean currentWALisBeingWrittenTo, List<HLog.Entry> entries) {
653     int sleepMultiplier = 0;
654     if (entries.isEmpty()) {
655       LOG.warn("Was given 0 edits to ship");
656       return;
657     }
658     while (this.isActive()) {
659       try {
660         if (this.throttler.isEnabled()) {
661           long sleepTicks = this.throttler.getNextSleepInterval(currentSize);
662           if (sleepTicks > 0) {
663             try {
664               if (LOG.isTraceEnabled()) {
665                 LOG.trace("To sleep " + sleepTicks + "ms for throttling control");
666               }
667               Thread.sleep(sleepTicks);
668             } catch (InterruptedException e) {
669               LOG.debug("Interrupted while sleeping for throttling control");
670               Thread.currentThread().interrupt();
671               // current thread might be interrupted to terminate
672               // directly go back to while() for confirm this
673               continue;
674             }
675             // reset throttler's cycle start tick when sleep for throttling occurs
676             this.throttler.resetStartTick();
677           }
678         }
679         // create replicateContext here, so the entries can be GC'd upon return from this call stack
680         ReplicationEndpoint.ReplicateContext replicateContext = new ReplicationEndpoint.ReplicateContext();
681         replicateContext.setEntries(entries).setSize(currentSize);
682 
683         // send the edits to the endpoint. Will block until the edits are shipped and acknowledged
684         boolean replicated = replicationEndpoint.replicate(replicateContext);
685 
686         if (!replicated) {
687           continue;
688         } else {
689           sleepMultiplier = Math.max(sleepMultiplier-1, 0);
690         }
691 
692         if (this.lastLoggedPosition != this.repLogReader.getPosition()) {
693           this.manager.logPositionAndCleanOldLogs(this.currentPath,
694               this.peerClusterZnode, this.repLogReader.getPosition(),
695               this.replicationQueueInfo.isQueueRecovered(), currentWALisBeingWrittenTo);
696           this.lastLoggedPosition = this.repLogReader.getPosition();
697         }
698         if (this.throttler.isEnabled()) {
699           this.throttler.addPushSize(currentSize);
700         }
701         this.totalReplicatedEdits += entries.size();
702         this.totalReplicatedOperations += currentNbOperations;
703         this.metrics.shipBatch(this.currentNbOperations, this.currentSize/1024);
704         this.metrics.setAgeOfLastShippedOp(entries.get(entries.size()-1).getKey().getWriteTime());
705         if (LOG.isTraceEnabled()) {
706           LOG.trace("Replicated " + this.totalReplicatedEdits + " entries in total, or "
707               + this.totalReplicatedOperations + " operations");
708         }
709         break;
710       } catch (Exception ex) {
711         LOG.warn(replicationEndpoint.getClass().getName() + " threw unknown exception:" + ex);
712         if (sleepForRetries("ReplicationEndpoint threw exception", sleepMultiplier)) {
713           sleepMultiplier++;
714         }
715       }
716     }
717   }
718 
719   /**
720    * check whether the peer is enabled or not
721    *
722    * @return true if the peer is enabled, otherwise false
723    */
724   protected boolean isPeerEnabled() {
725     return this.replicationPeers.getStatusOfPeer(this.peerId);
726   }
727 
728   /**
729    * If the queue isn't empty, switch to the next one
730    * Else if this is a recovered queue, it means we're done!
731    * Else we'll just continue to try reading the log file
732    * @return true if we're done with the current file, false if we should
733    * continue trying to read from it
734    */
735   protected boolean processEndOfFile() {
736     if (this.queue.size() != 0) {
737       if (LOG.isTraceEnabled()) {
738         String filesize = "N/A";
739         try {
740           FileStatus stat = this.fs.getFileStatus(this.currentPath);
741           filesize = stat.getLen()+"";
742         } catch (IOException ex) {}
743         LOG.trace("Reached the end of a log, stats: " + getStats() +
744             ", and the length of the file is " + filesize);
745       }
746       this.currentPath = null;
747       this.repLogReader.finishCurrentFile();
748       this.reader = null;
749       return true;
750     } else if (this.replicationQueueInfo.isQueueRecovered()) {
751       this.manager.closeRecoveredQueue(this);
752       LOG.info("Finished recovering the queue with the following stats " + getStats());
753       this.running = false;
754       return true;
755     }
756     return false;
757   }
758 
759   @Override
760   public void startup() {
761     String n = Thread.currentThread().getName();
762     Thread.UncaughtExceptionHandler handler =
763         new Thread.UncaughtExceptionHandler() {
764           @Override
765           public void uncaughtException(final Thread t, final Throwable e) {
766             LOG.error("Unexpected exception in ReplicationSource," +
767               " currentPath=" + currentPath, e);
768           }
769         };
770     Threads.setDaemonThreadRunning(
771         this, n + ".replicationSource," +
772         this.peerClusterZnode, handler);
773   }
774 
775   @Override
776   public void terminate(String reason) {
777     terminate(reason, null);
778   }
779 
780   @Override
781   public void terminate(String reason, Exception cause) {
782     terminate(reason, cause, true);
783   }
784 
785   public void terminate(String reason, Exception cause, boolean join) {
786     if (cause == null) {
787       LOG.info("Closing source "
788           + this.peerClusterZnode + " because: " + reason);
789 
790     } else {
791       LOG.error("Closing source " + this.peerClusterZnode
792           + " because an error occurred: " + reason, cause);
793     }
794     this.running = false;
795     this.interrupt();
796     ListenableFuture<Service.State> future = null;
797     if (this.replicationEndpoint != null) {
798       future = this.replicationEndpoint.stop();
799     }
800     if (join) {
801       Threads.shutdown(this, this.sleepForRetries);
802       if (future != null) {
803         try {
804           future.get();
805         } catch (Exception e) {
806           LOG.warn("Got exception:" + e);
807         }
808       }
809     }
810   }
811 
812   @Override
813   public String getPeerClusterZnode() {
814     return this.peerClusterZnode;
815   }
816 
817   @Override
818   public String getPeerClusterId() {
819     return this.peerId;
820   }
821 
822   @Override
823   public Path getCurrentPath() {
824     return this.currentPath;
825   }
826 
827   private boolean isActive() {
828     return !this.stopper.isStopped() && this.running && !isInterrupted();
829   }
830 
831   /**
832    * Comparator used to compare logs together based on their start time
833    */
834   public static class LogsComparator implements Comparator<Path> {
835 
836     @Override
837     public int compare(Path o1, Path o2) {
838       return Long.valueOf(getTS(o1)).compareTo(getTS(o2));
839     }
840 
841     /**
842      * Split a path to get the start time
843      * For example: 10.20.20.171%3A60020.1277499063250
844      * @param p path to split
845      * @return start time
846      */
847     private long getTS(Path p) {
848       String[] parts = p.getName().split("\\.");
849       return Long.parseLong(parts[parts.length-1]);
850     }
851   }
852 
853   @Override
854   public String getStats() {
855     long position = this.repLogReader.getPosition();
856     return "Total replicated edits: " + totalReplicatedEdits +
857       ", currently replicating from: " + this.currentPath +
858       " at position: " + position;
859   }
860 
861   /**
862    * Get Replication Source Metrics
863    * @return sourceMetrics
864    */
865   public MetricsSource getSourceMetrics() {
866     return this.metrics;
867   }
868 }