View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.replication.regionserver;
20  
21  import java.io.EOFException;
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Comparator;
26  import java.util.List;
27  import java.util.UUID;
28  import java.util.concurrent.PriorityBlockingQueue;
29  import java.util.concurrent.TimeUnit;
30  
31  import org.apache.commons.lang.StringUtils;
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileStatus;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.Stoppable;
42  import org.apache.hadoop.hbase.regionserver.wal.HLog;
43  import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
44  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
45  import org.apache.hadoop.hbase.replication.ChainWALEntryFilter;
46  import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
47  import org.apache.hadoop.hbase.replication.ReplicationException;
48  import org.apache.hadoop.hbase.replication.ReplicationPeers;
49  import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
50  import org.apache.hadoop.hbase.replication.ReplicationQueues;
51  import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
52  import org.apache.hadoop.hbase.replication.WALEntryFilter;
53  import org.apache.hadoop.hbase.util.Threads;
54  import com.google.common.collect.Lists;
55  import com.google.common.util.concurrent.ListenableFuture;
56  import com.google.common.util.concurrent.Service;
57  
58  /**
59   * Class that handles the source of a replication stream.
60   * Currently does not handle more than 1 slave
61   * For each slave cluster it selects a random number of peers
62   * using a replication ratio. For example, if replication ration = 0.1
63   * and slave cluster has 100 region servers, 10 will be selected.
64   * <p/>
65   * A stream is considered down when we cannot contact a region server on the
66   * peer cluster for more than 55 seconds by default.
67   * <p/>
68   *
69   */
70  @InterfaceAudience.Private
71  public class ReplicationSource extends Thread
72      implements ReplicationSourceInterface {
73  
74    public static final Log LOG = LogFactory.getLog(ReplicationSource.class);
75    // Queue of logs to process
76    private PriorityBlockingQueue<Path> queue;
77    private ReplicationQueues replicationQueues;
78    private ReplicationPeers replicationPeers;
79  
80    private Configuration conf;
81    private ReplicationQueueInfo replicationQueueInfo;
82    // id of the peer cluster this source replicates to
83    private String peerId;
84    // The manager of all sources to which we ping back our progress
85    private ReplicationSourceManager manager;
86    // Should we stop everything?
87    private Stoppable stopper;
88    // How long should we sleep for each retry
89    private long sleepForRetries;
90    // Max size in bytes of entriesArray
91    private long replicationQueueSizeCapacity;
92    // Max number of entries in entriesArray
93    private int replicationQueueNbCapacity;
94    // Our reader for the current log
95    private HLog.Reader reader;
96    // Last position in the log that we sent to ZooKeeper
97    private long lastLoggedPosition = -1;
98    // Path of the current log
99    private volatile Path currentPath;
100   private FileSystem fs;
101   // id of this cluster
102   private UUID clusterId;
103   // id of the other cluster
104   private UUID peerClusterId;
105   // total number of edits we replicated
106   private long totalReplicatedEdits = 0;
107   // total number of edits we replicated
108   private long totalReplicatedOperations = 0;
109   // The znode we currently play with
110   private String peerClusterZnode;
111   // Maximum number of retries before taking bold actions
112   private int maxRetriesMultiplier;
113   // Current number of operations (Put/Delete) that we need to replicate
114   private int currentNbOperations = 0;
115   // Current size of data we need to replicate
116   private int currentSize = 0;
117   // Indicates if this particular source is running
118   private volatile boolean running = true;
119   // Metrics for this source
120   private MetricsSource metrics;
121   // Handle on the log reader helper
122   private ReplicationHLogReaderManager repLogReader;
123   //WARN threshold for the number of queued logs, defaults to 2
124   private int logQueueWarnThreshold;
125   // ReplicationEndpoint which will handle the actual replication
126   private ReplicationEndpoint replicationEndpoint;
127   // A filter (or a chain of filters) for the WAL entries.
128   private WALEntryFilter walEntryFilter;
129   // throttler
130   private ReplicationThrottler throttler;
131 
132   /**
133    * Instantiation method used by region servers
134    *
135    * @param conf configuration to use
136    * @param fs file system to use
137    * @param manager replication manager to ping to
138    * @param stopper     the atomic boolean to use to stop the regionserver
139    * @param peerClusterZnode the name of our znode
140    * @param clusterId unique UUID for the cluster
141    * @param replicationEndpoint the replication endpoint implementation
142    * @param metrics metrics for replication source
143    * @throws IOException
144    */
145   @Override
146   public void init(final Configuration conf, final FileSystem fs,
147       final ReplicationSourceManager manager, final ReplicationQueues replicationQueues,
148       final ReplicationPeers replicationPeers, final Stoppable stopper,
149       final String peerClusterZnode, final UUID clusterId, ReplicationEndpoint replicationEndpoint,
150       final MetricsSource metrics)
151           throws IOException {
152     this.stopper = stopper;
153     this.conf = conf;
154     decorateConf();
155     this.replicationQueueSizeCapacity =
156         this.conf.getLong("replication.source.size.capacity", 1024*1024*64);
157     this.replicationQueueNbCapacity =
158         this.conf.getInt("replication.source.nb.capacity", 25000);
159     this.maxRetriesMultiplier = this.conf.getInt("replication.source.maxretriesmultiplier", 10);
160     this.queue =
161         new PriorityBlockingQueue<Path>(
162             this.conf.getInt("hbase.regionserver.maxlogs", 32),
163             new LogsComparator());
164     long bandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0);
165     this.throttler = new ReplicationThrottler((double)bandwidth/10.0);
166     this.replicationQueues = replicationQueues;
167     this.replicationPeers = replicationPeers;
168     this.manager = manager;
169     this.sleepForRetries =
170         this.conf.getLong("replication.source.sleepforretries", 1000);
171     this.fs = fs;
172     this.metrics = metrics;
173     this.repLogReader = new ReplicationHLogReaderManager(this.fs, this.conf);
174     this.clusterId = clusterId;
175 
176     this.peerClusterZnode = peerClusterZnode;
177     this.replicationQueueInfo = new ReplicationQueueInfo(peerClusterZnode);
178     // ReplicationQueueInfo parses the peerId out of the znode for us
179     this.peerId = this.replicationQueueInfo.getPeerId();
180     this.logQueueWarnThreshold = this.conf.getInt("replication.source.log.queue.warn", 2);
181     this.replicationEndpoint = replicationEndpoint;
182   }
183 
184   private void decorateConf() {
185     String replicationCodec = this.conf.get(HConstants.REPLICATION_CODEC_CONF_KEY);
186     if (StringUtils.isNotEmpty(replicationCodec)) {
187       this.conf.set(HConstants.RPC_CODEC_CONF_KEY, replicationCodec);
188     }
189   }
190 
191   @Override
192   public void enqueueLog(Path log) {
193     this.queue.put(log);
194     int queueSize = queue.size();
195     this.metrics.setSizeOfLogQueue(queueSize);
196     // This will log a warning for each new log that gets created above the warn threshold
197     if (queueSize > this.logQueueWarnThreshold) {
198       LOG.warn("Queue size: " + queueSize +
199         " exceeds value of replication.source.log.queue.warn: " + logQueueWarnThreshold);
200     }
201   }
202 
203   private void uninitialize() {
204     LOG.debug("Source exiting " + this.peerId);
205     metrics.clear();
206     if (replicationEndpoint.state() == Service.State.STARTING
207         || replicationEndpoint.state() == Service.State.RUNNING) {
208       replicationEndpoint.stopAndWait();
209     }
210   }
211 
212   @Override
213   public void run() {
214     // We were stopped while looping to connect to sinks, just abort
215     if (!this.isActive()) {
216       uninitialize();
217       return;
218     }
219 
220     try {
221       // start the endpoint, connect to the cluster
222       Service.State state = replicationEndpoint.start().get();
223       if (state != Service.State.RUNNING) {
224         LOG.warn("ReplicationEndpoint was not started. Exiting");
225         uninitialize();
226         return;
227       }
228     } catch (Exception ex) {
229       LOG.warn("Error starting ReplicationEndpoint, exiting", ex);
230       throw new RuntimeException(ex);
231     }
232 
233     // get the WALEntryFilter from ReplicationEndpoint and add it to default filters
234     ArrayList<WALEntryFilter> filters = Lists.newArrayList(
235       (WALEntryFilter)new SystemTableWALEntryFilter());
236     WALEntryFilter filterFromEndpoint = this.replicationEndpoint.getWALEntryfilter();
237     if (filterFromEndpoint != null) {
238       filters.add(filterFromEndpoint);
239     }
240     this.walEntryFilter = new ChainWALEntryFilter(filters);
241 
242     int sleepMultiplier = 1;
243     // delay this until we are in an asynchronous thread
244     while (this.isActive() && this.peerClusterId == null) {
245       this.peerClusterId = replicationEndpoint.getPeerUUID();
246       if (this.isActive() && this.peerClusterId == null) {
247         if (sleepForRetries("Cannot contact the peer's zk ensemble", sleepMultiplier)) {
248           sleepMultiplier++;
249         }
250       }
251     }
252     // We were stopped while looping to contact peer's zk ensemble, just abort
253     if (!this.isActive()) {
254       uninitialize();
255       return;
256     }
257 
258     // resetting to 1 to reuse later
259     sleepMultiplier = 1;
260 
261     // In rare case, zookeeper setting may be messed up. That leads to the incorrect
262     // peerClusterId value, which is the same as the source clusterId
263     if (clusterId.equals(peerClusterId) && !replicationEndpoint.canReplicateToSameCluster()) {
264       this.terminate("ClusterId " + clusterId + " is replicating to itself: peerClusterId "
265           + peerClusterId + " which is not allowed by ReplicationEndpoint:"
266           + replicationEndpoint.getClass().getName(), null, false);
267     }
268     LOG.info("Replicating "+clusterId + " -> " + peerClusterId);
269 
270     // If this is recovered, the queue is already full and the first log
271     // normally has a position (unless the RS failed between 2 logs)
272     if (this.replicationQueueInfo.isQueueRecovered()) {
273       try {
274         this.repLogReader.setPosition(this.replicationQueues.getLogPosition(this.peerClusterZnode,
275           this.queue.peek().getName()));
276         if (LOG.isTraceEnabled()) {
277           LOG.trace("Recovered queue started with log " + this.queue.peek() +
278               " at position " + this.repLogReader.getPosition());
279         }
280       } catch (ReplicationException e) {
281         this.terminate("Couldn't get the position of this recovered queue " +
282             this.peerClusterZnode, e);
283       }
284     }
285     // Loop until we close down
286     while (isActive()) {
287       // Sleep until replication is enabled again
288       if (!isPeerEnabled()) {
289         if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
290           sleepMultiplier++;
291         }
292         continue;
293       }
294       Path oldPath = getCurrentPath(); //note that in the current scenario,
295                                        //oldPath will be null when a log roll
296                                        //happens.
297       // Get a new path
298       boolean hasCurrentPath = getNextPath();
299       if (getCurrentPath() != null && oldPath == null) {
300         sleepMultiplier = 1; //reset the sleepMultiplier on a path change
301       }
302       if (!hasCurrentPath) {
303         if (sleepForRetries("No log to process", sleepMultiplier)) {
304           sleepMultiplier++;
305         }
306         continue;
307       }
308       boolean currentWALisBeingWrittenTo = false;
309       //For WAL files we own (rather than recovered), take a snapshot of whether the
310       //current WAL file (this.currentPath) is in use (for writing) NOW!
311       //Since the new WAL paths are enqueued only after the prev WAL file
312       //is 'closed', presence of an element in the queue means that
313       //the previous WAL file was closed, else the file is in use (currentPath)
314       //We take the snapshot now so that we are protected against races
315       //where a new file gets enqueued while the current file is being processed
316       //(and where we just finished reading the current file).
317       if (!this.replicationQueueInfo.isQueueRecovered() && queue.size() == 0) {
318         currentWALisBeingWrittenTo = true;
319       }
320       // Open a reader on it
321       if (!openReader(sleepMultiplier)) {
322         // Reset the sleep multiplier, else it'd be reused for the next file
323         sleepMultiplier = 1;
324         continue;
325       }
326 
327       // If we got a null reader but didn't continue, then sleep and continue
328       if (this.reader == null) {
329         if (sleepForRetries("Unable to open a reader", sleepMultiplier)) {
330           sleepMultiplier++;
331         }
332         continue;
333       }
334 
335       boolean gotIOE = false;
336       currentNbOperations = 0;
337       List<HLog.Entry> entries = new ArrayList<HLog.Entry>(1);
338       currentSize = 0;
339       try {
340         if (readAllEntriesToReplicateOrNextFile(currentWALisBeingWrittenTo, entries)) {
341           continue;
342         }
343       } catch (IOException ioe) {
344         LOG.warn(this.peerClusterZnode + " Got: ", ioe);
345         gotIOE = true;
346         if (ioe.getCause() instanceof EOFException) {
347 
348           boolean considerDumping = false;
349           if (this.replicationQueueInfo.isQueueRecovered()) {
350             try {
351               FileStatus stat = this.fs.getFileStatus(this.currentPath);
352               if (stat.getLen() == 0) {
353                 LOG.warn(this.peerClusterZnode + " Got EOF and the file was empty");
354               }
355               considerDumping = true;
356             } catch (IOException e) {
357               LOG.warn(this.peerClusterZnode + " Got while getting file size: ", e);
358             }
359           }
360 
361           if (considerDumping &&
362               sleepMultiplier == this.maxRetriesMultiplier &&
363               processEndOfFile()) {
364             continue;
365           }
366         }
367       } finally {
368         try {
369           this.reader = null;
370           this.repLogReader.closeReader();
371         } catch (IOException e) {
372           gotIOE = true;
373           LOG.warn("Unable to finalize the tailing of a file", e);
374         }
375       }
376 
377       // If we didn't get anything to replicate, or if we hit a IOE,
378       // wait a bit and retry.
379       // But if we need to stop, don't bother sleeping
380       if (this.isActive() && (gotIOE || entries.isEmpty())) {
381         if (this.lastLoggedPosition != this.repLogReader.getPosition()) {
382           this.manager.logPositionAndCleanOldLogs(this.currentPath,
383               this.peerClusterZnode, this.repLogReader.getPosition(),
384               this.replicationQueueInfo.isQueueRecovered(), currentWALisBeingWrittenTo);
385           this.lastLoggedPosition = this.repLogReader.getPosition();
386         }
387         // Reset the sleep multiplier if nothing has actually gone wrong
388         if (!gotIOE) {
389           sleepMultiplier = 1;
390           // if there was nothing to ship and it's not an error
391           // set "ageOfLastShippedOp" to <now> to indicate that we're current
392           this.metrics.setAgeOfLastShippedOp(System.currentTimeMillis());
393         }
394         if (sleepForRetries("Nothing to replicate", sleepMultiplier)) {
395           sleepMultiplier++;
396         }
397         continue;
398       }
399       sleepMultiplier = 1;
400       shipEdits(currentWALisBeingWrittenTo, entries);
401     }
402     uninitialize();
403   }
404 
405   /**
406    * Read all the entries from the current log files and retain those
407    * that need to be replicated. Else, process the end of the current file.
408    * @param currentWALisBeingWrittenTo is the current WAL being written to
409    * @param entries resulting entries to be replicated
410    * @return true if we got nothing and went to the next file, false if we got
411    * entries
412    * @throws IOException
413    */
414   protected boolean readAllEntriesToReplicateOrNextFile(boolean currentWALisBeingWrittenTo,
415       List<HLog.Entry> entries) throws IOException{
416     long seenEntries = 0;
417     if (LOG.isTraceEnabled()) {
418       LOG.trace("Seeking in " + this.currentPath + " at position "
419           + this.repLogReader.getPosition());
420     }
421     this.repLogReader.seek();
422     long positionBeforeRead = this.repLogReader.getPosition();
423     HLog.Entry entry =
424         this.repLogReader.readNextAndSetPosition();
425     while (entry != null) {
426       this.metrics.incrLogEditsRead();
427       seenEntries++;
428 
429       // don't replicate if the log entries have already been consumed by the cluster
430       if (replicationEndpoint.canReplicateToSameCluster()
431           || !entry.getKey().getClusterIds().contains(peerClusterId)) {
432         // Remove all KVs that should not be replicated
433         entry = walEntryFilter.filter(entry);
434         WALEdit edit = null;
435         HLogKey logKey = null;
436         if (entry != null) {
437           edit = entry.getEdit();
438           logKey = entry.getKey();
439         }
440 
441         if (edit != null && edit.size() != 0) {
442           //Mark that the current cluster has the change
443           logKey.addClusterId(clusterId);
444           currentNbOperations += countDistinctRowKeys(edit);
445           entries.add(entry);
446           currentSize += entry.getEdit().heapSize();
447         } else {
448           this.metrics.incrLogEditsFiltered();
449         }
450       }
451       // Stop if too many entries or too big
452       if (currentSize >= this.replicationQueueSizeCapacity ||
453           entries.size() >= this.replicationQueueNbCapacity) {
454         break;
455       }
456       try {
457         entry = this.repLogReader.readNextAndSetPosition();
458       } catch (IOException ie) {
459         LOG.debug("Break on IOE: " + ie.getMessage());
460         break;
461       }
462     }
463     metrics.incrLogReadInBytes(this.repLogReader.getPosition() - positionBeforeRead);
464     if (currentWALisBeingWrittenTo) {
465       return false;
466     }
467     // If we didn't get anything and the queue has an object, it means we
468     // hit the end of the file for sure
469     return seenEntries == 0 && processEndOfFile();
470   }
471 
472   /**
473    * Poll for the next path
474    * @return true if a path was obtained, false if not
475    */
476   protected boolean getNextPath() {
477     try {
478       if (this.currentPath == null) {
479         this.currentPath = queue.poll(this.sleepForRetries, TimeUnit.MILLISECONDS);
480         this.metrics.setSizeOfLogQueue(queue.size());
481         if (this.currentPath != null) {
482           this.manager.cleanOldLogs(this.currentPath.getName(),
483               this.peerId,
484               this.replicationQueueInfo.isQueueRecovered());
485           if (LOG.isTraceEnabled()) {
486             LOG.trace("New log: " + this.currentPath);
487           }
488         }
489       }
490     } catch (InterruptedException e) {
491       LOG.warn("Interrupted while reading edits", e);
492     }
493     return this.currentPath != null;
494   }
495 
496   /**
497    * Open a reader on the current path
498    *
499    * @param sleepMultiplier by how many times the default sleeping time is augmented
500    * @return true if we should continue with that file, false if we are over with it
501    */
502   protected boolean openReader(int sleepMultiplier) {
503     try {
504       try {
505         if (LOG.isTraceEnabled()) {
506           LOG.trace("Opening log " + this.currentPath);
507         }
508         this.reader = repLogReader.openReader(this.currentPath);
509       } catch (FileNotFoundException fnfe) {
510         if (this.replicationQueueInfo.isQueueRecovered()) {
511           // We didn't find the log in the archive directory, look if it still
512           // exists in the dead RS folder (there could be a chain of failures
513           // to look at)
514           List<String> deadRegionServers = this.replicationQueueInfo.getDeadRegionServers();
515           LOG.info("NB dead servers : " + deadRegionServers.size());
516           for (String curDeadServerName : deadRegionServers) {
517             Path deadRsDirectory =
518                 new Path(manager.getLogDir().getParent(), curDeadServerName);
519             Path[] locs = new Path[] {
520                 new Path(deadRsDirectory, currentPath.getName()),
521                 new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT),
522                                           currentPath.getName()),
523             };
524             for (Path possibleLogLocation : locs) {
525               LOG.info("Possible location " + possibleLogLocation.toUri().toString());
526               if (this.manager.getFs().exists(possibleLogLocation)) {
527                 // We found the right new location
528                 LOG.info("Log " + this.currentPath + " still exists at " +
529                     possibleLogLocation);
530                 // Breaking here will make us sleep since reader is null
531                 return true;
532               }
533             }
534           }
535           // In the case of disaster/recovery, HMaster may be shutdown/crashed before flush data
536           // from .logs to .oldlogs. Loop into .logs folders and check whether a match exists
537           if (stopper instanceof ReplicationSyncUp.DummyServer) {
538             FileStatus[] rss = fs.listStatus(manager.getLogDir());
539             for (FileStatus rs : rss) {
540               Path p = rs.getPath();
541               FileStatus[] logs = fs.listStatus(p);
542               for (FileStatus log : logs) {
543                 p = new Path(p, log.getPath().getName());
544                 if (p.getName().equals(currentPath.getName())) {
545                   currentPath = p;
546                   LOG.info("Log " + this.currentPath + " exists under " + manager.getLogDir());
547                   // Open the log at the new location
548                   this.openReader(sleepMultiplier);
549                   return true;
550                 }
551               }
552             }
553           }
554 
555           // TODO What happens if the log was missing from every single location?
556           // Although we need to check a couple of times as the log could have
557           // been moved by the master between the checks
558           // It can also happen if a recovered queue wasn't properly cleaned,
559           // such that the znode pointing to a log exists but the log was
560           // deleted a long time ago.
561           // For the moment, we'll throw the IO and processEndOfFile
562           throw new IOException("File from recovered queue is " +
563               "nowhere to be found", fnfe);
564         } else {
565           // If the log was archived, continue reading from there
566           Path archivedLogLocation =
567               new Path(manager.getOldLogDir(), currentPath.getName());
568           if (this.manager.getFs().exists(archivedLogLocation)) {
569             currentPath = archivedLogLocation;
570             LOG.info("Log " + this.currentPath + " was moved to " +
571                 archivedLogLocation);
572             // Open the log at the new location
573             this.openReader(sleepMultiplier);
574 
575           }
576           // TODO What happens the log is missing in both places?
577         }
578       }
579     } catch (IOException ioe) {
580       if (ioe instanceof EOFException && isCurrentLogEmpty()) return true;
581       LOG.warn(this.peerClusterZnode + " Got: ", ioe);
582       this.reader = null;
583       if (ioe.getCause() instanceof NullPointerException) {
584         // Workaround for race condition in HDFS-4380
585         // which throws a NPE if we open a file before any data node has the most recent block
586         // Just sleep and retry. Will require re-reading compressed HLogs for compressionContext.
587         LOG.warn("Got NPE opening reader, will retry.");
588       } else if (sleepMultiplier == this.maxRetriesMultiplier) {
589         // TODO Need a better way to determine if a file is really gone but
590         // TODO without scanning all logs dir
591         LOG.warn("Waited too long for this file, considering dumping");
592         return !processEndOfFile();
593       }
594     }
595     return true;
596   }
597 
598   /*
599    * Checks whether the current log file is empty, and it is not a recovered queue. This is to
600    * handle scenario when in an idle cluster, there is no entry in the current log and we keep on
601    * trying to read the log file and get EOFException. In case of a recovered queue the last log
602    * file may be empty, and we don't want to retry that.
603    */
604   private boolean isCurrentLogEmpty() {
605     return (this.repLogReader.getPosition() == 0 &&
606         !this.replicationQueueInfo.isQueueRecovered() && queue.size() == 0);
607   }
608 
609   /**
610    * Do the sleeping logic
611    * @param msg Why we sleep
612    * @param sleepMultiplier by how many times the default sleeping time is augmented
613    * @return True if <code>sleepMultiplier</code> is &lt; <code>maxRetriesMultiplier</code>
614    */
615   protected boolean sleepForRetries(String msg, int sleepMultiplier) {
616     try {
617       if (LOG.isTraceEnabled()) {
618         LOG.trace(msg + ", sleeping " + sleepForRetries + " times " + sleepMultiplier);
619       }
620       Thread.sleep(this.sleepForRetries * sleepMultiplier);
621     } catch (InterruptedException e) {
622       LOG.debug("Interrupted while sleeping between retries");
623       Thread.currentThread().interrupt();
624     }
625     return sleepMultiplier < maxRetriesMultiplier;
626   }
627 
628   /**
629    * Count the number of different row keys in the given edit because of
630    * mini-batching. We assume that there's at least one KV in the WALEdit.
631    * @param edit edit to count row keys from
632    * @return number of different row keys
633    */
634   private int countDistinctRowKeys(WALEdit edit) {
635     List<KeyValue> kvs = edit.getKeyValues();
636     int distinctRowKeys = 1;
637     KeyValue lastKV = kvs.get(0);
638     for (int i = 0; i < edit.size(); i++) {
639       if (!kvs.get(i).matchingRow(lastKV)) {
640         distinctRowKeys++;
641       }
642     }
643     return distinctRowKeys;
644   }
645 
646   /**
647    * Do the shipping logic
648    * @param currentWALisBeingWrittenTo was the current WAL being (seemingly)
649    * written to when this method was called
650    */
651   protected void shipEdits(boolean currentWALisBeingWrittenTo, List<HLog.Entry> entries) {
652     int sleepMultiplier = 1;
653     if (entries.isEmpty()) {
654       LOG.warn("Was given 0 edits to ship");
655       return;
656     }
657     while (this.isActive()) {
658       try {
659         if (this.throttler.isEnabled()) {
660           long sleepTicks = this.throttler.getNextSleepInterval(currentSize);
661           if (sleepTicks > 0) {
662             try {
663               if (LOG.isTraceEnabled()) {
664                 LOG.trace("To sleep " + sleepTicks + "ms for throttling control");
665               }
666               Thread.sleep(sleepTicks);
667             } catch (InterruptedException e) {
668               LOG.debug("Interrupted while sleeping for throttling control");
669               Thread.currentThread().interrupt();
670               // current thread might be interrupted to terminate
671               // directly go back to while() for confirm this
672               continue;
673             }
674             // reset throttler's cycle start tick when sleep for throttling occurs
675             this.throttler.resetStartTick();
676           }
677         }
678         // create replicateContext here, so the entries can be GC'd upon return from this call stack
679         ReplicationEndpoint.ReplicateContext replicateContext = new ReplicationEndpoint.ReplicateContext();
680         replicateContext.setEntries(entries).setSize(currentSize);
681 
682         // send the edits to the endpoint. Will block until the edits are shipped and acknowledged
683         boolean replicated = replicationEndpoint.replicate(replicateContext);
684 
685         if (!replicated) {
686           continue;
687         }
688 
689         if (this.lastLoggedPosition != this.repLogReader.getPosition()) {
690           this.manager.logPositionAndCleanOldLogs(this.currentPath,
691               this.peerClusterZnode, this.repLogReader.getPosition(),
692               this.replicationQueueInfo.isQueueRecovered(), currentWALisBeingWrittenTo);
693           this.lastLoggedPosition = this.repLogReader.getPosition();
694         }
695         if (this.throttler.isEnabled()) {
696           this.throttler.addPushSize(currentSize);
697         }
698         this.totalReplicatedEdits += entries.size();
699         this.totalReplicatedOperations += currentNbOperations;
700         this.metrics.shipBatch(this.currentNbOperations, this.currentSize/1024);
701         this.metrics.setAgeOfLastShippedOp(entries.get(entries.size()-1).getKey().getWriteTime());
702         if (LOG.isTraceEnabled()) {
703           LOG.trace("Replicated " + this.totalReplicatedEdits + " entries in total, or "
704               + this.totalReplicatedOperations + " operations");
705         }
706         break;
707       } catch (Exception ex) {
708         LOG.warn(replicationEndpoint.getClass().getName() + " threw unknown exception:" + ex);
709         if (sleepForRetries("ReplicationEndpoint threw exception", sleepMultiplier)) {
710           sleepMultiplier++;
711         }
712       }
713     }
714   }
715 
716   /**
717    * check whether the peer is enabled or not
718    *
719    * @return true if the peer is enabled, otherwise false
720    */
721   protected boolean isPeerEnabled() {
722     return this.replicationPeers.getStatusOfPeer(this.peerId);
723   }
724 
725   /**
726    * If the queue isn't empty, switch to the next one
727    * Else if this is a recovered queue, it means we're done!
728    * Else we'll just continue to try reading the log file
729    * @return true if we're done with the current file, false if we should
730    * continue trying to read from it
731    */
732   protected boolean processEndOfFile() {
733     if (this.queue.size() != 0) {
734       if (LOG.isTraceEnabled()) {
735         String filesize = "N/A";
736         try {
737           FileStatus stat = this.fs.getFileStatus(this.currentPath);
738           filesize = stat.getLen()+"";
739         } catch (IOException ex) {}
740         LOG.trace("Reached the end of a log, stats: " + getStats() +
741             ", and the length of the file is " + filesize);
742       }
743       this.currentPath = null;
744       this.repLogReader.finishCurrentFile();
745       this.reader = null;
746       return true;
747     } else if (this.replicationQueueInfo.isQueueRecovered()) {
748       this.manager.closeRecoveredQueue(this);
749       LOG.info("Finished recovering the queue with the following stats " + getStats());
750       this.running = false;
751       return true;
752     }
753     return false;
754   }
755 
756   @Override
757   public void startup() {
758     String n = Thread.currentThread().getName();
759     Thread.UncaughtExceptionHandler handler =
760         new Thread.UncaughtExceptionHandler() {
761           @Override
762           public void uncaughtException(final Thread t, final Throwable e) {
763             LOG.error("Unexpected exception in ReplicationSource," +
764               " currentPath=" + currentPath, e);
765           }
766         };
767     Threads.setDaemonThreadRunning(
768         this, n + ".replicationSource," +
769         this.peerClusterZnode, handler);
770   }
771 
772   @Override
773   public void terminate(String reason) {
774     terminate(reason, null);
775   }
776 
777   @Override
778   public void terminate(String reason, Exception cause) {
779     terminate(reason, cause, true);
780   }
781 
782   public void terminate(String reason, Exception cause, boolean join) {
783     if (cause == null) {
784       LOG.info("Closing source "
785           + this.peerClusterZnode + " because: " + reason);
786 
787     } else {
788       LOG.error("Closing source " + this.peerClusterZnode
789           + " because an error occurred: " + reason, cause);
790     }
791     this.running = false;
792     this.interrupt();
793     ListenableFuture<Service.State> future = null;
794     if (this.replicationEndpoint != null) {
795       future = this.replicationEndpoint.stop();
796     }
797     if (join) {
798       Threads.shutdown(this, this.sleepForRetries);
799       if (future != null) {
800         try {
801           future.get();
802         } catch (Exception e) {
803           LOG.warn("Got exception:" + e);
804         }
805       }
806     }
807   }
808 
809   @Override
810   public String getPeerClusterZnode() {
811     return this.peerClusterZnode;
812   }
813 
814   @Override
815   public String getPeerClusterId() {
816     return this.peerId;
817   }
818 
819   @Override
820   public Path getCurrentPath() {
821     return this.currentPath;
822   }
823 
824   private boolean isActive() {
825     return !this.stopper.isStopped() && this.running && !isInterrupted();
826   }
827 
828   /**
829    * Comparator used to compare logs together based on their start time
830    */
831   public static class LogsComparator implements Comparator<Path> {
832 
833     @Override
834     public int compare(Path o1, Path o2) {
835       return Long.valueOf(getTS(o1)).compareTo(getTS(o2));
836     }
837 
838     /**
839      * Split a path to get the start time
840      * For example: 10.20.20.171%3A60020.1277499063250
841      * @param p path to split
842      * @return start time
843      */
844     private long getTS(Path p) {
845       String[] parts = p.getName().split("\\.");
846       return Long.parseLong(parts[parts.length-1]);
847     }
848   }
849 
850   @Override
851   public String getStats() {
852     long position = this.repLogReader.getPosition();
853     return "Total replicated edits: " + totalReplicatedEdits +
854       ", currently replicating from: " + this.currentPath +
855       " at position: " + position;
856   }
857 
858   /**
859    * Get Replication Source Metrics
860    * @return sourceMetrics
861    */
862   public MetricsSource getSourceMetrics() {
863     return this.metrics;
864   }
865 }