View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.wal;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.lang.reflect.InvocationTargetException;
25  import java.lang.reflect.Method;
26  import java.net.URLEncoder;
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.Collections;
30  import java.util.TreeMap;
31  import java.util.LinkedList;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.SortedMap;
35  import java.util.TreeMap;
36  import java.util.TreeSet;
37  import java.util.UUID;
38  import java.util.concurrent.ConcurrentSkipListMap;
39  import java.util.concurrent.CopyOnWriteArrayList;
40  import java.util.concurrent.atomic.AtomicBoolean;
41  import java.util.concurrent.atomic.AtomicInteger;
42  import java.util.concurrent.atomic.AtomicLong;
43  
44  import org.apache.commons.logging.Log;
45  import org.apache.commons.logging.LogFactory;
46  import org.apache.hadoop.classification.InterfaceAudience;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.fs.FSDataOutputStream;
49  import org.apache.hadoop.fs.FileStatus;
50  import org.apache.hadoop.fs.FileSystem;
51  import org.apache.hadoop.fs.Path;
52  import org.apache.hadoop.fs.Syncable;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HConstants;
55  import org.apache.hadoop.hbase.HRegionInfo;
56  import org.apache.hadoop.hbase.HTableDescriptor;
57  import org.apache.hadoop.hbase.KeyValue;
58  import org.apache.hadoop.hbase.exceptions.FailedLogCloseException;
59  import org.apache.hadoop.hbase.util.Bytes;
60  import org.apache.hadoop.hbase.util.ClassSize;
61  import org.apache.hadoop.hbase.util.DrainBarrier;
62  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
63  import org.apache.hadoop.hbase.util.FSUtils;
64  import org.apache.hadoop.hbase.util.HasThread;
65  import org.apache.hadoop.hbase.util.Threads;
66  import org.apache.hadoop.util.StringUtils;
67  
68  /**
69   * HLog stores all the edits to the HStore.  Its the hbase write-ahead-log
70   * implementation.
71   *
72   * It performs logfile-rolling, so external callers are not aware that the
73   * underlying file is being rolled.
74   *
75   * <p>
76   * There is one HLog per RegionServer.  All edits for all Regions carried by
77   * a particular RegionServer are entered first in the HLog.
78   *
79   * <p>
80   * Each HRegion is identified by a unique long <code>int</code>. HRegions do
81   * not need to declare themselves before using the HLog; they simply include
82   * their HRegion-id in the <code>append</code> or
83   * <code>completeCacheFlush</code> calls.
84   *
85   * <p>
86   * An HLog consists of multiple on-disk files, which have a chronological order.
87   * As data is flushed to other (better) on-disk structures, the log becomes
88   * obsolete. We can destroy all the log messages for a given HRegion-id up to
89   * the most-recent CACHEFLUSH message from that HRegion.
90   *
91   * <p>
92   * It's only practical to delete entire files. Thus, we delete an entire on-disk
93   * file F when all of the messages in F have a log-sequence-id that's older
94   * (smaller) than the most-recent CACHEFLUSH message for every HRegion that has
95   * a message in F.
96   *
97   * <p>
98   * Synchronized methods can never execute in parallel. However, between the
99   * start of a cache flush and the completion point, appends are allowed but log
100  * rolling is not. To prevent log rolling taking place during this period, a
101  * separate reentrant lock is used.
102  *
103  * <p>To read an HLog, call {@link HLogFactory#createReader(org.apache.hadoop.fs.FileSystem,
104  * org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration)}.
105  *
106  */
107 @InterfaceAudience.Private
108 class FSHLog implements HLog, Syncable {
109   static final Log LOG = LogFactory.getLog(FSHLog.class);
110   
111   private final FileSystem fs;
112   private final Path rootDir;
113   private final Path dir;
114   private final Configuration conf;
115   // Listeners that are called on WAL events.
116   private List<WALActionsListener> listeners =
117     new CopyOnWriteArrayList<WALActionsListener>();
118   private final long optionalFlushInterval;
119   private final long blocksize;
120   private final String prefix;
121   private final AtomicLong unflushedEntries = new AtomicLong(0);
122   private volatile long syncedTillHere = 0;
123   private long lastDeferredTxid;
124   private final Path oldLogDir;
125   private volatile boolean logRollRunning;
126   private boolean failIfLogDirExists;
127 
128   private WALCoprocessorHost coprocessorHost;
129 
130   private FSDataOutputStream hdfs_out; // FSDataOutputStream associated with the current SequenceFile.writer
131   // Minimum tolerable replicas, if the actual value is lower than it, 
132   // rollWriter will be triggered
133   private int minTolerableReplication;
134   private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas
135   final static Object [] NO_ARGS = new Object []{};
136 
137   /** The barrier used to ensure that close() waits for all log rolls and flushes to finish. */
138   private DrainBarrier closeBarrier = new DrainBarrier();
139 
140   /**
141    * Current log file.
142    */
143   Writer writer;
144 
145   /**
146    * Map of all log files but the current one.
147    */
148   final SortedMap<Long, Path> outputfiles =
149     Collections.synchronizedSortedMap(new TreeMap<Long, Path>());
150 
151 
152   /**
153    * This lock synchronizes all operations on oldestUnflushedSeqNums and oldestFlushingSeqNums,
154    * with the exception of append's putIfAbsent into oldestUnflushedSeqNums.
155    * We only use these to find out the low bound seqNum, or to find regions with old seqNums to
156    * force flush them, so we don't care about these numbers messing with anything. */
157   private final Object oldestSeqNumsLock = new Object();
158 
159   /**
160    * This lock makes sure only one log roll runs at the same time. Should not be taken while
161    * any other lock is held. We don't just use synchronized because that results in bogus and
162    * tedious findbugs warning when it thinks synchronized controls writer thread safety */
163   private final Object rollWriterLock = new Object();
164 
165   /**
166    * Map of encoded region names to their most recent sequence/edit id in their memstore.
167    */
168   private final ConcurrentSkipListMap<byte [], Long> oldestUnflushedSeqNums =
169     new ConcurrentSkipListMap<byte [], Long>(Bytes.BYTES_COMPARATOR);
170   /**
171    * Map of encoded region names to their most recent sequence/edit id in their memstore;
172    * contains the regions that are currently flushing. That way we can store two numbers for
173    * flushing and non-flushing (oldestUnflushedSeqNums) memstore for the same region.
174    */
175   private final Map<byte[], Long> oldestFlushingSeqNums =
176     new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
177 
178   private volatile boolean closed = false;
179 
180   private final AtomicLong logSeqNum = new AtomicLong(0);
181 
182   private boolean forMeta = false;
183 
184   // The timestamp (in ms) when the log file was created.
185   private volatile long filenum = -1;
186 
187   //number of transactions in the current Hlog.
188   private final AtomicInteger numEntries = new AtomicInteger(0);
189 
190   // If live datanode count is lower than the default replicas value,
191   // RollWriter will be triggered in each sync(So the RollWriter will be
192   // triggered one by one in a short time). Using it as a workaround to slow
193   // down the roll frequency triggered by checkLowReplication().
194   private AtomicInteger consecutiveLogRolls = new AtomicInteger(0);
195   private final int lowReplicationRollLimit;
196 
197   // If consecutiveLogRolls is larger than lowReplicationRollLimit,
198   // then disable the rolling in checkLowReplication().
199   // Enable it if the replications recover.
200   private volatile boolean lowReplicationRollEnabled = true;
201 
202   // If > than this size, roll the log. This is typically 0.95 times the size
203   // of the default Hdfs block size.
204   private final long logrollsize;
205 
206   // We synchronize on updateLock to prevent updates and to prevent a log roll
207   // during an update
208   // locked during appends
209   private final Object updateLock = new Object();
210   private final Object flushLock = new Object();
211 
212   private final boolean enabled;
213 
214   /*
215    * If more than this many logs, force flush of oldest region to oldest edit
216    * goes to disk.  If too many and we crash, then will take forever replaying.
217    * Keep the number of logs tidy.
218    */
219   private final int maxLogs;
220 
221   /**
222    * Thread that handles optional sync'ing
223    */
224   private final LogSyncer logSyncer;
225 
226   /** Number of log close errors tolerated before we abort */
227   private final int closeErrorsTolerated;
228 
229   private final AtomicInteger closeErrorCount = new AtomicInteger();
230   private final MetricsWAL metrics;
231 
232   /**
233    * Constructor.
234    *
235    * @param fs filesystem handle
236    * @param root path for stored and archived hlogs
237    * @param logDir dir where hlogs are stored
238    * @param conf configuration to use
239    * @throws IOException
240    */
241   public FSHLog(final FileSystem fs, final Path root, final String logDir,
242                 final Configuration conf)
243   throws IOException {
244     this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME, 
245         conf, null, true, null, false);
246   }
247   
248   /**
249    * Constructor.
250    *
251    * @param fs filesystem handle
252    * @param root path for stored and archived hlogs
253    * @param logDir dir where hlogs are stored
254    * @param oldLogDir dir where hlogs are archived
255    * @param conf configuration to use
256    * @throws IOException
257    */
258   public FSHLog(final FileSystem fs, final Path root, final String logDir,
259                 final String oldLogDir, final Configuration conf)
260   throws IOException {
261     this(fs, root, logDir, oldLogDir, 
262         conf, null, true, null, false);
263   }
264 
265   /**
266    * Create an edit log at the given <code>dir</code> location.
267    *
268    * You should never have to load an existing log. If there is a log at
269    * startup, it should have already been processed and deleted by the time the
270    * HLog object is started up.
271    *
272    * @param fs filesystem handle
273    * @param root path for stored and archived hlogs
274    * @param logDir dir where hlogs are stored
275    * @param conf configuration to use
276    * @param listeners Listeners on WAL events. Listeners passed here will
277    * be registered before we do anything else; e.g. the
278    * Constructor {@link #rollWriter()}.
279    * @param prefix should always be hostname and port in distributed env and
280    *        it will be URL encoded before being used.
281    *        If prefix is null, "hlog" will be used
282    * @throws IOException
283    */
284   public FSHLog(final FileSystem fs, final Path root, final String logDir,
285       final Configuration conf, final List<WALActionsListener> listeners,
286       final String prefix) throws IOException {
287     this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME, 
288         conf, listeners, true, prefix, false);
289   }
290 
291   /**
292    * Create an edit log at the given <code>dir</code> location.
293    *
294    * You should never have to load an existing log. If there is a log at
295    * startup, it should have already been processed and deleted by the time the
296    * HLog object is started up.
297    *
298    * @param fs filesystem handle
299    * @param root path to where logs and oldlogs
300    * @param logDir dir where hlogs are stored
301    * @param oldLogDir dir where hlogs are archived
302    * @param conf configuration to use
303    * @param listeners Listeners on WAL events. Listeners passed here will
304    * be registered before we do anything else; e.g. the
305    * Constructor {@link #rollWriter()}.
306    * @param failIfLogDirExists If true IOException will be thrown if dir already exists.
307    * @param prefix should always be hostname and port in distributed env and
308    *        it will be URL encoded before being used.
309    *        If prefix is null, "hlog" will be used
310    * @param forMeta if this hlog is meant for meta updates
311    * @throws IOException
312    */
313   public FSHLog(final FileSystem fs, final Path root, final String logDir,
314       final String oldLogDir, final Configuration conf, 
315       final List<WALActionsListener> listeners,
316       final boolean failIfLogDirExists, final String prefix, boolean forMeta)
317   throws IOException {
318     super();
319     this.fs = fs;
320     this.rootDir = root;
321     this.dir = new Path(this.rootDir, logDir);
322     this.oldLogDir = new Path(this.rootDir, oldLogDir);
323     this.forMeta = forMeta;
324     this.conf = conf;
325    
326     if (listeners != null) {
327       for (WALActionsListener i: listeners) {
328         registerWALActionsListener(i);
329       }
330     }
331     
332     this.failIfLogDirExists = failIfLogDirExists;
333     
334     this.blocksize = this.conf.getLong("hbase.regionserver.hlog.blocksize",
335         getDefaultBlockSize());
336     // Roll at 95% of block size.
337     float multi = conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f);
338     this.logrollsize = (long)(this.blocksize * multi);
339     this.optionalFlushInterval =
340       conf.getLong("hbase.regionserver.optionallogflushinterval", 1 * 1000);
341     
342     this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
343     this.minTolerableReplication = conf.getInt(
344         "hbase.regionserver.hlog.tolerable.lowreplication",
345         this.fs.getDefaultReplication());
346     this.lowReplicationRollLimit = conf.getInt(
347         "hbase.regionserver.hlog.lowreplication.rolllimit", 5);
348     this.enabled = conf.getBoolean("hbase.regionserver.hlog.enabled", true);
349     this.closeErrorsTolerated = conf.getInt(
350         "hbase.regionserver.logroll.errors.tolerated", 0);
351     
352     this.logSyncer = new LogSyncer(this.optionalFlushInterval);
353     
354     LOG.info("HLog configuration: blocksize=" +
355       StringUtils.byteDesc(this.blocksize) +
356       ", rollsize=" + StringUtils.byteDesc(this.logrollsize) +
357       ", enabled=" + this.enabled +
358       ", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
359     // If prefix is null||empty then just name it hlog
360     this.prefix = prefix == null || prefix.isEmpty() ?
361         "hlog" : URLEncoder.encode(prefix, "UTF8");
362 
363     boolean dirExists = false;
364     if (failIfLogDirExists && (dirExists = this.fs.exists(dir))) {
365       throw new IOException("Target HLog directory already exists: " + dir);
366     }
367     if (!dirExists && !fs.mkdirs(dir)) {
368       throw new IOException("Unable to mkdir " + dir);
369     }
370 
371     if (!fs.exists(this.oldLogDir)) {
372       if (!fs.mkdirs(this.oldLogDir)) {
373         throw new IOException("Unable to mkdir " + this.oldLogDir);
374       }
375     }
376     // rollWriter sets this.hdfs_out if it can.
377     rollWriter();
378     
379     // handle the reflection necessary to call getNumCurrentReplicas()
380     this.getNumCurrentReplicas = getGetNumCurrentReplicas(this.hdfs_out);
381 
382     // When optionalFlushInterval is set as 0, don't start a thread for deferred log sync.
383     if (this.optionalFlushInterval > 0) {
384       Threads.setDaemonThreadRunning(logSyncer.getThread(), Thread.currentThread().getName()
385           + ".logSyncer");
386     } else {
387       LOG.info("hbase.regionserver.optionallogflushinterval is set as "
388           + this.optionalFlushInterval + ". Deferred log syncing won't work. "
389           + "Any Mutation, marked to be deferred synced, will be flushed immediately.");
390     }
391     coprocessorHost = new WALCoprocessorHost(this, conf);
392 
393     this.metrics = new MetricsWAL();
394   }
395   
396   // use reflection to search for getDefaultBlockSize(Path f)
397   // if the method doesn't exist, fall back to using getDefaultBlockSize()
398   private long getDefaultBlockSize() throws IOException {
399     Method m = null;
400     Class<? extends FileSystem> cls = this.fs.getClass();
401     try {
402       m = cls.getMethod("getDefaultBlockSize",
403           new Class<?>[] { Path.class });
404     } catch (NoSuchMethodException e) {
405       LOG.info("FileSystem doesn't support getDefaultBlockSize");
406     } catch (SecurityException e) {
407       LOG.info("Doesn't have access to getDefaultBlockSize on "
408           + "FileSystems", e);
409       m = null; // could happen on setAccessible()
410     }
411     if (null == m) {
412       return this.fs.getDefaultBlockSize();
413     } else {
414       try {
415         Object ret = m.invoke(this.fs, this.dir);
416         return ((Long)ret).longValue();
417       } catch (Exception e) {
418         throw new IOException(e);
419       }
420     }
421   }
422 
423   /**
424    * Find the 'getNumCurrentReplicas' on the passed <code>os</code> stream.
425    * @return Method or null.
426    */
427   private Method getGetNumCurrentReplicas(final FSDataOutputStream os) {
428     Method m = null;
429     if (os != null) {
430       Class<? extends OutputStream> wrappedStreamClass = os.getWrappedStream()
431           .getClass();
432       try {
433         m = wrappedStreamClass.getDeclaredMethod("getNumCurrentReplicas",
434             new Class<?>[] {});
435         m.setAccessible(true);
436       } catch (NoSuchMethodException e) {
437         LOG.info("FileSystem's output stream doesn't support"
438             + " getNumCurrentReplicas; --HDFS-826 not available; fsOut="
439             + wrappedStreamClass.getName());
440       } catch (SecurityException e) {
441         LOG.info("Doesn't have access to getNumCurrentReplicas on "
442             + "FileSystems's output stream --HDFS-826 not available; fsOut="
443             + wrappedStreamClass.getName(), e);
444         m = null; // could happen on setAccessible()
445       }
446     }
447     if (m != null) {
448       LOG.info("Using getNumCurrentReplicas--HDFS-826");
449     }
450     return m;
451   }
452 
453   @Override
454   public void registerWALActionsListener(final WALActionsListener listener) {
455     this.listeners.add(listener);
456   }
457 
458   @Override
459   public boolean unregisterWALActionsListener(final WALActionsListener listener) {
460     return this.listeners.remove(listener);
461   }
462 
463   @Override
464   public long getFilenum() {
465     return this.filenum;
466   }
467 
468   @Override
469   public void setSequenceNumber(final long newvalue) {
470     for (long id = this.logSeqNum.get(); id < newvalue &&
471         !this.logSeqNum.compareAndSet(id, newvalue); id = this.logSeqNum.get()) {
472       // This could spin on occasion but better the occasional spin than locking
473       // every increment of sequence number.
474       LOG.debug("Changed sequenceid from " + id + " to " + newvalue);
475     }
476   }
477 
478   @Override
479   public long getSequenceNumber() {
480     return logSeqNum.get();
481   }
482 
483   /**
484    * Method used internal to this class and for tests only.
485    * @return The wrapped stream our writer is using; its not the
486    * writer's 'out' FSDatoOutputStream but the stream that this 'out' wraps
487    * (In hdfs its an instance of DFSDataOutputStream).
488    * 
489    * usage: see TestLogRolling.java
490    */
491   OutputStream getOutputStream() {
492     return this.hdfs_out.getWrappedStream();
493   }
494 
495   @Override
496   public byte [][] rollWriter() throws FailedLogCloseException, IOException {
497     return rollWriter(false);
498   }
499 
500   @Override
501   public byte [][] rollWriter(boolean force)
502       throws FailedLogCloseException, IOException {
503     synchronized (rollWriterLock) {
504       // Return if nothing to flush.
505       if (!force && this.writer != null && this.numEntries.get() <= 0) {
506         return null;
507       }
508       byte [][] regionsToFlush = null;
509       try {
510         this.logRollRunning = true;
511         boolean isClosed = closed;
512         if (isClosed || !closeBarrier.beginOp()) {
513           LOG.debug("HLog " + (isClosed ? "closed" : "closing") + ". Skipping rolling of writer");
514           return regionsToFlush;
515         }
516         // Do all the preparation outside of the updateLock to block
517         // as less as possible the incoming writes
518         long currentFilenum = this.filenum;
519         Path oldPath = null;
520         if (currentFilenum > 0) {
521           //computeFilename  will take care of meta hlog filename
522           oldPath = computeFilename(currentFilenum);
523         }
524         this.filenum = System.currentTimeMillis();
525         Path newPath = computeFilename();
526 
527         // Tell our listeners that a new log is about to be created
528         if (!this.listeners.isEmpty()) {
529           for (WALActionsListener i : this.listeners) {
530             i.preLogRoll(oldPath, newPath);
531           }
532         }
533         FSHLog.Writer nextWriter = this.createWriterInstance(fs, newPath, conf);
534         // Can we get at the dfsclient outputstream?  If an instance of
535         // SFLW, it'll have done the necessary reflection to get at the
536         // protected field name.
537         FSDataOutputStream nextHdfsOut = null;
538         if (nextWriter instanceof SequenceFileLogWriter) {
539           nextHdfsOut = ((SequenceFileLogWriter)nextWriter).getWriterFSDataOutputStream();
540         }
541 
542         Path oldFile = null;
543         int oldNumEntries = 0;
544         synchronized (updateLock) {
545           // Clean up current writer.
546           oldNumEntries = this.numEntries.get();
547           oldFile = cleanupCurrentWriter(currentFilenum);
548           this.writer = nextWriter;
549           this.hdfs_out = nextHdfsOut;
550           this.numEntries.set(0);
551         }
552         LOG.info("Rolled log" + (oldFile != null ? " for file=" + FSUtils.getPath(oldFile)
553           + ", entries=" + oldNumEntries + ", filesize=" + this.fs.getFileStatus(oldFile).getLen()
554           : "" ) + "; new path=" + FSUtils.getPath(newPath));
555 
556         // Tell our listeners that a new log was created
557         if (!this.listeners.isEmpty()) {
558           for (WALActionsListener i : this.listeners) {
559             i.postLogRoll(oldPath, newPath);
560           }
561         }
562 
563         // Can we delete any of the old log files?
564         if (getNumLogFiles() > 0) {
565           cleanOldLogs();
566           regionsToFlush = getRegionsToForceFlush();
567         }
568       } finally {
569         this.logRollRunning = false;
570         closeBarrier.endOp();
571       }
572       return regionsToFlush;
573     }
574   }
575 
576   /**
577    * This method allows subclasses to inject different writers without having to
578    * extend other methods like rollWriter().
579    * 
580    * @param fs
581    * @param path
582    * @param conf
583    * @return Writer instance
584    * @throws IOException
585    */
586   protected Writer createWriterInstance(final FileSystem fs, final Path path,
587       final Configuration conf) throws IOException {
588     if (forMeta) {
589       //TODO: set a higher replication for the hlog files (HBASE-6773)
590     }
591     return HLogFactory.createWriter(fs, path, conf);
592   }
593 
594   /*
595    * Clean up old commit logs.
596    * @return If lots of logs, flush the returned region so next time through
597    * we can clean logs. Returns null if nothing to flush.  Returns array of
598    * encoded region names to flush.
599    * @throws IOException
600    */
601   private void cleanOldLogs() throws IOException {
602     long oldestOutstandingSeqNum = Long.MAX_VALUE;
603     synchronized (oldestSeqNumsLock) {
604       Long oldestFlushing = (oldestFlushingSeqNums.size() > 0)
605         ? Collections.min(oldestFlushingSeqNums.values()) : Long.MAX_VALUE;
606       Long oldestUnflushed = (oldestUnflushedSeqNums.size() > 0)
607         ? Collections.min(oldestUnflushedSeqNums.values()) : Long.MAX_VALUE;
608       oldestOutstandingSeqNum = Math.min(oldestFlushing, oldestUnflushed);
609     }
610 
611     // Get the set of all log files whose last sequence number is smaller than
612     // the oldest edit's sequence number.
613     TreeSet<Long> sequenceNumbers = new TreeSet<Long>(this.outputfiles.headMap(
614         oldestOutstandingSeqNum).keySet());
615     // Now remove old log files (if any)
616     if (LOG.isDebugEnabled()) {
617       if (sequenceNumbers.size() > 0) {
618         LOG.debug("Found " + sequenceNumbers.size() + " hlogs to remove" +
619           " out of total " + this.outputfiles.size() + ";" +
620           " oldest outstanding sequenceid is " + oldestOutstandingSeqNum);
621       }
622     }
623     for (Long seq : sequenceNumbers) {
624       archiveLogFile(this.outputfiles.remove(seq), seq);
625     }
626   }
627 
628   /**
629    * Return regions that have edits that are equal or less than a certain sequence number.
630    * Static due to some old unit test.
631    * @param walSeqNum The sequence number to compare with.
632    * @param regionsToSeqNums Encoded region names to sequence ids
633    * @return All regions whose seqNum <= walSeqNum. Null if no regions found.
634    */
635   static byte[][] findMemstoresWithEditsEqualOrOlderThan(
636       final long walSeqNum, final Map<byte[], Long> regionsToSeqNums) {
637     List<byte[]> regions = null;
638     for (Map.Entry<byte[], Long> e : regionsToSeqNums.entrySet()) {
639       if (e.getValue().longValue() <= walSeqNum) {
640         if (regions == null) regions = new ArrayList<byte[]>();
641         regions.add(e.getKey());
642       }
643     }
644     return regions == null ? null : regions
645         .toArray(new byte[][] { HConstants.EMPTY_BYTE_ARRAY });
646   }
647 
648   private byte[][] getRegionsToForceFlush() throws IOException {
649     // If too many log files, figure which regions we need to flush.
650     // Array is an array of encoded region names.
651     byte [][] regions = null;
652     int logCount = getNumLogFiles();
653     if (logCount > this.maxLogs && logCount > 0) {
654       // This is an array of encoded region names.
655       synchronized (oldestSeqNumsLock) {
656         regions = findMemstoresWithEditsEqualOrOlderThan(this.outputfiles.firstKey(),
657           this.oldestUnflushedSeqNums);
658       }
659       if (regions != null) {
660         StringBuilder sb = new StringBuilder();
661         for (int i = 0; i < regions.length; i++) {
662           if (i > 0) sb.append(", ");
663           sb.append(Bytes.toStringBinary(regions[i]));
664         }
665         LOG.info("Too many hlogs: logs=" + logCount + ", maxlogs=" +
666            this.maxLogs + "; forcing flush of " + regions.length + " regions(s): " +
667            sb.toString());
668       }
669     }
670     return regions;
671   }
672 
673   /*
674    * Cleans up current writer closing and adding to outputfiles.
675    * Presumes we're operating inside an updateLock scope.
676    * @return Path to current writer or null if none.
677    * @throws IOException
678    */
679   Path cleanupCurrentWriter(final long currentfilenum) throws IOException {
680     Path oldFile = null;
681     if (this.writer != null) {
682       // Close the current writer, get a new one.
683       try {
684         // Wait till all current transactions are written to the hlog.
685         // No new transactions can occur because we have the updatelock.
686         if (this.unflushedEntries.get() != this.syncedTillHere) {
687           LOG.debug("cleanupCurrentWriter " +
688                    " waiting for transactions to get synced " +
689                    " total " + this.unflushedEntries.get() +
690                    " synced till here " + syncedTillHere);
691           sync();
692         }
693         this.writer.close();
694         this.writer = null;
695         closeErrorCount.set(0);
696       } catch (IOException e) {
697         LOG.error("Failed close of HLog writer", e);
698         int errors = closeErrorCount.incrementAndGet();
699         if (errors <= closeErrorsTolerated && !hasDeferredEntries()) {
700           LOG.warn("Riding over HLog close failure! error count="+errors);
701         } else {
702           if (hasDeferredEntries()) {
703             LOG.error("Aborting due to unflushed edits in HLog");
704           }
705           // Failed close of log file.  Means we're losing edits.  For now,
706           // shut ourselves down to minimize loss.  Alternative is to try and
707           // keep going.  See HBASE-930.
708           FailedLogCloseException flce =
709             new FailedLogCloseException("#" + currentfilenum);
710           flce.initCause(e);
711           throw flce;
712         }
713       }
714       if (currentfilenum >= 0) {
715         oldFile = computeFilename(currentfilenum);
716         this.outputfiles.put(Long.valueOf(this.logSeqNum.get()), oldFile);
717       }
718     }
719     return oldFile;
720   }
721 
722   private void archiveLogFile(final Path p, final Long seqno) throws IOException {
723     Path newPath = getHLogArchivePath(this.oldLogDir, p);
724     LOG.info("moving old hlog file " + FSUtils.getPath(p) +
725       " whose highest sequenceid is " + seqno + " to " +
726       FSUtils.getPath(newPath));
727 
728     // Tell our listeners that a log is going to be archived.
729     if (!this.listeners.isEmpty()) {
730       for (WALActionsListener i : this.listeners) {
731         i.preLogArchive(p, newPath);
732       }
733     }
734     if (!this.fs.rename(p, newPath)) {
735       throw new IOException("Unable to rename " + p + " to " + newPath);
736     }
737     // Tell our listeners that a log has been archived.
738     if (!this.listeners.isEmpty()) {
739       for (WALActionsListener i : this.listeners) {
740         i.postLogArchive(p, newPath);
741       }
742     }
743   }
744 
745   /**
746    * This is a convenience method that computes a new filename with a given
747    * using the current HLog file-number
748    * @return Path
749    */
750   protected Path computeFilename() {
751     return computeFilename(this.filenum);
752   }
753 
754   /**
755    * This is a convenience method that computes a new filename with a given
756    * file-number.
757    * @param filenum to use
758    * @return Path
759    */
760   protected Path computeFilename(long filenum) {
761     if (filenum < 0) {
762       throw new RuntimeException("hlog file number can't be < 0");
763     }
764     String child = prefix + "." + filenum;
765     if (forMeta) {
766       child += HLog.META_HLOG_FILE_EXTN;
767     }
768     return new Path(dir, child);
769   }
770 
771   @Override
772   public void closeAndDelete() throws IOException {
773     close();
774     if (!fs.exists(this.dir)) return;
775     FileStatus[] files = fs.listStatus(this.dir);
776     for(FileStatus file : files) {
777 
778       Path p = getHLogArchivePath(this.oldLogDir, file.getPath());
779       // Tell our listeners that a log is going to be archived.
780       if (!this.listeners.isEmpty()) {
781         for (WALActionsListener i : this.listeners) {
782           i.preLogArchive(file.getPath(), p);
783         }
784       }
785 
786       if (!fs.rename(file.getPath(),p)) {
787         throw new IOException("Unable to rename " + file.getPath() + " to " + p);
788       }
789       // Tell our listeners that a log was archived.
790       if (!this.listeners.isEmpty()) {
791         for (WALActionsListener i : this.listeners) {
792           i.postLogArchive(file.getPath(), p);
793         }
794       }
795     }
796     LOG.debug("Moved " + files.length + " log files to " +
797       FSUtils.getPath(this.oldLogDir));
798     if (!fs.delete(dir, true)) {
799       LOG.info("Unable to delete " + dir);
800     }
801   }
802 
803   @Override
804   public void close() throws IOException {
805     if (this.closed) {
806       return;
807     }
808     // When optionalFlushInterval is 0, the logSyncer is not started as a Thread.
809     if (this.optionalFlushInterval > 0) {
810       try {
811         logSyncer.close();
812         // Make sure we synced everything
813         logSyncer.join(this.optionalFlushInterval * 2);
814       } catch (InterruptedException e) {
815         LOG.error("Exception while waiting for syncer thread to die", e);
816         Thread.currentThread().interrupt();
817       }
818     }
819     try {
820       // Prevent all further flushing and rolling.
821       closeBarrier.stopAndDrainOps();
822     } catch (InterruptedException e) {
823       LOG.error("Exception while waiting for cache flushes and log rolls", e);
824       Thread.currentThread().interrupt();
825     }
826 
827     // Tell our listeners that the log is closing
828     if (!this.listeners.isEmpty()) {
829       for (WALActionsListener i : this.listeners) {
830         i.logCloseRequested();
831       }
832     }
833     synchronized (updateLock) {
834       this.closed = true;
835       if (LOG.isDebugEnabled()) {
836         LOG.debug("closing hlog writer in " + this.dir.toString());
837       }
838       if (this.writer != null) {
839         this.writer.close();
840         this.writer = null;
841       }
842     }
843   }
844 
845   /**
846    * @param now
847    * @param regionName
848    * @param tableName
849    * @param clusterId
850    * @return New log key.
851    */
852   protected HLogKey makeKey(byte[] regionName, byte[] tableName, long seqnum,
853       long now, UUID clusterId) {
854     return new HLogKey(regionName, tableName, seqnum, now, clusterId);
855   }
856 
857   @Override
858   public long append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit,
859                      HTableDescriptor htd, boolean doSync)
860   throws IOException {
861     if (this.closed) {
862       throw new IOException("Cannot append; log is closed");
863     }
864     long txid = 0;
865     synchronized (updateLock) {
866       long seqNum = obtainSeqNum();
867       logKey.setLogSeqNum(seqNum);
868       // The 'lastSeqWritten' map holds the sequence number of the oldest
869       // write for each region (i.e. the first edit added to the particular
870       // memstore). When the cache is flushed, the entry for the
871       // region being flushed is removed if the sequence number of the flush
872       // is greater than or equal to the value in lastSeqWritten.
873       this.oldestUnflushedSeqNums.putIfAbsent(regionInfo.getEncodedNameAsBytes(),
874         Long.valueOf(seqNum));
875       doWrite(regionInfo, logKey, logEdit, htd);
876       txid = this.unflushedEntries.incrementAndGet();
877       this.numEntries.incrementAndGet();
878       if (htd.isDeferredLogFlush()) {
879         lastDeferredTxid = txid;
880       }
881     }
882 
883     // Sync if catalog region, and if not then check if that table supports
884     // deferred log flushing
885     if (doSync &&
886         (regionInfo.isMetaRegion() ||
887         !htd.isDeferredLogFlush())) {
888       // sync txn to file system
889       this.sync(txid);
890     }
891     return txid;
892   }
893 
894   @Override
895   public void append(HRegionInfo info, byte [] tableName, WALEdit edits,
896     final long now, HTableDescriptor htd)
897   throws IOException {
898     append(info, tableName, edits, HConstants.DEFAULT_CLUSTER_ID, now, htd);
899   }
900 
901   /**
902    * Append a set of edits to the log. Log edits are keyed by (encoded)
903    * regionName, rowname, and log-sequence-id.
904    *
905    * Later, if we sort by these keys, we obtain all the relevant edits for a
906    * given key-range of the HRegion (TODO). Any edits that do not have a
907    * matching COMPLETE_CACHEFLUSH message can be discarded.
908    *
909    * <p>
910    * Logs cannot be restarted once closed, or once the HLog process dies. Each
911    * time the HLog starts, it must create a new log. This means that other
912    * systems should process the log appropriately upon each startup (and prior
913    * to initializing HLog).
914    *
915    * synchronized prevents appends during the completion of a cache flush or for
916    * the duration of a log roll.
917    *
918    * @param info
919    * @param tableName
920    * @param edits
921    * @param clusterId The originating clusterId for this edit (for replication)
922    * @param now
923    * @param doSync shall we sync?
924    * @return txid of this transaction
925    * @throws IOException
926    */
927   private long append(HRegionInfo info, byte [] tableName, WALEdit edits, UUID clusterId,
928       final long now, HTableDescriptor htd, boolean doSync)
929     throws IOException {
930       if (edits.isEmpty()) return this.unflushedEntries.get();;
931       if (this.closed) {
932         throw new IOException("Cannot append; log is closed");
933       }
934       long txid = 0;
935       synchronized (this.updateLock) {
936         long seqNum = obtainSeqNum();
937         // The 'lastSeqWritten' map holds the sequence number of the oldest
938         // write for each region (i.e. the first edit added to the particular
939         // memstore). . When the cache is flushed, the entry for the
940         // region being flushed is removed if the sequence number of the flush
941         // is greater than or equal to the value in lastSeqWritten.
942         // Use encoded name.  Its shorter, guaranteed unique and a subset of
943         // actual  name.
944         byte [] encodedRegionName = info.getEncodedNameAsBytes();
945         this.oldestUnflushedSeqNums.putIfAbsent(encodedRegionName, seqNum);
946         HLogKey logKey = makeKey(encodedRegionName, tableName, seqNum, now, clusterId);
947         doWrite(info, logKey, edits, htd);
948         this.numEntries.incrementAndGet();
949         txid = this.unflushedEntries.incrementAndGet();
950         if (htd.isDeferredLogFlush()) {
951           lastDeferredTxid = txid;
952         }
953       }
954       // Sync if catalog region, and if not then check if that table supports
955       // deferred log flushing
956       if (doSync && 
957           (info.isMetaRegion() ||
958           !htd.isDeferredLogFlush())) {
959         // sync txn to file system
960         this.sync(txid);
961       }
962       return txid;
963     }
964 
965   @Override
966   public long appendNoSync(HRegionInfo info, byte [] tableName, WALEdit edits, 
967     UUID clusterId, final long now, HTableDescriptor htd)
968     throws IOException {
969     return append(info, tableName, edits, clusterId, now, htd, false);
970   }
971 
972   @Override
973   public long append(HRegionInfo info, byte [] tableName, WALEdit edits, 
974     UUID clusterId, final long now, HTableDescriptor htd)
975     throws IOException {
976     return append(info, tableName, edits, clusterId, now, htd, true);
977   }
978 
979   /**
980    * This class is responsible to hold the HLog's appended Entry list
981    * and to sync them according to a configurable interval.
982    *
983    * Deferred log flushing works first by piggy backing on this process by
984    * simply not sync'ing the appended Entry. It can also be sync'd by other
985    * non-deferred log flushed entries outside of this thread.
986    */
987   class LogSyncer extends HasThread {
988 
989     private final long optionalFlushInterval;
990 
991     private final AtomicBoolean closeLogSyncer = new AtomicBoolean(false);
992 
993     // List of pending writes to the HLog. There corresponds to transactions
994     // that have not yet returned to the client. We keep them cached here
995     // instead of writing them to HDFS piecemeal, because the HDFS write 
996     // method is pretty heavyweight as far as locking is concerned. The 
997     // goal is to increase the batchsize for writing-to-hdfs as well as
998     // sync-to-hdfs, so that we can get better system throughput.
999     private List<Entry> pendingWrites = new LinkedList<Entry>();
1000 
1001     LogSyncer(long optionalFlushInterval) {
1002       this.optionalFlushInterval = optionalFlushInterval;
1003     }
1004 
1005     @Override
1006     public void run() {
1007       try {
1008         // awaiting with a timeout doesn't always
1009         // throw exceptions on interrupt
1010         while(!this.isInterrupted() && !closeLogSyncer.get()) {
1011 
1012           try {
1013             if (unflushedEntries.get() <= syncedTillHere) {
1014               synchronized (closeLogSyncer) {
1015                 closeLogSyncer.wait(this.optionalFlushInterval);
1016               }
1017             }
1018             // Calling sync since we waited or had unflushed entries.
1019             // Entries appended but not sync'd are taken care of here AKA
1020             // deferred log flush
1021             sync();
1022           } catch (IOException e) {
1023             LOG.error("Error while syncing, requesting close of hlog ", e);
1024             requestLogRoll();
1025           }
1026         }
1027       } catch (InterruptedException e) {
1028         LOG.debug(getName() + " interrupted while waiting for sync requests");
1029       } finally {
1030         LOG.info(getName() + " exiting");
1031       }
1032     }
1033 
1034     // appends new writes to the pendingWrites. It is better to keep it in
1035     // our own queue rather than writing it to the HDFS output stream because
1036     // HDFSOutputStream.writeChunk is not lightweight at all.
1037     synchronized void append(Entry e) throws IOException {
1038       pendingWrites.add(e);
1039     }
1040 
1041     // Returns all currently pending writes. New writes
1042     // will accumulate in a new list.
1043     synchronized List<Entry> getPendingWrites() {
1044       List<Entry> save = this.pendingWrites;
1045       this.pendingWrites = new LinkedList<Entry>();
1046       return save;
1047     }
1048 
1049     // writes out pending entries to the HLog
1050     void hlogFlush(Writer writer, List<Entry> pending) throws IOException {
1051       if (pending == null) return;
1052 
1053       // write out all accumulated Entries to hdfs.
1054       for (Entry e : pending) {
1055         writer.append(e);
1056       }
1057     }
1058 
1059     void close() {
1060       synchronized (closeLogSyncer) {
1061         closeLogSyncer.set(true);
1062         closeLogSyncer.notifyAll();
1063       }
1064     }
1065   }
1066 
1067   // sync all known transactions
1068   private void syncer() throws IOException {
1069     syncer(this.unflushedEntries.get()); // sync all pending items
1070   }
1071 
1072   // sync all transactions upto the specified txid
1073   private void syncer(long txid) throws IOException {
1074     Writer tempWriter;
1075     synchronized (this.updateLock) {
1076       if (this.closed) return;
1077       // Guaranteed non-null.
1078       // Note that parallel sync can close tempWriter.
1079       // The current method of dealing with this is to catch exceptions.
1080       // See HBASE-4387, HBASE-5623, HBASE-7329.
1081       tempWriter = this.writer;
1082     }
1083     // if the transaction that we are interested in is already 
1084     // synced, then return immediately.
1085     if (txid <= this.syncedTillHere) {
1086       return;
1087     }
1088     try {
1089       long doneUpto;
1090       long now = EnvironmentEdgeManager.currentTimeMillis();
1091       // First flush all the pending writes to HDFS. Then 
1092       // issue the sync to HDFS. If sync is successful, then update
1093       // syncedTillHere to indicate that transactions till this
1094       // number has been successfully synced.
1095       IOException ioe = null;
1096       List<Entry> pending = null;
1097       synchronized (flushLock) {
1098         if (txid <= this.syncedTillHere) {
1099           return;
1100         }
1101         doneUpto = this.unflushedEntries.get();
1102         pending = logSyncer.getPendingWrites();
1103         try {
1104           logSyncer.hlogFlush(tempWriter, pending);
1105         } catch(IOException io) {
1106           ioe = io;
1107           LOG.error("syncer encountered error, will retry. txid=" + txid, ioe);
1108         }
1109       }
1110       if (ioe != null && pending != null) {
1111         synchronized (this.updateLock) {
1112           synchronized (flushLock) {
1113             // HBASE-4387, HBASE-5623, retry with updateLock held
1114             tempWriter = this.writer;
1115             logSyncer.hlogFlush(tempWriter, pending);
1116           }
1117         }          
1118       }
1119       // another thread might have sync'ed avoid double-sync'ing
1120       if (txid <= this.syncedTillHere) {
1121         return;
1122       }
1123       try {
1124         if (tempWriter != null) tempWriter.sync();
1125       } catch(IOException ex) {
1126         synchronized (this.updateLock) {
1127           // HBASE-4387, HBASE-5623, retry with updateLock held
1128           // TODO: we don't actually need to do it for concurrent close - what is the point
1129           //       of syncing new unrelated writer? Keep behavior for now.
1130           tempWriter = this.writer;
1131           if (tempWriter != null) tempWriter.sync();
1132         }
1133       }
1134       this.syncedTillHere = Math.max(this.syncedTillHere, doneUpto);
1135 
1136       this.metrics.finishSync(EnvironmentEdgeManager.currentTimeMillis() - now);
1137       // TODO: preserving the old behavior for now, but this check is strange. It's not
1138       //       protected by any locks here, so for all we know rolling locks might start
1139       //       as soon as we enter the "if". Is this best-effort optimization check?
1140       if (!this.logRollRunning) {
1141         checkLowReplication();
1142         try {
1143           if (tempWriter.getLength() > this.logrollsize) {
1144             requestLogRoll();
1145           }
1146         } catch (IOException x) {
1147           LOG.debug("Log roll failed and will be retried. (This is not an error)");
1148         }
1149       }
1150     } catch (IOException e) {
1151       LOG.fatal("Could not sync. Requesting close of hlog", e);
1152       requestLogRoll();
1153       throw e;
1154     }
1155   }
1156 
1157   private void checkLowReplication() {
1158     // if the number of replicas in HDFS has fallen below the configured
1159     // value, then roll logs.
1160     try {
1161       int numCurrentReplicas = getLogReplication();
1162       if (numCurrentReplicas != 0
1163           && numCurrentReplicas < this.minTolerableReplication) {
1164         if (this.lowReplicationRollEnabled) {
1165           if (this.consecutiveLogRolls.get() < this.lowReplicationRollLimit) {
1166             LOG.warn("HDFS pipeline error detected. " + "Found "
1167                 + numCurrentReplicas + " replicas but expecting no less than "
1168                 + this.minTolerableReplication + " replicas. "
1169                 + " Requesting close of hlog.");
1170             requestLogRoll();
1171             // If rollWriter is requested, increase consecutiveLogRolls. Once it
1172             // is larger than lowReplicationRollLimit, disable the
1173             // LowReplication-Roller
1174             this.consecutiveLogRolls.getAndIncrement();
1175           } else {
1176             LOG.warn("Too many consecutive RollWriter requests, it's a sign of "
1177                 + "the total number of live datanodes is lower than the tolerable replicas.");
1178             this.consecutiveLogRolls.set(0);
1179             this.lowReplicationRollEnabled = false;
1180           }
1181         }
1182       } else if (numCurrentReplicas >= this.minTolerableReplication) {
1183 
1184         if (!this.lowReplicationRollEnabled) {
1185           // The new writer's log replicas is always the default value.
1186           // So we should not enable LowReplication-Roller. If numEntries
1187           // is lower than or equals 1, we consider it as a new writer.
1188           if (this.numEntries.get() <= 1) {
1189             return;
1190           }
1191           // Once the live datanode number and the replicas return to normal,
1192           // enable the LowReplication-Roller.
1193           this.lowReplicationRollEnabled = true;
1194           LOG.info("LowReplication-Roller was enabled.");
1195         }
1196       }
1197     } catch (Exception e) {
1198       LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e +
1199           " still proceeding ahead...");
1200     }
1201   }
1202 
1203   /**
1204    * This method gets the datanode replication count for the current HLog.
1205    *
1206    * If the pipeline isn't started yet or is empty, you will get the default
1207    * replication factor.  Therefore, if this function returns 0, it means you
1208    * are not properly running with the HDFS-826 patch.
1209    * @throws InvocationTargetException
1210    * @throws IllegalAccessException
1211    * @throws IllegalArgumentException
1212    *
1213    * @throws Exception
1214    */
1215   int getLogReplication()
1216   throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
1217     if (this.getNumCurrentReplicas != null && this.hdfs_out != null) {
1218       Object repl = this.getNumCurrentReplicas.invoke(getOutputStream(), NO_ARGS);
1219       if (repl instanceof Integer) {
1220         return ((Integer)repl).intValue();
1221       }
1222     }
1223     return 0;
1224   }
1225 
1226   boolean canGetCurReplicas() {
1227     return this.getNumCurrentReplicas != null;
1228   }
1229 
1230   public void hsync() throws IOException {
1231     syncer();
1232   }
1233 
1234   public void hflush() throws IOException {
1235     syncer();
1236   }
1237 
1238   public void sync() throws IOException {
1239     syncer();
1240   }
1241 
1242   public void sync(long txid) throws IOException {
1243     syncer(txid);
1244   }
1245 
1246   private void requestLogRoll() {
1247     if (!this.listeners.isEmpty()) {
1248       for (WALActionsListener i: this.listeners) {
1249         i.logRollRequested();
1250       }
1251     }
1252   }
1253 
1254   protected void doWrite(HRegionInfo info, HLogKey logKey, WALEdit logEdit,
1255                            HTableDescriptor htd)
1256   throws IOException {
1257     if (!this.enabled) {
1258       return;
1259     }
1260     if (!this.listeners.isEmpty()) {
1261       for (WALActionsListener i: this.listeners) {
1262         i.visitLogEntryBeforeWrite(htd, logKey, logEdit);
1263       }
1264     }
1265     try {
1266       long now = EnvironmentEdgeManager.currentTimeMillis();
1267       // coprocessor hook:
1268       if (!coprocessorHost.preWALWrite(info, logKey, logEdit)) {
1269         // write to our buffer for the Hlog file.
1270         logSyncer.append(new FSHLog.Entry(logKey, logEdit));
1271       }
1272       long took = EnvironmentEdgeManager.currentTimeMillis() - now;
1273       coprocessorHost.postWALWrite(info, logKey, logEdit);
1274       long len = 0;
1275       for (KeyValue kv : logEdit.getKeyValues()) {
1276         len += kv.getLength();
1277       }
1278       this.metrics.finishAppend(took, len);
1279     } catch (IOException e) {
1280       LOG.fatal("Could not append. Requesting close of hlog", e);
1281       requestLogRoll();
1282       throw e;
1283     }
1284   }
1285 
1286 
1287   /** @return How many items have been added to the log */
1288   int getNumEntries() {
1289     return numEntries.get();
1290   }
1291 
1292   @Override
1293   public long obtainSeqNum() {
1294     return this.logSeqNum.incrementAndGet();
1295   }
1296 
1297   /** @return the number of log files in use */
1298   int getNumLogFiles() {
1299     return outputfiles.size();
1300   }
1301 
1302   @Override
1303   public Long startCacheFlush(final byte[] encodedRegionName) {
1304     Long oldRegionSeqNum = null;
1305     if (!closeBarrier.beginOp()) {
1306       return null;
1307     }
1308     synchronized (oldestSeqNumsLock) {
1309       oldRegionSeqNum = this.oldestUnflushedSeqNums.remove(encodedRegionName);
1310       if (oldRegionSeqNum != null) {
1311         Long oldValue = this.oldestFlushingSeqNums.put(encodedRegionName, oldRegionSeqNum);
1312         assert oldValue == null : "Flushing map not cleaned up for "
1313           + Bytes.toString(encodedRegionName);
1314       }
1315     }
1316     if (oldRegionSeqNum == null) {
1317       // TODO: if we have no oldRegionSeqNum, and WAL is not disabled, presumably either
1318       //       the region is already flushing (which would make this call invalid), or there
1319       //       were no appends after last flush, so why are we starting flush? Maybe we should
1320       //       assert not null, and switch to "long" everywhere. Less rigorous, but safer,
1321       //       alternative is telling the caller to stop. For now preserve old logic.
1322       LOG.warn("Couldn't find oldest seqNum for the region we are about to flush: ["
1323         + Bytes.toString(encodedRegionName) + "]");
1324     }
1325     return obtainSeqNum();
1326   }
1327 
1328   @Override
1329   public void completeCacheFlush(final byte [] encodedRegionName)
1330   {
1331     synchronized (oldestSeqNumsLock) {
1332       this.oldestFlushingSeqNums.remove(encodedRegionName);
1333     }
1334     closeBarrier.endOp();
1335   }
1336 
1337   @Override
1338   public void abortCacheFlush(byte[] encodedRegionName) {
1339     Long currentSeqNum = null, seqNumBeforeFlushStarts = null;
1340     synchronized (oldestSeqNumsLock) {
1341       seqNumBeforeFlushStarts = this.oldestFlushingSeqNums.remove(encodedRegionName);
1342       if (seqNumBeforeFlushStarts != null) {
1343         currentSeqNum =
1344           this.oldestUnflushedSeqNums.put(encodedRegionName, seqNumBeforeFlushStarts);
1345       }
1346     }
1347     closeBarrier.endOp();
1348     if ((currentSeqNum != null)
1349         && (currentSeqNum.longValue() <= seqNumBeforeFlushStarts.longValue())) {
1350       String errorStr = "Region " + Bytes.toString(encodedRegionName) +
1351           "acquired edits out of order current memstore seq=" + currentSeqNum
1352           + ", previous oldest unflushed id=" + seqNumBeforeFlushStarts;
1353       LOG.error(errorStr);
1354       assert false : errorStr;
1355       Runtime.getRuntime().halt(1);
1356     }
1357   }
1358 
1359   @Override
1360   public boolean isLowReplicationRollEnabled() {
1361       return lowReplicationRollEnabled;
1362   }
1363 
1364   /**
1365    * Get the directory we are making logs in.
1366    * 
1367    * @return dir
1368    */
1369   protected Path getDir() {
1370     return dir;
1371   }
1372   
1373   static Path getHLogArchivePath(Path oldLogDir, Path p) {
1374     return new Path(oldLogDir, p.getName());
1375   }
1376 
1377   static String formatRecoveredEditsFileName(final long seqid) {
1378     return String.format("%019d", seqid);
1379   }
1380 
1381   public static final long FIXED_OVERHEAD = ClassSize.align(
1382     ClassSize.OBJECT + (5 * ClassSize.REFERENCE) +
1383     ClassSize.ATOMIC_INTEGER + Bytes.SIZEOF_INT + (3 * Bytes.SIZEOF_LONG));
1384 
1385   private static void usage() {
1386     System.err.println("Usage: HLog <ARGS>");
1387     System.err.println("Arguments:");
1388     System.err.println(" --dump  Dump textual representation of passed one or more files");
1389     System.err.println("         For example: HLog --dump hdfs://example.com:9000/hbase/.logs/MACHINE/LOGFILE");
1390     System.err.println(" --split Split the passed directory of WAL logs");
1391     System.err.println("         For example: HLog --split hdfs://example.com:9000/hbase/.logs/DIR");
1392   }
1393 
1394   private static void split(final Configuration conf, final Path p)
1395   throws IOException {
1396     FileSystem fs = FileSystem.get(conf);
1397     if (!fs.exists(p)) {
1398       throw new FileNotFoundException(p.toString());
1399     }
1400     final Path baseDir = FSUtils.getRootDir(conf);
1401     final Path oldLogDir = new Path(baseDir, HConstants.HREGION_OLDLOGDIR_NAME);
1402     if (!fs.getFileStatus(p).isDir()) {
1403       throw new IOException(p + " is not a directory");
1404     }
1405 
1406     HLogSplitter logSplitter = HLogSplitter.createLogSplitter(
1407         conf, baseDir, p, oldLogDir, fs);
1408     logSplitter.splitLog();
1409   }
1410   
1411   @Override
1412   public WALCoprocessorHost getCoprocessorHost() {
1413     return coprocessorHost;
1414   }
1415 
1416   /** Provide access to currently deferred sequence num for tests */
1417   boolean hasDeferredEntries() {
1418     return lastDeferredTxid > syncedTillHere;
1419   }
1420 
1421   @Override
1422   public long getEarliestMemstoreSeqNum(byte[] encodedRegionName) {
1423     Long result = oldestUnflushedSeqNums.get(encodedRegionName);
1424     return result == null ? HConstants.NO_SEQNUM : result.longValue();
1425   }
1426 
1427   /**
1428    * Pass one or more log file names and it will either dump out a text version
1429    * on <code>stdout</code> or split the specified log files.
1430    *
1431    * @param args
1432    * @throws IOException
1433    */
1434   public static void main(String[] args) throws IOException {
1435     if (args.length < 2) {
1436       usage();
1437       System.exit(-1);
1438     }
1439     // either dump using the HLogPrettyPrinter or split, depending on args
1440     if (args[0].compareTo("--dump") == 0) {
1441       HLogPrettyPrinter.run(Arrays.copyOfRange(args, 1, args.length));
1442     } else if (args[0].compareTo("--split") == 0) {
1443       Configuration conf = HBaseConfiguration.create();
1444       for (int i = 1; i < args.length; i++) {
1445         try {
1446           Path logPath = new Path(args[i]);
1447           FSUtils.setFsDefault(conf, logPath);
1448           split(conf, logPath);
1449         } catch (Throwable t) {
1450           t.printStackTrace(System.err);
1451           System.exit(-1);
1452         }
1453       }
1454     } else {
1455       usage();
1456       System.exit(-1);
1457     }
1458   }
1459 }