View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.wal;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.lang.reflect.InvocationTargetException;
25  import java.lang.reflect.Method;
26  import java.net.URLEncoder;
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.Collections;
30  import java.util.LinkedList;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.SortedMap;
34  import java.util.TreeMap;
35  import java.util.TreeSet;
36  import java.util.UUID;
37  import java.util.concurrent.ConcurrentSkipListMap;
38  import java.util.concurrent.CopyOnWriteArrayList;
39  import java.util.concurrent.atomic.AtomicBoolean;
40  import java.util.concurrent.atomic.AtomicInteger;
41  import java.util.concurrent.atomic.AtomicLong;
42  
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.classification.InterfaceAudience;
46  import org.apache.hadoop.conf.Configuration;
47  import org.apache.hadoop.fs.FSDataOutputStream;
48  import org.apache.hadoop.fs.FileStatus;
49  import org.apache.hadoop.fs.FileSystem;
50  import org.apache.hadoop.fs.Path;
51  import org.apache.hadoop.fs.Syncable;
52  import org.apache.hadoop.hbase.TableName;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HConstants;
55  import org.apache.hadoop.hbase.HRegionInfo;
56  import org.apache.hadoop.hbase.HTableDescriptor;
57  import org.apache.hadoop.hbase.KeyValue;
58  import org.apache.hadoop.hbase.util.Bytes;
59  import org.apache.hadoop.hbase.util.ClassSize;
60  import org.apache.hadoop.hbase.util.DrainBarrier;
61  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
62  import org.apache.hadoop.hbase.util.FSUtils;
63  import org.apache.hadoop.hbase.util.HasThread;
64  import org.apache.hadoop.hbase.util.Threads;
65  import org.apache.hadoop.util.StringUtils;
66  import org.cloudera.htrace.Trace;
67  import org.cloudera.htrace.TraceScope;
68  
69  /**
70   * HLog stores all the edits to the HStore.  Its the hbase write-ahead-log
71   * implementation.
72   *
73   * It performs logfile-rolling, so external callers are not aware that the
74   * underlying file is being rolled.
75   *
76   * <p>
77   * There is one HLog per RegionServer.  All edits for all Regions carried by
78   * a particular RegionServer are entered first in the HLog.
79   *
80   * <p>
81   * Each HRegion is identified by a unique long <code>int</code>. HRegions do
82   * not need to declare themselves before using the HLog; they simply include
83   * their HRegion-id in the <code>append</code> or
84   * <code>completeCacheFlush</code> calls.
85   *
86   * <p>
87   * An HLog consists of multiple on-disk files, which have a chronological order.
88   * As data is flushed to other (better) on-disk structures, the log becomes
89   * obsolete. We can destroy all the log messages for a given HRegion-id up to
90   * the most-recent CACHEFLUSH message from that HRegion.
91   *
92   * <p>
93   * It's only practical to delete entire files. Thus, we delete an entire on-disk
94   * file F when all of the messages in F have a log-sequence-id that's older
95   * (smaller) than the most-recent CACHEFLUSH message for every HRegion that has
96   * a message in F.
97   *
98   * <p>
99   * Synchronized methods can never execute in parallel. However, between the
100  * start of a cache flush and the completion point, appends are allowed but log
101  * rolling is not. To prevent log rolling taking place during this period, a
102  * separate reentrant lock is used.
103  *
104  * <p>To read an HLog, call {@link HLogFactory#createReader(org.apache.hadoop.fs.FileSystem,
105  * org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration)}.
106  *
107  */
108 @InterfaceAudience.Private
109 class FSHLog implements HLog, Syncable {
110   static final Log LOG = LogFactory.getLog(FSHLog.class);
111 
112   private final FileSystem fs;
113   private final Path rootDir;
114   private final Path dir;
115   private final Configuration conf;
116   // Listeners that are called on WAL events.
117   private List<WALActionsListener> listeners =
118     new CopyOnWriteArrayList<WALActionsListener>();
119   private final long optionalFlushInterval;
120   private final long blocksize;
121   private final String prefix;
122   private final AtomicLong unflushedEntries = new AtomicLong(0);
123   private volatile long syncedTillHere = 0;
124   private long lastDeferredTxid;
125   private final Path oldLogDir;
126   private volatile boolean logRollRunning;
127 
128   private WALCoprocessorHost coprocessorHost;
129 
130   private FSDataOutputStream hdfs_out; // FSDataOutputStream associated with the current SequenceFile.writer
131   // Minimum tolerable replicas, if the actual value is lower than it,
132   // rollWriter will be triggered
133   private int minTolerableReplication;
134   private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas
135   final static Object [] NO_ARGS = new Object []{};
136 
137   /** The barrier used to ensure that close() waits for all log rolls and flushes to finish. */
138   private DrainBarrier closeBarrier = new DrainBarrier();
139 
140   /**
141    * Current log file.
142    */
143   Writer writer;
144 
145   /**
146    * Map of all log files but the current one.
147    */
148   final SortedMap<Long, Path> outputfiles =
149     Collections.synchronizedSortedMap(new TreeMap<Long, Path>());
150 
151 
152   /**
153    * This lock synchronizes all operations on oldestUnflushedSeqNums and oldestFlushingSeqNums,
154    * with the exception of append's putIfAbsent into oldestUnflushedSeqNums.
155    * We only use these to find out the low bound seqNum, or to find regions with old seqNums to
156    * force flush them, so we don't care about these numbers messing with anything. */
157   private final Object oldestSeqNumsLock = new Object();
158 
159   /**
160    * This lock makes sure only one log roll runs at the same time. Should not be taken while
161    * any other lock is held. We don't just use synchronized because that results in bogus and
162    * tedious findbugs warning when it thinks synchronized controls writer thread safety */
163   private final Object rollWriterLock = new Object();
164 
165   /**
166    * Map of encoded region names to their most recent sequence/edit id in their memstore.
167    */
168   private final ConcurrentSkipListMap<byte [], Long> oldestUnflushedSeqNums =
169     new ConcurrentSkipListMap<byte [], Long>(Bytes.BYTES_COMPARATOR);
170   /**
171    * Map of encoded region names to their most recent sequence/edit id in their memstore;
172    * contains the regions that are currently flushing. That way we can store two numbers for
173    * flushing and non-flushing (oldestUnflushedSeqNums) memstore for the same region.
174    */
175   private final Map<byte[], Long> oldestFlushingSeqNums =
176     new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
177 
178   private volatile boolean closed = false;
179 
180   private final AtomicLong logSeqNum = new AtomicLong(0);
181 
182   private boolean forMeta = false;
183 
184   // The timestamp (in ms) when the log file was created.
185   private volatile long filenum = -1;
186 
187   //number of transactions in the current Hlog.
188   private final AtomicInteger numEntries = new AtomicInteger(0);
189 
190   // If live datanode count is lower than the default replicas value,
191   // RollWriter will be triggered in each sync(So the RollWriter will be
192   // triggered one by one in a short time). Using it as a workaround to slow
193   // down the roll frequency triggered by checkLowReplication().
194   private AtomicInteger consecutiveLogRolls = new AtomicInteger(0);
195   private final int lowReplicationRollLimit;
196 
197   // If consecutiveLogRolls is larger than lowReplicationRollLimit,
198   // then disable the rolling in checkLowReplication().
199   // Enable it if the replications recover.
200   private volatile boolean lowReplicationRollEnabled = true;
201 
202   // If > than this size, roll the log. This is typically 0.95 times the size
203   // of the default Hdfs block size.
204   private final long logrollsize;
205 
206   // We synchronize on updateLock to prevent updates and to prevent a log roll
207   // during an update
208   // locked during appends
209   private final Object updateLock = new Object();
210   private final Object flushLock = new Object();
211 
212   private final boolean enabled;
213 
214   /*
215    * If more than this many logs, force flush of oldest region to oldest edit
216    * goes to disk.  If too many and we crash, then will take forever replaying.
217    * Keep the number of logs tidy.
218    */
219   private final int maxLogs;
220 
221   /**
222    * Thread that handles optional sync'ing
223    */
224   private final LogSyncer logSyncer;
225 
226   /** Number of log close errors tolerated before we abort */
227   private final int closeErrorsTolerated;
228 
229   private final AtomicInteger closeErrorCount = new AtomicInteger();
230   private final MetricsWAL metrics;
231 
232   /**
233    * Constructor.
234    *
235    * @param fs filesystem handle
236    * @param root path for stored and archived hlogs
237    * @param logDir dir where hlogs are stored
238    * @param conf configuration to use
239    * @throws IOException
240    */
241   public FSHLog(final FileSystem fs, final Path root, final String logDir,
242                 final Configuration conf)
243   throws IOException {
244     this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME,
245         conf, null, true, null, false);
246   }
247 
248   /**
249    * Constructor.
250    *
251    * @param fs filesystem handle
252    * @param root path for stored and archived hlogs
253    * @param logDir dir where hlogs are stored
254    * @param oldLogDir dir where hlogs are archived
255    * @param conf configuration to use
256    * @throws IOException
257    */
258   public FSHLog(final FileSystem fs, final Path root, final String logDir,
259                 final String oldLogDir, final Configuration conf)
260   throws IOException {
261     this(fs, root, logDir, oldLogDir,
262         conf, null, true, null, false);
263   }
264 
265   /**
266    * Create an edit log at the given <code>dir</code> location.
267    *
268    * You should never have to load an existing log. If there is a log at
269    * startup, it should have already been processed and deleted by the time the
270    * HLog object is started up.
271    *
272    * @param fs filesystem handle
273    * @param root path for stored and archived hlogs
274    * @param logDir dir where hlogs are stored
275    * @param conf configuration to use
276    * @param listeners Listeners on WAL events. Listeners passed here will
277    * be registered before we do anything else; e.g. the
278    * Constructor {@link #rollWriter()}.
279    * @param prefix should always be hostname and port in distributed env and
280    *        it will be URL encoded before being used.
281    *        If prefix is null, "hlog" will be used
282    * @throws IOException
283    */
284   public FSHLog(final FileSystem fs, final Path root, final String logDir,
285       final Configuration conf, final List<WALActionsListener> listeners,
286       final String prefix) throws IOException {
287     this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME,
288         conf, listeners, true, prefix, false);
289   }
290 
291   /**
292    * Create an edit log at the given <code>dir</code> location.
293    *
294    * You should never have to load an existing log. If there is a log at
295    * startup, it should have already been processed and deleted by the time the
296    * HLog object is started up.
297    *
298    * @param fs filesystem handle
299    * @param root path to where logs and oldlogs
300    * @param logDir dir where hlogs are stored
301    * @param oldLogDir dir where hlogs are archived
302    * @param conf configuration to use
303    * @param listeners Listeners on WAL events. Listeners passed here will
304    * be registered before we do anything else; e.g. the
305    * Constructor {@link #rollWriter()}.
306    * @param failIfLogDirExists If true IOException will be thrown if dir already exists.
307    * @param prefix should always be hostname and port in distributed env and
308    *        it will be URL encoded before being used.
309    *        If prefix is null, "hlog" will be used
310    * @param forMeta if this hlog is meant for meta updates
311    * @throws IOException
312    */
313   public FSHLog(final FileSystem fs, final Path root, final String logDir,
314       final String oldLogDir, final Configuration conf,
315       final List<WALActionsListener> listeners,
316       final boolean failIfLogDirExists, final String prefix, boolean forMeta)
317   throws IOException {
318     super();
319     this.fs = fs;
320     this.rootDir = root;
321     this.dir = new Path(this.rootDir, logDir);
322     this.oldLogDir = new Path(this.rootDir, oldLogDir);
323     this.forMeta = forMeta;
324     this.conf = conf;
325 
326     if (listeners != null) {
327       for (WALActionsListener i: listeners) {
328         registerWALActionsListener(i);
329       }
330     }
331 
332     this.blocksize = this.conf.getLong("hbase.regionserver.hlog.blocksize",
333         FSUtils.getDefaultBlockSize(this.fs, this.dir));
334     // Roll at 95% of block size.
335     float multi = conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f);
336     this.logrollsize = (long)(this.blocksize * multi);
337     this.optionalFlushInterval =
338       conf.getLong("hbase.regionserver.optionallogflushinterval", 1 * 1000);
339 
340     this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
341     this.minTolerableReplication = conf.getInt(
342         "hbase.regionserver.hlog.tolerable.lowreplication",
343         FSUtils.getDefaultReplication(fs, this.dir));
344     this.lowReplicationRollLimit = conf.getInt(
345         "hbase.regionserver.hlog.lowreplication.rolllimit", 5);
346     this.enabled = conf.getBoolean("hbase.regionserver.hlog.enabled", true);
347     this.closeErrorsTolerated = conf.getInt(
348         "hbase.regionserver.logroll.errors.tolerated", 0);
349 
350     this.logSyncer = new LogSyncer(this.optionalFlushInterval);
351 
352     LOG.info("WAL/HLog configuration: blocksize=" +
353       StringUtils.byteDesc(this.blocksize) +
354       ", rollsize=" + StringUtils.byteDesc(this.logrollsize) +
355       ", enabled=" + this.enabled +
356       ", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
357     // If prefix is null||empty then just name it hlog
358     this.prefix = prefix == null || prefix.isEmpty() ?
359         "hlog" : URLEncoder.encode(prefix, "UTF8");
360 
361     boolean dirExists = false;
362     if (failIfLogDirExists && (dirExists = this.fs.exists(dir))) {
363       throw new IOException("Target HLog directory already exists: " + dir);
364     }
365     if (!dirExists && !fs.mkdirs(dir)) {
366       throw new IOException("Unable to mkdir " + dir);
367     }
368 
369     if (!fs.exists(this.oldLogDir)) {
370       if (!fs.mkdirs(this.oldLogDir)) {
371         throw new IOException("Unable to mkdir " + this.oldLogDir);
372       }
373     }
374     // rollWriter sets this.hdfs_out if it can.
375     rollWriter();
376 
377     // handle the reflection necessary to call getNumCurrentReplicas()
378     this.getNumCurrentReplicas = getGetNumCurrentReplicas(this.hdfs_out);
379 
380     // When optionalFlushInterval is set as 0, don't start a thread for deferred log sync.
381     if (this.optionalFlushInterval > 0) {
382       Threads.setDaemonThreadRunning(logSyncer.getThread(), Thread.currentThread().getName()
383           + ".logSyncer");
384     } else {
385       LOG.info("hbase.regionserver.optionallogflushinterval is set as "
386           + this.optionalFlushInterval + ". Deferred log syncing won't work. "
387           + "Any Mutation, marked to be deferred synced, will be flushed immediately.");
388     }
389     coprocessorHost = new WALCoprocessorHost(this, conf);
390 
391     this.metrics = new MetricsWAL();
392   }
393 
394   /**
395    * Find the 'getNumCurrentReplicas' on the passed <code>os</code> stream.
396    * @return Method or null.
397    */
398   private Method getGetNumCurrentReplicas(final FSDataOutputStream os) {
399     Method m = null;
400     if (os != null) {
401       Class<? extends OutputStream> wrappedStreamClass = os.getWrappedStream()
402           .getClass();
403       try {
404         m = wrappedStreamClass.getDeclaredMethod("getNumCurrentReplicas",
405             new Class<?>[] {});
406         m.setAccessible(true);
407       } catch (NoSuchMethodException e) {
408         LOG.info("FileSystem's output stream doesn't support"
409             + " getNumCurrentReplicas; --HDFS-826 not available; fsOut="
410             + wrappedStreamClass.getName());
411       } catch (SecurityException e) {
412         LOG.info("Doesn't have access to getNumCurrentReplicas on "
413             + "FileSystems's output stream --HDFS-826 not available; fsOut="
414             + wrappedStreamClass.getName(), e);
415         m = null; // could happen on setAccessible()
416       }
417     }
418     if (m != null) {
419       if (LOG.isTraceEnabled()) LOG.trace("Using getNumCurrentReplicas--HDFS-826");
420     }
421     return m;
422   }
423 
424   @Override
425   public void registerWALActionsListener(final WALActionsListener listener) {
426     this.listeners.add(listener);
427   }
428 
429   @Override
430   public boolean unregisterWALActionsListener(final WALActionsListener listener) {
431     return this.listeners.remove(listener);
432   }
433 
434   @Override
435   public long getFilenum() {
436     return this.filenum;
437   }
438 
439   @Override
440   public void setSequenceNumber(final long newvalue) {
441     for (long id = this.logSeqNum.get(); id < newvalue &&
442         !this.logSeqNum.compareAndSet(id, newvalue); id = this.logSeqNum.get()) {
443       // This could spin on occasion but better the occasional spin than locking
444       // every increment of sequence number.
445       LOG.debug("Changed sequenceid from " + id + " to " + newvalue);
446     }
447   }
448 
449   @Override
450   public long getSequenceNumber() {
451     return logSeqNum.get();
452   }
453 
454   /**
455    * Method used internal to this class and for tests only.
456    * @return The wrapped stream our writer is using; its not the
457    * writer's 'out' FSDatoOutputStream but the stream that this 'out' wraps
458    * (In hdfs its an instance of DFSDataOutputStream).
459    *
460    * usage: see TestLogRolling.java
461    */
462   OutputStream getOutputStream() {
463     return this.hdfs_out.getWrappedStream();
464   }
465 
466   @Override
467   public byte [][] rollWriter() throws FailedLogCloseException, IOException {
468     return rollWriter(false);
469   }
470 
471   @Override
472   public byte [][] rollWriter(boolean force)
473       throws FailedLogCloseException, IOException {
474     synchronized (rollWriterLock) {
475       // Return if nothing to flush.
476       if (!force && this.writer != null && this.numEntries.get() <= 0) {
477         return null;
478       }
479       byte [][] regionsToFlush = null;
480       if (closed) {
481         LOG.debug("HLog closed. Skipping rolling of writer");
482         return null;
483       }
484       try {
485         this.logRollRunning = true;
486         if (!closeBarrier.beginOp()) {
487           LOG.debug("HLog closing. Skipping rolling of writer");
488           return regionsToFlush;
489         }
490         // Do all the preparation outside of the updateLock to block
491         // as less as possible the incoming writes
492         long currentFilenum = this.filenum;
493         Path oldPath = null;
494         if (currentFilenum > 0) {
495           //computeFilename  will take care of meta hlog filename
496           oldPath = computeFilename(currentFilenum);
497         }
498         this.filenum = System.currentTimeMillis();
499         Path newPath = computeFilename();
500 
501         // Tell our listeners that a new log is about to be created
502         if (!this.listeners.isEmpty()) {
503           for (WALActionsListener i : this.listeners) {
504             i.preLogRoll(oldPath, newPath);
505           }
506         }
507         FSHLog.Writer nextWriter = this.createWriterInstance(fs, newPath, conf);
508         // Can we get at the dfsclient outputstream?
509         FSDataOutputStream nextHdfsOut = null;
510         if (nextWriter instanceof ProtobufLogWriter) {
511           nextHdfsOut = ((ProtobufLogWriter)nextWriter).getStream();
512         }
513 
514         Path oldFile = null;
515         int oldNumEntries = 0;
516         synchronized (updateLock) {
517           // Clean up current writer.
518           oldNumEntries = this.numEntries.get();
519           oldFile = cleanupCurrentWriter(currentFilenum);
520           this.writer = nextWriter;
521           this.hdfs_out = nextHdfsOut;
522           this.numEntries.set(0);
523         }
524         if (oldFile == null) LOG.info("New WAL " + FSUtils.getPath(newPath));
525         else LOG.info("Rolled WAL " + FSUtils.getPath(oldFile) + " with entries=" + oldNumEntries +
526           ", filesize=" + StringUtils.humanReadableInt(this.fs.getFileStatus(oldFile).getLen()) +
527           "; new WAL " + FSUtils.getPath(newPath));
528 
529         // Tell our listeners that a new log was created
530         if (!this.listeners.isEmpty()) {
531           for (WALActionsListener i : this.listeners) {
532             i.postLogRoll(oldPath, newPath);
533           }
534         }
535 
536         // Can we delete any of the old log files?
537         if (getNumLogFiles() > 0) {
538           cleanOldLogs();
539           regionsToFlush = getRegionsToForceFlush();
540         }
541       } finally {
542         this.logRollRunning = false;
543         closeBarrier.endOp();
544       }
545       return regionsToFlush;
546     }
547   }
548 
549   /**
550    * This method allows subclasses to inject different writers without having to
551    * extend other methods like rollWriter().
552    *
553    * @param fs
554    * @param path
555    * @param conf
556    * @return Writer instance
557    * @throws IOException
558    */
559   protected Writer createWriterInstance(final FileSystem fs, final Path path,
560       final Configuration conf) throws IOException {
561     if (forMeta) {
562       //TODO: set a higher replication for the hlog files (HBASE-6773)
563     }
564     return HLogFactory.createWriter(fs, path, conf);
565   }
566 
567   /*
568    * Clean up old commit logs.
569    * @return If lots of logs, flush the returned region so next time through
570    * we can clean logs. Returns null if nothing to flush.  Returns array of
571    * encoded region names to flush.
572    * @throws IOException
573    */
574   private void cleanOldLogs() throws IOException {
575     long oldestOutstandingSeqNum = Long.MAX_VALUE;
576     synchronized (oldestSeqNumsLock) {
577       Long oldestFlushing = (oldestFlushingSeqNums.size() > 0)
578         ? Collections.min(oldestFlushingSeqNums.values()) : Long.MAX_VALUE;
579       Long oldestUnflushed = (oldestUnflushedSeqNums.size() > 0)
580         ? Collections.min(oldestUnflushedSeqNums.values()) : Long.MAX_VALUE;
581       oldestOutstandingSeqNum = Math.min(oldestFlushing, oldestUnflushed);
582     }
583 
584     // Get the set of all log files whose last sequence number is smaller than
585     // the oldest edit's sequence number.
586     TreeSet<Long> sequenceNumbers = new TreeSet<Long>(this.outputfiles.headMap(
587         oldestOutstandingSeqNum).keySet());
588     // Now remove old log files (if any)
589     if (LOG.isDebugEnabled()) {
590       if (sequenceNumbers.size() > 0) {
591         LOG.debug("Found " + sequenceNumbers.size() + " hlogs to remove" +
592           " out of total " + this.outputfiles.size() + ";" +
593           " oldest outstanding sequenceid is " + oldestOutstandingSeqNum);
594       }
595     }
596     for (Long seq : sequenceNumbers) {
597       archiveLogFile(this.outputfiles.remove(seq), seq);
598     }
599   }
600 
601   /**
602    * Return regions that have edits that are equal or less than a certain sequence number.
603    * Static due to some old unit test.
604    * @param walSeqNum The sequence number to compare with.
605    * @param regionsToSeqNums Encoded region names to sequence ids
606    * @return All regions whose seqNum <= walSeqNum. Null if no regions found.
607    */
608   static byte[][] findMemstoresWithEditsEqualOrOlderThan(
609       final long walSeqNum, final Map<byte[], Long> regionsToSeqNums) {
610     List<byte[]> regions = null;
611     for (Map.Entry<byte[], Long> e : regionsToSeqNums.entrySet()) {
612       if (e.getValue().longValue() <= walSeqNum) {
613         if (regions == null) regions = new ArrayList<byte[]>();
614         regions.add(e.getKey());
615       }
616     }
617     return regions == null ? null : regions
618         .toArray(new byte[][] { HConstants.EMPTY_BYTE_ARRAY });
619   }
620 
621   private byte[][] getRegionsToForceFlush() throws IOException {
622     // If too many log files, figure which regions we need to flush.
623     // Array is an array of encoded region names.
624     byte [][] regions = null;
625     int logCount = getNumLogFiles();
626     if (logCount > this.maxLogs && logCount > 0) {
627       // This is an array of encoded region names.
628       synchronized (oldestSeqNumsLock) {
629         regions = findMemstoresWithEditsEqualOrOlderThan(this.outputfiles.firstKey(),
630           this.oldestUnflushedSeqNums);
631       }
632       if (regions != null) {
633         StringBuilder sb = new StringBuilder();
634         for (int i = 0; i < regions.length; i++) {
635           if (i > 0) sb.append(", ");
636           sb.append(Bytes.toStringBinary(regions[i]));
637         }
638         LOG.info("Too many hlogs: logs=" + logCount + ", maxlogs=" +
639            this.maxLogs + "; forcing flush of " + regions.length + " regions(s): " +
640            sb.toString());
641       }
642     }
643     return regions;
644   }
645 
646   /*
647    * Cleans up current writer closing and adding to outputfiles.
648    * Presumes we're operating inside an updateLock scope.
649    * @return Path to current writer or null if none.
650    * @throws IOException
651    */
652   Path cleanupCurrentWriter(final long currentfilenum) throws IOException {
653     Path oldFile = null;
654     if (this.writer != null) {
655       // Close the current writer, get a new one.
656       try {
657         // Wait till all current transactions are written to the hlog.
658         // No new transactions can occur because we have the updatelock.
659         if (this.unflushedEntries.get() != this.syncedTillHere) {
660           LOG.debug("cleanupCurrentWriter " +
661                    " waiting for transactions to get synced " +
662                    " total " + this.unflushedEntries.get() +
663                    " synced till here " + syncedTillHere);
664           sync();
665         }
666         this.writer.close();
667         this.writer = null;
668         closeErrorCount.set(0);
669       } catch (IOException e) {
670         LOG.error("Failed close of HLog writer", e);
671         int errors = closeErrorCount.incrementAndGet();
672         if (errors <= closeErrorsTolerated && !hasDeferredEntries()) {
673           LOG.warn("Riding over HLog close failure! error count="+errors);
674         } else {
675           if (hasDeferredEntries()) {
676             LOG.error("Aborting due to unflushed edits in HLog");
677           }
678           // Failed close of log file.  Means we're losing edits.  For now,
679           // shut ourselves down to minimize loss.  Alternative is to try and
680           // keep going.  See HBASE-930.
681           FailedLogCloseException flce =
682             new FailedLogCloseException("#" + currentfilenum);
683           flce.initCause(e);
684           throw flce;
685         }
686       }
687       if (currentfilenum >= 0) {
688         oldFile = computeFilename(currentfilenum);
689         this.outputfiles.put(Long.valueOf(this.logSeqNum.get()), oldFile);
690       }
691     }
692     return oldFile;
693   }
694 
695   private void archiveLogFile(final Path p, final Long seqno) throws IOException {
696     Path newPath = getHLogArchivePath(this.oldLogDir, p);
697     LOG.info("moving old hlog file " + FSUtils.getPath(p) +
698       " whose highest sequenceid is " + seqno + " to " +
699       FSUtils.getPath(newPath));
700 
701     // Tell our listeners that a log is going to be archived.
702     if (!this.listeners.isEmpty()) {
703       for (WALActionsListener i : this.listeners) {
704         i.preLogArchive(p, newPath);
705       }
706     }
707     if (!FSUtils.renameAndSetModifyTime(this.fs, p, newPath)) {
708       throw new IOException("Unable to rename " + p + " to " + newPath);
709     }
710     // Tell our listeners that a log has been archived.
711     if (!this.listeners.isEmpty()) {
712       for (WALActionsListener i : this.listeners) {
713         i.postLogArchive(p, newPath);
714       }
715     }
716   }
717 
718   /**
719    * This is a convenience method that computes a new filename with a given
720    * using the current HLog file-number
721    * @return Path
722    */
723   protected Path computeFilename() {
724     return computeFilename(this.filenum);
725   }
726 
727   /**
728    * This is a convenience method that computes a new filename with a given
729    * file-number.
730    * @param filenum to use
731    * @return Path
732    */
733   protected Path computeFilename(long filenum) {
734     if (filenum < 0) {
735       throw new RuntimeException("hlog file number can't be < 0");
736     }
737     String child = prefix + "." + filenum;
738     if (forMeta) {
739       child += HLog.META_HLOG_FILE_EXTN;
740     }
741     return new Path(dir, child);
742   }
743 
744   @Override
745   public void closeAndDelete() throws IOException {
746     close();
747     if (!fs.exists(this.dir)) return;
748     FileStatus[] files = fs.listStatus(this.dir);
749     if (files != null) {
750       for(FileStatus file : files) {
751 
752         Path p = getHLogArchivePath(this.oldLogDir, file.getPath());
753         // Tell our listeners that a log is going to be archived.
754         if (!this.listeners.isEmpty()) {
755           for (WALActionsListener i : this.listeners) {
756             i.preLogArchive(file.getPath(), p);
757           }
758         }
759 
760         if (!FSUtils.renameAndSetModifyTime(fs, file.getPath(), p)) {
761           throw new IOException("Unable to rename " + file.getPath() + " to " + p);
762         }
763         // Tell our listeners that a log was archived.
764         if (!this.listeners.isEmpty()) {
765           for (WALActionsListener i : this.listeners) {
766             i.postLogArchive(file.getPath(), p);
767           }
768         }
769       }
770       LOG.debug("Moved " + files.length + " WAL file(s) to " + FSUtils.getPath(this.oldLogDir));
771     }
772     if (!fs.delete(dir, true)) {
773       LOG.info("Unable to delete " + dir);
774     }
775   }
776 
777   @Override
778   public void close() throws IOException {
779     if (this.closed) {
780       return;
781     }
782     // When optionalFlushInterval is 0, the logSyncer is not started as a Thread.
783     if (this.optionalFlushInterval > 0) {
784       try {
785         logSyncer.close();
786         // Make sure we synced everything
787         logSyncer.join(this.optionalFlushInterval * 2);
788       } catch (InterruptedException e) {
789         LOG.error("Exception while waiting for syncer thread to die", e);
790         Thread.currentThread().interrupt();
791       }
792     }
793     try {
794       // Prevent all further flushing and rolling.
795       closeBarrier.stopAndDrainOps();
796     } catch (InterruptedException e) {
797       LOG.error("Exception while waiting for cache flushes and log rolls", e);
798       Thread.currentThread().interrupt();
799     }
800 
801     // Tell our listeners that the log is closing
802     if (!this.listeners.isEmpty()) {
803       for (WALActionsListener i : this.listeners) {
804         i.logCloseRequested();
805       }
806     }
807     synchronized (updateLock) {
808       this.closed = true;
809       if (LOG.isDebugEnabled()) {
810         LOG.debug("Closing WAL writer in " + this.dir.toString());
811       }
812       if (this.writer != null) {
813         this.writer.close();
814         this.writer = null;
815       }
816     }
817   }
818 
819   /**
820    * @param now
821    * @param encodedRegionName Encoded name of the region as returned by
822    * <code>HRegionInfo#getEncodedNameAsBytes()</code>.
823    * @param tableName
824    * @param clusterId
825    * @return New log key.
826    */
827   protected HLogKey makeKey(byte[] encodedRegionName, TableName tableName, long seqnum,
828       long now, UUID clusterId) {
829     return new HLogKey(encodedRegionName, tableName, seqnum, now, clusterId);
830   }
831 
832   @Override
833   public void append(HRegionInfo info, TableName tableName, WALEdit edits,
834     final long now, HTableDescriptor htd)
835   throws IOException {
836     append(info, tableName, edits, now, htd, true);
837   }
838 
839   @Override
840   public void append(HRegionInfo info, TableName tableName, WALEdit edits,
841     final long now, HTableDescriptor htd, boolean isInMemstore) throws IOException {
842     append(info, tableName, edits, HConstants.DEFAULT_CLUSTER_ID, now, htd, true, isInMemstore);
843   }
844 
845   /**
846    * Append a set of edits to the log. Log edits are keyed by (encoded)
847    * regionName, rowname, and log-sequence-id.
848    *
849    * Later, if we sort by these keys, we obtain all the relevant edits for a
850    * given key-range of the HRegion (TODO). Any edits that do not have a
851    * matching COMPLETE_CACHEFLUSH message can be discarded.
852    *
853    * <p>
854    * Logs cannot be restarted once closed, or once the HLog process dies. Each
855    * time the HLog starts, it must create a new log. This means that other
856    * systems should process the log appropriately upon each startup (and prior
857    * to initializing HLog).
858    *
859    * synchronized prevents appends during the completion of a cache flush or for
860    * the duration of a log roll.
861    *
862    * @param info
863    * @param tableName
864    * @param edits
865    * @param clusterId The originating clusterId for this edit (for replication)
866    * @param now
867    * @param doSync shall we sync?
868    * @return txid of this transaction
869    * @throws IOException
870    */
871   @SuppressWarnings("deprecation")
872   private long append(HRegionInfo info, TableName tableName, WALEdit edits, UUID clusterId,
873       final long now, HTableDescriptor htd, boolean doSync, boolean isInMemstore)
874     throws IOException {
875       if (edits.isEmpty()) return this.unflushedEntries.get();
876       if (this.closed) {
877         throw new IOException("Cannot append; log is closed");
878       }
879       TraceScope traceScope = Trace.startSpan("FSHlog.append");
880       try {
881         long txid = 0;
882         synchronized (this.updateLock) {
883           long seqNum = obtainSeqNum();
884           // The 'lastSeqWritten' map holds the sequence number of the oldest
885           // write for each region (i.e. the first edit added to the particular
886           // memstore). . When the cache is flushed, the entry for the
887           // region being flushed is removed if the sequence number of the flush
888           // is greater than or equal to the value in lastSeqWritten.
889           // Use encoded name.  Its shorter, guaranteed unique and a subset of
890           // actual  name.
891           byte [] encodedRegionName = info.getEncodedNameAsBytes();
892           if (isInMemstore) this.oldestUnflushedSeqNums.putIfAbsent(encodedRegionName, seqNum);
893           HLogKey logKey = makeKey(encodedRegionName, tableName, seqNum, now, clusterId);
894           doWrite(info, logKey, edits, htd);
895           this.numEntries.incrementAndGet();
896           txid = this.unflushedEntries.incrementAndGet();
897           if (htd.isDeferredLogFlush()) {
898             lastDeferredTxid = txid;
899           }
900         }
901         // Sync if catalog region, and if not then check if that table supports
902         // deferred log flushing
903         if (doSync &&
904             (info.isMetaRegion() ||
905             !htd.isDeferredLogFlush())) {
906           // sync txn to file system
907           this.sync(txid);
908         }
909         return txid;
910       } finally {
911         traceScope.close();
912       }
913     }
914 
915   @Override
916   public long appendNoSync(HRegionInfo info, TableName tableName, WALEdit edits,
917     UUID clusterId, final long now, HTableDescriptor htd)
918     throws IOException {
919     return append(info, tableName, edits, clusterId, now, htd, false, true);
920   }
921 
922   /**
923    * This class is responsible to hold the HLog's appended Entry list
924    * and to sync them according to a configurable interval.
925    *
926    * Deferred log flushing works first by piggy backing on this process by
927    * simply not sync'ing the appended Entry. It can also be sync'd by other
928    * non-deferred log flushed entries outside of this thread.
929    */
930   class LogSyncer extends HasThread {
931 
932     private final long optionalFlushInterval;
933 
934     private final AtomicBoolean closeLogSyncer = new AtomicBoolean(false);
935 
936     // List of pending writes to the HLog. There corresponds to transactions
937     // that have not yet returned to the client. We keep them cached here
938     // instead of writing them to HDFS piecemeal, because the HDFS write
939     // method is pretty heavyweight as far as locking is concerned. The
940     // goal is to increase the batchsize for writing-to-hdfs as well as
941     // sync-to-hdfs, so that we can get better system throughput.
942     private List<Entry> pendingWrites = new LinkedList<Entry>();
943 
944     LogSyncer(long optionalFlushInterval) {
945       this.optionalFlushInterval = optionalFlushInterval;
946     }
947 
948     @Override
949     public void run() {
950       try {
951         // awaiting with a timeout doesn't always
952         // throw exceptions on interrupt
953         while(!this.isInterrupted() && !closeLogSyncer.get()) {
954 
955           try {
956             if (unflushedEntries.get() <= syncedTillHere) {
957               synchronized (closeLogSyncer) {
958                 closeLogSyncer.wait(this.optionalFlushInterval);
959               }
960             }
961             // Calling sync since we waited or had unflushed entries.
962             // Entries appended but not sync'd are taken care of here AKA
963             // deferred log flush
964             sync();
965           } catch (IOException e) {
966             LOG.error("Error while syncing, requesting close of hlog ", e);
967             requestLogRoll();
968             Threads.sleep(this.optionalFlushInterval);
969           }
970         }
971       } catch (InterruptedException e) {
972         LOG.debug(getName() + " interrupted while waiting for sync requests");
973       } finally {
974         LOG.info(getName() + " exiting");
975       }
976     }
977 
978     // appends new writes to the pendingWrites. It is better to keep it in
979     // our own queue rather than writing it to the HDFS output stream because
980     // HDFSOutputStream.writeChunk is not lightweight at all.
981     synchronized void append(Entry e) throws IOException {
982       pendingWrites.add(e);
983     }
984 
985     // Returns all currently pending writes. New writes
986     // will accumulate in a new list.
987     synchronized List<Entry> getPendingWrites() {
988       List<Entry> save = this.pendingWrites;
989       this.pendingWrites = new LinkedList<Entry>();
990       return save;
991     }
992 
993     // writes out pending entries to the HLog
994     void hlogFlush(Writer writer, List<Entry> pending) throws IOException {
995       if (pending == null) return;
996 
997       // write out all accumulated Entries to hdfs.
998       for (Entry e : pending) {
999         writer.append(e);
1000       }
1001     }
1002 
1003     void close() {
1004       synchronized (closeLogSyncer) {
1005         closeLogSyncer.set(true);
1006         closeLogSyncer.notifyAll();
1007       }
1008     }
1009   }
1010 
1011   // sync all known transactions
1012   private void syncer() throws IOException {
1013     syncer(this.unflushedEntries.get()); // sync all pending items
1014   }
1015 
1016   // sync all transactions upto the specified txid
1017   private void syncer(long txid) throws IOException {
1018     // if the transaction that we are interested in is already
1019     // synced, then return immediately.
1020     if (txid <= this.syncedTillHere) {
1021       return;
1022     }
1023     Writer tempWriter;
1024     synchronized (this.updateLock) {
1025       if (this.closed) return;
1026       // Guaranteed non-null.
1027       // Note that parallel sync can close tempWriter.
1028       // The current method of dealing with this is to catch exceptions.
1029       // See HBASE-4387, HBASE-5623, HBASE-7329.
1030       tempWriter = this.writer;
1031     }
1032     try {
1033       long doneUpto;
1034       long now = EnvironmentEdgeManager.currentTimeMillis();
1035       // First flush all the pending writes to HDFS. Then
1036       // issue the sync to HDFS. If sync is successful, then update
1037       // syncedTillHere to indicate that transactions till this
1038       // number has been successfully synced.
1039       IOException ioe = null;
1040       List<Entry> pending = null;
1041       synchronized (flushLock) {
1042         if (txid <= this.syncedTillHere) {
1043           return;
1044         }
1045         doneUpto = this.unflushedEntries.get();
1046         pending = logSyncer.getPendingWrites();
1047         try {
1048           logSyncer.hlogFlush(tempWriter, pending);
1049         } catch(IOException io) {
1050           ioe = io;
1051           LOG.error("syncer encountered error, will retry. txid=" + txid, ioe);
1052         }
1053       }
1054       if (ioe != null && pending != null) {
1055         synchronized (this.updateLock) {
1056           synchronized (flushLock) {
1057             // HBASE-4387, HBASE-5623, retry with updateLock held
1058             tempWriter = this.writer;
1059             logSyncer.hlogFlush(tempWriter, pending);
1060           }
1061         }
1062       }
1063       // another thread might have sync'ed avoid double-sync'ing
1064       if (txid <= this.syncedTillHere) {
1065         return;
1066       }
1067       try {
1068         if (tempWriter != null) tempWriter.sync();
1069       } catch(IOException ex) {
1070         synchronized (this.updateLock) {
1071           // HBASE-4387, HBASE-5623, retry with updateLock held
1072           // TODO: we don't actually need to do it for concurrent close - what is the point
1073           //       of syncing new unrelated writer? Keep behavior for now.
1074           tempWriter = this.writer;
1075           if (tempWriter != null) tempWriter.sync();
1076         }
1077       }
1078       this.syncedTillHere = Math.max(this.syncedTillHere, doneUpto);
1079 
1080       this.metrics.finishSync(EnvironmentEdgeManager.currentTimeMillis() - now);
1081       // TODO: preserving the old behavior for now, but this check is strange. It's not
1082       //       protected by any locks here, so for all we know rolling locks might start
1083       //       as soon as we enter the "if". Is this best-effort optimization check?
1084       if (!this.logRollRunning) {
1085         checkLowReplication();
1086         try {
1087           if (tempWriter.getLength() > this.logrollsize) {
1088             requestLogRoll();
1089           }
1090         } catch (IOException x) {
1091           LOG.debug("Log roll failed and will be retried. (This is not an error)");
1092         }
1093       }
1094     } catch (IOException e) {
1095       LOG.fatal("Could not sync. Requesting roll of hlog", e);
1096       requestLogRoll();
1097       throw e;
1098     }
1099   }
1100 
1101   private void checkLowReplication() {
1102     // if the number of replicas in HDFS has fallen below the configured
1103     // value, then roll logs.
1104     try {
1105       int numCurrentReplicas = getLogReplication();
1106       if (numCurrentReplicas != 0
1107           && numCurrentReplicas < this.minTolerableReplication) {
1108         if (this.lowReplicationRollEnabled) {
1109           if (this.consecutiveLogRolls.get() < this.lowReplicationRollLimit) {
1110             LOG.warn("HDFS pipeline error detected. " + "Found "
1111                 + numCurrentReplicas + " replicas but expecting no less than "
1112                 + this.minTolerableReplication + " replicas. "
1113                 + " Requesting close of hlog.");
1114             requestLogRoll();
1115             // If rollWriter is requested, increase consecutiveLogRolls. Once it
1116             // is larger than lowReplicationRollLimit, disable the
1117             // LowReplication-Roller
1118             this.consecutiveLogRolls.getAndIncrement();
1119           } else {
1120             LOG.warn("Too many consecutive RollWriter requests, it's a sign of "
1121                 + "the total number of live datanodes is lower than the tolerable replicas.");
1122             this.consecutiveLogRolls.set(0);
1123             this.lowReplicationRollEnabled = false;
1124           }
1125         }
1126       } else if (numCurrentReplicas >= this.minTolerableReplication) {
1127 
1128         if (!this.lowReplicationRollEnabled) {
1129           // The new writer's log replicas is always the default value.
1130           // So we should not enable LowReplication-Roller. If numEntries
1131           // is lower than or equals 1, we consider it as a new writer.
1132           if (this.numEntries.get() <= 1) {
1133             return;
1134           }
1135           // Once the live datanode number and the replicas return to normal,
1136           // enable the LowReplication-Roller.
1137           this.lowReplicationRollEnabled = true;
1138           LOG.info("LowReplication-Roller was enabled.");
1139         }
1140       }
1141     } catch (Exception e) {
1142       LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e +
1143           " still proceeding ahead...");
1144     }
1145   }
1146 
1147   /**
1148    * This method gets the datanode replication count for the current HLog.
1149    *
1150    * If the pipeline isn't started yet or is empty, you will get the default
1151    * replication factor.  Therefore, if this function returns 0, it means you
1152    * are not properly running with the HDFS-826 patch.
1153    * @throws InvocationTargetException
1154    * @throws IllegalAccessException
1155    * @throws IllegalArgumentException
1156    *
1157    * @throws Exception
1158    */
1159   int getLogReplication()
1160   throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
1161     if (this.getNumCurrentReplicas != null && this.hdfs_out != null) {
1162       Object repl = this.getNumCurrentReplicas.invoke(getOutputStream(), NO_ARGS);
1163       if (repl instanceof Integer) {
1164         return ((Integer)repl).intValue();
1165       }
1166     }
1167     return 0;
1168   }
1169 
1170   boolean canGetCurReplicas() {
1171     return this.getNumCurrentReplicas != null;
1172   }
1173 
1174   @Override
1175   public void hsync() throws IOException {
1176     syncer();
1177   }
1178 
1179   @Override
1180   public void hflush() throws IOException {
1181     syncer();
1182   }
1183 
1184   @Override
1185   public void sync() throws IOException {
1186     syncer();
1187   }
1188 
1189   @Override
1190   public void sync(long txid) throws IOException {
1191     syncer(txid);
1192   }
1193 
1194   private void requestLogRoll() {
1195     if (!this.listeners.isEmpty()) {
1196       for (WALActionsListener i: this.listeners) {
1197         i.logRollRequested();
1198       }
1199     }
1200   }
1201 
1202   // TODO: Remove info.  Unused.
1203   protected void doWrite(HRegionInfo info, HLogKey logKey, WALEdit logEdit,
1204                            HTableDescriptor htd)
1205   throws IOException {
1206     if (!this.enabled) {
1207       return;
1208     }
1209     if (!this.listeners.isEmpty()) {
1210       for (WALActionsListener i: this.listeners) {
1211         i.visitLogEntryBeforeWrite(htd, logKey, logEdit);
1212       }
1213     }
1214     try {
1215       long now = EnvironmentEdgeManager.currentTimeMillis();
1216       // coprocessor hook:
1217       if (!coprocessorHost.preWALWrite(info, logKey, logEdit)) {
1218         if (logEdit.isReplay()) {
1219           // set replication scope null so that this won't be replicated
1220           logKey.setScopes(null);
1221         }
1222         // write to our buffer for the Hlog file.
1223         logSyncer.append(new FSHLog.Entry(logKey, logEdit));
1224       }
1225       long took = EnvironmentEdgeManager.currentTimeMillis() - now;
1226       coprocessorHost.postWALWrite(info, logKey, logEdit);
1227       long len = 0;
1228       for (KeyValue kv : logEdit.getKeyValues()) {
1229         len += kv.getLength();
1230       }
1231       this.metrics.finishAppend(took, len);
1232     } catch (IOException e) {
1233       LOG.fatal("Could not append. Requesting close of hlog", e);
1234       requestLogRoll();
1235       throw e;
1236     }
1237   }
1238 
1239 
1240   /** @return How many items have been added to the log */
1241   int getNumEntries() {
1242     return numEntries.get();
1243   }
1244 
1245   @Override
1246   public long obtainSeqNum() {
1247     return this.logSeqNum.incrementAndGet();
1248   }
1249 
1250   /** @return the number of log files in use */
1251   int getNumLogFiles() {
1252     return outputfiles.size();
1253   }
1254 
1255   @Override
1256   public Long startCacheFlush(final byte[] encodedRegionName) {
1257     Long oldRegionSeqNum = null;
1258     if (!closeBarrier.beginOp()) {
1259       return null;
1260     }
1261     synchronized (oldestSeqNumsLock) {
1262       oldRegionSeqNum = this.oldestUnflushedSeqNums.remove(encodedRegionName);
1263       if (oldRegionSeqNum != null) {
1264         Long oldValue = this.oldestFlushingSeqNums.put(encodedRegionName, oldRegionSeqNum);
1265         assert oldValue == null : "Flushing map not cleaned up for "
1266           + Bytes.toString(encodedRegionName);
1267       }
1268     }
1269     if (oldRegionSeqNum == null) {
1270       // TODO: if we have no oldRegionSeqNum, and WAL is not disabled, presumably either
1271       //       the region is already flushing (which would make this call invalid), or there
1272       //       were no appends after last flush, so why are we starting flush? Maybe we should
1273       //       assert not null, and switch to "long" everywhere. Less rigorous, but safer,
1274       //       alternative is telling the caller to stop. For now preserve old logic.
1275       LOG.warn("Couldn't find oldest seqNum for the region we are about to flush: ["
1276         + Bytes.toString(encodedRegionName) + "]");
1277     }
1278     return obtainSeqNum();
1279   }
1280 
1281   @Override
1282   public void completeCacheFlush(final byte [] encodedRegionName)
1283   {
1284     synchronized (oldestSeqNumsLock) {
1285       this.oldestFlushingSeqNums.remove(encodedRegionName);
1286     }
1287     closeBarrier.endOp();
1288   }
1289 
1290   @Override
1291   public void abortCacheFlush(byte[] encodedRegionName) {
1292     Long currentSeqNum = null, seqNumBeforeFlushStarts = null;
1293     synchronized (oldestSeqNumsLock) {
1294       seqNumBeforeFlushStarts = this.oldestFlushingSeqNums.remove(encodedRegionName);
1295       if (seqNumBeforeFlushStarts != null) {
1296         currentSeqNum =
1297           this.oldestUnflushedSeqNums.put(encodedRegionName, seqNumBeforeFlushStarts);
1298       }
1299     }
1300     closeBarrier.endOp();
1301     if ((currentSeqNum != null)
1302         && (currentSeqNum.longValue() <= seqNumBeforeFlushStarts.longValue())) {
1303       String errorStr = "Region " + Bytes.toString(encodedRegionName) +
1304           "acquired edits out of order current memstore seq=" + currentSeqNum
1305           + ", previous oldest unflushed id=" + seqNumBeforeFlushStarts;
1306       LOG.error(errorStr);
1307       assert false : errorStr;
1308       Runtime.getRuntime().halt(1);
1309     }
1310   }
1311 
1312   @Override
1313   public boolean isLowReplicationRollEnabled() {
1314       return lowReplicationRollEnabled;
1315   }
1316 
1317   /**
1318    * Get the directory we are making logs in.
1319    *
1320    * @return dir
1321    */
1322   protected Path getDir() {
1323     return dir;
1324   }
1325 
1326   static Path getHLogArchivePath(Path oldLogDir, Path p) {
1327     return new Path(oldLogDir, p.getName());
1328   }
1329 
1330   static String formatRecoveredEditsFileName(final long seqid) {
1331     return String.format("%019d", seqid);
1332   }
1333 
1334   public static final long FIXED_OVERHEAD = ClassSize.align(
1335     ClassSize.OBJECT + (5 * ClassSize.REFERENCE) +
1336     ClassSize.ATOMIC_INTEGER + Bytes.SIZEOF_INT + (3 * Bytes.SIZEOF_LONG));
1337 
1338   private static void usage() {
1339     System.err.println("Usage: HLog <ARGS>");
1340     System.err.println("Arguments:");
1341     System.err.println(" --dump  Dump textual representation of passed one or more files");
1342     System.err.println("         For example: HLog --dump hdfs://example.com:9000/hbase/.logs/MACHINE/LOGFILE");
1343     System.err.println(" --split Split the passed directory of WAL logs");
1344     System.err.println("         For example: HLog --split hdfs://example.com:9000/hbase/.logs/DIR");
1345   }
1346 
1347   private static void split(final Configuration conf, final Path p)
1348   throws IOException {
1349     FileSystem fs = FileSystem.get(conf);
1350     if (!fs.exists(p)) {
1351       throw new FileNotFoundException(p.toString());
1352     }
1353     if (!fs.getFileStatus(p).isDir()) {
1354       throw new IOException(p + " is not a directory");
1355     }
1356 
1357     final Path baseDir = FSUtils.getRootDir(conf);
1358     final Path oldLogDir = new Path(baseDir, HConstants.HREGION_OLDLOGDIR_NAME);
1359     HLogSplitter.split(baseDir, p, oldLogDir, fs, conf);
1360   }
1361 
1362   @Override
1363   public WALCoprocessorHost getCoprocessorHost() {
1364     return coprocessorHost;
1365   }
1366 
1367   /** Provide access to currently deferred sequence num for tests */
1368   boolean hasDeferredEntries() {
1369     return lastDeferredTxid > syncedTillHere;
1370   }
1371 
1372   @Override
1373   public long getEarliestMemstoreSeqNum(byte[] encodedRegionName) {
1374     Long result = oldestUnflushedSeqNums.get(encodedRegionName);
1375     return result == null ? HConstants.NO_SEQNUM : result.longValue();
1376   }
1377 
1378   /**
1379    * Pass one or more log file names and it will either dump out a text version
1380    * on <code>stdout</code> or split the specified log files.
1381    *
1382    * @param args
1383    * @throws IOException
1384    */
1385   public static void main(String[] args) throws IOException {
1386     if (args.length < 2) {
1387       usage();
1388       System.exit(-1);
1389     }
1390     // either dump using the HLogPrettyPrinter or split, depending on args
1391     if (args[0].compareTo("--dump") == 0) {
1392       HLogPrettyPrinter.run(Arrays.copyOfRange(args, 1, args.length));
1393     } else if (args[0].compareTo("--split") == 0) {
1394       Configuration conf = HBaseConfiguration.create();
1395       for (int i = 1; i < args.length; i++) {
1396         try {
1397           Path logPath = new Path(args[i]);
1398           FSUtils.setFsDefault(conf, logPath);
1399           split(conf, logPath);
1400         } catch (Throwable t) {
1401           t.printStackTrace(System.err);
1402           System.exit(-1);
1403         }
1404       }
1405     } else {
1406       usage();
1407       System.exit(-1);
1408     }
1409   }
1410 }