View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver.wal;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.io.UnsupportedEncodingException;
28  import java.lang.reflect.InvocationTargetException;
29  import java.lang.reflect.Method;
30  import java.net.URLEncoder;
31  import java.util.ArrayList;
32  import java.util.Arrays;
33  import java.util.Collections;
34  import java.util.LinkedList;
35  import java.util.List;
36  import java.util.Map;
37  import java.util.NavigableSet;
38  import java.util.SortedMap;
39  import java.util.TreeMap;
40  import java.util.TreeSet;
41  import java.util.UUID;
42  import java.util.concurrent.ConcurrentSkipListMap;
43  import java.util.concurrent.CopyOnWriteArrayList;
44  import java.util.concurrent.atomic.AtomicBoolean;
45  import java.util.concurrent.atomic.AtomicInteger;
46  import java.util.concurrent.atomic.AtomicLong;
47  import java.util.concurrent.locks.Lock;
48  import java.util.concurrent.locks.ReentrantLock;
49  import java.util.regex.Matcher;
50  import java.util.regex.Pattern;
51  
52  import org.apache.commons.logging.Log;
53  import org.apache.commons.logging.LogFactory;
54  import org.apache.hadoop.conf.Configuration;
55  import org.apache.hadoop.fs.FSDataOutputStream;
56  import org.apache.hadoop.fs.FileStatus;
57  import org.apache.hadoop.fs.FileSystem;
58  import org.apache.hadoop.fs.Path;
59  import org.apache.hadoop.fs.PathFilter;
60  import org.apache.hadoop.fs.Syncable;
61  import org.apache.hadoop.hbase.HBaseConfiguration;
62  import org.apache.hadoop.hbase.HBaseFileSystem;
63  import org.apache.hadoop.hbase.HConstants;
64  import org.apache.hadoop.hbase.HRegionInfo;
65  import org.apache.hadoop.hbase.HTableDescriptor;
66  import org.apache.hadoop.hbase.KeyValue;
67  import org.apache.hadoop.hbase.ServerName;
68  import org.apache.hadoop.hbase.util.Bytes;
69  import org.apache.hadoop.hbase.util.ClassSize;
70  import org.apache.hadoop.hbase.util.FSUtils;
71  import org.apache.hadoop.hbase.util.HasThread;
72  import org.apache.hadoop.hbase.util.Threads;
73  import org.apache.hadoop.io.Writable;
74  import org.apache.hadoop.util.StringUtils;
75  
76  /**
77   * HLog stores all the edits to the HStore.  Its the hbase write-ahead-log
78   * implementation.
79   *
80   * It performs logfile-rolling, so external callers are not aware that the
81   * underlying file is being rolled.
82   *
83   * <p>
84   * There is one HLog per RegionServer.  All edits for all Regions carried by
85   * a particular RegionServer are entered first in the HLog.
86   *
87   * <p>
88   * Each HRegion is identified by a unique long <code>int</code>. HRegions do
89   * not need to declare themselves before using the HLog; they simply include
90   * their HRegion-id in the <code>append</code> or
91   * <code>completeCacheFlush</code> calls.
92   *
93   * <p>
94   * An HLog consists of multiple on-disk files, which have a chronological order.
95   * As data is flushed to other (better) on-disk structures, the log becomes
96   * obsolete. We can destroy all the log messages for a given HRegion-id up to
97   * the most-recent CACHEFLUSH message from that HRegion.
98   *
99   * <p>
100  * It's only practical to delete entire files. Thus, we delete an entire on-disk
101  * file F when all of the messages in F have a log-sequence-id that's older
102  * (smaller) than the most-recent CACHEFLUSH message for every HRegion that has
103  * a message in F.
104  *
105  * <p>
106  * Synchronized methods can never execute in parallel. However, between the
107  * start of a cache flush and the completion point, appends are allowed but log
108  * rolling is not. To prevent log rolling taking place during this period, a
109  * separate reentrant lock is used.
110  *
111  * <p>To read an HLog, call {@link #getReader(org.apache.hadoop.fs.FileSystem,
112  * org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration)}.
113  *
114  */
115 public class HLog implements Syncable {
116   static final Log LOG = LogFactory.getLog(HLog.class);
117   public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY");
118   static final byte [] METAROW = Bytes.toBytes("METAROW");
119 
120   /** File Extension used while splitting an HLog into regions (HBASE-2312) */
121   public static final String SPLITTING_EXT = "-splitting";
122   public static final boolean SPLIT_SKIP_ERRORS_DEFAULT = false;
123   /** The META region's HLog filename extension */
124   public static final String META_HLOG_FILE_EXTN = ".meta";
125   public static final String SEPARATE_HLOG_FOR_META = "hbase.regionserver.separate.hlog.for.meta";
126 
127   /*
128    * Name of directory that holds recovered edits written by the wal log
129    * splitting code, one per region
130    */
131   public static final String RECOVERED_EDITS_DIR = "recovered.edits";
132   private static final Pattern EDITFILES_NAME_PATTERN =
133     Pattern.compile("-?[0-9]+");
134   public static final String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp";
135   
136   private final FileSystem fs;
137   private final Path dir;
138   private final Configuration conf;
139   private final HLogFileSystem hlogFs;
140   // Listeners that are called on WAL events.
141   private List<WALActionsListener> listeners =
142     new CopyOnWriteArrayList<WALActionsListener>();
143   private final long optionalFlushInterval;
144   private final long blocksize;
145   private final String prefix;
146   private final AtomicLong unflushedEntries = new AtomicLong(0);
147   private volatile long syncedTillHere = 0;
148   private long lastDeferredTxid;
149   private final Path oldLogDir;
150   private volatile boolean logRollRunning;
151 
152   private static Class<? extends Writer> logWriterClass;
153   private static Class<? extends Reader> logReaderClass;
154 
155   private WALCoprocessorHost coprocessorHost;
156 
157   static void resetLogReaderClass() {
158     HLog.logReaderClass = null;
159   }
160 
161   private FSDataOutputStream hdfs_out; // FSDataOutputStream associated with the current SequenceFile.writer
162   // Minimum tolerable replicas, if the actual value is lower than it, 
163   // rollWriter will be triggered
164   private int minTolerableReplication;
165   private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas
166   final static Object [] NO_ARGS = new Object []{};
167 
168   public interface Reader {
169     void init(FileSystem fs, Path path, Configuration c) throws IOException;
170     void close() throws IOException;
171     Entry next() throws IOException;
172     Entry next(Entry reuse) throws IOException;
173     void seek(long pos) throws IOException;
174     long getPosition() throws IOException;
175     void reset() throws IOException;
176   }
177 
178   public interface Writer {
179     void init(FileSystem fs, Path path, Configuration c) throws IOException;
180     void close() throws IOException;
181     void sync() throws IOException;
182     void append(Entry entry) throws IOException;
183     long getLength() throws IOException;
184   }
185 
186   /*
187    * Current log file.
188    */
189   Writer writer;
190 
191   /*
192    * Map of all log files but the current one.
193    */
194   final SortedMap<Long, Path> outputfiles =
195     Collections.synchronizedSortedMap(new TreeMap<Long, Path>());
196 
197   /*
198    * Map of encoded region names to their most recent sequence/edit id in their
199    * memstore.
200    */
201   private final ConcurrentSkipListMap<byte [], Long> lastSeqWritten =
202     new ConcurrentSkipListMap<byte [], Long>(Bytes.BYTES_COMPARATOR);
203 
204   private volatile boolean closed = false;
205 
206   private final AtomicLong logSeqNum = new AtomicLong(0);
207 
208   private boolean forMeta = false;
209 
210   // The timestamp (in ms) when the log file was created.
211   private volatile long filenum = -1;
212 
213   //number of transactions in the current Hlog.
214   private final AtomicInteger numEntries = new AtomicInteger(0);
215 
216   // If live datanode count is lower than the default replicas value,
217   // RollWriter will be triggered in each sync(So the RollWriter will be
218   // triggered one by one in a short time). Using it as a workaround to slow
219   // down the roll frequency triggered by checkLowReplication().
220   private volatile int consecutiveLogRolls = 0;
221   private final int lowReplicationRollLimit;
222 
223   // If consecutiveLogRolls is larger than lowReplicationRollLimit,
224   // then disable the rolling in checkLowReplication().
225   // Enable it if the replications recover.
226   private volatile boolean lowReplicationRollEnabled = true;
227 
228   // If > than this size, roll the log. This is typically 0.95 times the size
229   // of the default Hdfs block size.
230   private final long logrollsize;
231 
232   // This lock prevents starting a log roll during a cache flush.
233   // synchronized is insufficient because a cache flush spans two method calls.
234   private final Lock cacheFlushLock = new ReentrantLock();
235 
236   // We synchronize on updateLock to prevent updates and to prevent a log roll
237   // during an update
238   // locked during appends
239   private final Object updateLock = new Object();
240   private final Object flushLock = new Object();
241 
242   private final boolean enabled;
243 
244   /*
245    * If more than this many logs, force flush of oldest region to oldest edit
246    * goes to disk.  If too many and we crash, then will take forever replaying.
247    * Keep the number of logs tidy.
248    */
249   private final int maxLogs;
250 
251   /**
252    * Thread that handles optional sync'ing
253    */
254   private final LogSyncer logSyncer;
255 
256   /** Number of log close errors tolerated before we abort */
257   private final int closeErrorsTolerated;
258 
259   private final AtomicInteger closeErrorCount = new AtomicInteger();
260 
261   /**
262    * Pattern used to validate a HLog file name
263    */
264   private static final Pattern pattern = 
265       Pattern.compile(".*\\.\\d*("+HLog.META_HLOG_FILE_EXTN+")*");
266 
267   static byte [] COMPLETE_CACHE_FLUSH;
268   static {
269     try {
270       COMPLETE_CACHE_FLUSH =
271         "HBASE::CACHEFLUSH".getBytes(HConstants.UTF8_ENCODING);
272     } catch (UnsupportedEncodingException e) {
273       assert(false);
274     }
275   }
276 
277   public static class Metric {
278     public long min = Long.MAX_VALUE;
279     public long max = 0;
280     public long total = 0;
281     public int count = 0;
282 
283     synchronized void inc(final long val) {
284       min = Math.min(min, val);
285       max = Math.max(max, val);
286       total += val;
287       ++count;
288     }
289 
290     synchronized Metric get() {
291       Metric copy = new Metric();
292       copy.min = min;
293       copy.max = max;
294       copy.total = total;
295       copy.count = count;
296       this.min = Long.MAX_VALUE;
297       this.max = 0;
298       this.total = 0;
299       this.count = 0;
300       return copy;
301     }
302   }
303 
304   // For measuring latency of writes
305   private static Metric writeTime = new Metric();
306   private static Metric writeSize = new Metric();
307   // For measuring latency of syncs
308   private static Metric syncTime = new Metric();
309   //For measuring slow HLog appends
310   private static AtomicLong slowHLogAppendCount = new AtomicLong();
311   private static Metric slowHLogAppendTime = new Metric();
312   
313   public static Metric getWriteTime() {
314     return writeTime.get();
315   }
316 
317   public static Metric getWriteSize() {
318     return writeSize.get();
319   }
320 
321   public static Metric getSyncTime() {
322     return syncTime.get();
323   }
324 
325   public static long getSlowAppendCount() {
326     return slowHLogAppendCount.get();
327   }
328   
329   public static Metric getSlowAppendTime() {
330     return slowHLogAppendTime.get();
331   }
332 
333   /**
334    * Constructor.
335    *
336    * @param fs filesystem handle
337    * @param dir path to where hlogs are stored
338    * @param oldLogDir path to where hlogs are archived
339    * @param conf configuration to use
340    * @throws IOException
341    */
342   public HLog(final FileSystem fs, final Path dir, final Path oldLogDir,
343               final Configuration conf)
344   throws IOException {
345     this(fs, dir, oldLogDir, conf, null, true, null, false);
346   }
347 
348   /**
349    * Create an edit log at the given <code>dir</code> location.
350    *
351    * You should never have to load an existing log. If there is a log at
352    * startup, it should have already been processed and deleted by the time the
353    * HLog object is started up.
354    *
355    * @param fs filesystem handle
356    * @param dir path to where hlogs are stored
357    * @param oldLogDir path to where hlogs are archived
358    * @param conf configuration to use
359    * @param listeners Listeners on WAL events. Listeners passed here will
360    * be registered before we do anything else; e.g. the
361    * Constructor {@link #rollWriter()}.
362    * @param prefix should always be hostname and port in distributed env and
363    *        it will be URL encoded before being used.
364    *        If prefix is null, "hlog" will be used
365    * @throws IOException
366    */
367   public HLog(final FileSystem fs, final Path dir, final Path oldLogDir,
368       final Configuration conf, final List<WALActionsListener> listeners,
369       final String prefix) throws IOException {
370     this(fs, dir, oldLogDir, conf, listeners, true, prefix, false);
371   }
372 
373   /**
374    * Create an edit log at the given <code>dir</code> location.
375    *
376    * You should never have to load an existing log. If there is a log at
377    * startup, it should have already been processed and deleted by the time the
378    * HLog object is started up.
379    *
380    * @param fs filesystem handle
381    * @param dir path to where hlogs are stored
382    * @param oldLogDir path to where hlogs are archived
383    * @param conf configuration to use
384    * @param listeners Listeners on WAL events. Listeners passed here will
385    * be registered before we do anything else; e.g. the
386    * Constructor {@link #rollWriter()}.
387    * @param failIfLogDirExists If true IOException will be thrown if dir already exists.
388    * @param prefix should always be hostname and port in distributed env and
389    *        it will be URL encoded before being used.
390    *        If prefix is null, "hlog" will be used
391    * @param forMeta if this hlog is meant for meta updates
392    * @throws IOException
393    */
394   public HLog(final FileSystem fs, final Path dir, final Path oldLogDir,
395       final Configuration conf, final List<WALActionsListener> listeners,
396       final boolean failIfLogDirExists, final String prefix, boolean forMeta)
397   throws IOException {
398     super();
399     this.fs = fs;
400     this.dir = dir;
401     this.conf = conf;
402     this.hlogFs = new HLogFileSystem(conf);
403     if (listeners != null) {
404       for (WALActionsListener i: listeners) {
405         registerWALActionsListener(i);
406       }
407     }
408     this.blocksize = conf.getLong("hbase.regionserver.hlog.blocksize",
409         getDefaultBlockSize());
410     // Roll at 95% of block size.
411     float multi = conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f);
412     this.logrollsize = (long)(this.blocksize * multi);
413     this.optionalFlushInterval =
414       conf.getLong("hbase.regionserver.optionallogflushinterval", 1 * 1000);
415     boolean dirExists = false;
416     if (failIfLogDirExists && (dirExists = this.fs.exists(dir))) {
417       throw new IOException("Target HLog directory already exists: " + dir);
418     }
419     if (!dirExists && !HBaseFileSystem.makeDirOnFileSystem(fs, dir)) {
420       throw new IOException("Unable to mkdir " + dir);
421     }
422     this.oldLogDir = oldLogDir;
423     if (!fs.exists(oldLogDir) && !HBaseFileSystem.makeDirOnFileSystem(fs, oldLogDir)) {
424       throw new IOException("Unable to mkdir " + this.oldLogDir);
425     }
426     this.forMeta = forMeta;
427     this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
428     this.minTolerableReplication = conf.getInt(
429         "hbase.regionserver.hlog.tolerable.lowreplication",
430         this.fs.getDefaultReplication());
431     this.lowReplicationRollLimit = conf.getInt(
432         "hbase.regionserver.hlog.lowreplication.rolllimit", 5);
433     this.enabled = conf.getBoolean("hbase.regionserver.hlog.enabled", true);
434     this.closeErrorsTolerated = conf.getInt(
435         "hbase.regionserver.logroll.errors.tolerated", 0);
436 
437     LOG.info("HLog configuration: blocksize=" +
438       StringUtils.byteDesc(this.blocksize) +
439       ", rollsize=" + StringUtils.byteDesc(this.logrollsize) +
440       ", enabled=" + this.enabled +
441       ", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
442     // If prefix is null||empty then just name it hlog
443     this.prefix = prefix == null || prefix.isEmpty() ?
444         "hlog" : URLEncoder.encode(prefix, "UTF8");
445     // rollWriter sets this.hdfs_out if it can.
446     rollWriter();
447 
448     // handle the reflection necessary to call getNumCurrentReplicas()
449     this.getNumCurrentReplicas = getGetNumCurrentReplicas(this.hdfs_out);
450 
451     logSyncer = new LogSyncer(this.optionalFlushInterval);
452     // When optionalFlushInterval is set as 0, don't start a thread for deferred log sync.
453     if (this.optionalFlushInterval > 0) {
454       Threads.setDaemonThreadRunning(logSyncer.getThread(), Thread.currentThread().getName()
455           + ".logSyncer");
456     } else {
457       LOG.info("hbase.regionserver.optionallogflushinterval is set as "
458           + this.optionalFlushInterval + ". Deferred log syncing won't work. "
459           + "Any Mutation, marked to be deferred synced, will be flushed immediately.");
460     }
461     coprocessorHost = new WALCoprocessorHost(this, conf);
462   }
463   
464   // use reflection to search for getDefaultBlockSize(Path f)
465   // if the method doesn't exist, fall back to using getDefaultBlockSize()
466   private long getDefaultBlockSize() throws IOException {
467     Method m = null;
468     Class<? extends FileSystem> cls = this.fs.getClass();
469     try {
470       m = cls.getMethod("getDefaultBlockSize",
471           new Class<?>[] { Path.class });
472     } catch (NoSuchMethodException e) {
473       LOG.info("FileSystem doesn't support getDefaultBlockSize");
474     } catch (SecurityException e) {
475       LOG.info("Doesn't have access to getDefaultBlockSize on "
476           + "FileSystems", e);
477       m = null; // could happen on setAccessible()
478     }
479     if (null == m) {
480       return this.fs.getDefaultBlockSize();
481     } else {
482       try {
483         Object ret = m.invoke(this.fs, this.dir);
484         return ((Long)ret).longValue();
485       } catch (Exception e) {
486         throw new IOException(e);
487       }
488     }
489   }
490 
491   /**
492    * Find the 'getNumCurrentReplicas' on the passed <code>os</code> stream.
493    * @return Method or null.
494    */
495   private Method getGetNumCurrentReplicas(final FSDataOutputStream os) {
496     Method m = null;
497     if (os != null) {
498       Class<? extends OutputStream> wrappedStreamClass = os.getWrappedStream()
499           .getClass();
500       try {
501         m = wrappedStreamClass.getDeclaredMethod("getNumCurrentReplicas",
502             new Class<?>[] {});
503         m.setAccessible(true);
504       } catch (NoSuchMethodException e) {
505         LOG.info("FileSystem's output stream doesn't support"
506             + " getNumCurrentReplicas; --HDFS-826 not available; fsOut="
507             + wrappedStreamClass.getName());
508       } catch (SecurityException e) {
509         LOG.info("Doesn't have access to getNumCurrentReplicas on "
510             + "FileSystems's output stream --HDFS-826 not available; fsOut="
511             + wrappedStreamClass.getName(), e);
512         m = null; // could happen on setAccessible()
513       }
514     }
515     if (m != null) {
516       LOG.info("Using getNumCurrentReplicas--HDFS-826");
517     }
518     return m;
519   }
520 
521   public void registerWALActionsListener(final WALActionsListener listener) {
522     this.listeners.add(listener);
523   }
524 
525   public boolean unregisterWALActionsListener(final WALActionsListener listener) {
526     return this.listeners.remove(listener);
527   }
528 
529   /**
530    * @return Current state of the monotonically increasing file id.
531    */
532   public long getFilenum() {
533     return this.filenum;
534   }
535 
536   /**
537    * Called by HRegionServer when it opens a new region to ensure that log
538    * sequence numbers are always greater than the latest sequence number of the
539    * region being brought on-line.
540    *
541    * @param newvalue We'll set log edit/sequence number to this value if it
542    * is greater than the current value.
543    */
544   public void setSequenceNumber(final long newvalue) {
545     for (long id = this.logSeqNum.get(); id < newvalue &&
546         !this.logSeqNum.compareAndSet(id, newvalue); id = this.logSeqNum.get()) {
547       // This could spin on occasion but better the occasional spin than locking
548       // every increment of sequence number.
549       LOG.debug("Changed sequenceid from " + logSeqNum + " to " + newvalue);
550     }
551   }
552 
553   /**
554    * @return log sequence number
555    */
556   public long getSequenceNumber() {
557     return logSeqNum.get();
558   }
559 
560   /**
561    * Method used internal to this class and for tests only.
562    * @return The wrapped stream our writer is using; its not the
563    * writer's 'out' FSDatoOutputStream but the stream that this 'out' wraps
564    * (In hdfs its an instance of DFSDataOutputStream).
565    */
566   // usage: see TestLogRolling.java
567   OutputStream getOutputStream() {
568     return this.hdfs_out.getWrappedStream();
569   }
570 
571   /**
572    * Roll the log writer. That is, start writing log messages to a new file.
573    *
574    * Because a log cannot be rolled during a cache flush, and a cache flush
575    * spans two method calls, a special lock needs to be obtained so that a cache
576    * flush cannot start when the log is being rolled and the log cannot be
577    * rolled during a cache flush.
578    *
579    * <p>Note that this method cannot be synchronized because it is possible that
580    * startCacheFlush runs, obtaining the cacheFlushLock, then this method could
581    * start which would obtain the lock on this but block on obtaining the
582    * cacheFlushLock and then completeCacheFlush could be called which would wait
583    * for the lock on this and consequently never release the cacheFlushLock
584    *
585    * @return If lots of logs, flush the returned regions so next time through
586    * we can clean logs. Returns null if nothing to flush.  Names are actual
587    * region names as returned by {@link HRegionInfo#getEncodedName()}
588    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
589    * @throws IOException
590    */
591   public byte [][] rollWriter() throws FailedLogCloseException, IOException {
592     return rollWriter(false);
593   }
594 
595   /**
596    * Roll the log writer. That is, start writing log messages to a new file.
597    *
598    * Because a log cannot be rolled during a cache flush, and a cache flush
599    * spans two method calls, a special lock needs to be obtained so that a cache
600    * flush cannot start when the log is being rolled and the log cannot be
601    * rolled during a cache flush.
602    *
603    * <p>Note that this method cannot be synchronized because it is possible that
604    * startCacheFlush runs, obtaining the cacheFlushLock, then this method could
605    * start which would obtain the lock on this but block on obtaining the
606    * cacheFlushLock and then completeCacheFlush could be called which would wait
607    * for the lock on this and consequently never release the cacheFlushLock
608    *
609    * @param force If true, force creation of a new writer even if no entries
610    * have been written to the current writer
611    * @return If lots of logs, flush the returned regions so next time through
612    * we can clean logs. Returns null if nothing to flush.  Names are actual
613    * region names as returned by {@link HRegionInfo#getEncodedName()}
614    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
615    * @throws IOException
616    */
617   public byte [][] rollWriter(boolean force)
618       throws FailedLogCloseException, IOException {
619     // Return if nothing to flush.
620     if (!force && this.writer != null && this.numEntries.get() <= 0) {
621       return null;
622     }
623     byte [][] regionsToFlush = null;
624     this.cacheFlushLock.lock();
625     this.logRollRunning = true;
626     try {
627       if (closed) {
628         LOG.debug("HLog closed.  Skipping rolling of writer");
629         return regionsToFlush;
630       }
631       // Do all the preparation outside of the updateLock to block
632       // as less as possible the incoming writes
633       long currentFilenum = this.filenum;
634       Path oldPath = null;
635       if (currentFilenum > 0) {
636         //computeFilename  will take care of meta hlog filename
637         oldPath = computeFilename(currentFilenum);
638       }
639       this.filenum = System.currentTimeMillis();
640       Path newPath = computeFilename();
641 
642       // Tell our listeners that a new log is about to be created
643       if (!this.listeners.isEmpty()) {
644         for (WALActionsListener i : this.listeners) {
645           i.preLogRoll(oldPath, newPath);
646         }
647       }
648       HLog.Writer nextWriter = this.createWriterInstance(fs, newPath, conf);
649       // Can we get at the dfsclient outputstream?  If an instance of
650       // SFLW, it'll have done the necessary reflection to get at the
651       // protected field name.
652       FSDataOutputStream nextHdfsOut = null;
653       if (nextWriter instanceof SequenceFileLogWriter) {
654         nextHdfsOut = ((SequenceFileLogWriter)nextWriter).getWriterFSDataOutputStream();
655       }
656 
657       synchronized (updateLock) {
658         // Clean up current writer.
659         Path oldFile = cleanupCurrentWriter(currentFilenum);
660         this.writer = nextWriter;
661         this.hdfs_out = nextHdfsOut;
662 
663         LOG.info((oldFile != null?
664             "Roll " + FSUtils.getPath(oldFile) + ", entries=" +
665             this.numEntries.get() +
666             ", filesize=" +
667             this.fs.getFileStatus(oldFile).getLen() + ". ": "") +
668           " for " + FSUtils.getPath(newPath));
669         this.numEntries.set(0);
670       }
671       // Tell our listeners that a new log was created
672       if (!this.listeners.isEmpty()) {
673         for (WALActionsListener i : this.listeners) {
674           i.postLogRoll(oldPath, newPath);
675         }
676       }
677 
678       // Can we delete any of the old log files?
679       if (this.outputfiles.size() > 0) {
680         if (this.lastSeqWritten.isEmpty()) {
681           LOG.debug("Last sequenceid written is empty. Deleting all old hlogs");
682           // If so, then no new writes have come in since all regions were
683           // flushed (and removed from the lastSeqWritten map). Means can
684           // remove all but currently open log file.
685           for (Map.Entry<Long, Path> e : this.outputfiles.entrySet()) {
686             archiveLogFile(e.getValue(), e.getKey());
687           }
688           this.outputfiles.clear();
689         } else {
690           regionsToFlush = cleanOldLogs();
691         }
692       }
693     } finally {
694       this.logRollRunning = false;
695       this.cacheFlushLock.unlock();
696     }
697     return regionsToFlush;
698   }
699 
700   /**
701    * This method allows subclasses to inject different writers without having to
702    * extend other methods like rollWriter().
703    * 
704    * @param fs
705    * @param path
706    * @param conf
707    * @return Writer instance
708    * @throws IOException
709    */
710   protected Writer createWriterInstance(final FileSystem fs, final Path path,
711       final Configuration conf) throws IOException {
712     if (forMeta) {
713       //TODO: set a higher replication for the hlog files (HBASE-6773)
714     }
715     return this.hlogFs.createWriter(fs, conf, path);
716   }
717 
718   /**
719    * Get a reader for the WAL.
720    * The proper way to tail a log that can be under construction is to first use this method
721    * to get a reader then call {@link HLog.Reader#reset()} to see the new data. It will also
722    * take care of keeping implementation-specific context (like compression).
723    * @param fs
724    * @param path
725    * @param conf
726    * @return A WAL reader.  Close when done with it.
727    * @throws IOException
728    */
729   public static Reader getReader(final FileSystem fs, final Path path,
730                                  Configuration conf)
731       throws IOException {
732     try {
733 
734       if (logReaderClass == null) {
735 
736         logReaderClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
737             SequenceFileLogReader.class, Reader.class);
738       }
739 
740 
741       HLog.Reader reader = logReaderClass.newInstance();
742       reader.init(fs, path, conf);
743       return reader;
744     } catch (IOException e) {
745       throw e;
746     }
747     catch (Exception e) {
748       throw new IOException("Cannot get log reader", e);
749     }
750   }
751 
752   /**
753    * Get a writer for the WAL.
754    * @param path
755    * @param conf
756    * @return A WAL writer.  Close when done with it.
757    * @throws IOException
758    */
759   public static Writer createWriter(final FileSystem fs,
760       final Path path, Configuration conf)
761   throws IOException {
762     try {
763       if (logWriterClass == null) {
764         logWriterClass = conf.getClass("hbase.regionserver.hlog.writer.impl",
765             SequenceFileLogWriter.class, Writer.class);
766       }
767       HLog.Writer writer = (HLog.Writer) logWriterClass.newInstance();
768       writer.init(fs, path, conf);
769       return writer;
770     } catch (Exception e) {
771       throw new IOException("cannot get log writer", e);
772     }
773   }
774 
775   /*
776    * Clean up old commit logs.
777    * @return If lots of logs, flush the returned region so next time through
778    * we can clean logs. Returns null if nothing to flush.  Returns array of
779    * encoded region names to flush.
780    * @throws IOException
781    */
782   private byte [][] cleanOldLogs() throws IOException {
783     Long oldestOutstandingSeqNum = getOldestOutstandingSeqNum();
784     // Get the set of all log files whose last sequence number is smaller than
785     // the oldest edit's sequence number.
786     TreeSet<Long> sequenceNumbers =
787       new TreeSet<Long>(this.outputfiles.headMap(
788         (Long.valueOf(oldestOutstandingSeqNum.longValue()))).keySet());
789     // Now remove old log files (if any)
790     int logsToRemove = sequenceNumbers.size();
791     if (logsToRemove > 0) {
792       if (LOG.isDebugEnabled()) {
793         // Find associated region; helps debugging.
794         byte [] oldestRegion = getOldestRegion(oldestOutstandingSeqNum);
795         LOG.debug("Found " + logsToRemove + " hlogs to remove" +
796           " out of total " + this.outputfiles.size() + ";" +
797           " oldest outstanding sequenceid is " + oldestOutstandingSeqNum +
798           " from region " + Bytes.toStringBinary(oldestRegion));
799       }
800       for (Long seq : sequenceNumbers) {
801         archiveLogFile(this.outputfiles.remove(seq), seq);
802       }
803     }
804 
805     // If too many log files, figure which regions we need to flush.
806     // Array is an array of encoded region names.
807     byte [][] regions = null;
808     int logCount = this.outputfiles == null? 0: this.outputfiles.size();
809     if (logCount > this.maxLogs && logCount > 0) {
810       // This is an array of encoded region names.
811       regions = findMemstoresWithEditsEqualOrOlderThan(this.outputfiles.firstKey(),
812         this.lastSeqWritten);
813       if (regions != null) {
814         StringBuilder sb = new StringBuilder();
815         for (int i = 0; i < regions.length; i++) {
816           if (i > 0) sb.append(", ");
817           sb.append(Bytes.toStringBinary(regions[i]));
818         }
819         LOG.info("Too many hlogs: logs=" + logCount + ", maxlogs=" +
820            this.maxLogs + "; forcing flush of " + regions.length + " regions(s): " +
821            sb.toString());
822       }
823     }
824     return regions;
825   }
826 
827   /**
828    * Return regions (memstores) that have edits that are equal or less than
829    * the passed <code>oldestWALseqid</code>.
830    * @param oldestWALseqid
831    * @param regionsToSeqids Encoded region names to sequence ids
832    * @return All regions whose seqid is < than <code>oldestWALseqid</code> (Not
833    * necessarily in order).  Null if no regions found.
834    */
835   static byte [][] findMemstoresWithEditsEqualOrOlderThan(final long oldestWALseqid,
836       final Map<byte [], Long> regionsToSeqids) {
837     //  This method is static so it can be unit tested the easier.
838     List<byte []> regions = null;
839     for (Map.Entry<byte [], Long> e: regionsToSeqids.entrySet()) {
840       if (e.getValue().longValue() <= oldestWALseqid) {
841         if (regions == null) regions = new ArrayList<byte []>();
842         // Key is encoded region name.
843         regions.add(e.getKey());
844       }
845     }
846     return regions == null?
847       null: regions.toArray(new byte [][] {HConstants.EMPTY_BYTE_ARRAY});
848   }
849 
850   /*
851    * @return Logs older than this id are safe to remove.
852    */
853   private Long getOldestOutstandingSeqNum() {
854     return Collections.min(this.lastSeqWritten.values());
855   }
856 
857   /**
858    * @param oldestOutstandingSeqNum
859    * @return (Encoded) name of oldest outstanding region.
860    */
861   private byte [] getOldestRegion(final Long oldestOutstandingSeqNum) {
862     byte [] oldestRegion = null;
863     for (Map.Entry<byte [], Long> e: this.lastSeqWritten.entrySet()) {
864       if (e.getValue().longValue() == oldestOutstandingSeqNum.longValue()) {
865         // Key is encoded region name.
866         oldestRegion = e.getKey();
867         break;
868       }
869     }
870     return oldestRegion;
871   }
872 
873   /*
874    * Cleans up current writer closing and adding to outputfiles.
875    * Presumes we're operating inside an updateLock scope.
876    * @return Path to current writer or null if none.
877    * @throws IOException
878    */
879   Path cleanupCurrentWriter(final long currentfilenum) throws IOException {
880     Path oldFile = null;
881     if (this.writer != null) {
882       // Close the current writer, get a new one.
883       try {
884         // Wait till all current transactions are written to the hlog.
885         // No new transactions can occur because we have the updatelock.
886         if (this.unflushedEntries.get() != this.syncedTillHere) {
887           LOG.debug("cleanupCurrentWriter " +
888                    " waiting for transactions to get synced " +
889                    " total " + this.unflushedEntries.get() +
890                    " synced till here " + syncedTillHere);
891           sync();
892         }
893         this.writer.close();
894         this.writer = null;
895         closeErrorCount.set(0);
896       } catch (IOException e) {
897         LOG.error("Failed close of HLog writer", e);
898         int errors = closeErrorCount.incrementAndGet();
899         if (errors <= closeErrorsTolerated && !hasDeferredEntries()) {
900           LOG.warn("Riding over HLog close failure! error count="+errors);
901         } else {
902           if (hasDeferredEntries()) {
903             LOG.error("Aborting due to unflushed edits in HLog");
904           }
905           // Failed close of log file.  Means we're losing edits.  For now,
906           // shut ourselves down to minimize loss.  Alternative is to try and
907           // keep going.  See HBASE-930.
908           FailedLogCloseException flce =
909             new FailedLogCloseException("#" + currentfilenum);
910           flce.initCause(e);
911           throw flce;
912         }
913       }
914       if (currentfilenum >= 0) {
915         oldFile = computeFilename(currentfilenum);
916         this.outputfiles.put(Long.valueOf(this.logSeqNum.get()), oldFile);
917       }
918     }
919     return oldFile;
920   }
921 
922   private void archiveLogFile(final Path p, final Long seqno) throws IOException {
923     Path newPath = getHLogArchivePath(this.oldLogDir, p);
924     LOG.info("moving old hlog file " + FSUtils.getPath(p) +
925       " whose highest sequenceid is " + seqno + " to " +
926       FSUtils.getPath(newPath));
927 
928     // Tell our listeners that a log is going to be archived.
929     if (!this.listeners.isEmpty()) {
930       for (WALActionsListener i : this.listeners) {
931         i.preLogArchive(p, newPath);
932       }
933     }
934     if (!HBaseFileSystem.renameDirForFileSystem(fs, p, newPath)) {
935       throw new IOException("Unable to rename " + p + " to " + newPath);
936     }
937     // Tell our listeners that a log has been archived.
938     if (!this.listeners.isEmpty()) {
939       for (WALActionsListener i : this.listeners) {
940         i.postLogArchive(p, newPath);
941       }
942     }
943   }
944 
945   /**
946    * This is a convenience method that computes a new filename with a given
947    * using the current HLog file-number
948    * @return Path
949    */
950   protected Path computeFilename() {
951     return computeFilename(this.filenum);
952   }
953 
954   /**
955    * This is a convenience method that computes a new filename with a given
956    * file-number.
957    * @param filenum to use
958    * @return Path
959    */
960   protected Path computeFilename(long filenum) {
961     if (filenum < 0) {
962       throw new RuntimeException("hlog file number can't be < 0");
963     }
964     String child = prefix + "." + filenum;
965     if (forMeta) {
966       child += HLog.META_HLOG_FILE_EXTN;
967     }
968     return new Path(dir, child);
969   }
970 
971   public static boolean isMetaFile(Path p) {
972     if (p.getName().endsWith(HLog.META_HLOG_FILE_EXTN)) {
973       return true;
974     }
975     return false;
976   }
977 
978   /**
979    * Shut down the log and delete the log directory
980    *
981    * @throws IOException
982    */
983   public void closeAndDelete() throws IOException {
984     close();
985     if (!fs.exists(this.dir)) return;
986     FileStatus[] files = fs.listStatus(this.dir);
987     for(FileStatus file : files) {
988 
989       Path p = getHLogArchivePath(this.oldLogDir, file.getPath());
990       // Tell our listeners that a log is going to be archived.
991       if (!this.listeners.isEmpty()) {
992         for (WALActionsListener i : this.listeners) {
993           i.preLogArchive(file.getPath(), p);
994         }
995       }
996 
997       if (!HBaseFileSystem.renameDirForFileSystem(fs, file.getPath(), p)) {
998         throw new IOException("Unable to rename " + file.getPath() + " to " + p);
999       }
1000       // Tell our listeners that a log was archived.
1001       if (!this.listeners.isEmpty()) {
1002         for (WALActionsListener i : this.listeners) {
1003           i.postLogArchive(file.getPath(), p);
1004         }
1005       }
1006     }
1007     LOG.debug("Moved " + files.length + " log files to " +
1008       FSUtils.getPath(this.oldLogDir));
1009     if (!HBaseFileSystem.deleteDirFromFileSystem(fs, dir)) {
1010       LOG.info("Unable to delete " + dir);
1011     }
1012   }
1013 
1014   /**
1015    * Shut down the log.
1016    *
1017    * @throws IOException
1018    */
1019   public void close() throws IOException {
1020     // When optionalFlushInterval is 0, the logSyncer is not started as a Thread.
1021     if (this.optionalFlushInterval > 0) {
1022       try {
1023         logSyncer.close();
1024         // Make sure we synced everything
1025         logSyncer.join(this.optionalFlushInterval * 2);
1026       } catch (InterruptedException e) {
1027         LOG.error("Exception while waiting for syncer thread to die", e);
1028       }
1029     }
1030 
1031     cacheFlushLock.lock();
1032     try {
1033       // Tell our listeners that the log is closing
1034       if (!this.listeners.isEmpty()) {
1035         for (WALActionsListener i : this.listeners) {
1036           i.logCloseRequested();
1037         }
1038       }
1039       synchronized (updateLock) {
1040         this.closed = true;
1041         if (LOG.isDebugEnabled()) {
1042           LOG.debug("closing hlog writer in " + this.dir.toString());
1043         }
1044         if (this.writer != null) {
1045           this.writer.close();
1046         }
1047       }
1048     } finally {
1049       cacheFlushLock.unlock();
1050     }
1051   }
1052 
1053   /**
1054    * @param now
1055    * @param regionName
1056    * @param tableName
1057    * @param clusterId
1058    * @return New log key.
1059    */
1060   protected HLogKey makeKey(byte[] regionName, byte[] tableName, long seqnum,
1061       long now, UUID clusterId) {
1062     return new HLogKey(regionName, tableName, seqnum, now, clusterId);
1063   }
1064 
1065 
1066   /** Append an entry to the log.
1067    *
1068    * @param regionInfo
1069    * @param logEdit
1070    * @param logKey
1071    * @param doSync shall we sync after writing the transaction
1072    * @return The txid of this transaction
1073    * @throws IOException
1074    */
1075   public long append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit,
1076                      HTableDescriptor htd, boolean doSync)
1077   throws IOException {
1078     if (this.closed) {
1079       throw new IOException("Cannot append; log is closed");
1080     }
1081     long txid = 0;
1082     synchronized (updateLock) {
1083       long seqNum = obtainSeqNum();
1084       logKey.setLogSeqNum(seqNum);
1085       // The 'lastSeqWritten' map holds the sequence number of the oldest
1086       // write for each region (i.e. the first edit added to the particular
1087       // memstore). When the cache is flushed, the entry for the
1088       // region being flushed is removed if the sequence number of the flush
1089       // is greater than or equal to the value in lastSeqWritten.
1090       this.lastSeqWritten.putIfAbsent(regionInfo.getEncodedNameAsBytes(),
1091         Long.valueOf(seqNum));
1092       doWrite(regionInfo, logKey, logEdit, htd);
1093       txid = this.unflushedEntries.incrementAndGet();
1094       this.numEntries.incrementAndGet();
1095       if (htd.isDeferredLogFlush()) {
1096         lastDeferredTxid = txid;
1097       }
1098     }
1099 
1100     // Sync if catalog region, and if not then check if that table supports
1101     // deferred log flushing
1102     if (doSync &&
1103         (regionInfo.isMetaRegion() ||
1104         !htd.isDeferredLogFlush())) {
1105       // sync txn to file system
1106       this.sync(txid);
1107     }
1108     return txid;
1109   }
1110 
1111   /**
1112    * Only used in tests.
1113    *
1114    * @param info
1115    * @param tableName
1116    * @param edits
1117    * @param now
1118    * @param htd
1119    * @throws IOException
1120    */
1121   public void append(HRegionInfo info, byte [] tableName, WALEdit edits,
1122     final long now, HTableDescriptor htd)
1123   throws IOException {
1124     append(info, tableName, edits, HConstants.DEFAULT_CLUSTER_ID, now, htd);
1125   }
1126 
1127   /**
1128    * Append a set of edits to the log. Log edits are keyed by (encoded)
1129    * regionName, rowname, and log-sequence-id.
1130    *
1131    * Later, if we sort by these keys, we obtain all the relevant edits for a
1132    * given key-range of the HRegion (TODO). Any edits that do not have a
1133    * matching COMPLETE_CACHEFLUSH message can be discarded.
1134    *
1135    * <p>
1136    * Logs cannot be restarted once closed, or once the HLog process dies. Each
1137    * time the HLog starts, it must create a new log. This means that other
1138    * systems should process the log appropriately upon each startup (and prior
1139    * to initializing HLog).
1140    *
1141    * synchronized prevents appends during the completion of a cache flush or for
1142    * the duration of a log roll.
1143    *
1144    * @param info
1145    * @param tableName
1146    * @param edits
1147    * @param clusterId The originating clusterId for this edit (for replication)
1148    * @param now
1149    * @param doSync shall we sync?
1150    * @return txid of this transaction
1151    * @throws IOException
1152    */
1153   private long append(HRegionInfo info, byte [] tableName, WALEdit edits, UUID clusterId,
1154       final long now, HTableDescriptor htd, boolean doSync)
1155     throws IOException {
1156       if (edits.isEmpty()) return this.unflushedEntries.get();;
1157       if (this.closed) {
1158         throw new IOException("Cannot append; log is closed");
1159       }
1160       long txid = 0;
1161       synchronized (this.updateLock) {
1162         long seqNum = obtainSeqNum();
1163         // The 'lastSeqWritten' map holds the sequence number of the oldest
1164         // write for each region (i.e. the first edit added to the particular
1165         // memstore). . When the cache is flushed, the entry for the
1166         // region being flushed is removed if the sequence number of the flush
1167         // is greater than or equal to the value in lastSeqWritten.
1168         // Use encoded name.  Its shorter, guaranteed unique and a subset of
1169         // actual  name.
1170         byte [] encodedRegionName = info.getEncodedNameAsBytes();
1171         this.lastSeqWritten.putIfAbsent(encodedRegionName, seqNum);
1172         HLogKey logKey = makeKey(encodedRegionName, tableName, seqNum, now, clusterId);
1173         doWrite(info, logKey, edits, htd);
1174         this.numEntries.incrementAndGet();
1175         txid = this.unflushedEntries.incrementAndGet();
1176         if (htd.isDeferredLogFlush()) {
1177           lastDeferredTxid = txid;
1178         }
1179       }
1180       // Sync if catalog region, and if not then check if that table supports
1181       // deferred log flushing
1182       if (doSync && 
1183           (info.isMetaRegion() ||
1184           !htd.isDeferredLogFlush())) {
1185         // sync txn to file system
1186         this.sync(txid);
1187       }
1188       return txid;
1189     }
1190 
1191   /**
1192    * Append a set of edits to the log. Log edits are keyed by (encoded)
1193    * regionName, rowname, and log-sequence-id. The HLog is not flushed
1194    * after this transaction is written to the log.
1195    *
1196    * @param info
1197    * @param tableName
1198    * @param edits
1199    * @param clusterId The originating clusterId for this edit (for replication)
1200    * @param now
1201    * @return txid of this transaction
1202    * @throws IOException
1203    */
1204   public long appendNoSync(HRegionInfo info, byte [] tableName, WALEdit edits, 
1205     UUID clusterId, final long now, HTableDescriptor htd)
1206     throws IOException {
1207     return append(info, tableName, edits, clusterId, now, htd, false);
1208   }
1209 
1210   /**
1211    * Append a set of edits to the log. Log edits are keyed by (encoded)
1212    * regionName, rowname, and log-sequence-id. The HLog is flushed
1213    * after this transaction is written to the log.
1214    *
1215    * @param info
1216    * @param tableName
1217    * @param edits
1218    * @param clusterId The originating clusterId for this edit (for replication)
1219    * @param now
1220    * @return txid of this transaction
1221    * @throws IOException
1222    */
1223   public long append(HRegionInfo info, byte [] tableName, WALEdit edits, 
1224     UUID clusterId, final long now, HTableDescriptor htd)
1225     throws IOException {
1226     return append(info, tableName, edits, clusterId, now, htd, true);
1227   }
1228 
1229   /**
1230    * This class is responsible to hold the HLog's appended Entry list
1231    * and to sync them according to a configurable interval.
1232    *
1233    * Deferred log flushing works first by piggy backing on this process by
1234    * simply not sync'ing the appended Entry. It can also be sync'd by other
1235    * non-deferred log flushed entries outside of this thread.
1236    */
1237   class LogSyncer extends HasThread {
1238 
1239     private final long optionalFlushInterval;
1240 
1241     private AtomicBoolean closeLogSyncer = new AtomicBoolean(false);
1242 
1243     // List of pending writes to the HLog. There corresponds to transactions
1244     // that have not yet returned to the client. We keep them cached here
1245     // instead of writing them to HDFS piecemeal, because the HDFS write 
1246     // method is pretty heavyweight as far as locking is concerned. The 
1247     // goal is to increase the batchsize for writing-to-hdfs as well as
1248     // sync-to-hdfs, so that we can get better system throughput.
1249     private List<Entry> pendingWrites = new LinkedList<Entry>();
1250 
1251     LogSyncer(long optionalFlushInterval) {
1252       this.optionalFlushInterval = optionalFlushInterval;
1253     }
1254 
1255     @Override
1256     public void run() {
1257       try {
1258         // awaiting with a timeout doesn't always
1259         // throw exceptions on interrupt
1260         while(!this.isInterrupted() && !closeLogSyncer.get()) {
1261 
1262           try {
1263             if (unflushedEntries.get() <= syncedTillHere) {
1264               synchronized (closeLogSyncer) {
1265                 closeLogSyncer.wait(this.optionalFlushInterval);
1266               }
1267             }
1268             // Calling sync since we waited or had unflushed entries.
1269             // Entries appended but not sync'd are taken care of here AKA
1270             // deferred log flush
1271             sync();
1272           } catch (IOException e) {
1273             LOG.error("Error while syncing, requesting close of hlog ", e);
1274             requestLogRoll();
1275           }
1276         }
1277       } catch (InterruptedException e) {
1278         LOG.debug(getName() + " interrupted while waiting for sync requests");
1279       } finally {
1280         LOG.info(getName() + " exiting");
1281       }
1282     }
1283 
1284     // appends new writes to the pendingWrites. It is better to keep it in
1285     // our own queue rather than writing it to the HDFS output stream because
1286     // HDFSOutputStream.writeChunk is not lightweight at all.
1287     synchronized void append(Entry e) throws IOException {
1288       pendingWrites.add(e);
1289     }
1290 
1291     // Returns all currently pending writes. New writes
1292     // will accumulate in a new list.
1293     synchronized List<Entry> getPendingWrites() {
1294       List<Entry> save = this.pendingWrites;
1295       this.pendingWrites = new LinkedList<Entry>();
1296       return save;
1297     }
1298 
1299     // writes out pending entries to the HLog
1300     void hlogFlush(Writer writer, List<Entry> pending) throws IOException {
1301       if (pending == null) return;
1302 
1303       // write out all accumulated Entries to hdfs.
1304       for (Entry e : pending) {
1305         writer.append(e);
1306       }
1307     }
1308 
1309     void close() {
1310       synchronized (closeLogSyncer) {
1311         closeLogSyncer.set(true);
1312         closeLogSyncer.notifyAll();
1313       }
1314     }
1315   }
1316 
1317   // sync all known transactions
1318   private void syncer() throws IOException {
1319     syncer(this.unflushedEntries.get()); // sync all pending items
1320   }
1321 
1322   // sync all transactions upto the specified txid
1323   private void syncer(long txid) throws IOException {
1324     // if the transaction that we are interested in is already
1325     // synced, then return immediately.
1326     if (txid <= this.syncedTillHere) {
1327       return;
1328     }
1329     Writer tempWriter;
1330     synchronized (this.updateLock) {
1331       if (this.closed) return;
1332       tempWriter = this.writer; // guaranteed non-null
1333     }
1334     try {
1335       long doneUpto;
1336       long now = System.currentTimeMillis();
1337       // First flush all the pending writes to HDFS. Then 
1338       // issue the sync to HDFS. If sync is successful, then update
1339       // syncedTillHere to indicate that transactions till this
1340       // number has been successfully synced.
1341       IOException ioe = null;
1342       List<Entry> pending = null;
1343       synchronized (flushLock) {
1344         if (txid <= this.syncedTillHere) {
1345           return;
1346         }
1347         doneUpto = this.unflushedEntries.get();
1348         pending = logSyncer.getPendingWrites();
1349         try {
1350           logSyncer.hlogFlush(tempWriter, pending);
1351         } catch(IOException io) {
1352           ioe = io;
1353           LOG.error("syncer encountered error, will retry. txid=" + txid, ioe);
1354         }
1355       }
1356       if (ioe != null && pending != null) {
1357         synchronized (this.updateLock) {
1358           synchronized (flushLock) {
1359             // HBASE-4387, HBASE-5623, retry with updateLock held
1360             tempWriter = this.writer;
1361             logSyncer.hlogFlush(tempWriter, pending);
1362           }
1363         }
1364       }
1365       // another thread might have sync'ed avoid double-sync'ing
1366       if (txid <= this.syncedTillHere) {
1367         return;
1368       }
1369       try {
1370         tempWriter.sync();
1371       } catch (IOException io) {
1372         synchronized (this.updateLock) {
1373           // HBASE-4387, HBASE-5623, retry with updateLock held
1374           tempWriter = this.writer;
1375           tempWriter.sync();
1376         }
1377       }
1378       this.syncedTillHere = Math.max(this.syncedTillHere, doneUpto);
1379 
1380       syncTime.inc(System.currentTimeMillis() - now);
1381       if (!this.logRollRunning) {
1382         checkLowReplication();
1383         try {
1384           if (tempWriter.getLength() > this.logrollsize) {
1385             requestLogRoll();
1386           }
1387         } catch (IOException x) {
1388           LOG.debug("Log roll failed and will be retried. (This is not an error)");
1389         }
1390       }
1391     } catch (IOException e) {
1392       LOG.fatal("Could not sync. Requesting close of hlog", e);
1393       requestLogRoll();
1394       throw e;
1395     }
1396   }
1397 
1398   private void checkLowReplication() {
1399     // if the number of replicas in HDFS has fallen below the configured
1400     // value, then roll logs.
1401     try {
1402       int numCurrentReplicas = getLogReplication();
1403       if (numCurrentReplicas != 0
1404           && numCurrentReplicas < this.minTolerableReplication) {
1405         if (this.lowReplicationRollEnabled) {
1406           if (this.consecutiveLogRolls < this.lowReplicationRollLimit) {
1407             LOG.warn("HDFS pipeline error detected. " + "Found "
1408                 + numCurrentReplicas + " replicas but expecting no less than "
1409                 + this.minTolerableReplication + " replicas. "
1410                 + " Requesting close of hlog.");
1411             requestLogRoll();
1412             // If rollWriter is requested, increase consecutiveLogRolls. Once it
1413             // is larger than lowReplicationRollLimit, disable the
1414             // LowReplication-Roller
1415             this.consecutiveLogRolls++;
1416           } else {
1417             LOG.warn("Too many consecutive RollWriter requests, it's a sign of "
1418                 + "the total number of live datanodes is lower than the tolerable replicas.");
1419             this.consecutiveLogRolls = 0;
1420             this.lowReplicationRollEnabled = false;
1421           }
1422         }
1423       } else if (numCurrentReplicas >= this.minTolerableReplication) {
1424 
1425         if (!this.lowReplicationRollEnabled) {
1426           // The new writer's log replicas is always the default value.
1427           // So we should not enable LowReplication-Roller. If numEntries
1428           // is lower than or equals 1, we consider it as a new writer.
1429           if (this.numEntries.get() <= 1) {
1430             return;
1431           }
1432           // Once the live datanode number and the replicas return to normal,
1433           // enable the LowReplication-Roller.
1434           this.lowReplicationRollEnabled = true;
1435           LOG.info("LowReplication-Roller was enabled.");
1436         }
1437       }
1438     } catch (Exception e) {
1439       LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e +
1440           " still proceeding ahead...");
1441     }
1442   }
1443 
1444   /**
1445    * This method gets the datanode replication count for the current HLog.
1446    *
1447    * If the pipeline isn't started yet or is empty, you will get the default
1448    * replication factor.  Therefore, if this function returns 0, it means you
1449    * are not properly running with the HDFS-826 patch.
1450    * @throws InvocationTargetException
1451    * @throws IllegalAccessException
1452    * @throws IllegalArgumentException
1453    *
1454    * @throws Exception
1455    */
1456   int getLogReplication()
1457   throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
1458     if (this.getNumCurrentReplicas != null && this.hdfs_out != null) {
1459       Object repl = this.getNumCurrentReplicas.invoke(getOutputStream(), NO_ARGS);
1460       if (repl instanceof Integer) {
1461         return ((Integer)repl).intValue();
1462       }
1463     }
1464     return 0;
1465   }
1466 
1467   boolean canGetCurReplicas() {
1468     return this.getNumCurrentReplicas != null;
1469   }
1470 
1471   public void hsync() throws IOException {
1472     syncer();
1473   }
1474 
1475   public void hflush() throws IOException {
1476     syncer();
1477   }
1478 
1479   public void sync() throws IOException {
1480     syncer();
1481   }
1482 
1483   public void sync(long txid) throws IOException {
1484     syncer(txid);
1485   }
1486 
1487   private void requestLogRoll() {
1488     if (!this.listeners.isEmpty()) {
1489       for (WALActionsListener i: this.listeners) {
1490         i.logRollRequested();
1491       }
1492     }
1493   }
1494 
1495   protected void doWrite(HRegionInfo info, HLogKey logKey, WALEdit logEdit,
1496                            HTableDescriptor htd)
1497   throws IOException {
1498     if (!this.enabled) {
1499       return;
1500     }
1501     if (!this.listeners.isEmpty()) {
1502       for (WALActionsListener i: this.listeners) {
1503         i.visitLogEntryBeforeWrite(htd, logKey, logEdit);
1504       }
1505     }
1506     try {
1507       long now = System.currentTimeMillis();
1508       // coprocessor hook:
1509       if (!coprocessorHost.preWALWrite(info, logKey, logEdit)) {
1510         // write to our buffer for the Hlog file.
1511         logSyncer.append(new HLog.Entry(logKey, logEdit));
1512       }
1513       long took = System.currentTimeMillis() - now;
1514       coprocessorHost.postWALWrite(info, logKey, logEdit);
1515       writeTime.inc(took);
1516       long len = 0;
1517       for (KeyValue kv : logEdit.getKeyValues()) {
1518         len += kv.getLength();
1519       }
1520       writeSize.inc(len);
1521       if (took > 1000) {
1522         LOG.warn(String.format(
1523           "%s took %d ms appending an edit to hlog; editcount=%d, len~=%s",
1524           Thread.currentThread().getName(), took, this.numEntries.get(),
1525           StringUtils.humanReadableInt(len)));
1526         slowHLogAppendCount.incrementAndGet();
1527         slowHLogAppendTime.inc(took);
1528       }
1529     } catch (IOException e) {
1530       LOG.fatal("Could not append. Requesting close of hlog", e);
1531       requestLogRoll();
1532       throw e;
1533     }
1534   }
1535 
1536 
1537   /** @return How many items have been added to the log */
1538   int getNumEntries() {
1539     return numEntries.get();
1540   }
1541 
1542   /**
1543    * Obtain a log sequence number.
1544    */
1545   private long obtainSeqNum() {
1546     return this.logSeqNum.incrementAndGet();
1547   }
1548 
1549   /** @return the number of log files in use */
1550   int getNumLogFiles() {
1551     return outputfiles.size();
1552   }
1553 
1554   private byte[] getSnapshotName(byte[] encodedRegionName) {
1555     byte snp[] = new byte[encodedRegionName.length + 3];
1556     // an encoded region name has only hex digits. s, n or p are not hex
1557     // and therefore snapshot-names will never collide with
1558     // encoded-region-names
1559     snp[0] = 's'; snp[1] = 'n'; snp[2] = 'p';
1560     for (int i = 0; i < encodedRegionName.length; i++) {
1561       snp[i+3] = encodedRegionName[i];
1562     }
1563     return snp;
1564   }
1565 
1566   /**
1567    * By acquiring a log sequence ID, we can allow log messages to continue while
1568    * we flush the cache.
1569    *
1570    * Acquire a lock so that we do not roll the log between the start and
1571    * completion of a cache-flush. Otherwise the log-seq-id for the flush will
1572    * not appear in the correct logfile.
1573    *
1574    * Ensuring that flushes and log-rolls don't happen concurrently also allows
1575    * us to temporarily put a log-seq-number in lastSeqWritten against the region
1576    * being flushed that might not be the earliest in-memory log-seq-number for
1577    * that region. By the time the flush is completed or aborted and before the
1578    * cacheFlushLock is released it is ensured that lastSeqWritten again has the
1579    * oldest in-memory edit's lsn for the region that was being flushed.
1580    *
1581    * In this method, by removing the entry in lastSeqWritten for the region
1582    * being flushed we ensure that the next edit inserted in this region will be
1583    * correctly recorded in {@link #append(HRegionInfo, byte[], WALEdit, long, HTableDescriptor)} The
1584    * lsn of the earliest in-memory lsn - which is now in the memstore snapshot -
1585    * is saved temporarily in the lastSeqWritten map while the flush is active.
1586    *
1587    * @return sequence ID to pass
1588    *         {@link #completeCacheFlush(byte[], byte[], long, boolean)} (byte[],
1589    *         byte[], long)}
1590    * @see #completeCacheFlush(byte[], byte[], long, boolean)
1591    * @see #abortCacheFlush(byte[])
1592    */
1593   public long startCacheFlush(final byte[] encodedRegionName) {
1594     this.cacheFlushLock.lock();
1595     Long seq = this.lastSeqWritten.remove(encodedRegionName);
1596     // seq is the lsn of the oldest edit associated with this region. If a
1597     // snapshot already exists - because the last flush failed - then seq will
1598     // be the lsn of the oldest edit in the snapshot
1599     if (seq != null) {
1600       // keeping the earliest sequence number of the snapshot in
1601       // lastSeqWritten maintains the correctness of
1602       // getOldestOutstandingSeqNum(). But it doesn't matter really because
1603       // everything is being done inside of cacheFlush lock.
1604       Long oldseq =
1605         lastSeqWritten.put(getSnapshotName(encodedRegionName), seq);
1606       if (oldseq != null) {
1607         LOG.error("Logic Error Snapshot seq id from earlier flush still" +
1608             " present! for region " + Bytes.toString(encodedRegionName) +
1609             " overwritten oldseq=" + oldseq + "with new seq=" + seq);
1610         Runtime.getRuntime().halt(1);
1611       }
1612     }
1613     return obtainSeqNum();
1614   }
1615 
1616 
1617   /**
1618    * Complete the cache flush
1619    *
1620    * Protected by cacheFlushLock
1621    *
1622    * @param encodedRegionName
1623    * @param tableName
1624    * @param logSeqId
1625    * @throws IOException
1626    */
1627   public void completeCacheFlush(final byte [] encodedRegionName,
1628       final byte [] tableName, final long logSeqId, final boolean isMetaRegion)
1629   throws IOException {
1630     try {
1631       if (this.closed) {
1632         return;
1633       }
1634       long txid = 0;
1635       synchronized (updateLock) {
1636         long now = System.currentTimeMillis();
1637         WALEdit edit = completeCacheFlushLogEdit();
1638         HLogKey key = makeKey(encodedRegionName, tableName, logSeqId,
1639             System.currentTimeMillis(), HConstants.DEFAULT_CLUSTER_ID);
1640         logSyncer.append(new Entry(key, edit));
1641         txid = this.unflushedEntries.incrementAndGet();
1642         writeTime.inc(System.currentTimeMillis() - now);
1643         long len = 0;
1644         for (KeyValue kv : edit.getKeyValues()) {
1645           len += kv.getLength();
1646         }
1647         writeSize.inc(len);
1648         this.numEntries.incrementAndGet();
1649       }
1650       // sync txn to file system
1651       this.sync(txid);
1652 
1653     } finally {
1654       // updateLock not needed for removing snapshot's entry
1655       // Cleaning up of lastSeqWritten is in the finally clause because we
1656       // don't want to confuse getOldestOutstandingSeqNum()
1657       this.lastSeqWritten.remove(getSnapshotName(encodedRegionName));
1658       this.cacheFlushLock.unlock();
1659     }
1660   }
1661 
1662   private WALEdit completeCacheFlushLogEdit() {
1663     KeyValue kv = new KeyValue(METAROW, METAFAMILY, null,
1664       System.currentTimeMillis(), COMPLETE_CACHE_FLUSH);
1665     WALEdit e = new WALEdit();
1666     e.add(kv);
1667     return e;
1668   }
1669 
1670   /**
1671    * Abort a cache flush.
1672    * Call if the flush fails. Note that the only recovery for an aborted flush
1673    * currently is a restart of the regionserver so the snapshot content dropped
1674    * by the failure gets restored to the memstore.
1675    */
1676   public void abortCacheFlush(byte[] encodedRegionName) {
1677     Long snapshot_seq =
1678       this.lastSeqWritten.remove(getSnapshotName(encodedRegionName));
1679     if (snapshot_seq != null) {
1680       // updateLock not necessary because we are racing against
1681       // lastSeqWritten.putIfAbsent() in append() and we will always win
1682       // before releasing cacheFlushLock make sure that the region's entry in
1683       // lastSeqWritten points to the earliest edit in the region
1684       Long current_memstore_earliest_seq =
1685         this.lastSeqWritten.put(encodedRegionName, snapshot_seq);
1686       if (current_memstore_earliest_seq != null &&
1687           (current_memstore_earliest_seq.longValue() <=
1688             snapshot_seq.longValue())) {
1689         LOG.error("Logic Error region " + Bytes.toString(encodedRegionName) +
1690             "acquired edits out of order current memstore seq=" +
1691             current_memstore_earliest_seq + " snapshot seq=" + snapshot_seq);
1692         Runtime.getRuntime().halt(1);
1693       }
1694     }
1695     this.cacheFlushLock.unlock();
1696   }
1697 
1698   /**
1699    * @param family
1700    * @return true if the column is a meta column
1701    */
1702   public static boolean isMetaFamily(byte [] family) {
1703     return Bytes.equals(METAFAMILY, family);
1704   }
1705 
1706   /**
1707    * Get LowReplication-Roller status
1708    * 
1709    * @return lowReplicationRollEnabled
1710    */
1711   public boolean isLowReplicationRollEnabled() {
1712     return lowReplicationRollEnabled;
1713   }
1714 
1715   @SuppressWarnings("unchecked")
1716   public static Class<? extends HLogKey> getKeyClass(Configuration conf) {
1717      return (Class<? extends HLogKey>)
1718        conf.getClass("hbase.regionserver.hlog.keyclass", HLogKey.class);
1719   }
1720 
1721   public static HLogKey newKey(Configuration conf) throws IOException {
1722     Class<? extends HLogKey> keyClass = getKeyClass(conf);
1723     try {
1724       return keyClass.newInstance();
1725     } catch (InstantiationException e) {
1726       throw new IOException("cannot create hlog key");
1727     } catch (IllegalAccessException e) {
1728       throw new IOException("cannot create hlog key");
1729     }
1730   }
1731 
1732   /**
1733    * Utility class that lets us keep track of the edit with it's key
1734    * Only used when splitting logs
1735    */
1736   public static class Entry implements Writable {
1737     private WALEdit edit;
1738     private HLogKey key;
1739 
1740     public Entry() {
1741       edit = new WALEdit();
1742       key = new HLogKey();
1743     }
1744 
1745     /**
1746      * Constructor for both params
1747      * @param edit log's edit
1748      * @param key log's key
1749      */
1750     public Entry(HLogKey key, WALEdit edit) {
1751       super();
1752       this.key = key;
1753       this.edit = edit;
1754     }
1755     /**
1756      * Gets the edit
1757      * @return edit
1758      */
1759     public WALEdit getEdit() {
1760       return edit;
1761     }
1762     /**
1763      * Gets the key
1764      * @return key
1765      */
1766     public HLogKey getKey() {
1767       return key;
1768     }
1769 
1770     /**
1771      * Set compression context for this entry.
1772      * @param compressionContext Compression context
1773      */
1774     public void setCompressionContext(CompressionContext compressionContext) {
1775       edit.setCompressionContext(compressionContext);
1776       key.setCompressionContext(compressionContext);
1777     }
1778 
1779     @Override
1780     public String toString() {
1781       return this.key + "=" + this.edit;
1782     }
1783 
1784     @Override
1785     public void write(DataOutput dataOutput) throws IOException {
1786       this.key.write(dataOutput);
1787       this.edit.write(dataOutput);
1788     }
1789 
1790     @Override
1791     public void readFields(DataInput dataInput) throws IOException {
1792       this.key.readFields(dataInput);
1793       this.edit.readFields(dataInput);
1794     }
1795   }
1796 
1797   /**
1798    * Construct the HLog directory name
1799    *
1800    * @param serverName Server name formatted as described in {@link ServerName}
1801    * @return the HLog directory name
1802    */
1803   public static String getHLogDirectoryName(final String serverName) {
1804     StringBuilder dirName = new StringBuilder(HConstants.HREGION_LOGDIR_NAME);
1805     dirName.append("/");
1806     dirName.append(serverName);
1807     return dirName.toString();
1808   }
1809 
1810   /**
1811    * Get the directory we are making logs in.
1812    * 
1813    * @return dir
1814    */
1815   protected Path getDir() {
1816     return dir;
1817   }
1818   
1819   /**
1820    * @param filename name of the file to validate
1821    * @return <tt>true</tt> if the filename matches an HLog, <tt>false</tt>
1822    *         otherwise
1823    */
1824   public static boolean validateHLogFilename(String filename) {
1825     return pattern.matcher(filename).matches();
1826   }
1827 
1828   static Path getHLogArchivePath(Path oldLogDir, Path p) {
1829     return new Path(oldLogDir, p.getName());
1830   }
1831 
1832   static String formatRecoveredEditsFileName(final long seqid) {
1833     return String.format("%019d", seqid);
1834   }
1835 
1836   /**
1837    * Returns sorted set of edit files made by wal-log splitter, excluding files
1838    * with '.temp' suffix.
1839    * @param fs
1840    * @param regiondir
1841    * @return Files in passed <code>regiondir</code> as a sorted set.
1842    * @throws IOException
1843    */
1844   public static NavigableSet<Path> getSplitEditFilesSorted(final FileSystem fs,
1845       final Path regiondir)
1846   throws IOException {
1847     NavigableSet<Path> filesSorted = new TreeSet<Path>();
1848     Path editsdir = getRegionDirRecoveredEditsDir(regiondir);
1849     if (!fs.exists(editsdir)) return filesSorted;
1850     FileStatus[] files = FSUtils.listStatus(fs, editsdir, new PathFilter() {
1851       @Override
1852       public boolean accept(Path p) {
1853         boolean result = false;
1854         try {
1855           // Return files and only files that match the editfile names pattern.
1856           // There can be other files in this directory other than edit files.
1857           // In particular, on error, we'll move aside the bad edit file giving
1858           // it a timestamp suffix.  See moveAsideBadEditsFile.
1859           Matcher m = EDITFILES_NAME_PATTERN.matcher(p.getName());
1860           result = fs.isFile(p) && m.matches();
1861           // Skip the file whose name ends with RECOVERED_LOG_TMPFILE_SUFFIX,
1862           // because it means splithlog thread is writting this file.
1863           if (p.getName().endsWith(RECOVERED_LOG_TMPFILE_SUFFIX)) {
1864             result = false;
1865           }
1866         } catch (IOException e) {
1867           LOG.warn("Failed isFile check on " + p);
1868         }
1869         return result;
1870       }
1871     });
1872     if (files == null) return filesSorted;
1873     for (FileStatus status: files) {
1874       filesSorted.add(status.getPath());
1875     }
1876     return filesSorted;
1877   }
1878 
1879   /**
1880    * Move aside a bad edits file.
1881    * @param fs
1882    * @param edits Edits file to move aside.
1883    * @return The name of the moved aside file.
1884    * @throws IOException
1885    */
1886   public static Path moveAsideBadEditsFile(final FileSystem fs,
1887       final Path edits)
1888   throws IOException {
1889     Path moveAsideName = new Path(edits.getParent(), edits.getName() + "." +
1890       System.currentTimeMillis());
1891     if (!HBaseFileSystem.renameDirForFileSystem(fs, edits, moveAsideName)) {
1892       LOG.warn("Rename failed from " + edits + " to " + moveAsideName);
1893     }
1894     return moveAsideName;
1895   }
1896 
1897   /**
1898    * @param regiondir This regions directory in the filesystem.
1899    * @return The directory that holds recovered edits files for the region
1900    * <code>regiondir</code>
1901    */
1902   public static Path getRegionDirRecoveredEditsDir(final Path regiondir) {
1903     return new Path(regiondir, RECOVERED_EDITS_DIR);
1904   }
1905 
1906   public static final long FIXED_OVERHEAD = ClassSize.align(
1907     ClassSize.OBJECT + (5 * ClassSize.REFERENCE) +
1908     ClassSize.ATOMIC_INTEGER + Bytes.SIZEOF_INT + (3 * Bytes.SIZEOF_LONG));
1909 
1910   private static void usage() {
1911     System.err.println("Usage: HLog <ARGS>");
1912     System.err.println("Arguments:");
1913     System.err.println(" --dump  Dump textual representation of passed one or more files");
1914     System.err.println("         For example: HLog --dump hdfs://example.com:9000/hbase/.logs/MACHINE/LOGFILE");
1915     System.err.println(" --split Split the passed directory of WAL logs");
1916     System.err.println("         For example: HLog --split hdfs://example.com:9000/hbase/.logs/DIR");
1917   }
1918 
1919   private static void split(final Configuration conf, final Path p)
1920   throws IOException {
1921     FileSystem fs = FileSystem.get(conf);
1922     if (!fs.exists(p)) {
1923       throw new FileNotFoundException(p.toString());
1924     }
1925     final Path baseDir = new Path(conf.get(HConstants.HBASE_DIR));
1926     final Path oldLogDir = new Path(baseDir, HConstants.HREGION_OLDLOGDIR_NAME);
1927     if (!fs.getFileStatus(p).isDir()) {
1928       throw new IOException(p + " is not a directory");
1929     }
1930 
1931     HLogSplitter logSplitter = HLogSplitter.createLogSplitter(
1932         conf, baseDir, p, oldLogDir, fs);
1933     logSplitter.splitLog();
1934   }
1935 
1936   /**
1937    * @return Coprocessor host.
1938    */
1939   public WALCoprocessorHost getCoprocessorHost() {
1940     return coprocessorHost;
1941   }
1942 
1943   /** Provide access to currently deferred sequence num for tests */
1944   boolean hasDeferredEntries() {
1945     return lastDeferredTxid > syncedTillHere;
1946   }
1947 
1948   /**
1949    * Pass one or more log file names and it will either dump out a text version
1950    * on <code>stdout</code> or split the specified log files.
1951    *
1952    * @param args
1953    * @throws IOException
1954    */
1955   public static void main(String[] args) throws IOException {
1956     if (args.length < 2) {
1957       usage();
1958       System.exit(-1);
1959     }
1960     // either dump using the HLogPrettyPrinter or split, depending on args
1961     if (args[0].compareTo("--dump") == 0) {
1962       HLogPrettyPrinter.run(Arrays.copyOfRange(args, 1, args.length));
1963     } else if (args[0].compareTo("--split") == 0) {
1964       Configuration conf = HBaseConfiguration.create();
1965       for (int i = 1; i < args.length; i++) {
1966         try {
1967           conf.set("fs.default.name", args[i]);
1968           conf.set("fs.defaultFS", args[i]);
1969           Path logPath = new Path(args[i]);
1970           split(conf, logPath);
1971         } catch (Throwable t) {
1972           t.printStackTrace(System.err);
1973           System.exit(-1);
1974         }
1975       }
1976     } else {
1977       usage();
1978       System.exit(-1);
1979     }
1980   }
1981 }