1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver.wal;
20
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.io.OutputStream;
24 import java.lang.reflect.InvocationTargetException;
25 import java.lang.reflect.Method;
26 import java.net.URLEncoder;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.Collections;
30 import java.util.LinkedList;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.SortedMap;
34 import java.util.TreeMap;
35 import java.util.TreeSet;
36 import java.util.UUID;
37 import java.util.concurrent.ConcurrentSkipListMap;
38 import java.util.concurrent.CopyOnWriteArrayList;
39 import java.util.concurrent.atomic.AtomicBoolean;
40 import java.util.concurrent.atomic.AtomicInteger;
41 import java.util.concurrent.atomic.AtomicLong;
42
43 import org.apache.commons.logging.Log;
44 import org.apache.commons.logging.LogFactory;
45 import org.apache.hadoop.classification.InterfaceAudience;
46 import org.apache.hadoop.conf.Configuration;
47 import org.apache.hadoop.fs.FSDataOutputStream;
48 import org.apache.hadoop.fs.FileStatus;
49 import org.apache.hadoop.fs.FileSystem;
50 import org.apache.hadoop.fs.Path;
51 import org.apache.hadoop.fs.Syncable;
52 import org.apache.hadoop.hbase.TableName;
53 import org.apache.hadoop.hbase.HBaseConfiguration;
54 import org.apache.hadoop.hbase.HConstants;
55 import org.apache.hadoop.hbase.HRegionInfo;
56 import org.apache.hadoop.hbase.HTableDescriptor;
57 import org.apache.hadoop.hbase.KeyValue;
58 import org.apache.hadoop.hbase.util.Bytes;
59 import org.apache.hadoop.hbase.util.ClassSize;
60 import org.apache.hadoop.hbase.util.DrainBarrier;
61 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
62 import org.apache.hadoop.hbase.util.FSUtils;
63 import org.apache.hadoop.hbase.util.HasThread;
64 import org.apache.hadoop.hbase.util.Threads;
65 import org.apache.hadoop.util.StringUtils;
66 import org.cloudera.htrace.Trace;
67 import org.cloudera.htrace.TraceScope;
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108 @InterfaceAudience.Private
109 class FSHLog implements HLog, Syncable {
110 static final Log LOG = LogFactory.getLog(FSHLog.class);
111
112 private final FileSystem fs;
113 private final Path rootDir;
114 private final Path dir;
115 private final Configuration conf;
116
117 private List<WALActionsListener> listeners =
118 new CopyOnWriteArrayList<WALActionsListener>();
119 private final long optionalFlushInterval;
120 private final long blocksize;
121 private final String prefix;
122 private final AtomicLong unflushedEntries = new AtomicLong(0);
123 private volatile long syncedTillHere = 0;
124 private long lastDeferredTxid;
125 private final Path oldLogDir;
126 private volatile boolean logRollRunning;
127
128 private WALCoprocessorHost coprocessorHost;
129
130 private FSDataOutputStream hdfs_out;
131
132
133 private int minTolerableReplication;
134 private Method getNumCurrentReplicas;
135 final static Object [] NO_ARGS = new Object []{};
136
137
138 private DrainBarrier closeBarrier = new DrainBarrier();
139
140
141
142
143 Writer writer;
144
145
146
147
148 final SortedMap<Long, Path> outputfiles =
149 Collections.synchronizedSortedMap(new TreeMap<Long, Path>());
150
151
152
153
154
155
156
157 private final Object oldestSeqNumsLock = new Object();
158
159
160
161
162
163 private final Object rollWriterLock = new Object();
164
165
166
167
168 private final ConcurrentSkipListMap<byte [], Long> oldestUnflushedSeqNums =
169 new ConcurrentSkipListMap<byte [], Long>(Bytes.BYTES_COMPARATOR);
170
171
172
173
174
175 private final Map<byte[], Long> oldestFlushingSeqNums =
176 new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
177
178 private volatile boolean closed = false;
179
180 private final AtomicLong logSeqNum = new AtomicLong(0);
181
182 private boolean forMeta = false;
183
184
185 private volatile long filenum = -1;
186
187
188 private final AtomicInteger numEntries = new AtomicInteger(0);
189
190
191
192
193
194 private AtomicInteger consecutiveLogRolls = new AtomicInteger(0);
195 private final int lowReplicationRollLimit;
196
197
198
199
200 private volatile boolean lowReplicationRollEnabled = true;
201
202
203
204 private final long logrollsize;
205
206
207
208
209 private final Object updateLock = new Object();
210 private final Object flushLock = new Object();
211
212 private final boolean enabled;
213
214
215
216
217
218
219 private final int maxLogs;
220
221
222
223
224 private final LogSyncer logSyncer;
225
226
227 private final int closeErrorsTolerated;
228
229 private final AtomicInteger closeErrorCount = new AtomicInteger();
230 private final MetricsWAL metrics;
231
232
233
234
235
236
237
238
239
240
241 public FSHLog(final FileSystem fs, final Path root, final String logDir,
242 final Configuration conf)
243 throws IOException {
244 this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME,
245 conf, null, true, null, false);
246 }
247
248
249
250
251
252
253
254
255
256
257
258 public FSHLog(final FileSystem fs, final Path root, final String logDir,
259 final String oldLogDir, final Configuration conf)
260 throws IOException {
261 this(fs, root, logDir, oldLogDir,
262 conf, null, true, null, false);
263 }
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284 public FSHLog(final FileSystem fs, final Path root, final String logDir,
285 final Configuration conf, final List<WALActionsListener> listeners,
286 final String prefix) throws IOException {
287 this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME,
288 conf, listeners, true, prefix, false);
289 }
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313 public FSHLog(final FileSystem fs, final Path root, final String logDir,
314 final String oldLogDir, final Configuration conf,
315 final List<WALActionsListener> listeners,
316 final boolean failIfLogDirExists, final String prefix, boolean forMeta)
317 throws IOException {
318 super();
319 this.fs = fs;
320 this.rootDir = root;
321 this.dir = new Path(this.rootDir, logDir);
322 this.oldLogDir = new Path(this.rootDir, oldLogDir);
323 this.forMeta = forMeta;
324 this.conf = conf;
325
326 if (listeners != null) {
327 for (WALActionsListener i: listeners) {
328 registerWALActionsListener(i);
329 }
330 }
331
332 this.blocksize = this.conf.getLong("hbase.regionserver.hlog.blocksize",
333 FSUtils.getDefaultBlockSize(this.fs, this.dir));
334
335 float multi = conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f);
336 this.logrollsize = (long)(this.blocksize * multi);
337 this.optionalFlushInterval =
338 conf.getLong("hbase.regionserver.optionallogflushinterval", 1 * 1000);
339
340 this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
341 this.minTolerableReplication = conf.getInt(
342 "hbase.regionserver.hlog.tolerable.lowreplication",
343 FSUtils.getDefaultReplication(fs, this.dir));
344 this.lowReplicationRollLimit = conf.getInt(
345 "hbase.regionserver.hlog.lowreplication.rolllimit", 5);
346 this.enabled = conf.getBoolean("hbase.regionserver.hlog.enabled", true);
347 this.closeErrorsTolerated = conf.getInt(
348 "hbase.regionserver.logroll.errors.tolerated", 0);
349
350 this.logSyncer = new LogSyncer(this.optionalFlushInterval);
351
352 LOG.info("WAL/HLog configuration: blocksize=" +
353 StringUtils.byteDesc(this.blocksize) +
354 ", rollsize=" + StringUtils.byteDesc(this.logrollsize) +
355 ", enabled=" + this.enabled +
356 ", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
357
358 this.prefix = prefix == null || prefix.isEmpty() ?
359 "hlog" : URLEncoder.encode(prefix, "UTF8");
360
361 boolean dirExists = false;
362 if (failIfLogDirExists && (dirExists = this.fs.exists(dir))) {
363 throw new IOException("Target HLog directory already exists: " + dir);
364 }
365 if (!dirExists && !fs.mkdirs(dir)) {
366 throw new IOException("Unable to mkdir " + dir);
367 }
368
369 if (!fs.exists(this.oldLogDir)) {
370 if (!fs.mkdirs(this.oldLogDir)) {
371 throw new IOException("Unable to mkdir " + this.oldLogDir);
372 }
373 }
374
375 rollWriter();
376
377
378 this.getNumCurrentReplicas = getGetNumCurrentReplicas(this.hdfs_out);
379
380
381 if (this.optionalFlushInterval > 0) {
382 Threads.setDaemonThreadRunning(logSyncer.getThread(), Thread.currentThread().getName()
383 + ".logSyncer");
384 } else {
385 LOG.info("hbase.regionserver.optionallogflushinterval is set as "
386 + this.optionalFlushInterval + ". Deferred log syncing won't work. "
387 + "Any Mutation, marked to be deferred synced, will be flushed immediately.");
388 }
389 coprocessorHost = new WALCoprocessorHost(this, conf);
390
391 this.metrics = new MetricsWAL();
392 }
393
394
395
396
397
398 private Method getGetNumCurrentReplicas(final FSDataOutputStream os) {
399 Method m = null;
400 if (os != null) {
401 Class<? extends OutputStream> wrappedStreamClass = os.getWrappedStream()
402 .getClass();
403 try {
404 m = wrappedStreamClass.getDeclaredMethod("getNumCurrentReplicas",
405 new Class<?>[] {});
406 m.setAccessible(true);
407 } catch (NoSuchMethodException e) {
408 LOG.info("FileSystem's output stream doesn't support"
409 + " getNumCurrentReplicas; --HDFS-826 not available; fsOut="
410 + wrappedStreamClass.getName());
411 } catch (SecurityException e) {
412 LOG.info("Doesn't have access to getNumCurrentReplicas on "
413 + "FileSystems's output stream --HDFS-826 not available; fsOut="
414 + wrappedStreamClass.getName(), e);
415 m = null;
416 }
417 }
418 if (m != null) {
419 if (LOG.isTraceEnabled()) LOG.trace("Using getNumCurrentReplicas--HDFS-826");
420 }
421 return m;
422 }
423
424 @Override
425 public void registerWALActionsListener(final WALActionsListener listener) {
426 this.listeners.add(listener);
427 }
428
429 @Override
430 public boolean unregisterWALActionsListener(final WALActionsListener listener) {
431 return this.listeners.remove(listener);
432 }
433
434 @Override
435 public long getFilenum() {
436 return this.filenum;
437 }
438
439 @Override
440 public void setSequenceNumber(final long newvalue) {
441 for (long id = this.logSeqNum.get(); id < newvalue &&
442 !this.logSeqNum.compareAndSet(id, newvalue); id = this.logSeqNum.get()) {
443
444
445 LOG.debug("Changed sequenceid from " + id + " to " + newvalue);
446 }
447 }
448
449 @Override
450 public long getSequenceNumber() {
451 return logSeqNum.get();
452 }
453
454
455
456
457
458
459
460
461
462 OutputStream getOutputStream() {
463 return this.hdfs_out.getWrappedStream();
464 }
465
466 @Override
467 public byte [][] rollWriter() throws FailedLogCloseException, IOException {
468 return rollWriter(false);
469 }
470
471 @Override
472 public byte [][] rollWriter(boolean force)
473 throws FailedLogCloseException, IOException {
474 synchronized (rollWriterLock) {
475
476 if (!force && this.writer != null && this.numEntries.get() <= 0) {
477 return null;
478 }
479 byte [][] regionsToFlush = null;
480 if (closed) {
481 LOG.debug("HLog closed. Skipping rolling of writer");
482 return null;
483 }
484 try {
485 this.logRollRunning = true;
486 if (!closeBarrier.beginOp()) {
487 LOG.debug("HLog closing. Skipping rolling of writer");
488 return regionsToFlush;
489 }
490
491
492 long currentFilenum = this.filenum;
493 Path oldPath = null;
494 if (currentFilenum > 0) {
495
496 oldPath = computeFilename(currentFilenum);
497 }
498 this.filenum = System.currentTimeMillis();
499 Path newPath = computeFilename();
500
501
502 if (!this.listeners.isEmpty()) {
503 for (WALActionsListener i : this.listeners) {
504 i.preLogRoll(oldPath, newPath);
505 }
506 }
507 FSHLog.Writer nextWriter = this.createWriterInstance(fs, newPath, conf);
508
509 FSDataOutputStream nextHdfsOut = null;
510 if (nextWriter instanceof ProtobufLogWriter) {
511 nextHdfsOut = ((ProtobufLogWriter)nextWriter).getStream();
512 }
513
514 Path oldFile = null;
515 int oldNumEntries = 0;
516 synchronized (updateLock) {
517
518 oldNumEntries = this.numEntries.get();
519 oldFile = cleanupCurrentWriter(currentFilenum);
520 this.writer = nextWriter;
521 this.hdfs_out = nextHdfsOut;
522 this.numEntries.set(0);
523 }
524 if (oldFile == null) LOG.info("New WAL " + FSUtils.getPath(newPath));
525 else LOG.info("Rolled WAL " + FSUtils.getPath(oldFile) + " with entries=" + oldNumEntries +
526 ", filesize=" + StringUtils.humanReadableInt(this.fs.getFileStatus(oldFile).getLen()) +
527 "; new WAL " + FSUtils.getPath(newPath));
528
529
530 if (!this.listeners.isEmpty()) {
531 for (WALActionsListener i : this.listeners) {
532 i.postLogRoll(oldPath, newPath);
533 }
534 }
535
536
537 if (getNumLogFiles() > 0) {
538 cleanOldLogs();
539 regionsToFlush = getRegionsToForceFlush();
540 }
541 } finally {
542 this.logRollRunning = false;
543 closeBarrier.endOp();
544 }
545 return regionsToFlush;
546 }
547 }
548
549
550
551
552
553
554
555
556
557
558
559 protected Writer createWriterInstance(final FileSystem fs, final Path path,
560 final Configuration conf) throws IOException {
561 if (forMeta) {
562
563 }
564 return HLogFactory.createWriter(fs, path, conf);
565 }
566
567
568
569
570
571
572
573
574 private void cleanOldLogs() throws IOException {
575 long oldestOutstandingSeqNum = Long.MAX_VALUE;
576 synchronized (oldestSeqNumsLock) {
577 Long oldestFlushing = (oldestFlushingSeqNums.size() > 0)
578 ? Collections.min(oldestFlushingSeqNums.values()) : Long.MAX_VALUE;
579 Long oldestUnflushed = (oldestUnflushedSeqNums.size() > 0)
580 ? Collections.min(oldestUnflushedSeqNums.values()) : Long.MAX_VALUE;
581 oldestOutstandingSeqNum = Math.min(oldestFlushing, oldestUnflushed);
582 }
583
584
585
586 TreeSet<Long> sequenceNumbers = new TreeSet<Long>(this.outputfiles.headMap(
587 oldestOutstandingSeqNum).keySet());
588
589 if (LOG.isDebugEnabled()) {
590 if (sequenceNumbers.size() > 0) {
591 LOG.debug("Found " + sequenceNumbers.size() + " hlogs to remove" +
592 " out of total " + this.outputfiles.size() + ";" +
593 " oldest outstanding sequenceid is " + oldestOutstandingSeqNum);
594 }
595 }
596 for (Long seq : sequenceNumbers) {
597 archiveLogFile(this.outputfiles.remove(seq), seq);
598 }
599 }
600
601
602
603
604
605
606
607
608 static byte[][] findMemstoresWithEditsEqualOrOlderThan(
609 final long walSeqNum, final Map<byte[], Long> regionsToSeqNums) {
610 List<byte[]> regions = null;
611 for (Map.Entry<byte[], Long> e : regionsToSeqNums.entrySet()) {
612 if (e.getValue().longValue() <= walSeqNum) {
613 if (regions == null) regions = new ArrayList<byte[]>();
614 regions.add(e.getKey());
615 }
616 }
617 return regions == null ? null : regions
618 .toArray(new byte[][] { HConstants.EMPTY_BYTE_ARRAY });
619 }
620
621 private byte[][] getRegionsToForceFlush() throws IOException {
622
623
624 byte [][] regions = null;
625 int logCount = getNumLogFiles();
626 if (logCount > this.maxLogs && logCount > 0) {
627
628 synchronized (oldestSeqNumsLock) {
629 regions = findMemstoresWithEditsEqualOrOlderThan(this.outputfiles.firstKey(),
630 this.oldestUnflushedSeqNums);
631 }
632 if (regions != null) {
633 StringBuilder sb = new StringBuilder();
634 for (int i = 0; i < regions.length; i++) {
635 if (i > 0) sb.append(", ");
636 sb.append(Bytes.toStringBinary(regions[i]));
637 }
638 LOG.info("Too many hlogs: logs=" + logCount + ", maxlogs=" +
639 this.maxLogs + "; forcing flush of " + regions.length + " regions(s): " +
640 sb.toString());
641 }
642 }
643 return regions;
644 }
645
646
647
648
649
650
651
652 Path cleanupCurrentWriter(final long currentfilenum) throws IOException {
653 Path oldFile = null;
654 if (this.writer != null) {
655
656 try {
657
658
659 if (this.unflushedEntries.get() != this.syncedTillHere) {
660 LOG.debug("cleanupCurrentWriter " +
661 " waiting for transactions to get synced " +
662 " total " + this.unflushedEntries.get() +
663 " synced till here " + syncedTillHere);
664 sync();
665 }
666 this.writer.close();
667 this.writer = null;
668 closeErrorCount.set(0);
669 } catch (IOException e) {
670 LOG.error("Failed close of HLog writer", e);
671 int errors = closeErrorCount.incrementAndGet();
672 if (errors <= closeErrorsTolerated && !hasDeferredEntries()) {
673 LOG.warn("Riding over HLog close failure! error count="+errors);
674 } else {
675 if (hasDeferredEntries()) {
676 LOG.error("Aborting due to unflushed edits in HLog");
677 }
678
679
680
681 FailedLogCloseException flce =
682 new FailedLogCloseException("#" + currentfilenum);
683 flce.initCause(e);
684 throw flce;
685 }
686 }
687 if (currentfilenum >= 0) {
688 oldFile = computeFilename(currentfilenum);
689 this.outputfiles.put(Long.valueOf(this.logSeqNum.get()), oldFile);
690 }
691 }
692 return oldFile;
693 }
694
695 private void archiveLogFile(final Path p, final Long seqno) throws IOException {
696 Path newPath = getHLogArchivePath(this.oldLogDir, p);
697 LOG.info("moving old hlog file " + FSUtils.getPath(p) +
698 " whose highest sequenceid is " + seqno + " to " +
699 FSUtils.getPath(newPath));
700
701
702 if (!this.listeners.isEmpty()) {
703 for (WALActionsListener i : this.listeners) {
704 i.preLogArchive(p, newPath);
705 }
706 }
707 if (!FSUtils.renameAndSetModifyTime(this.fs, p, newPath)) {
708 throw new IOException("Unable to rename " + p + " to " + newPath);
709 }
710
711 if (!this.listeners.isEmpty()) {
712 for (WALActionsListener i : this.listeners) {
713 i.postLogArchive(p, newPath);
714 }
715 }
716 }
717
718
719
720
721
722
723 protected Path computeFilename() {
724 return computeFilename(this.filenum);
725 }
726
727
728
729
730
731
732
733 protected Path computeFilename(long filenum) {
734 if (filenum < 0) {
735 throw new RuntimeException("hlog file number can't be < 0");
736 }
737 String child = prefix + "." + filenum;
738 if (forMeta) {
739 child += HLog.META_HLOG_FILE_EXTN;
740 }
741 return new Path(dir, child);
742 }
743
744 @Override
745 public void closeAndDelete() throws IOException {
746 close();
747 if (!fs.exists(this.dir)) return;
748 FileStatus[] files = fs.listStatus(this.dir);
749 if (files != null) {
750 for(FileStatus file : files) {
751
752 Path p = getHLogArchivePath(this.oldLogDir, file.getPath());
753
754 if (!this.listeners.isEmpty()) {
755 for (WALActionsListener i : this.listeners) {
756 i.preLogArchive(file.getPath(), p);
757 }
758 }
759
760 if (!FSUtils.renameAndSetModifyTime(fs, file.getPath(), p)) {
761 throw new IOException("Unable to rename " + file.getPath() + " to " + p);
762 }
763
764 if (!this.listeners.isEmpty()) {
765 for (WALActionsListener i : this.listeners) {
766 i.postLogArchive(file.getPath(), p);
767 }
768 }
769 }
770 LOG.debug("Moved " + files.length + " WAL file(s) to " + FSUtils.getPath(this.oldLogDir));
771 }
772 if (!fs.delete(dir, true)) {
773 LOG.info("Unable to delete " + dir);
774 }
775 }
776
777 @Override
778 public void close() throws IOException {
779 if (this.closed) {
780 return;
781 }
782
783 if (this.optionalFlushInterval > 0) {
784 try {
785 logSyncer.close();
786
787 logSyncer.join(this.optionalFlushInterval * 2);
788 } catch (InterruptedException e) {
789 LOG.error("Exception while waiting for syncer thread to die", e);
790 Thread.currentThread().interrupt();
791 }
792 }
793 try {
794
795 closeBarrier.stopAndDrainOps();
796 } catch (InterruptedException e) {
797 LOG.error("Exception while waiting for cache flushes and log rolls", e);
798 Thread.currentThread().interrupt();
799 }
800
801
802 if (!this.listeners.isEmpty()) {
803 for (WALActionsListener i : this.listeners) {
804 i.logCloseRequested();
805 }
806 }
807 synchronized (updateLock) {
808 this.closed = true;
809 if (LOG.isDebugEnabled()) {
810 LOG.debug("Closing WAL writer in " + this.dir.toString());
811 }
812 if (this.writer != null) {
813 this.writer.close();
814 this.writer = null;
815 }
816 }
817 }
818
819
820
821
822
823
824
825
826
827 protected HLogKey makeKey(byte[] encodedRegionName, TableName tableName, long seqnum,
828 long now, UUID clusterId) {
829 return new HLogKey(encodedRegionName, tableName, seqnum, now, clusterId);
830 }
831
832 @Override
833 public void append(HRegionInfo info, TableName tableName, WALEdit edits,
834 final long now, HTableDescriptor htd)
835 throws IOException {
836 append(info, tableName, edits, now, htd, true);
837 }
838
839 @Override
840 public void append(HRegionInfo info, TableName tableName, WALEdit edits,
841 final long now, HTableDescriptor htd, boolean isInMemstore) throws IOException {
842 append(info, tableName, edits, HConstants.DEFAULT_CLUSTER_ID, now, htd, true, isInMemstore);
843 }
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871 @SuppressWarnings("deprecation")
872 private long append(HRegionInfo info, TableName tableName, WALEdit edits, UUID clusterId,
873 final long now, HTableDescriptor htd, boolean doSync, boolean isInMemstore)
874 throws IOException {
875 if (edits.isEmpty()) return this.unflushedEntries.get();
876 if (this.closed) {
877 throw new IOException("Cannot append; log is closed");
878 }
879 TraceScope traceScope = Trace.startSpan("FSHlog.append");
880 try {
881 long txid = 0;
882 synchronized (this.updateLock) {
883 long seqNum = obtainSeqNum();
884
885
886
887
888
889
890
891 byte [] encodedRegionName = info.getEncodedNameAsBytes();
892 if (isInMemstore) this.oldestUnflushedSeqNums.putIfAbsent(encodedRegionName, seqNum);
893 HLogKey logKey = makeKey(encodedRegionName, tableName, seqNum, now, clusterId);
894 doWrite(info, logKey, edits, htd);
895 this.numEntries.incrementAndGet();
896 txid = this.unflushedEntries.incrementAndGet();
897 if (htd.isDeferredLogFlush()) {
898 lastDeferredTxid = txid;
899 }
900 }
901
902
903 if (doSync &&
904 (info.isMetaRegion() ||
905 !htd.isDeferredLogFlush())) {
906
907 this.sync(txid);
908 }
909 return txid;
910 } finally {
911 traceScope.close();
912 }
913 }
914
915 @Override
916 public long appendNoSync(HRegionInfo info, TableName tableName, WALEdit edits,
917 UUID clusterId, final long now, HTableDescriptor htd)
918 throws IOException {
919 return append(info, tableName, edits, clusterId, now, htd, false, true);
920 }
921
922
923
924
925
926
927
928
929
930 class LogSyncer extends HasThread {
931
932 private final long optionalFlushInterval;
933
934 private final AtomicBoolean closeLogSyncer = new AtomicBoolean(false);
935
936
937
938
939
940
941
942 private List<Entry> pendingWrites = new LinkedList<Entry>();
943
944 LogSyncer(long optionalFlushInterval) {
945 this.optionalFlushInterval = optionalFlushInterval;
946 }
947
948 @Override
949 public void run() {
950 try {
951
952
953 while(!this.isInterrupted() && !closeLogSyncer.get()) {
954
955 try {
956 if (unflushedEntries.get() <= syncedTillHere) {
957 synchronized (closeLogSyncer) {
958 closeLogSyncer.wait(this.optionalFlushInterval);
959 }
960 }
961
962
963
964 sync();
965 } catch (IOException e) {
966 LOG.error("Error while syncing, requesting close of hlog ", e);
967 requestLogRoll();
968 Threads.sleep(this.optionalFlushInterval);
969 }
970 }
971 } catch (InterruptedException e) {
972 LOG.debug(getName() + " interrupted while waiting for sync requests");
973 } finally {
974 LOG.info(getName() + " exiting");
975 }
976 }
977
978
979
980
981 synchronized void append(Entry e) throws IOException {
982 pendingWrites.add(e);
983 }
984
985
986
987 synchronized List<Entry> getPendingWrites() {
988 List<Entry> save = this.pendingWrites;
989 this.pendingWrites = new LinkedList<Entry>();
990 return save;
991 }
992
993
994 void hlogFlush(Writer writer, List<Entry> pending) throws IOException {
995 if (pending == null) return;
996
997
998 for (Entry e : pending) {
999 writer.append(e);
1000 }
1001 }
1002
1003 void close() {
1004 synchronized (closeLogSyncer) {
1005 closeLogSyncer.set(true);
1006 closeLogSyncer.notifyAll();
1007 }
1008 }
1009 }
1010
1011
1012 private void syncer() throws IOException {
1013 syncer(this.unflushedEntries.get());
1014 }
1015
1016
1017 private void syncer(long txid) throws IOException {
1018
1019
1020 if (txid <= this.syncedTillHere) {
1021 return;
1022 }
1023 Writer tempWriter;
1024 synchronized (this.updateLock) {
1025 if (this.closed) return;
1026
1027
1028
1029
1030 tempWriter = this.writer;
1031 }
1032 try {
1033 long doneUpto;
1034 long now = EnvironmentEdgeManager.currentTimeMillis();
1035
1036
1037
1038
1039 IOException ioe = null;
1040 List<Entry> pending = null;
1041 synchronized (flushLock) {
1042 if (txid <= this.syncedTillHere) {
1043 return;
1044 }
1045 doneUpto = this.unflushedEntries.get();
1046 pending = logSyncer.getPendingWrites();
1047 try {
1048 logSyncer.hlogFlush(tempWriter, pending);
1049 } catch(IOException io) {
1050 ioe = io;
1051 LOG.error("syncer encountered error, will retry. txid=" + txid, ioe);
1052 }
1053 }
1054 if (ioe != null && pending != null) {
1055 synchronized (this.updateLock) {
1056 synchronized (flushLock) {
1057
1058 tempWriter = this.writer;
1059 logSyncer.hlogFlush(tempWriter, pending);
1060 }
1061 }
1062 }
1063
1064 if (txid <= this.syncedTillHere) {
1065 return;
1066 }
1067 try {
1068 if (tempWriter != null) tempWriter.sync();
1069 } catch(IOException ex) {
1070 synchronized (this.updateLock) {
1071
1072
1073
1074 tempWriter = this.writer;
1075 if (tempWriter != null) tempWriter.sync();
1076 }
1077 }
1078 this.syncedTillHere = Math.max(this.syncedTillHere, doneUpto);
1079
1080 this.metrics.finishSync(EnvironmentEdgeManager.currentTimeMillis() - now);
1081
1082
1083
1084 if (!this.logRollRunning) {
1085 checkLowReplication();
1086 try {
1087 if (tempWriter.getLength() > this.logrollsize) {
1088 requestLogRoll();
1089 }
1090 } catch (IOException x) {
1091 LOG.debug("Log roll failed and will be retried. (This is not an error)");
1092 }
1093 }
1094 } catch (IOException e) {
1095 LOG.fatal("Could not sync. Requesting roll of hlog", e);
1096 requestLogRoll();
1097 throw e;
1098 }
1099 }
1100
1101 private void checkLowReplication() {
1102
1103
1104 try {
1105 int numCurrentReplicas = getLogReplication();
1106 if (numCurrentReplicas != 0
1107 && numCurrentReplicas < this.minTolerableReplication) {
1108 if (this.lowReplicationRollEnabled) {
1109 if (this.consecutiveLogRolls.get() < this.lowReplicationRollLimit) {
1110 LOG.warn("HDFS pipeline error detected. " + "Found "
1111 + numCurrentReplicas + " replicas but expecting no less than "
1112 + this.minTolerableReplication + " replicas. "
1113 + " Requesting close of hlog.");
1114 requestLogRoll();
1115
1116
1117
1118 this.consecutiveLogRolls.getAndIncrement();
1119 } else {
1120 LOG.warn("Too many consecutive RollWriter requests, it's a sign of "
1121 + "the total number of live datanodes is lower than the tolerable replicas.");
1122 this.consecutiveLogRolls.set(0);
1123 this.lowReplicationRollEnabled = false;
1124 }
1125 }
1126 } else if (numCurrentReplicas >= this.minTolerableReplication) {
1127
1128 if (!this.lowReplicationRollEnabled) {
1129
1130
1131
1132 if (this.numEntries.get() <= 1) {
1133 return;
1134 }
1135
1136
1137 this.lowReplicationRollEnabled = true;
1138 LOG.info("LowReplication-Roller was enabled.");
1139 }
1140 }
1141 } catch (Exception e) {
1142 LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e +
1143 " still proceeding ahead...");
1144 }
1145 }
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159 int getLogReplication()
1160 throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
1161 if (this.getNumCurrentReplicas != null && this.hdfs_out != null) {
1162 Object repl = this.getNumCurrentReplicas.invoke(getOutputStream(), NO_ARGS);
1163 if (repl instanceof Integer) {
1164 return ((Integer)repl).intValue();
1165 }
1166 }
1167 return 0;
1168 }
1169
1170 boolean canGetCurReplicas() {
1171 return this.getNumCurrentReplicas != null;
1172 }
1173
1174 @Override
1175 public void hsync() throws IOException {
1176 syncer();
1177 }
1178
1179 @Override
1180 public void hflush() throws IOException {
1181 syncer();
1182 }
1183
1184 @Override
1185 public void sync() throws IOException {
1186 syncer();
1187 }
1188
1189 @Override
1190 public void sync(long txid) throws IOException {
1191 syncer(txid);
1192 }
1193
1194 private void requestLogRoll() {
1195 if (!this.listeners.isEmpty()) {
1196 for (WALActionsListener i: this.listeners) {
1197 i.logRollRequested();
1198 }
1199 }
1200 }
1201
1202
1203 protected void doWrite(HRegionInfo info, HLogKey logKey, WALEdit logEdit,
1204 HTableDescriptor htd)
1205 throws IOException {
1206 if (!this.enabled) {
1207 return;
1208 }
1209 if (!this.listeners.isEmpty()) {
1210 for (WALActionsListener i: this.listeners) {
1211 i.visitLogEntryBeforeWrite(htd, logKey, logEdit);
1212 }
1213 }
1214 try {
1215 long now = EnvironmentEdgeManager.currentTimeMillis();
1216
1217 if (!coprocessorHost.preWALWrite(info, logKey, logEdit)) {
1218 if (logEdit.isReplay()) {
1219
1220 logKey.setScopes(null);
1221 }
1222
1223 logSyncer.append(new FSHLog.Entry(logKey, logEdit));
1224 }
1225 long took = EnvironmentEdgeManager.currentTimeMillis() - now;
1226 coprocessorHost.postWALWrite(info, logKey, logEdit);
1227 long len = 0;
1228 for (KeyValue kv : logEdit.getKeyValues()) {
1229 len += kv.getLength();
1230 }
1231 this.metrics.finishAppend(took, len);
1232 } catch (IOException e) {
1233 LOG.fatal("Could not append. Requesting close of hlog", e);
1234 requestLogRoll();
1235 throw e;
1236 }
1237 }
1238
1239
1240
1241 int getNumEntries() {
1242 return numEntries.get();
1243 }
1244
1245 @Override
1246 public long obtainSeqNum() {
1247 return this.logSeqNum.incrementAndGet();
1248 }
1249
1250
1251 int getNumLogFiles() {
1252 return outputfiles.size();
1253 }
1254
1255 @Override
1256 public Long startCacheFlush(final byte[] encodedRegionName) {
1257 Long oldRegionSeqNum = null;
1258 if (!closeBarrier.beginOp()) {
1259 return null;
1260 }
1261 synchronized (oldestSeqNumsLock) {
1262 oldRegionSeqNum = this.oldestUnflushedSeqNums.remove(encodedRegionName);
1263 if (oldRegionSeqNum != null) {
1264 Long oldValue = this.oldestFlushingSeqNums.put(encodedRegionName, oldRegionSeqNum);
1265 assert oldValue == null : "Flushing map not cleaned up for "
1266 + Bytes.toString(encodedRegionName);
1267 }
1268 }
1269 if (oldRegionSeqNum == null) {
1270
1271
1272
1273
1274
1275 LOG.warn("Couldn't find oldest seqNum for the region we are about to flush: ["
1276 + Bytes.toString(encodedRegionName) + "]");
1277 }
1278 return obtainSeqNum();
1279 }
1280
1281 @Override
1282 public void completeCacheFlush(final byte [] encodedRegionName)
1283 {
1284 synchronized (oldestSeqNumsLock) {
1285 this.oldestFlushingSeqNums.remove(encodedRegionName);
1286 }
1287 closeBarrier.endOp();
1288 }
1289
1290 @Override
1291 public void abortCacheFlush(byte[] encodedRegionName) {
1292 Long currentSeqNum = null, seqNumBeforeFlushStarts = null;
1293 synchronized (oldestSeqNumsLock) {
1294 seqNumBeforeFlushStarts = this.oldestFlushingSeqNums.remove(encodedRegionName);
1295 if (seqNumBeforeFlushStarts != null) {
1296 currentSeqNum =
1297 this.oldestUnflushedSeqNums.put(encodedRegionName, seqNumBeforeFlushStarts);
1298 }
1299 }
1300 closeBarrier.endOp();
1301 if ((currentSeqNum != null)
1302 && (currentSeqNum.longValue() <= seqNumBeforeFlushStarts.longValue())) {
1303 String errorStr = "Region " + Bytes.toString(encodedRegionName) +
1304 "acquired edits out of order current memstore seq=" + currentSeqNum
1305 + ", previous oldest unflushed id=" + seqNumBeforeFlushStarts;
1306 LOG.error(errorStr);
1307 assert false : errorStr;
1308 Runtime.getRuntime().halt(1);
1309 }
1310 }
1311
1312 @Override
1313 public boolean isLowReplicationRollEnabled() {
1314 return lowReplicationRollEnabled;
1315 }
1316
1317
1318
1319
1320
1321
1322 protected Path getDir() {
1323 return dir;
1324 }
1325
1326 static Path getHLogArchivePath(Path oldLogDir, Path p) {
1327 return new Path(oldLogDir, p.getName());
1328 }
1329
1330 static String formatRecoveredEditsFileName(final long seqid) {
1331 return String.format("%019d", seqid);
1332 }
1333
1334 public static final long FIXED_OVERHEAD = ClassSize.align(
1335 ClassSize.OBJECT + (5 * ClassSize.REFERENCE) +
1336 ClassSize.ATOMIC_INTEGER + Bytes.SIZEOF_INT + (3 * Bytes.SIZEOF_LONG));
1337
1338 private static void usage() {
1339 System.err.println("Usage: HLog <ARGS>");
1340 System.err.println("Arguments:");
1341 System.err.println(" --dump Dump textual representation of passed one or more files");
1342 System.err.println(" For example: HLog --dump hdfs://example.com:9000/hbase/.logs/MACHINE/LOGFILE");
1343 System.err.println(" --split Split the passed directory of WAL logs");
1344 System.err.println(" For example: HLog --split hdfs://example.com:9000/hbase/.logs/DIR");
1345 }
1346
1347 private static void split(final Configuration conf, final Path p)
1348 throws IOException {
1349 FileSystem fs = FileSystem.get(conf);
1350 if (!fs.exists(p)) {
1351 throw new FileNotFoundException(p.toString());
1352 }
1353 if (!fs.getFileStatus(p).isDir()) {
1354 throw new IOException(p + " is not a directory");
1355 }
1356
1357 final Path baseDir = FSUtils.getRootDir(conf);
1358 final Path oldLogDir = new Path(baseDir, HConstants.HREGION_OLDLOGDIR_NAME);
1359 HLogSplitter.split(baseDir, p, oldLogDir, fs, conf);
1360 }
1361
1362 @Override
1363 public WALCoprocessorHost getCoprocessorHost() {
1364 return coprocessorHost;
1365 }
1366
1367
1368 boolean hasDeferredEntries() {
1369 return lastDeferredTxid > syncedTillHere;
1370 }
1371
1372 @Override
1373 public long getEarliestMemstoreSeqNum(byte[] encodedRegionName) {
1374 Long result = oldestUnflushedSeqNums.get(encodedRegionName);
1375 return result == null ? HConstants.NO_SEQNUM : result.longValue();
1376 }
1377
1378
1379
1380
1381
1382
1383
1384
1385 public static void main(String[] args) throws IOException {
1386 if (args.length < 2) {
1387 usage();
1388 System.exit(-1);
1389 }
1390
1391 if (args[0].compareTo("--dump") == 0) {
1392 HLogPrettyPrinter.run(Arrays.copyOfRange(args, 1, args.length));
1393 } else if (args[0].compareTo("--split") == 0) {
1394 Configuration conf = HBaseConfiguration.create();
1395 for (int i = 1; i < args.length; i++) {
1396 try {
1397 Path logPath = new Path(args[i]);
1398 FSUtils.setFsDefault(conf, logPath);
1399 split(conf, logPath);
1400 } catch (Throwable t) {
1401 t.printStackTrace(System.err);
1402 System.exit(-1);
1403 }
1404 }
1405 } else {
1406 usage();
1407 System.exit(-1);
1408 }
1409 }
1410 }