1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver.wal;
21
22 import java.io.EOFException;
23 import java.io.IOException;
24 import java.io.InterruptedIOException;
25 import java.lang.reflect.Constructor;
26 import java.lang.reflect.InvocationTargetException;
27 import java.text.ParseException;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.LinkedList;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.TreeMap;
35 import java.util.TreeSet;
36 import java.util.concurrent.atomic.AtomicReference;
37 import java.util.concurrent.CountDownLatch;
38
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FileStatus;
43 import org.apache.hadoop.fs.FileSystem;
44 import org.apache.hadoop.fs.Path;
45 import org.apache.hadoop.hbase.HBaseFileSystem;
46 import org.apache.hadoop.hbase.HConstants;
47 import org.apache.hadoop.hbase.HTableDescriptor;
48 import org.apache.hadoop.hbase.RemoteExceptionHandler;
49 import org.apache.hadoop.hbase.io.HeapSize;
50 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
51 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
52 import org.apache.hadoop.hbase.regionserver.HRegion;
53 import org.apache.hadoop.hbase.regionserver.wal.HLog.Entry;
54 import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
55 import org.apache.hadoop.hbase.regionserver.wal.HLog.Writer;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.hbase.util.CancelableProgressable;
58 import org.apache.hadoop.hbase.util.ClassSize;
59 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
60 import org.apache.hadoop.hbase.util.FSUtils;
61 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
62 import org.apache.hadoop.io.MultipleIOException;
63
64 import com.google.common.base.Preconditions;
65 import com.google.common.collect.Lists;
66
67
68
69
70
71
72 public class HLogSplitter {
73 private static final String LOG_SPLITTER_IMPL = "hbase.hlog.splitter.impl";
74
75
76
77
78
79 public static final String RECOVERED_EDITS = "recovered.edits";
80
81
82 static final Log LOG = LogFactory.getLog(HLogSplitter.class);
83
84 private boolean hasSplit = false;
85 private long splitTime = 0;
86 private long splitSize = 0;
87
88
89
90 protected final Path rootDir;
91 protected final Path srcDir;
92 protected final Path oldLogDir;
93 protected final FileSystem fs;
94 protected final Configuration conf;
95 private final HLogFileSystem hlogFs;
96
97
98
99 OutputSink outputSink;
100 EntryBuffers entryBuffers;
101
102
103
104 protected AtomicReference<Throwable> thrown = new AtomicReference<Throwable>();
105
106
107
108 Object dataAvailable = new Object();
109
110 private MonitoredTask status;
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125 public static HLogSplitter createLogSplitter(Configuration conf,
126 final Path rootDir, final Path srcDir,
127 Path oldLogDir, final FileSystem fs) {
128
129 @SuppressWarnings("unchecked")
130 Class<? extends HLogSplitter> splitterClass = (Class<? extends HLogSplitter>) conf
131 .getClass(LOG_SPLITTER_IMPL, HLogSplitter.class);
132 try {
133 Constructor<? extends HLogSplitter> constructor =
134 splitterClass.getConstructor(
135 Configuration.class,
136 Path.class,
137 Path.class,
138 Path.class,
139 FileSystem.class);
140 return constructor.newInstance(conf, rootDir, srcDir, oldLogDir, fs);
141 } catch (IllegalArgumentException e) {
142 throw new RuntimeException(e);
143 } catch (InstantiationException e) {
144 throw new RuntimeException(e);
145 } catch (IllegalAccessException e) {
146 throw new RuntimeException(e);
147 } catch (InvocationTargetException e) {
148 throw new RuntimeException(e);
149 } catch (SecurityException e) {
150 throw new RuntimeException(e);
151 } catch (NoSuchMethodException e) {
152 throw new RuntimeException(e);
153 }
154 }
155
156 public HLogSplitter(Configuration conf, Path rootDir, Path srcDir,
157 Path oldLogDir, FileSystem fs) {
158 this.conf = conf;
159 this.rootDir = rootDir;
160 this.srcDir = srcDir;
161 this.oldLogDir = oldLogDir;
162 this.fs = fs;
163
164 entryBuffers = new EntryBuffers(
165 conf.getInt("hbase.regionserver.hlog.splitlog.buffersize",
166 128*1024*1024));
167 outputSink = new OutputSink();
168 this.hlogFs = new HLogFileSystem(conf);
169 }
170
171
172
173
174
175
176
177
178
179 public List<Path> splitLog()
180 throws IOException {
181 return splitLog((CountDownLatch) null);
182 }
183
184
185
186
187
188
189
190
191
192
193 public List<Path> splitLog(CountDownLatch latch)
194 throws IOException {
195 Preconditions.checkState(!hasSplit,
196 "An HLogSplitter instance may only be used once");
197 hasSplit = true;
198
199 status = TaskMonitor.get().createStatus(
200 "Splitting logs in " + srcDir);
201
202 long startTime = EnvironmentEdgeManager.currentTimeMillis();
203
204 status.setStatus("Determining files to split...");
205 List<Path> splits = null;
206 if (!fs.exists(srcDir)) {
207
208 status.markComplete("No log directory existed to split.");
209 return splits;
210 }
211 FileStatus[] logfiles = fs.listStatus(srcDir);
212 if (logfiles == null || logfiles.length == 0) {
213
214 return splits;
215 }
216 logAndReport("Splitting " + logfiles.length + " hlog(s) in "
217 + srcDir.toString());
218 splits = splitLog(logfiles, latch);
219
220 splitTime = EnvironmentEdgeManager.currentTimeMillis() - startTime;
221 String msg = "hlog file splitting completed in " + splitTime +
222 " ms for " + srcDir.toString();
223 status.markComplete(msg);
224 LOG.info(msg);
225 return splits;
226 }
227
228 private void logAndReport(String msg) {
229 status.setStatus(msg);
230 LOG.info(msg);
231 }
232
233
234
235
236 public long getTime() {
237 return this.splitTime;
238 }
239
240
241
242
243 public long getSize() {
244 return this.splitSize;
245 }
246
247
248
249
250
251 Map<byte[], Long> getOutputCounts() {
252 Preconditions.checkState(hasSplit);
253 return outputSink.getOutputCounts();
254 }
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278 private List<Path> splitLog(final FileStatus[] logfiles, CountDownLatch latch)
279 throws IOException {
280 List<Path> processedLogs = new ArrayList<Path>();
281 List<Path> corruptedLogs = new ArrayList<Path>();
282 List<Path> splits = null;
283
284 boolean skipErrors = conf.getBoolean("hbase.hlog.split.skip.errors", true);
285
286 countTotalBytes(logfiles);
287 splitSize = 0;
288
289 outputSink.startWriterThreads(entryBuffers);
290
291 try {
292 int i = 0;
293 for (FileStatus log : logfiles) {
294 Path logPath = log.getPath();
295 long logLength = log.getLen();
296 splitSize += logLength;
297 logAndReport("Splitting hlog " + (i++ + 1) + " of " + logfiles.length
298 + ": " + logPath + ", length=" + logLength);
299 Reader in;
300 try {
301
302
303
304
305
306 in = getReader(fs, log, conf, skipErrors);
307 if (in != null) {
308 parseHLog(in, logPath, entryBuffers, fs, conf, skipErrors);
309 try {
310 in.close();
311 } catch (IOException e) {
312 LOG.warn("Close log reader threw exception -- continuing",
313 e);
314 }
315 }
316 processedLogs.add(logPath);
317 } catch (CorruptedLogFileException e) {
318 LOG.info("Got while parsing hlog " + logPath +
319 ". Marking as corrupted", e);
320 corruptedLogs.add(logPath);
321 continue;
322 }
323 }
324 status.setStatus("Log splits complete. Checking for orphaned logs.");
325
326 if (latch != null) {
327 try {
328 latch.await();
329 } catch (InterruptedException ie) {
330 LOG.warn("wait for latch interrupted");
331 Thread.currentThread().interrupt();
332 }
333 }
334 FileStatus[] currFiles = fs.listStatus(srcDir);
335 if (currFiles.length > processedLogs.size()
336 + corruptedLogs.size()) {
337 throw new OrphanHLogAfterSplitException(
338 "Discovered orphan hlog after split. Maybe the "
339 + "HRegionServer was not dead when we started");
340 }
341 } finally {
342 status.setStatus("Finishing writing output logs and closing down.");
343 splits = outputSink.finishWritingAndClose();
344 }
345 status.setStatus("Archiving logs after completed split");
346 archiveLogs(srcDir, corruptedLogs, processedLogs, oldLogDir, fs, conf);
347 return splits;
348 }
349
350
351
352
353 private static long countTotalBytes(FileStatus[] logfiles) {
354 long ret = 0;
355 for (FileStatus stat : logfiles) {
356 ret += stat.getLen();
357 }
358 return ret;
359 }
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376 static public boolean splitLogFile(Path rootDir, FileStatus logfile,
377 FileSystem fs, Configuration conf, CancelableProgressable reporter)
378 throws IOException {
379 HLogSplitter s = new HLogSplitter(conf, rootDir, null, null
380 fs);
381 return s.splitLogFile(logfile, reporter);
382 }
383
384 public boolean splitLogFile(FileStatus logfile,
385 CancelableProgressable reporter) throws IOException {
386 final Map<byte[], Object> logWriters = Collections.
387 synchronizedMap(new TreeMap<byte[], Object>(Bytes.BYTES_COMPARATOR));
388 boolean isCorrupted = false;
389
390 Preconditions.checkState(status == null);
391 status = TaskMonitor.get().createStatus(
392 "Splitting log file " + logfile.getPath() +
393 "into a temporary staging area.");
394
395 Object BAD_WRITER = new Object();
396
397 boolean progress_failed = false;
398
399 boolean skipErrors = conf.getBoolean("hbase.hlog.split.skip.errors",
400 HLog.SPLIT_SKIP_ERRORS_DEFAULT);
401 int interval = conf.getInt("hbase.splitlog.report.interval.loglines", 1024);
402
403
404 int period = conf.getInt("hbase.splitlog.report.period",
405 conf.getInt("hbase.splitlog.manager.timeout", ZKSplitLog.DEFAULT_TIMEOUT) / 2);
406 int numOpenedFilesBeforeReporting =
407 conf.getInt("hbase.splitlog.report.openedfiles", 3);
408 Path logPath = logfile.getPath();
409 long logLength = logfile.getLen();
410 LOG.info("Splitting hlog: " + logPath + ", length=" + logLength);
411 status.setStatus("Opening log file");
412 Reader in = null;
413 try {
414 in = getReader(fs, logfile, conf, skipErrors);
415 } catch (CorruptedLogFileException e) {
416 LOG.warn("Could not get reader, corrupted log file " + logPath, e);
417 ZKSplitLog.markCorrupted(rootDir, logfile.getPath().getName(), fs);
418 isCorrupted = true;
419 }
420 if (in == null) {
421 status.markComplete("Was nothing to split in log file");
422 LOG.warn("Nothing to split in log file " + logPath);
423 return true;
424 }
425 long t = EnvironmentEdgeManager.currentTimeMillis();
426 long last_report_at = t;
427 if (reporter != null && reporter.progress() == false) {
428 status.markComplete("Failed: reporter.progress asked us to terminate");
429 return false;
430 }
431
432
433 int editsCount = 0;
434 int numNewlyOpenedFiles = 0;
435 Entry entry;
436 try {
437 while ((entry = getNextLogLine(in,logPath, skipErrors)) != null) {
438 byte[] region = entry.getKey().getEncodedRegionName();
439 Object o = logWriters.get(region);
440 if (o == BAD_WRITER) {
441 continue;
442 }
443 WriterAndPath wap = (WriterAndPath)o;
444 if (wap == null) {
445 wap = createWAP(region, entry, rootDir, fs, conf);
446 numNewlyOpenedFiles++;
447 if (wap == null) {
448
449
450 logWriters.put(region, BAD_WRITER);
451 continue;
452 } else {
453 logWriters.put(region, wap);
454 }
455 }
456 wap.w.append(entry);
457 outputSink.updateRegionMaximumEditLogSeqNum(entry);
458 editsCount++;
459
460
461 if (editsCount % interval == 0 ||
462 (numNewlyOpenedFiles > numOpenedFilesBeforeReporting)) {
463
464 numNewlyOpenedFiles = 0;
465 String countsStr = "edits=" + editsCount + ", files=" + logWriters.size();
466 status.setStatus("Split " + countsStr);
467 long t1 = EnvironmentEdgeManager.currentTimeMillis();
468 if ((t1 - last_report_at) > period) {
469 last_report_at = t;
470 if (reporter != null && reporter.progress() == false) {
471 status.markComplete("Failed: reporter.progress asked us to terminate; " + countsStr);
472 progress_failed = true;
473 return false;
474 }
475 }
476 }
477 }
478 } catch (CorruptedLogFileException e) {
479 LOG.warn("Could not parse, corrupted log file " + logPath, e);
480 ZKSplitLog.markCorrupted(rootDir, logfile.getPath().getName(), fs);
481 isCorrupted = true;
482 } catch (IOException e) {
483 e = RemoteExceptionHandler.checkIOException(e);
484 throw e;
485 } finally {
486 boolean allWritersClosed = false;
487 try {
488 int n = 0;
489 for (Map.Entry<byte[], Object> logWritersEntry : logWriters.entrySet()) {
490 Object o = logWritersEntry.getValue();
491 long t1 = EnvironmentEdgeManager.currentTimeMillis();
492 if ((t1 - last_report_at) > period) {
493 last_report_at = t;
494 if ((progress_failed == false) && (reporter != null) && (reporter.progress() == false)) {
495 progress_failed = true;
496 }
497 }
498 if (o == BAD_WRITER) {
499 continue;
500 }
501 n++;
502 WriterAndPath wap = (WriterAndPath) o;
503 wap.writerClosed = true;
504 wap.w.close();
505 LOG.debug("Closed " + wap.p);
506 Path dst = getCompletedRecoveredEditsFilePath(wap.p,
507 outputSink.getRegionMaximumEditLogSeqNum(logWritersEntry.getKey()));
508 if (!dst.equals(wap.p) && fs.exists(dst)) {
509 LOG.warn("Found existing old edits file. It could be the "
510 + "result of a previous failed split attempt. Deleting " + dst + ", length="
511 + fs.getFileStatus(dst).getLen());
512 if (!HBaseFileSystem.deleteFileFromFileSystem(fs, dst)) {
513 LOG.warn("Failed deleting of old " + dst);
514 throw new IOException("Failed deleting of old " + dst);
515 }
516 }
517
518
519
520 if (fs.exists(wap.p)) {
521 if (!HBaseFileSystem.renameDirForFileSystem(fs, wap.p, dst)) {
522 throw new IOException("Failed renaming " + wap.p + " to " + dst);
523 }
524 LOG.debug("Rename " + wap.p + " to " + dst);
525 }
526 }
527 allWritersClosed = true;
528 String msg = "Processed " + editsCount + " edits across " + n + " regions"
529 + " threw away edits for " + (logWriters.size() - n) + " regions" + "; log file="
530 + logPath + " is corrupted = " + isCorrupted + " progress failed = " + progress_failed;
531 LOG.info(msg);
532 status.markComplete(msg);
533 } finally {
534 if (!allWritersClosed) {
535 for (Map.Entry<byte[], Object> logWritersEntry : logWriters.entrySet()) {
536 Object o = logWritersEntry.getValue();
537 if (o != BAD_WRITER) {
538 WriterAndPath wap = (WriterAndPath) o;
539 try {
540 if (!wap.writerClosed) {
541 wap.writerClosed = true;
542 wap.w.close();
543 }
544 } catch (IOException e) {
545 LOG.debug("Exception while closing the writer :", e);
546 }
547 }
548 }
549 }
550 in.close();
551 }
552 }
553 return !progress_failed;
554 }
555
556
557
558
559
560
561
562
563
564
565
566
567 public static void finishSplitLogFile(String logfile, Configuration conf)
568 throws IOException {
569 Path rootdir = FSUtils.getRootDir(conf);
570 Path oldLogDir = new Path(rootdir, HConstants.HREGION_OLDLOGDIR_NAME);
571 finishSplitLogFile(rootdir, oldLogDir, logfile, conf);
572 }
573
574 public static void finishSplitLogFile(Path rootdir, Path oldLogDir,
575 String logfile, Configuration conf) throws IOException {
576 List<Path> processedLogs = new ArrayList<Path>();
577 List<Path> corruptedLogs = new ArrayList<Path>();
578 FileSystem fs;
579 fs = rootdir.getFileSystem(conf);
580 Path logPath = new Path(logfile);
581 if (ZKSplitLog.isCorrupted(rootdir, logPath.getName(), fs)) {
582 corruptedLogs.add(logPath);
583 } else {
584 processedLogs.add(logPath);
585 }
586 archiveLogs(null, corruptedLogs, processedLogs, oldLogDir, fs, conf);
587 Path stagingDir = ZKSplitLog.getSplitLogDir(rootdir, logPath.getName());
588 HBaseFileSystem.deleteDirFromFileSystem(fs, stagingDir);
589 }
590
591
592
593
594
595
596
597
598
599
600
601
602
603 private static void archiveLogs(
604 final Path srcDir,
605 final List<Path> corruptedLogs,
606 final List<Path> processedLogs, final Path oldLogDir,
607 final FileSystem fs, final Configuration conf) throws IOException {
608 final Path corruptDir = new Path(conf.get(HConstants.HBASE_DIR), conf.get(
609 "hbase.regionserver.hlog.splitlog.corrupt.dir", HConstants.CORRUPT_DIR_NAME));
610
611 if (!HBaseFileSystem.makeDirOnFileSystem(fs, corruptDir)) {
612 LOG.info("Unable to mkdir " + corruptDir);
613 }
614 HBaseFileSystem.makeDirOnFileSystem(fs, oldLogDir);
615
616
617
618 for (Path corrupted : corruptedLogs) {
619 Path p = new Path(corruptDir, corrupted.getName());
620 if (fs.exists(corrupted)) {
621 if (!HBaseFileSystem.renameDirForFileSystem(fs, corrupted, p)) {
622 LOG.warn("Unable to move corrupted log " + corrupted + " to " + p);
623 } else {
624 LOG.warn("Moving corrupted log " + corrupted + " to " + p);
625 }
626 }
627 }
628
629 for (Path p : processedLogs) {
630 Path newPath = HLog.getHLogArchivePath(oldLogDir, p);
631 if (fs.exists(p)) {
632 if (!HBaseFileSystem.renameAndSetModifyTime(fs, p, newPath)) {
633 LOG.warn("Unable to move " + p + " to " + newPath);
634 } else {
635 LOG.debug("Archived processed log " + p + " to " + newPath);
636 }
637 }
638 }
639
640
641
642 if (srcDir != null && !HBaseFileSystem.deleteDirFromFileSystem(fs, srcDir)) {
643 throw new IOException("Unable to delete src dir: " + srcDir);
644 }
645 }
646
647
648
649
650
651
652
653
654
655
656
657
658
659 static Path getRegionSplitEditsPath(final FileSystem fs,
660 final Entry logEntry, final Path rootDir, boolean isCreate)
661 throws IOException {
662 Path tableDir = HTableDescriptor.getTableDir(rootDir, logEntry.getKey()
663 .getTablename());
664 Path regiondir = HRegion.getRegionDir(tableDir,
665 Bytes.toString(logEntry.getKey().getEncodedRegionName()));
666 Path dir = HLog.getRegionDirRecoveredEditsDir(regiondir);
667
668 if (!fs.exists(regiondir)) {
669 LOG.info("This region's directory doesn't exist: "
670 + regiondir.toString() + ". It is very likely that it was" +
671 " already split so it's safe to discard those edits.");
672 return null;
673 }
674 if (isCreate && !fs.exists(dir) &&
675 !HBaseFileSystem.makeDirOnFileSystem(fs, dir)) {
676 LOG.warn("mkdir failed on " + dir);
677 }
678
679
680 String fileName = formatRecoveredEditsFileName(logEntry.getKey()
681 .getLogSeqNum());
682 fileName = getTmpRecoveredEditsFileName(fileName);
683 return new Path(dir, fileName);
684 }
685
686 static String getTmpRecoveredEditsFileName(String fileName) {
687 return fileName + HLog.RECOVERED_LOG_TMPFILE_SUFFIX;
688 }
689
690
691
692
693
694
695
696
697
698 static Path getCompletedRecoveredEditsFilePath(Path srcPath,
699 Long maximumEditLogSeqNum) {
700 String fileName = formatRecoveredEditsFileName(maximumEditLogSeqNum);
701 return new Path(srcPath.getParent(), fileName);
702 }
703
704 static String formatRecoveredEditsFileName(final long seqid) {
705 return String.format("%019d", seqid);
706 }
707
708
709
710
711
712
713
714
715
716
717
718
719
720 private void parseHLog(final Reader in, Path path,
721 EntryBuffers entryBuffers, final FileSystem fs,
722 final Configuration conf, boolean skipErrors)
723 throws IOException, CorruptedLogFileException {
724 int editsCount = 0;
725 try {
726 Entry entry;
727 while ((entry = getNextLogLine(in, path, skipErrors)) != null) {
728 entryBuffers.appendEntry(entry);
729 editsCount++;
730 }
731 } catch (InterruptedException ie) {
732 IOException t = new InterruptedIOException();
733 t.initCause(ie);
734 throw t;
735 } finally {
736 LOG.debug("Pushed=" + editsCount + " entries from " + path);
737 }
738 }
739
740
741
742
743
744
745
746
747
748
749
750 protected Reader getReader(FileSystem fs, FileStatus file, Configuration conf,
751 boolean skipErrors)
752 throws IOException, CorruptedLogFileException {
753 Path path = file.getPath();
754 long length = file.getLen();
755 Reader in;
756
757
758
759
760
761 if (length <= 0) {
762 LOG.warn("File " + path + " might be still open, length is 0");
763 }
764
765 try {
766 FSUtils.getInstance(fs, conf).recoverFileLease(fs, path, conf);
767 try {
768 in = getReader(fs, path, conf);
769 } catch (EOFException e) {
770 if (length <= 0) {
771
772
773
774
775
776 LOG.warn("Could not open " + path + " for reading. File is empty", e);
777 return null;
778 } else {
779
780 return null;
781 }
782 }
783 } catch (IOException e) {
784 if (!skipErrors) {
785 throw e;
786 }
787 CorruptedLogFileException t =
788 new CorruptedLogFileException("skipErrors=true Could not open hlog " +
789 path + " ignoring");
790 t.initCause(e);
791 throw t;
792 }
793 return in;
794 }
795
796 static private Entry getNextLogLine(Reader in, Path path, boolean skipErrors)
797 throws CorruptedLogFileException, IOException {
798 try {
799 return in.next();
800 } catch (EOFException eof) {
801
802 LOG.info("EOF from hlog " + path + ". continuing");
803 return null;
804 } catch (IOException e) {
805
806
807 if (e.getCause() != null &&
808 (e.getCause() instanceof ParseException ||
809 e.getCause() instanceof org.apache.hadoop.fs.ChecksumException)) {
810 LOG.warn("Parse exception " + e.getCause().toString() + " from hlog "
811 + path + ". continuing");
812 return null;
813 }
814 if (!skipErrors) {
815 throw e;
816 }
817 CorruptedLogFileException t =
818 new CorruptedLogFileException("skipErrors=true Ignoring exception" +
819 " while parsing hlog " + path + ". Marking as corrupted");
820 t.initCause(e);
821 throw t;
822 }
823 }
824
825
826 private void writerThreadError(Throwable t) {
827 thrown.compareAndSet(null, t);
828 }
829
830
831
832
833 private void checkForErrors() throws IOException {
834 Throwable thrown = this.thrown.get();
835 if (thrown == null) return;
836 if (thrown instanceof IOException) {
837 throw (IOException)thrown;
838 } else {
839 throw new RuntimeException(thrown);
840 }
841 }
842
843
844
845 protected Writer createWriter(FileSystem fs, Path logfile, Configuration conf)
846 throws IOException {
847 return hlogFs.createWriter(fs, conf, logfile);
848 }
849
850
851
852
853 protected Reader getReader(FileSystem fs, Path curLogFile, Configuration conf)
854 throws IOException {
855 return HLog.getReader(fs, curLogFile, conf);
856 }
857
858
859
860
861
862
863
864
865 class EntryBuffers {
866 Map<byte[], RegionEntryBuffer> buffers =
867 new TreeMap<byte[], RegionEntryBuffer>(Bytes.BYTES_COMPARATOR);
868
869
870
871
872 Set<byte[]> currentlyWriting = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
873
874 long totalBuffered = 0;
875 long maxHeapUsage;
876
877 EntryBuffers(long maxHeapUsage) {
878 this.maxHeapUsage = maxHeapUsage;
879 }
880
881
882
883
884
885
886
887
888 void appendEntry(Entry entry) throws InterruptedException, IOException {
889 HLogKey key = entry.getKey();
890
891 RegionEntryBuffer buffer;
892 long incrHeap;
893 synchronized (this) {
894 buffer = buffers.get(key.getEncodedRegionName());
895 if (buffer == null) {
896 buffer = new RegionEntryBuffer(key.getTablename(), key.getEncodedRegionName());
897 buffers.put(key.getEncodedRegionName(), buffer);
898 }
899 incrHeap= buffer.appendEntry(entry);
900 }
901
902
903 synchronized (dataAvailable) {
904 totalBuffered += incrHeap;
905 while (totalBuffered > maxHeapUsage && thrown.get() == null) {
906 LOG.debug("Used " + totalBuffered + " bytes of buffered edits, waiting for IO threads...");
907 dataAvailable.wait(3000);
908 }
909 dataAvailable.notifyAll();
910 }
911 checkForErrors();
912 }
913
914 synchronized RegionEntryBuffer getChunkToWrite() {
915 long biggestSize=0;
916 byte[] biggestBufferKey=null;
917
918 for (Map.Entry<byte[], RegionEntryBuffer> entry : buffers.entrySet()) {
919 long size = entry.getValue().heapSize();
920 if (size > biggestSize && !currentlyWriting.contains(entry.getKey())) {
921 biggestSize = size;
922 biggestBufferKey = entry.getKey();
923 }
924 }
925 if (biggestBufferKey == null) {
926 return null;
927 }
928
929 RegionEntryBuffer buffer = buffers.remove(biggestBufferKey);
930 currentlyWriting.add(biggestBufferKey);
931 return buffer;
932 }
933
934 void doneWriting(RegionEntryBuffer buffer) {
935 synchronized (this) {
936 boolean removed = currentlyWriting.remove(buffer.encodedRegionName);
937 assert removed;
938 }
939 long size = buffer.heapSize();
940
941 synchronized (dataAvailable) {
942 totalBuffered -= size;
943
944 dataAvailable.notifyAll();
945 }
946 }
947
948 synchronized boolean isRegionCurrentlyWriting(byte[] region) {
949 return currentlyWriting.contains(region);
950 }
951 }
952
953
954
955
956
957
958
959 static class RegionEntryBuffer implements HeapSize {
960 long heapInBuffer = 0;
961 List<Entry> entryBuffer;
962 byte[] tableName;
963 byte[] encodedRegionName;
964
965 RegionEntryBuffer(byte[] table, byte[] region) {
966 this.tableName = table;
967 this.encodedRegionName = region;
968 this.entryBuffer = new LinkedList<Entry>();
969 }
970
971 long appendEntry(Entry entry) {
972 internify(entry);
973 entryBuffer.add(entry);
974 long incrHeap = entry.getEdit().heapSize() +
975 ClassSize.align(2 * ClassSize.REFERENCE) +
976 0;
977 heapInBuffer += incrHeap;
978 return incrHeap;
979 }
980
981 private void internify(Entry entry) {
982 HLogKey k = entry.getKey();
983 k.internTableName(this.tableName);
984 k.internEncodedRegionName(this.encodedRegionName);
985 }
986
987 public long heapSize() {
988 return heapInBuffer;
989 }
990 }
991
992
993 class WriterThread extends Thread {
994 private volatile boolean shouldStop = false;
995
996 WriterThread(int i) {
997 super("WriterThread-" + i);
998 }
999
1000 public void run() {
1001 try {
1002 doRun();
1003 } catch (Throwable t) {
1004 LOG.error("Error in log splitting write thread", t);
1005 writerThreadError(t);
1006 }
1007 }
1008
1009 private void doRun() throws IOException {
1010 LOG.debug("Writer thread " + this + ": starting");
1011 while (true) {
1012 RegionEntryBuffer buffer = entryBuffers.getChunkToWrite();
1013 if (buffer == null) {
1014
1015 synchronized (dataAvailable) {
1016 if (shouldStop) return;
1017 try {
1018 dataAvailable.wait(1000);
1019 } catch (InterruptedException ie) {
1020 if (!shouldStop) {
1021 throw new RuntimeException(ie);
1022 }
1023 }
1024 }
1025 continue;
1026 }
1027
1028 assert buffer != null;
1029 try {
1030 writeBuffer(buffer);
1031 } finally {
1032 entryBuffers.doneWriting(buffer);
1033 }
1034 }
1035 }
1036
1037
1038 private void writeBuffer(RegionEntryBuffer buffer) throws IOException {
1039 List<Entry> entries = buffer.entryBuffer;
1040 if (entries.isEmpty()) {
1041 LOG.warn(this.getName() + " got an empty buffer, skipping");
1042 return;
1043 }
1044
1045 WriterAndPath wap = null;
1046
1047 long startTime = System.nanoTime();
1048 try {
1049 int editsCount = 0;
1050
1051 for (Entry logEntry : entries) {
1052 if (wap == null) {
1053 wap = outputSink.getWriterAndPath(logEntry);
1054 if (wap == null) {
1055
1056
1057 return;
1058 }
1059 }
1060 wap.w.append(logEntry);
1061 outputSink.updateRegionMaximumEditLogSeqNum(logEntry);
1062 editsCount++;
1063 }
1064
1065 wap.incrementEdits(editsCount);
1066 wap.incrementNanoTime(System.nanoTime() - startTime);
1067 } catch (IOException e) {
1068 e = RemoteExceptionHandler.checkIOException(e);
1069 LOG.fatal(this.getName() + " Got while writing log entry to log", e);
1070 throw e;
1071 }
1072 }
1073
1074 void finish() {
1075 synchronized (dataAvailable) {
1076 shouldStop = true;
1077 dataAvailable.notifyAll();
1078 }
1079 }
1080 }
1081
1082 private WriterAndPath createWAP(byte[] region, Entry entry, Path rootdir,
1083 FileSystem fs, Configuration conf)
1084 throws IOException {
1085 Path regionedits = getRegionSplitEditsPath(fs, entry, rootdir, true);
1086 if (regionedits == null) {
1087 return null;
1088 }
1089 if (fs.exists(regionedits)) {
1090 LOG.warn("Found existing old edits file. It could be the "
1091 + "result of a previous failed split attempt. Deleting "
1092 + regionedits + ", length="
1093 + fs.getFileStatus(regionedits).getLen());
1094 if (!HBaseFileSystem.deleteFileFromFileSystem(fs, regionedits)) {
1095 LOG.warn("Failed delete of old " + regionedits);
1096 }
1097 }
1098 Writer w = createWriter(fs, regionedits, conf);
1099 LOG.debug("Creating writer path=" + regionedits + " region="
1100 + Bytes.toStringBinary(region));
1101 return (new WriterAndPath(regionedits, w));
1102 }
1103
1104 Path convertRegionEditsToTemp(Path rootdir, Path edits, String tmpname) {
1105 List<String> components = new ArrayList<String>(10);
1106 do {
1107 components.add(edits.getName());
1108 edits = edits.getParent();
1109 } while (edits.depth() > rootdir.depth());
1110 Path ret = ZKSplitLog.getSplitLogDir(rootdir, tmpname);
1111 for (int i = components.size() - 1; i >= 0; i--) {
1112 ret = new Path(ret, components.get(i));
1113 }
1114 try {
1115 if (fs.exists(ret)) {
1116 LOG.warn("Found existing old temporary edits file. It could be the "
1117 + "result of a previous failed split attempt. Deleting "
1118 + ret + ", length="
1119 + fs.getFileStatus(ret).getLen());
1120 if (!HBaseFileSystem.deleteFileFromFileSystem(fs, ret)) {
1121 LOG.warn("Failed delete of old " + ret);
1122 }
1123 }
1124 Path dir = ret.getParent();
1125 if (!fs.exists(dir) && !HBaseFileSystem.makeDirOnFileSystem(fs, dir)) {
1126 LOG.warn("mkdir failed on " + dir);
1127 }
1128 } catch (IOException e) {
1129 LOG.warn("Could not prepare temp staging area ", e);
1130
1131 }
1132 return ret;
1133 }
1134
1135
1136
1137
1138 class OutputSink {
1139 private final Map<byte[], WriterAndPath> logWriters = Collections.synchronizedMap(
1140 new TreeMap<byte[], WriterAndPath>(Bytes.BYTES_COMPARATOR));
1141 private final Map<byte[], Long> regionMaximumEditLogSeqNum = Collections
1142 .synchronizedMap(new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR));
1143 private final List<WriterThread> writerThreads = Lists.newArrayList();
1144
1145
1146 private final Set<byte[]> blacklistedRegions = Collections.synchronizedSet(
1147 new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR));
1148
1149 private boolean closeAndCleanCompleted = false;
1150
1151 private boolean logWritersClosed = false;
1152
1153
1154
1155
1156
1157
1158 synchronized void startWriterThreads(EntryBuffers entryBuffers) {
1159
1160
1161
1162
1163
1164 int numThreads = conf.getInt(
1165 "hbase.regionserver.hlog.splitlog.writer.threads", 3);
1166
1167 for (int i = 0; i < numThreads; i++) {
1168 WriterThread t = new WriterThread(i);
1169 t.start();
1170 writerThreads.add(t);
1171 }
1172 }
1173
1174 List<Path> finishWritingAndClose() throws IOException {
1175 LOG.info("Waiting for split writer threads to finish");
1176 try {
1177 for (WriterThread t : writerThreads) {
1178 t.finish();
1179 }
1180 for (WriterThread t : writerThreads) {
1181 try {
1182 t.join();
1183 } catch (InterruptedException ie) {
1184 throw new IOException(ie);
1185 }
1186 checkForErrors();
1187 }
1188 LOG.info("Split writers finished");
1189
1190 return closeStreams();
1191 } finally {
1192 List<IOException> thrown = closeLogWriters(null);
1193 if (thrown != null && !thrown.isEmpty()) {
1194 throw MultipleIOException.createIOException(thrown);
1195 }
1196 }
1197 }
1198
1199
1200
1201
1202
1203 private List<Path> closeStreams() throws IOException {
1204 Preconditions.checkState(!closeAndCleanCompleted);
1205
1206 List<Path> paths = new ArrayList<Path>();
1207 List<IOException> thrown = Lists.newArrayList();
1208 closeLogWriters(thrown);
1209 for (Map.Entry<byte[], WriterAndPath> logWritersEntry : logWriters
1210 .entrySet()) {
1211 WriterAndPath wap = logWritersEntry.getValue();
1212 Path dst = getCompletedRecoveredEditsFilePath(wap.p,
1213 regionMaximumEditLogSeqNum.get(logWritersEntry.getKey()));
1214 try {
1215 if (!dst.equals(wap.p) && fs.exists(dst)) {
1216 LOG.warn("Found existing old edits file. It could be the "
1217 + "result of a previous failed split attempt. Deleting " + dst
1218 + ", length=" + fs.getFileStatus(dst).getLen());
1219 if (!HBaseFileSystem.deleteFileFromFileSystem(fs, dst)) {
1220 LOG.warn("Failed deleting of old " + dst);
1221 throw new IOException("Failed deleting of old " + dst);
1222 }
1223 }
1224
1225
1226
1227 if (fs.exists(wap.p)) {
1228 if (!HBaseFileSystem.renameDirForFileSystem(fs, wap.p, dst)) {
1229 throw new IOException("Failed renaming " + wap.p + " to " + dst);
1230 }
1231 LOG.debug("Rename " + wap.p + " to " + dst);
1232 }
1233 } catch (IOException ioe) {
1234 LOG.error("Couldn't rename " + wap.p + " to " + dst, ioe);
1235 thrown.add(ioe);
1236 continue;
1237 }
1238 paths.add(dst);
1239 }
1240 if (!thrown.isEmpty()) {
1241 throw MultipleIOException.createIOException(thrown);
1242 }
1243
1244 closeAndCleanCompleted = true;
1245 return paths;
1246 }
1247
1248 private List<IOException> closeLogWriters(List<IOException> thrown)
1249 throws IOException {
1250 if (!logWritersClosed) {
1251 if (thrown == null) {
1252 thrown = Lists.newArrayList();
1253 }
1254 try {
1255 for (WriterThread t : writerThreads) {
1256 while (t.isAlive()) {
1257 t.shouldStop = true;
1258 t.interrupt();
1259 try {
1260 t.join(10);
1261 } catch (InterruptedException e) {
1262 IOException iie = new InterruptedIOException();
1263 iie.initCause(e);
1264 throw iie;
1265 }
1266 }
1267 }
1268 } finally {
1269 synchronized (logWriters) {
1270 for (WriterAndPath wap : logWriters.values()) {
1271 try {
1272 wap.w.close();
1273 } catch (IOException ioe) {
1274 LOG.error("Couldn't close log at " + wap.p, ioe);
1275 thrown.add(ioe);
1276 continue;
1277 }
1278 LOG.info("Closed path " + wap.p + " (wrote " + wap.editsWritten
1279 + " edits in " + (wap.nanosSpent / 1000 / 1000) + "ms)");
1280 }
1281 }
1282 logWritersClosed = true;
1283 }
1284 }
1285 return thrown;
1286 }
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296 WriterAndPath getWriterAndPath(Entry entry) throws IOException {
1297 byte region[] = entry.getKey().getEncodedRegionName();
1298 WriterAndPath ret = logWriters.get(region);
1299 if (ret != null) {
1300 return ret;
1301 }
1302
1303
1304 if (blacklistedRegions.contains(region)) {
1305 return null;
1306 }
1307 ret = createWAP(region, entry, rootDir, fs, conf);
1308 if (ret == null) {
1309 blacklistedRegions.add(region);
1310 return null;
1311 }
1312 logWriters.put(region, ret);
1313 return ret;
1314 }
1315
1316
1317
1318
1319 void updateRegionMaximumEditLogSeqNum(Entry entry) {
1320 synchronized (regionMaximumEditLogSeqNum) {
1321 Long currentMaxSeqNum=regionMaximumEditLogSeqNum.get(entry.getKey().getEncodedRegionName());
1322 if (currentMaxSeqNum == null
1323 || entry.getKey().getLogSeqNum() > currentMaxSeqNum) {
1324 regionMaximumEditLogSeqNum.put(entry.getKey().getEncodedRegionName(),
1325 entry.getKey().getLogSeqNum());
1326 }
1327 }
1328
1329 }
1330
1331 Long getRegionMaximumEditLogSeqNum(byte[] region) {
1332 return regionMaximumEditLogSeqNum.get(region);
1333 }
1334
1335
1336
1337
1338
1339 private Map<byte[], Long> getOutputCounts() {
1340 TreeMap<byte[], Long> ret = new TreeMap<byte[], Long>(
1341 Bytes.BYTES_COMPARATOR);
1342 synchronized (logWriters) {
1343 for (Map.Entry<byte[], WriterAndPath> entry : logWriters.entrySet()) {
1344 ret.put(entry.getKey(), entry.getValue().editsWritten);
1345 }
1346 }
1347 return ret;
1348 }
1349 }
1350
1351
1352
1353
1354
1355
1356
1357
1358 private final static class WriterAndPath {
1359 final Path p;
1360 final Writer w;
1361
1362
1363 long editsWritten = 0;
1364
1365 long nanosSpent = 0;
1366
1367
1368
1369
1370 boolean writerClosed = false;
1371
1372 WriterAndPath(final Path p, final Writer w) {
1373 this.p = p;
1374 this.w = w;
1375 }
1376
1377 void incrementEdits(int edits) {
1378 editsWritten += edits;
1379 }
1380
1381 void incrementNanoTime(long nanos) {
1382 nanosSpent += nanos;
1383 }
1384 }
1385
1386 static class CorruptedLogFileException extends Exception {
1387 private static final long serialVersionUID = 1L;
1388 CorruptedLogFileException(String s) {
1389 super(s);
1390 }
1391 }
1392 }