View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.wal;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.FileNotFoundException;
27  import java.io.IOException;
28  import java.lang.reflect.Method;
29  import java.security.PrivilegedExceptionAction;
30  import java.util.ArrayList;
31  import java.util.Collections;
32  import java.util.HashMap;
33  import java.util.HashSet;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.NavigableSet;
37  import java.util.Set;
38  import java.util.concurrent.atomic.AtomicBoolean;
39  import java.util.concurrent.atomic.AtomicInteger;
40  import java.util.concurrent.atomic.AtomicLong;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.fs.FSDataInputStream;
46  import org.apache.hadoop.fs.FSDataOutputStream;
47  import org.apache.hadoop.fs.FileStatus;
48  import org.apache.hadoop.fs.FileSystem;
49  import org.apache.hadoop.fs.FileUtil;
50  import org.apache.hadoop.fs.Path;
51  import org.apache.hadoop.fs.PathFilter;
52  import org.apache.hadoop.hbase.Cell;
53  import org.apache.hadoop.hbase.HBaseConfiguration;
54  import org.apache.hadoop.hbase.HBaseTestingUtility;
55  import org.apache.hadoop.hbase.HConstants;
56  import org.apache.hadoop.hbase.HRegionInfo;
57  import org.apache.hadoop.hbase.KeyValue;
58  import org.apache.hadoop.hbase.testclassification.LargeTests;
59  import org.apache.hadoop.hbase.TableName;
60  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
61  import org.apache.hadoop.hbase.regionserver.HRegion;
62  import org.apache.hadoop.hbase.wal.WAL.Entry;
63  import org.apache.hadoop.hbase.wal.WAL.Reader;
64  import org.apache.hadoop.hbase.wal.WALProvider.Writer;
65  import org.apache.hadoop.hbase.wal.WALSplitter.CorruptedLogFileException;
66  import org.apache.hadoop.hbase.security.User;
67  import org.apache.hadoop.hbase.util.Bytes;
68  import org.apache.hadoop.hbase.util.CancelableProgressable;
69  import org.apache.hadoop.hbase.util.FSUtils;
70  import org.apache.hadoop.hbase.util.Threads;
71  import org.apache.hadoop.hdfs.DFSTestUtil;
72  import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
73  import org.apache.hadoop.ipc.RemoteException;
74  import org.junit.After;
75  import org.junit.AfterClass;
76  import org.junit.Before;
77  import org.junit.BeforeClass;
78  import org.junit.Rule;
79  import org.junit.rules.TestName;
80  import org.junit.Test;
81  import org.junit.experimental.categories.Category;
82  import org.mockito.Mockito;
83  import org.mockito.invocation.InvocationOnMock;
84  import org.mockito.stubbing.Answer;
85  
86  import com.google.common.base.Joiner;
87  import com.google.common.collect.ImmutableList;
88  
89  // imports for things that haven't moved from regionserver.wal yet.
90  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
91  import org.apache.hadoop.hbase.regionserver.wal.InstrumentedLogWriter;
92  import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader;
93  import org.apache.hadoop.hbase.regionserver.wal.FaultySequenceFileLogReader;
94  
95  /**
96   * Testing {@link WAL} splitting code.
97   */
98  @Category(LargeTests.class)
99  public class TestWALSplit {
100   {
101     // Uncomment the following lines if more verbosity is needed for
102     // debugging (see HBASE-12285 for details).
103     //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
104     //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
105     //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
106   }
107   private final static Log LOG = LogFactory.getLog(TestWALSplit.class);
108 
109   private static Configuration conf;
110   private FileSystem fs;
111 
112   protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
113 
114   private Path HBASEDIR;
115   private Path WALDIR;
116   private Path OLDLOGDIR;
117   private Path CORRUPTDIR;
118   private Path TABLEDIR;
119 
120   private static final int NUM_WRITERS = 10;
121   private static final int ENTRIES = 10; // entries per writer per region
122 
123   private static final TableName TABLE_NAME =
124       TableName.valueOf("t1");
125   private static final byte[] FAMILY = "f1".getBytes();
126   private static final byte[] QUALIFIER = "q1".getBytes();
127   private static final byte[] VALUE = "v1".getBytes();
128   private static final String WAL_FILE_PREFIX = "wal.dat.";
129   private static List<String> REGIONS = new ArrayList<String>();
130   private static final String HBASE_SKIP_ERRORS = "hbase.hlog.split.skip.errors";
131   private static String ROBBER;
132   private static String ZOMBIE;
133   private static String [] GROUP = new String [] {"supergroup"};
134   private RecoveryMode mode;
135 
136   static enum Corruptions {
137     INSERT_GARBAGE_ON_FIRST_LINE,
138     INSERT_GARBAGE_IN_THE_MIDDLE,
139     APPEND_GARBAGE,
140     TRUNCATE,
141     TRUNCATE_TRAILER
142   }
143 
144   @BeforeClass
145   public static void setUpBeforeClass() throws Exception {
146     conf = TEST_UTIL.getConfiguration();
147     conf.setClass("hbase.regionserver.hlog.writer.impl",
148       InstrumentedLogWriter.class, Writer.class);
149     conf.setBoolean("dfs.support.broken.append", true);
150     conf.setBoolean("dfs.support.append", true);
151     // This is how you turn off shortcircuit read currently.  TODO: Fix.  Should read config.
152     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
153     // Create fake maping user to group and set it to the conf.
154     Map<String, String []> u2g_map = new HashMap<String, String []>(2);
155     ROBBER = User.getCurrent().getName() + "-robber";
156     ZOMBIE = User.getCurrent().getName() + "-zombie";
157     u2g_map.put(ROBBER, GROUP);
158     u2g_map.put(ZOMBIE, GROUP);
159     DFSTestUtil.updateConfWithFakeGroupMapping(conf, u2g_map);
160     conf.setInt("dfs.heartbeat.interval", 1);
161     TEST_UTIL.startMiniDFSCluster(2);
162   }
163 
164   @AfterClass
165   public static void tearDownAfterClass() throws Exception {
166     TEST_UTIL.shutdownMiniDFSCluster();
167   }
168 
169   @Rule
170   public TestName name = new TestName();
171   private WALFactory wals = null;
172 
173   @Before
174   public void setUp() throws Exception {
175     LOG.info("Cleaning up cluster for new test.");
176     fs = TEST_UTIL.getDFSCluster().getFileSystem();
177     HBASEDIR = TEST_UTIL.createRootDir();
178     OLDLOGDIR = new Path(HBASEDIR, HConstants.HREGION_OLDLOGDIR_NAME);
179     CORRUPTDIR = new Path(HBASEDIR, HConstants.CORRUPT_DIR_NAME);
180     TABLEDIR = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
181     REGIONS.clear();
182     Collections.addAll(REGIONS, "bbb", "ccc");
183     InstrumentedLogWriter.activateFailure = false;
184     this.mode = (conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false) ? 
185         RecoveryMode.LOG_REPLAY : RecoveryMode.LOG_SPLITTING);
186     wals = new WALFactory(conf, null, name.getMethodName());
187     WALDIR = new Path(HBASEDIR, DefaultWALProvider.getWALDirectoryName(name.getMethodName()));
188     //fs.mkdirs(WALDIR);
189   }
190 
191   @After
192   public void tearDown() throws Exception {
193     try {
194       wals.close();
195     } catch(IOException exception) {
196       // Some tests will move WALs out from under us. In those cases, we'll get an error on close.
197       LOG.info("Ignoring an error while closing down our WALFactory. Fine for some tests, but if" +
198          " you see a failure look here.");
199       LOG.debug("exception details", exception);
200     } finally {
201       wals = null;
202       fs.delete(HBASEDIR, true);
203     }
204   }
205 
206   /**
207    * Simulates splitting a WAL out from under a regionserver that is still trying to write it.
208    * Ensures we do not lose edits.
209    * @throws IOException
210    * @throws InterruptedException
211    */
212   @Test (timeout=300000)
213   public void testLogCannotBeWrittenOnceParsed() throws IOException, InterruptedException {
214     final AtomicLong counter = new AtomicLong(0);
215     AtomicBoolean stop = new AtomicBoolean(false);
216     // Region we'll write edits too and then later examine to make sure they all made it in.
217     final String region = REGIONS.get(0);
218     final int numWriters = 3;
219     Thread zombie = new ZombieLastLogWriterRegionServer(counter, stop, region, numWriters);
220     try {
221       long startCount = counter.get();
222       zombie.start();
223       // Wait till writer starts going.
224       while (startCount == counter.get()) Threads.sleep(1);
225       // Give it a second to write a few appends.
226       Threads.sleep(1000);
227       final Configuration conf2 = HBaseConfiguration.create(this.conf);
228       final User robber = User.createUserForTesting(conf2, ROBBER, GROUP);
229       int count = robber.runAs(new PrivilegedExceptionAction<Integer>() {
230         @Override
231         public Integer run() throws Exception {
232           StringBuilder ls = new StringBuilder("Contents of WALDIR (").append(WALDIR)
233               .append("):\n");
234           for (FileStatus status : fs.listStatus(WALDIR)) {
235             ls.append("\t").append(status.toString()).append("\n");
236           }
237           LOG.debug(ls);
238           LOG.info("Splitting WALs out from under zombie. Expecting " + numWriters + " files.");
239           WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf2, wals);
240           LOG.info("Finished splitting out from under zombie.");
241           Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
242           assertEquals("wrong number of split files for region", numWriters, logfiles.length);
243           int count = 0;
244           for (Path logfile: logfiles) {
245             count += countWAL(logfile);
246           }
247           return count;
248         }
249       });
250       LOG.info("zombie=" + counter.get() + ", robber=" + count);
251       assertTrue("The log file could have at most 1 extra log entry, but can't have less. " +
252         "Zombie could write " + counter.get() + " and logfile had only " + count,
253         counter.get() == count || counter.get() + 1 == count);
254     } finally {
255       stop.set(true);
256       zombie.interrupt();
257       Threads.threadDumpingIsAlive(zombie);
258     }
259   }
260 
261   /**
262    * This thread will keep writing to a 'wal' file even after the split process has started.
263    * It simulates a region server that was considered dead but woke up and wrote some more to the
264    * last log entry. Does its writing as an alternate user in another filesystem instance to
265    * simulate better it being a regionserver.
266    */
267   class ZombieLastLogWriterRegionServer extends Thread {
268     final AtomicLong editsCount;
269     final AtomicBoolean stop;
270     final int numOfWriters;
271     /**
272      * Region to write edits for.
273      */
274     final String region;
275     final User user;
276 
277     public ZombieLastLogWriterRegionServer(AtomicLong counter, AtomicBoolean stop,
278         final String region, final int writers)
279         throws IOException, InterruptedException {
280       super("ZombieLastLogWriterRegionServer");
281       setDaemon(true);
282       this.stop = stop;
283       this.editsCount = counter;
284       this.region = region;
285       this.user = User.createUserForTesting(conf, ZOMBIE, GROUP);
286       numOfWriters = writers;
287     }
288 
289     @Override
290     public void run() {
291       try {
292         doWriting();
293       } catch (IOException e) {
294         LOG.warn(getName() + " Writer exiting " + e);
295       } catch (InterruptedException e) {
296         LOG.warn(getName() + " Writer exiting " + e);
297       }
298     }
299 
300     private void doWriting() throws IOException, InterruptedException {
301       this.user.runAs(new PrivilegedExceptionAction<Object>() {
302         @Override
303         public Object run() throws Exception {
304           // Index of the WAL we want to keep open.  generateWALs will leave open the WAL whose
305           // index we supply here.
306           int walToKeepOpen = numOfWriters - 1;
307           // The below method writes numOfWriters files each with ENTRIES entries for a total of
308           // numOfWriters * ENTRIES added per column family in the region.
309           Writer writer = null;
310           try {
311             writer = generateWALs(numOfWriters, ENTRIES, walToKeepOpen);
312           } catch (IOException e1) {
313             throw new RuntimeException("Failed", e1);
314           }
315           // Update counter so has all edits written so far.
316           editsCount.addAndGet(numOfWriters * ENTRIES);
317           loop(writer);
318           // If we've been interruped, then things should have shifted out from under us.
319           // closing should error
320           try {
321             writer.close();
322             fail("Writing closing after parsing should give an error.");
323           } catch (IOException exception) {
324             LOG.debug("ignoring error when closing final writer.", exception);
325           }
326           return null;
327         }
328       });
329     }
330 
331     private void loop(final Writer writer) {
332       byte [] regionBytes = Bytes.toBytes(this.region);
333       while (!stop.get()) {
334         try {
335           long seq = appendEntry(writer, TABLE_NAME, regionBytes,
336               ("r" + editsCount.get()).getBytes(), regionBytes, QUALIFIER, VALUE, 0);
337           long count = editsCount.incrementAndGet();
338           LOG.info(getName() + " sync count=" + count + ", seq=" + seq);
339           try {
340             Thread.sleep(1);
341           } catch (InterruptedException e) {
342             //
343           }
344         } catch (IOException ex) {
345           LOG.error(getName() + " ex " + ex.toString());
346           if (ex instanceof RemoteException) {
347             LOG.error("Juliet: got RemoteException " + ex.getMessage() +
348               " while writing " + (editsCount.get() + 1));
349           } else {
350             LOG.error(getName() + " failed to write....at " + editsCount.get());
351             fail("Failed to write " + editsCount.get());
352           }
353           break;
354         } catch (Throwable t) {
355           LOG.error(getName() + " HOW? " + t);
356           LOG.debug("exception details", t);
357           break;
358         }
359       }
360       LOG.info(getName() + " Writer exiting");
361     }
362   }
363 
364   /**
365    * @throws IOException
366    * @see https://issues.apache.org/jira/browse/HBASE-3020
367    */
368   @Test (timeout=300000)
369   public void testRecoveredEditsPathForMeta() throws IOException {
370     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
371     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
372     Path regiondir = new Path(tdir,
373         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
374     fs.mkdirs(regiondir);
375     long now = System.currentTimeMillis();
376     Entry entry =
377         new Entry(new WALKey(encoded,
378             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
379       new WALEdit());
380     Path p = WALSplitter.getRegionSplitEditsPath(fs, entry, HBASEDIR, true);
381     String parentOfParent = p.getParent().getParent().getName();
382     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
383   }
384 
385   /**
386    * Test old recovered edits file doesn't break WALSplitter.
387    * This is useful in upgrading old instances.
388    */
389   @Test (timeout=300000)
390   public void testOldRecoveredEditsFileSidelined() throws IOException {
391     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
392     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
393     Path regiondir = new Path(tdir,
394         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
395     fs.mkdirs(regiondir);
396     long now = System.currentTimeMillis();
397     Entry entry =
398         new Entry(new WALKey(encoded,
399             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
400       new WALEdit());
401     Path parent = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
402     assertEquals(parent.getName(), HConstants.RECOVERED_EDITS_DIR);
403     fs.createNewFile(parent); // create a recovered.edits file
404 
405     Path p = WALSplitter.getRegionSplitEditsPath(fs, entry, HBASEDIR, true);
406     String parentOfParent = p.getParent().getParent().getName();
407     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
408     WALFactory.createRecoveredEditsWriter(fs, p, conf).close();
409   }
410 
411   private void useDifferentDFSClient() throws IOException {
412     // make fs act as a different client now
413     // initialize will create a new DFSClient with a new client ID
414     fs.initialize(fs.getUri(), conf);
415   }
416 
417   @Test (timeout=300000)
418   public void testSplitPreservesEdits() throws IOException{
419     final String REGION = "region__1";
420     REGIONS.clear();
421     REGIONS.add(REGION);
422 
423     generateWALs(1, 10, -1);
424     useDifferentDFSClient();
425     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
426     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
427     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
428     assertEquals(1, splitLog.length);
429 
430     assertTrue("edits differ after split", logsAreEqual(originalLog, splitLog[0]));
431   }
432 
433   /**
434    * @param expectedEntries -1 to not assert
435    * @return the count across all regions
436    */
437   private int splitAndCount(final int expectedFiles, final int expectedEntries)
438       throws IOException {
439     useDifferentDFSClient();
440     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
441     int result = 0;
442     for (String region : REGIONS) {
443       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
444       assertEquals(expectedFiles, logfiles.length);
445       int count = 0;
446       for (Path logfile: logfiles) {
447         count += countWAL(logfile);
448       }
449       if (-1 != expectedEntries) {
450         assertEquals(expectedEntries, count);
451       }
452       result += count;
453     }
454     return result;
455   }
456 
457   @Test (timeout=300000)
458   public void testEmptyLogFiles() throws IOException {
459     testEmptyLogFiles(true);
460   }
461 
462   @Test (timeout=300000)
463   public void testEmptyOpenLogFiles() throws IOException {
464     testEmptyLogFiles(false);
465   }
466 
467   private void testEmptyLogFiles(final boolean close) throws IOException {
468     injectEmptyFile(".empty", close);
469     generateWALs(Integer.MAX_VALUE);
470     injectEmptyFile("empty", close);
471     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES); // skip 2 empty
472   }
473 
474   @Test (timeout=300000)
475   public void testOpenZeroLengthReportedFileButWithDataGetsSplit() throws IOException {
476     // generate logs but leave wal.dat.5 open.
477     generateWALs(5);
478     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
479   }
480 
481   @Test (timeout=300000)
482   public void testTralingGarbageCorruptionFileSkipErrorsPasses() throws IOException {
483     conf.setBoolean(HBASE_SKIP_ERRORS, true);
484     generateWALs(Integer.MAX_VALUE);
485     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
486             Corruptions.APPEND_GARBAGE, true);
487     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
488   }
489 
490   @Test (timeout=300000)
491   public void testFirstLineCorruptionLogFileSkipErrorsPasses() throws IOException {
492     conf.setBoolean(HBASE_SKIP_ERRORS, true);
493     generateWALs(Integer.MAX_VALUE);
494     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
495             Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true);
496     splitAndCount(NUM_WRITERS - 1, (NUM_WRITERS - 1) * ENTRIES); //1 corrupt
497   }
498 
499   @Test (timeout=300000)
500   public void testMiddleGarbageCorruptionSkipErrorsReadsHalfOfFile() throws IOException {
501     conf.setBoolean(HBASE_SKIP_ERRORS, true);
502     generateWALs(Integer.MAX_VALUE);
503     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
504             Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false);
505     // the entries in the original logs are alternating regions
506     // considering the sequence file header, the middle corruption should
507     // affect at least half of the entries
508     int goodEntries = (NUM_WRITERS - 1) * ENTRIES;
509     int firstHalfEntries = (int) Math.ceil(ENTRIES / 2) - 1;
510     int allRegionsCount = splitAndCount(NUM_WRITERS, -1);
511     assertTrue("The file up to the corrupted area hasn't been parsed",
512         REGIONS.size() * (goodEntries + firstHalfEntries) <= allRegionsCount);
513   }
514 
515   @Test (timeout=300000)
516   public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException {
517     conf.setBoolean(HBASE_SKIP_ERRORS, true);
518     for (FaultySequenceFileLogReader.FailureType  failureType :
519         FaultySequenceFileLogReader.FailureType.values()) {
520       final Set<String> walDirContents = splitCorruptWALs(failureType);
521       final Set<String> archivedLogs = new HashSet<String>();
522       final StringBuilder archived = new StringBuilder("Archived logs in CORRUPTDIR:");
523       for (FileStatus log : fs.listStatus(CORRUPTDIR)) {
524         archived.append("\n\t").append(log.toString());
525         archivedLogs.add(log.getPath().getName());
526       }
527       LOG.debug(archived.toString());
528       assertEquals(failureType.name() + ": expected to find all of our wals corrupt.",
529           walDirContents, archivedLogs);
530     }
531   }
532 
533   /**
534    * @return set of wal names present prior to split attempt.
535    * @throws IOException if the split process fails
536    */
537   private Set<String> splitCorruptWALs(final FaultySequenceFileLogReader.FailureType failureType)
538       throws IOException {
539     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
540         Reader.class);
541     InstrumentedLogWriter.activateFailure = false;
542 
543     try {
544       conf.setClass("hbase.regionserver.hlog.reader.impl",
545           FaultySequenceFileLogReader.class, Reader.class);
546       conf.set("faultysequencefilelogreader.failuretype", failureType.name());
547       // Clean up from previous tests or previous loop
548       try {
549         wals.shutdown();
550       } catch (IOException exception) {
551         // since we're splitting out from under the factory, we should expect some closing failures.
552         LOG.debug("Ignoring problem closing WALFactory.", exception);
553       }
554       wals.close();
555       try {
556         for (FileStatus log : fs.listStatus(CORRUPTDIR)) {
557           fs.delete(log.getPath(), true);
558         }
559       } catch (FileNotFoundException exception) {
560         LOG.debug("no previous CORRUPTDIR to clean.");
561       }
562       // change to the faulty reader
563       wals = new WALFactory(conf, null, name.getMethodName());
564       generateWALs(-1);
565       // Our reader will render all of these files corrupt.
566       final Set<String> walDirContents = new HashSet<String>();
567       for (FileStatus status : fs.listStatus(WALDIR)) {
568         walDirContents.add(status.getPath().getName());
569       }
570       useDifferentDFSClient();
571       WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
572       return walDirContents;
573     } finally {
574       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
575           Reader.class);
576     }
577   }
578 
579   @Test (timeout=300000, expected = IOException.class)
580   public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows()
581       throws IOException {
582     conf.setBoolean(HBASE_SKIP_ERRORS, false);
583     splitCorruptWALs(FaultySequenceFileLogReader.FailureType.BEGINNING);
584   }
585 
586   @Test (timeout=300000)
587   public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs()
588       throws IOException {
589     conf.setBoolean(HBASE_SKIP_ERRORS, false);
590     try {
591       splitCorruptWALs(FaultySequenceFileLogReader.FailureType.BEGINNING);
592     } catch (IOException e) {
593       LOG.debug("split with 'skip errors' set to 'false' correctly threw");
594     }
595     assertEquals("if skip.errors is false all files should remain in place",
596         NUM_WRITERS + 1 /* Factory WAL */, fs.listStatus(WALDIR).length);
597   }
598 
599   private void ignoreCorruption(final Corruptions corruption, final int entryCount,
600       final int expectedCount) throws IOException {
601     conf.setBoolean(HBASE_SKIP_ERRORS, false);
602 
603     final String REGION = "region__1";
604     REGIONS.clear();
605     REGIONS.add(REGION);
606 
607     Path c1 = new Path(WALDIR, WAL_FILE_PREFIX + "0");
608     generateWALs(1, entryCount, -1);
609     corruptWAL(c1, corruption, true);
610 
611     useDifferentDFSClient();
612     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
613 
614     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
615     assertEquals(1, splitLog.length);
616 
617     int actualCount = 0;
618     Reader in = wals.createReader(fs, splitLog[0]);
619     @SuppressWarnings("unused")
620     Entry entry;
621     while ((entry = in.next()) != null) ++actualCount;
622     assertEquals(expectedCount, actualCount);
623     in.close();
624 
625     // should not have stored the EOF files as corrupt
626     FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
627     assertEquals(archivedLogs.length, 0);
628 
629   }
630 
631   @Test (timeout=300000)
632   public void testEOFisIgnored() throws IOException {
633     int entryCount = 10;
634     ignoreCorruption(Corruptions.TRUNCATE, entryCount, entryCount-1);
635   }
636 
637   @Test (timeout=300000)
638   public void testCorruptWALTrailer() throws IOException {
639     int entryCount = 10;
640     ignoreCorruption(Corruptions.TRUNCATE_TRAILER, entryCount, entryCount);
641   }
642 
643   @Test (timeout=300000)
644   public void testLogsGetArchivedAfterSplit() throws IOException {
645     conf.setBoolean(HBASE_SKIP_ERRORS, false);
646     generateWALs(-1);
647     useDifferentDFSClient();
648     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
649     FileStatus[] archivedLogs = fs.listStatus(OLDLOGDIR);
650     assertEquals("wrong number of files in the archive log", NUM_WRITERS + 1 /* wal from factory */,
651         archivedLogs.length);
652   }
653 
654   @Test (timeout=300000)
655   public void testSplit() throws IOException {
656     generateWALs(-1);
657     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
658   }
659 
660   @Test (timeout=300000)
661   public void testLogDirectoryShouldBeDeletedAfterSuccessfulSplit()
662   throws IOException {
663     generateWALs(-1);
664     useDifferentDFSClient();
665     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
666     FileStatus [] statuses = null;
667     try {
668       statuses = fs.listStatus(WALDIR);
669       if (statuses != null) {
670         fail("Files left in log dir: " +
671             Joiner.on(",").join(FileUtil.stat2Paths(statuses)));
672       }
673     } catch (FileNotFoundException e) {
674       // hadoop 0.21 throws FNFE whereas hadoop 0.20 returns null
675     }
676   }
677 
678   @Test(timeout=300000, expected = IOException.class)
679   public void testSplitWillFailIfWritingToRegionFails() throws Exception {
680     //leave 5th log open so we could append the "trap"
681     Writer writer = generateWALs(4);
682     useDifferentDFSClient();
683 
684     String region = "break";
685     Path regiondir = new Path(TABLEDIR, region);
686     fs.mkdirs(regiondir);
687 
688     InstrumentedLogWriter.activateFailure = false;
689     appendEntry(writer, TABLE_NAME, Bytes.toBytes(region),
690         ("r" + 999).getBytes(), FAMILY, QUALIFIER, VALUE, 0);
691     writer.close();
692 
693     try {
694       InstrumentedLogWriter.activateFailure = true;
695       WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
696     } catch (IOException e) {
697       assertTrue(e.getMessage().
698         contains("This exception is instrumented and should only be thrown for testing"));
699       throw e;
700     } finally {
701       InstrumentedLogWriter.activateFailure = false;
702     }
703   }
704 
705   @Test (timeout=300000)
706   public void testSplitDeletedRegion() throws IOException {
707     REGIONS.clear();
708     String region = "region_that_splits";
709     REGIONS.add(region);
710 
711     generateWALs(1);
712     useDifferentDFSClient();
713 
714     Path regiondir = new Path(TABLEDIR, region);
715     fs.delete(regiondir, true);
716     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
717     assertFalse(fs.exists(regiondir));
718   }
719 
720   @Test (timeout=300000)
721   public void testIOEOnOutputThread() throws Exception {
722     conf.setBoolean(HBASE_SKIP_ERRORS, false);
723 
724     generateWALs(-1);
725     useDifferentDFSClient();
726     FileStatus[] logfiles = fs.listStatus(WALDIR);
727     assertTrue("There should be some log file",
728       logfiles != null && logfiles.length > 0);
729     // wals with no entries (like the one we don't use in the factory)
730     // won't cause a failure since nothing will ever be written.
731     // pick the largest one since it's most likely to have entries.
732     int largestLogFile = 0;
733     long largestSize = 0;
734     for (int i = 0; i < logfiles.length; i++) {
735       if (logfiles[i].getLen() > largestSize) {
736         largestLogFile = i;
737         largestSize = logfiles[i].getLen();
738       }
739     }
740     assertTrue("There should be some log greater than size 0.", 0 < largestSize);
741     // Set up a splitter that will throw an IOE on the output side
742     WALSplitter logSplitter = new WALSplitter(wals,
743         conf, HBASEDIR, fs, null, null, this.mode) {
744       @Override
745       protected Writer createWriter(Path logfile) throws IOException {
746         Writer mockWriter = Mockito.mock(Writer.class);
747         Mockito.doThrow(new IOException("Injected")).when(
748           mockWriter).append(Mockito.<Entry>any());
749         return mockWriter;
750       }
751     };
752     // Set up a background thread dumper.  Needs a thread to depend on and then we need to run
753     // the thread dumping in a background thread so it does not hold up the test.
754     final AtomicBoolean stop = new AtomicBoolean(false);
755     final Thread someOldThread = new Thread("Some-old-thread") {
756       @Override
757       public void run() {
758         while(!stop.get()) Threads.sleep(10);
759       }
760     };
761     someOldThread.setDaemon(true);
762     someOldThread.start();
763     final Thread t = new Thread("Background-thread-dumper") {
764       public void run() {
765         try {
766           Threads.threadDumpingIsAlive(someOldThread);
767         } catch (InterruptedException e) {
768           e.printStackTrace();
769         }
770       }
771     };
772     t.setDaemon(true);
773     t.start();
774     try {
775       logSplitter.splitLogFile(logfiles[largestLogFile], null);
776       fail("Didn't throw!");
777     } catch (IOException ioe) {
778       assertTrue(ioe.toString().contains("Injected"));
779     } finally {
780       // Setting this to true will turn off the background thread dumper.
781       stop.set(true);
782     }
783   }
784 
785   /**
786    * @param spiedFs should be instrumented for failure.
787    */
788   private void retryOverHdfsProblem(final FileSystem spiedFs) throws Exception {
789     generateWALs(-1);
790     useDifferentDFSClient();
791 
792     try {
793       WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, spiedFs, conf, wals);
794       assertEquals(NUM_WRITERS + 1 /* wal created by factory */, fs.listStatus(OLDLOGDIR).length);
795       assertFalse(fs.exists(WALDIR));
796     } catch (IOException e) {
797       fail("There shouldn't be any exception but: " + e.toString());
798     }
799   }
800 
801   // Test for HBASE-3412
802   @Test (timeout=300000)
803   public void testMovedWALDuringRecovery() throws Exception {
804     // This partial mock will throw LEE for every file simulating
805     // files that were moved
806     FileSystem spiedFs = Mockito.spy(fs);
807     // The "File does not exist" part is very important,
808     // that's how it comes out of HDFS
809     Mockito.doThrow(new LeaseExpiredException("Injected: File does not exist")).
810         when(spiedFs).append(Mockito.<Path>any());
811     retryOverHdfsProblem(spiedFs);
812   }
813 
814   @Test (timeout=300000)
815   public void testRetryOpenDuringRecovery() throws Exception {
816     FileSystem spiedFs = Mockito.spy(fs);
817     // The "Cannot obtain block length", "Could not obtain the last block",
818     // and "Blocklist for [^ ]* has changed.*" part is very important,
819     // that's how it comes out of HDFS. If HDFS changes the exception
820     // message, this test needs to be adjusted accordingly.
821     //
822     // When DFSClient tries to open a file, HDFS needs to locate
823     // the last block of the file and get its length. However, if the
824     // last block is under recovery, HDFS may have problem to obtain
825     // the block length, in which case, retry may help.
826     Mockito.doAnswer(new Answer<FSDataInputStream>() {
827       private final String[] errors = new String[] {
828         "Cannot obtain block length", "Could not obtain the last block",
829         "Blocklist for " + OLDLOGDIR + " has changed"};
830       private int count = 0;
831 
832       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
833             if (count < 3) {
834                 throw new IOException(errors[count++]);
835             }
836             return (FSDataInputStream)invocation.callRealMethod();
837         }
838     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
839     retryOverHdfsProblem(spiedFs);
840   }
841 
842   @Test (timeout=300000)
843   public void testTerminationAskedByReporter() throws IOException, CorruptedLogFileException {
844     generateWALs(1, 10, -1);
845     FileStatus logfile = fs.listStatus(WALDIR)[0];
846     useDifferentDFSClient();
847 
848     final AtomicInteger count = new AtomicInteger();
849 
850     CancelableProgressable localReporter
851       = new CancelableProgressable() {
852         @Override
853         public boolean progress() {
854           count.getAndIncrement();
855           return false;
856         }
857       };
858 
859     FileSystem spiedFs = Mockito.spy(fs);
860     Mockito.doAnswer(new Answer<FSDataInputStream>() {
861       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
862         Thread.sleep(1500); // Sleep a while and wait report status invoked
863         return (FSDataInputStream)invocation.callRealMethod();
864       }
865     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
866 
867     try {
868       conf.setInt("hbase.splitlog.report.period", 1000);
869       boolean ret = WALSplitter.splitLogFile(
870         HBASEDIR, logfile, spiedFs, conf, localReporter, null, null, this.mode, wals);
871       assertFalse("Log splitting should failed", ret);
872       assertTrue(count.get() > 0);
873     } catch (IOException e) {
874       fail("There shouldn't be any exception but: " + e.toString());
875     } finally {
876       // reset it back to its default value
877       conf.setInt("hbase.splitlog.report.period", 59000);
878     }
879   }
880 
881   /**
882    * Test log split process with fake data and lots of edits to trigger threading
883    * issues.
884    */
885   @Test (timeout=300000)
886   public void testThreading() throws Exception {
887     doTestThreading(20000, 128*1024*1024, 0);
888   }
889 
890   /**
891    * Test blocking behavior of the log split process if writers are writing slower
892    * than the reader is reading.
893    */
894   @Test (timeout=300000)
895   public void testThreadingSlowWriterSmallBuffer() throws Exception {
896     doTestThreading(200, 1024, 50);
897   }
898 
899   /**
900    * Sets up a log splitter with a mock reader and writer. The mock reader generates
901    * a specified number of edits spread across 5 regions. The mock writer optionally
902    * sleeps for each edit it is fed.
903    * *
904    * After the split is complete, verifies that the statistics show the correct number
905    * of edits output into each region.
906    *
907    * @param numFakeEdits number of fake edits to push through pipeline
908    * @param bufferSize size of in-memory buffer
909    * @param writerSlowness writer threads will sleep this many ms per edit
910    */
911   private void doTestThreading(final int numFakeEdits,
912       final int bufferSize,
913       final int writerSlowness) throws Exception {
914 
915     Configuration localConf = new Configuration(conf);
916     localConf.setInt("hbase.regionserver.hlog.splitlog.buffersize", bufferSize);
917 
918     // Create a fake log file (we'll override the reader to produce a stream of edits)
919     Path logPath = new Path(WALDIR, WAL_FILE_PREFIX + ".fake");
920     FSDataOutputStream out = fs.create(logPath);
921     out.close();
922 
923     // Make region dirs for our destination regions so the output doesn't get skipped
924     final List<String> regions = ImmutableList.of("r0", "r1", "r2", "r3", "r4");
925     makeRegionDirs(regions);
926 
927     // Create a splitter that reads and writes the data without touching disk
928     WALSplitter logSplitter = new WALSplitter(wals,
929         localConf, HBASEDIR, fs, null, null, this.mode) {
930 
931       /* Produce a mock writer that doesn't write anywhere */
932       @Override
933       protected Writer createWriter(Path logfile) throws IOException {
934         Writer mockWriter = Mockito.mock(Writer.class);
935         Mockito.doAnswer(new Answer<Void>() {
936           int expectedIndex = 0;
937 
938           @Override
939           public Void answer(InvocationOnMock invocation) {
940             if (writerSlowness > 0) {
941               try {
942                 Thread.sleep(writerSlowness);
943               } catch (InterruptedException ie) {
944                 Thread.currentThread().interrupt();
945               }
946             }
947             Entry entry = (Entry) invocation.getArguments()[0];
948             WALEdit edit = entry.getEdit();
949             List<Cell> cells = edit.getCells();
950             assertEquals(1, cells.size());
951             Cell cell = cells.get(0);
952 
953             // Check that the edits come in the right order.
954             assertEquals(expectedIndex, Bytes.toInt(cell.getRow()));
955             expectedIndex++;
956             return null;
957           }
958         }).when(mockWriter).append(Mockito.<Entry>any());
959         return mockWriter;
960       }
961 
962       /* Produce a mock reader that generates fake entries */
963       @Override
964       protected Reader getReader(Path curLogFile, CancelableProgressable reporter)
965           throws IOException {
966         Reader mockReader = Mockito.mock(Reader.class);
967         Mockito.doAnswer(new Answer<Entry>() {
968           int index = 0;
969 
970           @Override
971           public Entry answer(InvocationOnMock invocation) throws Throwable {
972             if (index >= numFakeEdits) return null;
973 
974             // Generate r0 through r4 in round robin fashion
975             int regionIdx = index % regions.size();
976             byte region[] = new byte[] {(byte)'r', (byte) (0x30 + regionIdx)};
977 
978             Entry ret = createTestEntry(TABLE_NAME, region,
979                 Bytes.toBytes((int)(index / regions.size())),
980                 FAMILY, QUALIFIER, VALUE, index);
981             index++;
982             return ret;
983           }
984         }).when(mockReader).next();
985         return mockReader;
986       }
987     };
988 
989     logSplitter.splitLogFile(fs.getFileStatus(logPath), null);
990 
991     // Verify number of written edits per region
992     Map<byte[], Long> outputCounts = logSplitter.outputSink.getOutputCounts();
993     for (Map.Entry<byte[], Long> entry : outputCounts.entrySet()) {
994       LOG.info("Got " + entry.getValue() + " output edits for region " +
995           Bytes.toString(entry.getKey()));
996       assertEquals((long)entry.getValue(), numFakeEdits / regions.size());
997     }
998     assertEquals("Should have as many outputs as regions", regions.size(), outputCounts.size());
999   }
1000 
1001   // Does leaving the writer open in testSplitDeletedRegion matter enough for two tests?
1002   @Test (timeout=300000)
1003   public void testSplitLogFileDeletedRegionDir() throws IOException {
1004     LOG.info("testSplitLogFileDeletedRegionDir");
1005     final String REGION = "region__1";
1006     REGIONS.clear();
1007     REGIONS.add(REGION);
1008 
1009     generateWALs(1, 10, -1);
1010     useDifferentDFSClient();
1011 
1012     Path regiondir = new Path(TABLEDIR, REGION);
1013     LOG.info("Region directory is" + regiondir);
1014     fs.delete(regiondir, true);
1015     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1016     assertFalse(fs.exists(regiondir));
1017   }
1018 
1019   @Test (timeout=300000)
1020   public void testSplitLogFileEmpty() throws IOException {
1021     LOG.info("testSplitLogFileEmpty");
1022     injectEmptyFile(".empty", true);
1023     useDifferentDFSClient();
1024 
1025     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1026     Path tdir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
1027     assertFalse(fs.exists(tdir));
1028 
1029     assertEquals(0, countWAL(fs.listStatus(OLDLOGDIR)[0].getPath()));
1030   }
1031 
1032   @Test (timeout=300000)
1033   public void testSplitLogFileMultipleRegions() throws IOException {
1034     LOG.info("testSplitLogFileMultipleRegions");
1035     generateWALs(1, 10, -1);
1036     splitAndCount(1, 10);
1037   }
1038 
1039   @Test (timeout=300000)
1040   public void testSplitLogFileFirstLineCorruptionLog()
1041   throws IOException {
1042     conf.setBoolean(HBASE_SKIP_ERRORS, true);
1043     generateWALs(1, 10, -1);
1044     FileStatus logfile = fs.listStatus(WALDIR)[0];
1045 
1046     corruptWAL(logfile.getPath(),
1047         Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true);
1048 
1049     useDifferentDFSClient();
1050     WALSplitter.split(HBASEDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1051 
1052     final Path corruptDir = new Path(FSUtils.getRootDir(conf), conf.get(
1053         "hbase.regionserver.hlog.splitlog.corrupt.dir", HConstants.CORRUPT_DIR_NAME));
1054     assertEquals(1, fs.listStatus(corruptDir).length);
1055   }
1056 
1057   /**
1058    * @throws IOException
1059    * @see https://issues.apache.org/jira/browse/HBASE-4862
1060    */
1061   @Test (timeout=300000)
1062   public void testConcurrentSplitLogAndReplayRecoverEdit() throws IOException {
1063     LOG.info("testConcurrentSplitLogAndReplayRecoverEdit");
1064     // Generate wals for our destination region
1065     String regionName = "r0";
1066     final Path regiondir = new Path(TABLEDIR, regionName);
1067     REGIONS.clear();
1068     REGIONS.add(regionName);
1069     generateWALs(-1);
1070 
1071     wals.getWAL(Bytes.toBytes(regionName));
1072     FileStatus[] logfiles = fs.listStatus(WALDIR);
1073     assertTrue("There should be some log file",
1074       logfiles != null && logfiles.length > 0);
1075 
1076     WALSplitter logSplitter = new WALSplitter(wals,
1077         conf, HBASEDIR, fs, null, null, this.mode) {
1078       @Override
1079       protected Writer createWriter(Path logfile)
1080       throws IOException {
1081         Writer writer = wals.createRecoveredEditsWriter(this.fs, logfile);
1082         // After creating writer, simulate region's
1083         // replayRecoveredEditsIfAny() which gets SplitEditFiles of this
1084         // region and delete them, excluding files with '.temp' suffix.
1085         NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(fs, regiondir);
1086         if (files != null && !files.isEmpty()) {
1087           for (Path file : files) {
1088             if (!this.fs.delete(file, false)) {
1089               LOG.error("Failed delete of " + file);
1090             } else {
1091               LOG.debug("Deleted recovered.edits file=" + file);
1092             }
1093           }
1094         }
1095         return writer;
1096       }
1097     };
1098     try{
1099       logSplitter.splitLogFile(logfiles[0], null);
1100     } catch (IOException e) {
1101       LOG.info(e);
1102       fail("Throws IOException when spliting "
1103           + "log, it is most likely because writing file does not "
1104           + "exist which is caused by concurrent replayRecoveredEditsIfAny()");
1105     }
1106     if (fs.exists(CORRUPTDIR)) {
1107       if (fs.listStatus(CORRUPTDIR).length > 0) {
1108         fail("There are some corrupt logs, "
1109                 + "it is most likely caused by concurrent replayRecoveredEditsIfAny()");
1110       }
1111     }
1112   }
1113 
1114   private Writer generateWALs(int leaveOpen) throws IOException {
1115     return generateWALs(NUM_WRITERS, ENTRIES, leaveOpen);
1116   }
1117 
1118   private void makeRegionDirs(List<String> regions) throws IOException {
1119     for (String region : regions) {
1120       LOG.debug("Creating dir for region " + region);
1121       fs.mkdirs(new Path(TABLEDIR, region));
1122     }
1123   }
1124 
1125   /**
1126    * @param leaveOpen index to leave un-closed. -1 to close all.
1127    * @return the writer that's still open, or null if all were closed.
1128    */
1129   private Writer generateWALs(int writers, int entries, int leaveOpen) throws IOException {
1130     makeRegionDirs(REGIONS);
1131     fs.mkdirs(WALDIR);
1132     Writer [] ws = new Writer[writers];
1133     int seq = 0;
1134     for (int i = 0; i < writers; i++) {
1135       ws[i] = wals.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + i));
1136       for (int j = 0; j < entries; j++) {
1137         int prefix = 0;
1138         for (String region : REGIONS) {
1139           String row_key = region + prefix++ + i + j;
1140           appendEntry(ws[i], TABLE_NAME, region.getBytes(), row_key.getBytes(), FAMILY, QUALIFIER,
1141               VALUE, seq++);
1142         }
1143       }
1144       if (i != leaveOpen) {
1145         ws[i].close();
1146         LOG.info("Closing writer " + i);
1147       }
1148     }
1149     if (leaveOpen < 0 || leaveOpen >= writers) {
1150       return null;
1151     }
1152     return ws[leaveOpen];
1153   }
1154 
1155   private Path[] getLogForRegion(Path rootdir, TableName table, String region)
1156   throws IOException {
1157     Path tdir = FSUtils.getTableDir(rootdir, table);
1158     @SuppressWarnings("deprecation")
1159     Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir,
1160       Bytes.toString(region.getBytes())));
1161     FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
1162       @Override
1163       public boolean accept(Path p) {
1164         if (WALSplitter.isSequenceIdFile(p)) {
1165           return false;
1166         }
1167         return true;
1168       }
1169     });
1170     Path[] paths = new Path[files.length];
1171     for (int i = 0; i < files.length; i++) {
1172       paths[i] = files[i].getPath();
1173     }
1174     return paths;
1175   }
1176 
1177   private void corruptWAL(Path path, Corruptions corruption, boolean close) throws IOException {
1178     FSDataOutputStream out;
1179     int fileSize = (int) fs.listStatus(path)[0].getLen();
1180 
1181     FSDataInputStream in = fs.open(path);
1182     byte[] corrupted_bytes = new byte[fileSize];
1183     in.readFully(0, corrupted_bytes, 0, fileSize);
1184     in.close();
1185 
1186     switch (corruption) {
1187       case APPEND_GARBAGE:
1188         fs.delete(path, false);
1189         out = fs.create(path);
1190         out.write(corrupted_bytes);
1191         out.write("-----".getBytes());
1192         closeOrFlush(close, out);
1193         break;
1194 
1195       case INSERT_GARBAGE_ON_FIRST_LINE:
1196         fs.delete(path, false);
1197         out = fs.create(path);
1198         out.write(0);
1199         out.write(corrupted_bytes);
1200         closeOrFlush(close, out);
1201         break;
1202 
1203       case INSERT_GARBAGE_IN_THE_MIDDLE:
1204         fs.delete(path, false);
1205         out = fs.create(path);
1206         int middle = (int) Math.floor(corrupted_bytes.length / 2);
1207         out.write(corrupted_bytes, 0, middle);
1208         out.write(0);
1209         out.write(corrupted_bytes, middle, corrupted_bytes.length - middle);
1210         closeOrFlush(close, out);
1211         break;
1212 
1213       case TRUNCATE:
1214         fs.delete(path, false);
1215         out = fs.create(path);
1216         out.write(corrupted_bytes, 0, fileSize
1217           - (32 + ProtobufLogReader.PB_WAL_COMPLETE_MAGIC.length + Bytes.SIZEOF_INT));
1218         closeOrFlush(close, out);
1219         break;
1220 
1221       case TRUNCATE_TRAILER:
1222         fs.delete(path, false);
1223         out = fs.create(path);
1224         out.write(corrupted_bytes, 0, fileSize - Bytes.SIZEOF_INT);// trailer is truncated.
1225         closeOrFlush(close, out);
1226         break;
1227     }
1228   }
1229 
1230   private void closeOrFlush(boolean close, FSDataOutputStream out)
1231   throws IOException {
1232     if (close) {
1233       out.close();
1234     } else {
1235       Method syncMethod = null;
1236       try {
1237         syncMethod = out.getClass().getMethod("hflush", new Class<?> []{});
1238       } catch (NoSuchMethodException e) {
1239         try {
1240           syncMethod = out.getClass().getMethod("sync", new Class<?> []{});
1241         } catch (NoSuchMethodException ex) {
1242           throw new IOException("This version of Hadoop supports " +
1243               "neither Syncable.sync() nor Syncable.hflush().");
1244         }
1245       }
1246       try {
1247         syncMethod.invoke(out, new Object[]{});
1248       } catch (Exception e) {
1249         throw new IOException(e);
1250       }
1251       // Not in 0out.hflush();
1252     }
1253   }
1254 
1255   private int countWAL(Path log) throws IOException {
1256     int count = 0;
1257     Reader in = wals.createReader(fs, log);
1258     while (in.next() != null) {
1259       count++;
1260     }
1261     in.close();
1262     return count;
1263   }
1264 
1265   public static long appendEntry(Writer writer, TableName table, byte[] region,
1266                           byte[] row, byte[] family, byte[] qualifier,
1267                           byte[] value, long seq)
1268           throws IOException {
1269     LOG.info(Thread.currentThread().getName() + " append");
1270     writer.append(createTestEntry(table, region, row, family, qualifier, value, seq));
1271     LOG.info(Thread.currentThread().getName() + " sync");
1272     writer.sync();
1273     return seq;
1274   }
1275 
1276   private static Entry createTestEntry(
1277       TableName table, byte[] region,
1278       byte[] row, byte[] family, byte[] qualifier,
1279       byte[] value, long seq) {
1280     long time = System.nanoTime();
1281     WALEdit edit = new WALEdit();
1282     seq++;
1283     edit.add(new KeyValue(row, family, qualifier, time, KeyValue.Type.Put, value));
1284     return new Entry(new WALKey(region, table, seq, time,
1285         HConstants.DEFAULT_CLUSTER_ID), edit);
1286   }
1287 
1288   private void injectEmptyFile(String suffix, boolean closeFile)
1289           throws IOException {
1290     Writer writer = wals.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + suffix),
1291         conf);
1292     if (closeFile) writer.close();
1293   }
1294 
1295   private boolean logsAreEqual(Path p1, Path p2) throws IOException {
1296     Reader in1, in2;
1297     in1 = wals.createReader(fs, p1);
1298     in2 = wals.createReader(fs, p2);
1299     Entry entry1;
1300     Entry entry2;
1301     while ((entry1 = in1.next()) != null) {
1302       entry2 = in2.next();
1303       if ((entry1.getKey().compareTo(entry2.getKey()) != 0) ||
1304               (!entry1.getEdit().toString().equals(entry2.getEdit().toString()))) {
1305         return false;
1306       }
1307     }
1308     in1.close();
1309     in2.close();
1310     return true;
1311   }
1312 }