View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.wal;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.FileNotFoundException;
28  import java.io.IOException;
29  import java.lang.reflect.Method;
30  import java.security.PrivilegedExceptionAction;
31  import java.util.ArrayList;
32  import java.util.Collections;
33  import java.util.HashMap;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.NavigableSet;
37  import java.util.concurrent.CountDownLatch;
38  import java.util.concurrent.atomic.AtomicBoolean;
39  import java.util.concurrent.atomic.AtomicInteger;
40  import java.util.concurrent.atomic.AtomicLong;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.commons.logging.impl.Log4JLogger;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.log4j.Level;
47  import org.apache.hadoop.hdfs.server.datanode.DataNode;
48  import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
49  import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
50  import org.apache.hadoop.conf.Configuration;
51  import org.apache.hadoop.fs.FSDataInputStream;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.FileUtil;
56  import org.apache.hadoop.fs.Path;
57  import org.apache.hadoop.hbase.HBaseConfiguration;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HColumnDescriptor;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HRegionInfo;
62  import org.apache.hadoop.hbase.HTableDescriptor;
63  import org.apache.hadoop.hbase.KeyValue;
64  import org.apache.hadoop.hbase.LargeTests;
65  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
66  import org.apache.hadoop.hbase.regionserver.HRegion;
67  import org.apache.hadoop.hbase.regionserver.wal.HLog.Entry;
68  import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
69  import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter.CorruptedLogFileException;
70  import org.apache.hadoop.hbase.security.User;
71  import org.apache.hadoop.hbase.util.Bytes;
72  import org.apache.hadoop.hbase.util.CancelableProgressable;
73  import org.apache.hadoop.hbase.util.FSUtils;
74  import org.apache.hadoop.hbase.util.Threads;
75  import org.apache.hadoop.hdfs.DFSTestUtil;
76  import org.apache.hadoop.hdfs.DistributedFileSystem;
77  import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
78  import org.apache.hadoop.ipc.RemoteException;
79  import org.junit.After;
80  import org.junit.AfterClass;
81  import org.junit.Assert;
82  import org.junit.Before;
83  import org.junit.BeforeClass;
84  import org.junit.Ignore;
85  import org.junit.Test;
86  import org.junit.experimental.categories.Category;
87  import org.mockito.Mockito;
88  import org.mockito.invocation.InvocationOnMock;
89  import org.mockito.stubbing.Answer;
90  
91  import com.google.common.base.Joiner;
92  import com.google.common.collect.ImmutableList;
93  
94  /**
95   * Testing {@link HLog} splitting code.
96   */
97  @Category(LargeTests.class)
98  public class TestHLogSplit {
99    {
100     // Uncomment the following lines if more verbosity is needed for
101     // debugging (see HBASE-12285 for details).
102     //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
103     //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
104     //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
105   }
106   private final static Log LOG = LogFactory.getLog(TestHLogSplit.class);
107 
108   private Configuration conf;
109   private FileSystem fs;
110 
111   protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
112 
113   private static final Path HBASEDIR = new Path("/hbase");
114   private static final Path HLOGDIR = new Path(HBASEDIR, "hlog");
115   private static final Path OLDLOGDIR = new Path(HBASEDIR, "hlog.old");
116   private static final Path CORRUPTDIR = new Path(HBASEDIR, HConstants.CORRUPT_DIR_NAME);
117 
118   private static final int NUM_WRITERS = 10;
119   private static final int ENTRIES = 10; // entries per writer per region
120 
121   private static final TableName TABLE_NAME =
122       TableName.valueOf("t1");
123   private static final byte[] FAMILY = "f1".getBytes();
124   private static final byte[] QUALIFIER = "q1".getBytes();
125   private static final byte[] VALUE = "v1".getBytes();
126   private static final String HLOG_FILE_PREFIX = "hlog.dat.";
127   private static List<String> REGIONS = new ArrayList<String>();
128   private static final String HBASE_SKIP_ERRORS = "hbase.hlog.split.skip.errors";
129   private static final Path TABLEDIR = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
130   private static String ROBBER;
131   private static String ZOMBIE;
132   private static String [] GROUP = new String [] {"supergroup"};
133   private RecoveryMode mode;
134 
135   static enum Corruptions {
136     INSERT_GARBAGE_ON_FIRST_LINE,
137     INSERT_GARBAGE_IN_THE_MIDDLE,
138     APPEND_GARBAGE,
139     TRUNCATE,
140     TRUNCATE_TRAILER
141   }
142 
143   @BeforeClass
144   public static void setUpBeforeClass() throws Exception {
145     FSUtils.setRootDir(TEST_UTIL.getConfiguration(), HBASEDIR);
146     TEST_UTIL.getConfiguration().setClass("hbase.regionserver.hlog.writer.impl",
147       InstrumentedSequenceFileLogWriter.class, HLog.Writer.class);
148     TEST_UTIL.getConfiguration().setBoolean("dfs.support.broken.append", true);
149     TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
150     // This is how you turn off shortcircuit read currently.  TODO: Fix.  Should read config.
151     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
152     // Create fake maping user to group and set it to the conf.
153     Map<String, String []> u2g_map = new HashMap<String, String []>(2);
154     ROBBER = User.getCurrent().getName() + "-robber";
155     ZOMBIE = User.getCurrent().getName() + "-zombie";
156     u2g_map.put(ROBBER, GROUP);
157     u2g_map.put(ZOMBIE, GROUP);
158     DFSTestUtil.updateConfWithFakeGroupMapping(TEST_UTIL.getConfiguration(), u2g_map);
159     TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
160     TEST_UTIL.startMiniDFSCluster(2);
161   }
162 
163   @AfterClass
164   public static void tearDownAfterClass() throws Exception {
165     TEST_UTIL.shutdownMiniDFSCluster();
166   }
167 
168   @Before
169   public void setUp() throws Exception {
170     flushToConsole("Cleaning up cluster for new test\n"
171         + "--------------------------");
172     conf = TEST_UTIL.getConfiguration();
173     fs = TEST_UTIL.getDFSCluster().getFileSystem();
174     FileStatus[] entries = fs.listStatus(new Path("/"));
175     flushToConsole("Num entries in /:" + entries.length);
176     for (FileStatus dir : entries){
177       assertTrue("Deleting " + dir.getPath(), fs.delete(dir.getPath(), true));
178     }
179     // create the HLog directory because recursive log creates are not allowed
180     fs.mkdirs(HLOGDIR);
181     REGIONS.clear();
182     Collections.addAll(REGIONS, "bbb", "ccc");
183     InstrumentedSequenceFileLogWriter.activateFailure = false;
184     this.mode = (conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false) ? 
185         RecoveryMode.LOG_REPLAY : RecoveryMode.LOG_SPLITTING);
186   }
187 
188   @After
189   public void tearDown() throws Exception {
190   }
191 
192   /**
193    * Simulates splitting a WAL out from under a regionserver that is still trying to write it.  Ensures we do not
194    * lose edits.
195    * @throws IOException
196    * @throws InterruptedException
197    */
198   @Test (timeout=300000)
199   public void testLogCannotBeWrittenOnceParsed() throws IOException, InterruptedException {
200     final AtomicLong counter = new AtomicLong(0);
201     AtomicBoolean stop = new AtomicBoolean(false);
202     // Region we'll write edits too and then later examine to make sure they all made it in.
203     final String region = REGIONS.get(0);
204     Thread zombie = new ZombieLastLogWriterRegionServer(this.conf, counter, stop, region);
205     try {
206       long startCount = counter.get();
207       zombie.start();
208       // Wait till writer starts going.
209       while (startCount == counter.get()) Threads.sleep(1);
210       // Give it a second to write a few appends.
211       Threads.sleep(1000);
212       final Configuration conf2 = HBaseConfiguration.create(this.conf);
213       final User robber = User.createUserForTesting(conf2, ROBBER, GROUP);
214       int count = robber.runAs(new PrivilegedExceptionAction<Integer>() {
215         @Override
216         public Integer run() throws Exception {
217           FileSystem fs = FileSystem.get(conf2);
218           int expectedFiles = fs.listStatus(HLOGDIR).length;
219           HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf2);
220           Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
221           assertEquals(expectedFiles, logfiles.length);
222           int count = 0;
223           for (Path logfile: logfiles) {
224             count += countHLog(logfile, fs, conf2);
225           }
226           return count;
227         }
228       });
229       LOG.info("zombie=" + counter.get() + ", robber=" + count);
230       assertTrue("The log file could have at most 1 extra log entry, but can't have less. Zombie could write " +
231         counter.get() + " and logfile had only " + count,
232         counter.get() == count || counter.get() + 1 == count);
233     } finally {
234       stop.set(true);
235       zombie.interrupt();
236       Threads.threadDumpingIsAlive(zombie);
237     }
238   }
239 
240   /**
241    * This thread will keep writing to a 'wal' file even after the split process has started.
242    * It simulates a region server that was considered dead but woke up and wrote some more to he last log entry.
243    * Does its writing as an alternate user in another filesystem instance to simulate better it being a regionserver.
244    */
245   static class ZombieLastLogWriterRegionServer extends Thread {
246     final AtomicLong editsCount;
247     final AtomicBoolean stop;
248     // final User user;
249     /**
250      * Region to write edits for.
251      */
252     final String region;
253     final Configuration conf;
254     final User user;
255 
256     public ZombieLastLogWriterRegionServer(final Configuration conf, AtomicLong counter, AtomicBoolean stop,
257         final String region)
258     throws IOException, InterruptedException {
259       super("ZombieLastLogWriterRegionServer");
260       setDaemon(true);
261       this.stop = stop;
262       this.editsCount = counter;
263       this.region = region;
264       this.conf = HBaseConfiguration.create(conf);
265       this.user = User.createUserForTesting(this.conf, ZOMBIE, GROUP);
266     }
267 
268     @Override
269     public void run() {
270       try {
271         doWriting();
272       } catch (IOException e) {
273         LOG.warn(getName() + " Writer exiting " + e);
274       } catch (InterruptedException e) {
275         LOG.warn(getName() + " Writer exiting " + e);
276       }
277     }
278 
279     private void doWriting() throws IOException, InterruptedException {
280       this.user.runAs(new PrivilegedExceptionAction<Object>() {
281         @Override
282         public Object run() throws Exception {
283           // Index of the WAL we want to keep open.  generateHLogs will leave open the WAL whose index we supply here.
284           int walToKeepOpen = 2;
285           // How many files to write.
286           final int numOfWriters = walToKeepOpen + 1;
287           // The below method writes numOfWriters files each with ENTRIES entries for a total of numOfWriters * ENTRIES
288           // added per column family in the region.
289           HLog.Writer[] writers = null;
290           try {
291             DistributedFileSystem dfs = (DistributedFileSystem)FileSystem.get(conf);
292             writers = generateHLogs(dfs, numOfWriters, ENTRIES, walToKeepOpen);
293           } catch (IOException e1) {
294             throw new RuntimeException("Failed", e1);
295           }
296           // Update counter so has all edits written so far.
297           editsCount.addAndGet(numOfWriters * NUM_WRITERS);
298           // This WAL should be open still after our call to generateHLogs -- we asked it leave it open.
299           HLog.Writer writer = writers[walToKeepOpen];
300           loop(writer);
301           return null;
302         }
303       });
304     }
305 
306     private void loop(final HLog.Writer writer) {
307       byte [] regionBytes = Bytes.toBytes(this.region);
308       while (true) {
309         try {
310           long seq = appendEntry(writer, TABLE_NAME, regionBytes, ("r" + editsCount.get()).getBytes(),
311             regionBytes, QUALIFIER, VALUE, 0);
312           long count = editsCount.incrementAndGet();
313           flushToConsole(getName() + " sync count=" + count + ", seq=" + seq);
314           try {
315             Thread.sleep(1);
316           } catch (InterruptedException e) {
317             //
318           }
319         } catch (IOException ex) {
320           flushToConsole(getName() + " ex " + ex.toString());
321           if (ex instanceof RemoteException) {
322             flushToConsole("Juliet: got RemoteException " + ex.getMessage() +
323               " while writing " + (editsCount.get() + 1));
324           } else {
325             flushToConsole(getName() + " failed to write....at " + editsCount.get());
326             assertTrue("Failed to write " + editsCount.get(), false);
327           }
328           break;
329         } catch (Throwable t) {
330           flushToConsole(getName() + " HOW? " + t);
331           t.printStackTrace();
332           break;
333         }
334       }
335       flushToConsole(getName() + " Writer exiting");
336     }
337   }
338 
339   /**
340    * @throws IOException
341    * @see https://issues.apache.org/jira/browse/HBASE-3020
342    */
343   @Test (timeout=300000)
344   public void testRecoveredEditsPathForMeta() throws IOException {
345     FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
346     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
347     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
348     Path regiondir = new Path(tdir,
349         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
350     fs.mkdirs(regiondir);
351     long now = System.currentTimeMillis();
352     HLog.Entry entry =
353         new HLog.Entry(new HLogKey(encoded,
354             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
355       new WALEdit());
356     Path p = HLogSplitter.getRegionSplitEditsPath(fs, entry, HBASEDIR, true);
357     String parentOfParent = p.getParent().getParent().getName();
358     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
359   }
360 
361   /**
362    * Test old recovered edits file doesn't break HLogSplitter.
363    * This is useful in upgrading old instances.
364    */
365   @Test (timeout=300000)
366   public void testOldRecoveredEditsFileSidelined() throws IOException {
367     FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
368     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
369     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
370     Path regiondir = new Path(tdir,
371         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
372     fs.mkdirs(regiondir);
373     long now = System.currentTimeMillis();
374     HLog.Entry entry =
375         new HLog.Entry(new HLogKey(encoded,
376             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
377       new WALEdit());
378     Path parent = HLogUtil.getRegionDirRecoveredEditsDir(regiondir);
379     assertEquals(parent.getName(), HConstants.RECOVERED_EDITS_DIR);
380     fs.createNewFile(parent); // create a recovered.edits file
381 
382     Path p = HLogSplitter.getRegionSplitEditsPath(fs, entry, HBASEDIR, true);
383     String parentOfParent = p.getParent().getParent().getName();
384     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
385     HLogFactory.createRecoveredEditsWriter(fs, p, conf).close();
386   }
387 
388   @Test (timeout=300000)
389   public void testSplitPreservesEdits() throws IOException{
390     final String REGION = "region__1";
391     REGIONS.removeAll(REGIONS);
392     REGIONS.add(REGION);
393 
394     generateHLogs(1, 10, -1);
395     fs.initialize(fs.getUri(), conf);
396     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
397     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
398     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
399     assertEquals(1, splitLog.length);
400 
401     assertEquals("edits differ after split", true, logsAreEqual(originalLog, splitLog[0]));
402   }
403 
404 
405   @Test (timeout=300000)
406   public void testEmptyLogFiles() throws IOException {
407 
408     injectEmptyFile(".empty", true);
409     generateHLogs(Integer.MAX_VALUE);
410     injectEmptyFile("empty", true);
411 
412     // make fs act as a different client now
413     // initialize will create a new DFSClient with a new client ID
414     fs.initialize(fs.getUri(), conf);
415 
416     int expectedFiles = fs.listStatus(HLOGDIR).length - 2; // less 2 empty files
417     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
418     for (String region : REGIONS) {
419       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
420       assertEquals(expectedFiles, logfiles.length);
421       int count = 0;
422       for (Path logfile: logfiles) {
423         count += countHLog(logfile, fs, conf);
424       }
425       assertEquals(NUM_WRITERS * ENTRIES, count);
426     }
427   }
428 
429 
430   @Test (timeout=300000)
431   public void testEmptyOpenLogFiles() throws IOException {
432     injectEmptyFile(".empty", false);
433     generateHLogs(Integer.MAX_VALUE);
434     injectEmptyFile("empty", false);
435 
436     // make fs act as a different client now
437     // initialize will create a new DFSClient with a new client ID
438     fs.initialize(fs.getUri(), conf);
439 
440     int expectedFiles = fs.listStatus(HLOGDIR).length - 2 ; // less 2 empty files
441     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
442     for (String region : REGIONS) {
443       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
444       assertEquals(expectedFiles, logfiles.length);
445       int count = 0;
446       for (Path logfile: logfiles) {
447         count += countHLog(logfile, fs, conf);
448       }
449       assertEquals(NUM_WRITERS * ENTRIES, count);
450     }
451   }
452 
453   @Test (timeout=300000)
454   public void testOpenZeroLengthReportedFileButWithDataGetsSplit() throws IOException {
455     // generate logs but leave hlog.dat.5 open.
456     generateHLogs(5);
457 
458     fs.initialize(fs.getUri(), conf);
459 
460     int expectedFiles = fs.listStatus(HLOGDIR).length;
461     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
462     for (String region : REGIONS) {
463       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
464       assertEquals(expectedFiles, logfiles.length);
465       int count = 0;
466       for (Path logfile: logfiles) {
467         count += countHLog(logfile, fs, conf);
468       }
469       assertEquals(NUM_WRITERS * ENTRIES, count);
470     }
471   }
472 
473 
474   @Test (timeout=300000)
475   public void testTralingGarbageCorruptionFileSkipErrorsPasses() throws IOException {
476     conf.setBoolean(HBASE_SKIP_ERRORS, true);
477     generateHLogs(Integer.MAX_VALUE);
478     corruptHLog(new Path(HLOGDIR, HLOG_FILE_PREFIX + "5"),
479             Corruptions.APPEND_GARBAGE, true, fs);
480     fs.initialize(fs.getUri(), conf);
481 
482     int expectedFiles = fs.listStatus(HLOGDIR).length;
483     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
484     for (String region : REGIONS) {
485       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
486       assertEquals(expectedFiles, logfiles.length);
487       int count = 0;
488       for (Path logfile: logfiles) {
489         count += countHLog(logfile, fs, conf);
490       }
491       assertEquals(NUM_WRITERS * ENTRIES, count);
492     }
493   }
494 
495   @Test (timeout=300000)
496   public void testFirstLineCorruptionLogFileSkipErrorsPasses() throws IOException {
497     conf.setBoolean(HBASE_SKIP_ERRORS, true);
498     generateHLogs(Integer.MAX_VALUE);
499     corruptHLog(new Path(HLOGDIR, HLOG_FILE_PREFIX + "5"),
500             Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true, fs);
501     fs.initialize(fs.getUri(), conf);
502 
503     int expectedFiles = fs.listStatus(HLOGDIR).length - 1; // less 1 corrupted file
504     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
505     for (String region : REGIONS) {
506       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
507       assertEquals(expectedFiles, logfiles.length);
508       int count = 0;
509       for (Path logfile: logfiles) {
510         count += countHLog(logfile, fs, conf);
511       }
512       assertEquals((NUM_WRITERS - 1) * ENTRIES, count);
513     }
514   }
515 
516   @Test (timeout=300000)
517   public void testMiddleGarbageCorruptionSkipErrorsReadsHalfOfFile() throws IOException {
518     conf.setBoolean(HBASE_SKIP_ERRORS, true);
519     generateHLogs(Integer.MAX_VALUE);
520     corruptHLog(new Path(HLOGDIR, HLOG_FILE_PREFIX + "5"),
521             Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false, fs);
522     fs.initialize(fs.getUri(), conf);
523 
524     int expectedFiles = fs.listStatus(HLOGDIR).length;
525     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
526     for (String region : REGIONS) {
527       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
528       assertEquals(expectedFiles, logfiles.length);
529       int count = 0;
530       for (Path logfile: logfiles) {
531         count += countHLog(logfile, fs, conf);
532       }
533       // the entries in the original logs are alternating regions
534       // considering the sequence file header, the middle corruption should
535       // affect at least half of the entries
536       int goodEntries = (NUM_WRITERS - 1) * ENTRIES;
537       int firstHalfEntries = (int) Math.ceil(ENTRIES / 2) - 1;
538       assertTrue("The file up to the corrupted area hasn't been parsed",
539               goodEntries + firstHalfEntries <= count);
540     }
541   }
542 
543   @Test (timeout=300000)
544   public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException {
545     conf.setBoolean(HBASE_SKIP_ERRORS, true);
546     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
547         Reader.class);
548     InstrumentedSequenceFileLogWriter.activateFailure = false;
549     HLogFactory.resetLogReaderClass();
550 
551     try {
552     Path c1 = new Path(HLOGDIR, HLOG_FILE_PREFIX + "0");
553       conf.setClass("hbase.regionserver.hlog.reader.impl",
554           FaultySequenceFileLogReader.class, HLog.Reader.class);
555       for (FaultySequenceFileLogReader.FailureType  failureType : FaultySequenceFileLogReader.FailureType.values()) {
556         conf.set("faultysequencefilelogreader.failuretype", failureType.name());
557         generateHLogs(1, ENTRIES, -1);
558         fs.initialize(fs.getUri(), conf);
559         HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
560         FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
561         assertEquals("expected a different file", c1.getName(), archivedLogs[0]
562             .getPath().getName());
563         assertEquals(archivedLogs.length, 1);
564         fs.delete(new Path(OLDLOGDIR, HLOG_FILE_PREFIX + "0"), false);
565       }
566     } finally {
567       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
568           Reader.class);
569       HLogFactory.resetLogReaderClass();
570     }
571   }
572 
573   @Test (timeout=300000, expected = IOException.class)
574   public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows()
575       throws IOException {
576     conf.setBoolean(HBASE_SKIP_ERRORS, false);
577     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
578         Reader.class);
579     InstrumentedSequenceFileLogWriter.activateFailure = false;
580     HLogFactory.resetLogReaderClass();
581 
582     try {
583       conf.setClass("hbase.regionserver.hlog.reader.impl",
584           FaultySequenceFileLogReader.class, HLog.Reader.class);
585       conf.set("faultysequencefilelogreader.failuretype", FaultySequenceFileLogReader.FailureType.BEGINNING.name());
586       generateHLogs(Integer.MAX_VALUE);
587       fs.initialize(fs.getUri(), conf);
588       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
589     } finally {
590       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
591           Reader.class);
592       HLogFactory.resetLogReaderClass();
593     }
594   }
595 
596   @Test (timeout=300000)
597   public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs()
598       throws IOException {
599     conf.setBoolean(HBASE_SKIP_ERRORS, false);
600     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
601         Reader.class);
602     InstrumentedSequenceFileLogWriter.activateFailure = false;
603     HLogFactory.resetLogReaderClass();
604 
605     try {
606       conf.setClass("hbase.regionserver.hlog.reader.impl",
607           FaultySequenceFileLogReader.class, HLog.Reader.class);
608       conf.set("faultysequencefilelogreader.failuretype", FaultySequenceFileLogReader.FailureType.BEGINNING.name());
609       generateHLogs(-1);
610       fs.initialize(fs.getUri(), conf);
611       try {
612         HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
613       } catch (IOException e) {
614         assertEquals(
615             "if skip.errors is false all files should remain in place",
616             NUM_WRITERS, fs.listStatus(HLOGDIR).length);
617       }
618     } finally {
619       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
620           Reader.class);
621       HLogFactory.resetLogReaderClass();
622     }
623   }
624 
625   @Test (timeout=300000)
626   public void testEOFisIgnored() throws IOException {
627     conf.setBoolean(HBASE_SKIP_ERRORS, false);
628 
629     final String REGION = "region__1";
630     REGIONS.removeAll(REGIONS);
631     REGIONS.add(REGION);
632 
633     int entryCount = 10;
634     Path c1 = new Path(HLOGDIR, HLOG_FILE_PREFIX + "0");
635     generateHLogs(1, entryCount, -1);
636     corruptHLog(c1, Corruptions.TRUNCATE, true, fs);
637 
638     fs.initialize(fs.getUri(), conf);
639     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
640 
641     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
642     assertEquals(1, splitLog.length);
643 
644     int actualCount = 0;
645     HLog.Reader in = HLogFactory.createReader(fs, splitLog[0], conf);
646     @SuppressWarnings("unused")
647     HLog.Entry entry;
648     while ((entry = in.next()) != null) ++actualCount;
649     assertEquals(entryCount-1, actualCount);
650 
651     // should not have stored the EOF files as corrupt
652     FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
653     assertEquals(archivedLogs.length, 0);
654   }
655 
656   @Test (timeout=300000)
657   public void testCorruptWALTrailer() throws IOException {
658     conf.setBoolean(HBASE_SKIP_ERRORS, false);
659 
660     final String REGION = "region__1";
661     REGIONS.removeAll(REGIONS);
662     REGIONS.add(REGION);
663 
664     int entryCount = 10;
665     Path c1 = new Path(HLOGDIR, HLOG_FILE_PREFIX + "0");
666     generateHLogs(1, entryCount, -1);
667     corruptHLog(c1, Corruptions.TRUNCATE_TRAILER, true, fs);
668 
669     fs.initialize(fs.getUri(), conf);
670     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
671 
672     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
673     assertEquals(1, splitLog.length);
674 
675     int actualCount = 0;
676     HLog.Reader in = HLogFactory.createReader(fs, splitLog[0], conf);
677     @SuppressWarnings("unused")
678     HLog.Entry entry;
679     while ((entry = in.next()) != null) ++actualCount;
680     assertEquals(entryCount, actualCount);
681 
682     // should not have stored the EOF files as corrupt
683     FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
684     assertEquals(archivedLogs.length, 0);
685   }
686 
687   @Test (timeout=300000)
688   public void testLogsGetArchivedAfterSplit() throws IOException {
689     conf.setBoolean(HBASE_SKIP_ERRORS, false);
690     generateHLogs(-1);
691     fs.initialize(fs.getUri(), conf);
692     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
693     FileStatus[] archivedLogs = fs.listStatus(OLDLOGDIR);
694     assertEquals("wrong number of files in the archive log", NUM_WRITERS, archivedLogs.length);
695   }
696 
697   @Test (timeout=300000)
698   public void testSplit() throws IOException {
699     generateHLogs(-1);
700     fs.initialize(fs.getUri(), conf);
701 
702     int expectedFiles = fs.listStatus(HLOGDIR).length;
703     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
704     for (String region : REGIONS) {
705       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
706       assertEquals(expectedFiles, logfiles.length);
707       int count = 0;
708       for (Path logfile: logfiles) {
709         count += countHLog(logfile, fs, conf);
710       }
711       assertEquals(NUM_WRITERS * ENTRIES, count);
712     }
713   }
714 
715   @Test (timeout=300000)
716   public void testLogDirectoryShouldBeDeletedAfterSuccessfulSplit()
717   throws IOException {
718     generateHLogs(-1);
719     fs.initialize(fs.getUri(), conf);
720     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
721     FileStatus [] statuses = null;
722     try {
723       statuses = fs.listStatus(HLOGDIR);
724       if (statuses != null) {
725         Assert.fail("Files left in log dir: " +
726             Joiner.on(",").join(FileUtil.stat2Paths(statuses)));
727       }
728     } catch (FileNotFoundException e) {
729       // hadoop 0.21 throws FNFE whereas hadoop 0.20 returns null
730     }
731   }
732 
733   @Test(timeout=300000, expected = IOException.class)
734   public void testSplitWillFailIfWritingToRegionFails() throws Exception {
735     //leave 5th log open so we could append the "trap"
736     HLog.Writer [] writer = generateHLogs(4);
737 
738     fs.initialize(fs.getUri(), conf);
739 
740     String region = "break";
741     Path regiondir = new Path(TABLEDIR, region);
742     fs.mkdirs(regiondir);
743 
744     InstrumentedSequenceFileLogWriter.activateFailure = false;
745     appendEntry(writer[4], TABLE_NAME, Bytes.toBytes(region),
746         ("r" + 999).getBytes(), FAMILY, QUALIFIER, VALUE, 0);
747     writer[4].close();
748 
749     try {
750       InstrumentedSequenceFileLogWriter.activateFailure = true;
751       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
752     } catch (IOException e) {
753       assertTrue(e.getMessage().
754         contains("This exception is instrumented and should only be thrown for testing"));
755       throw e;
756     } finally {
757       InstrumentedSequenceFileLogWriter.activateFailure = false;
758     }
759   }
760 
761 
762   // @Test TODO this test has been disabled since it was created!
763   // It currently fails because the second split doesn't output anything
764   // -- because there are no region dirs after we move aside the first
765   // split result
766   public void testSplittingLargeNumberOfRegionsConsistency() throws IOException {
767 
768     REGIONS.removeAll(REGIONS);
769     for (int i=0; i<100; i++) {
770       REGIONS.add("region__"+i);
771     }
772 
773     generateHLogs(1, 100, -1);
774     fs.initialize(fs.getUri(), conf);
775 
776     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
777     fs.rename(OLDLOGDIR, HLOGDIR);
778     Path firstSplitPath = new Path(HBASEDIR, TABLE_NAME+ ".first");
779     Path splitPath = new Path(HBASEDIR, TABLE_NAME.getNameAsString());
780     fs.rename(splitPath,
781             firstSplitPath);
782 
783     fs.initialize(fs.getUri(), conf);
784     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
785     assertEquals(0, compareHLogSplitDirs(firstSplitPath, splitPath));
786   }
787 
788   @Test (timeout=300000)
789   public void testSplitDeletedRegion() throws IOException {
790     REGIONS.removeAll(REGIONS);
791     String region = "region_that_splits";
792     REGIONS.add(region);
793 
794     generateHLogs(1);
795     fs.initialize(fs.getUri(), conf);
796 
797     Path regiondir = new Path(TABLEDIR, region);
798     fs.delete(regiondir, true);
799     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
800     assertFalse(fs.exists(regiondir));
801   }
802 
803   @Test (timeout=300000)
804   public void testIOEOnOutputThread() throws Exception {
805     conf.setBoolean(HBASE_SKIP_ERRORS, false);
806 
807     generateHLogs(-1);
808     fs.initialize(fs.getUri(), conf);
809     FileStatus[] logfiles = fs.listStatus(HLOGDIR);
810     assertTrue("There should be some log file",
811       logfiles != null && logfiles.length > 0);
812     // Set up a splitter that will throw an IOE on the output side
813     HLogSplitter logSplitter = new HLogSplitter(
814         conf, HBASEDIR, fs, null, null, this.mode) {
815       protected HLog.Writer createWriter(FileSystem fs,
816           Path logfile, Configuration conf) throws IOException {
817         HLog.Writer mockWriter = Mockito.mock(HLog.Writer.class);
818         Mockito.doThrow(new IOException("Injected")).when(
819           mockWriter).append(Mockito.<HLog.Entry>any());
820         return mockWriter;
821       }
822     };
823     // Set up a background thread dumper.  Needs a thread to depend on and then we need to run
824     // the thread dumping in a background thread so it does not hold up the test.
825     final AtomicBoolean stop = new AtomicBoolean(false);
826     final Thread someOldThread = new Thread("Some-old-thread") {
827       @Override
828       public void run() {
829         while(!stop.get()) Threads.sleep(10);
830       }
831     };
832     someOldThread.setDaemon(true);
833     someOldThread.start();
834     final Thread t = new Thread("Background-thread-dumper") {
835       public void run() {
836         try {
837           Threads.threadDumpingIsAlive(someOldThread);
838         } catch (InterruptedException e) {
839           e.printStackTrace();
840         }
841       }
842     };
843     t.setDaemon(true);
844     t.start();
845     try {
846       logSplitter.splitLogFile(logfiles[0], null);
847       fail("Didn't throw!");
848     } catch (IOException ioe) {
849       assertTrue(ioe.toString().contains("Injected"));
850     } finally {
851       // Setting this to true will turn off the background thread dumper.
852       stop.set(true);
853     }
854   }
855 
856   // Test for HBASE-3412
857   @Test (timeout=300000)
858   public void testMovedHLogDuringRecovery() throws Exception {
859     generateHLogs(-1);
860 
861     fs.initialize(fs.getUri(), conf);
862 
863     // This partial mock will throw LEE for every file simulating
864     // files that were moved
865     FileSystem spiedFs = Mockito.spy(fs);
866     // The "File does not exist" part is very important,
867     // that's how it comes out of HDFS
868     Mockito.doThrow(new LeaseExpiredException("Injected: File does not exist")).
869         when(spiedFs).append(Mockito.<Path>any());
870 
871     try {
872       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, spiedFs, conf);
873       assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length);
874       assertFalse(fs.exists(HLOGDIR));
875     } catch (IOException e) {
876       fail("There shouldn't be any exception but: " + e.toString());
877     }
878   }
879 
880   @Test (timeout=300000)
881   public void testRetryOpenDuringRecovery() throws Exception {
882     generateHLogs(-1);
883 
884     fs.initialize(fs.getUri(), conf);
885 
886     FileSystem spiedFs = Mockito.spy(fs);
887     // The "Cannot obtain block length", "Could not obtain the last block",
888     // and "Blocklist for [^ ]* has changed.*" part is very important,
889     // that's how it comes out of HDFS. If HDFS changes the exception
890     // message, this test needs to be adjusted accordingly.
891     //
892     // When DFSClient tries to open a file, HDFS needs to locate
893     // the last block of the file and get its length. However, if the
894     // last block is under recovery, HDFS may have problem to obtain
895     // the block length, in which case, retry may help.
896     Mockito.doAnswer(new Answer<FSDataInputStream>() {
897       private final String[] errors = new String[] {
898         "Cannot obtain block length", "Could not obtain the last block",
899         "Blocklist for " + OLDLOGDIR + " has changed"};
900       private int count = 0;
901 
902       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
903             if (count < 3) {
904                 throw new IOException(errors[count++]);
905             }
906             return (FSDataInputStream)invocation.callRealMethod();
907         }
908     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
909 
910     try {
911       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, spiedFs, conf);
912       assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length);
913       assertFalse(fs.exists(HLOGDIR));
914     } catch (IOException e) {
915       fail("There shouldn't be any exception but: " + e.toString());
916     }
917   }
918 
919   @Test (timeout=300000)
920   public void testTerminationAskedByReporter() throws IOException, CorruptedLogFileException {
921     generateHLogs(1, 10, -1);
922     FileStatus logfile = fs.listStatus(HLOGDIR)[0];
923     fs.initialize(fs.getUri(), conf);
924 
925     final AtomicInteger count = new AtomicInteger();
926 
927     CancelableProgressable localReporter
928       = new CancelableProgressable() {
929         @Override
930         public boolean progress() {
931           count.getAndIncrement();
932           return false;
933         }
934       };
935 
936     FileSystem spiedFs = Mockito.spy(fs);
937     Mockito.doAnswer(new Answer<FSDataInputStream>() {
938       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
939         Thread.sleep(1500); // Sleep a while and wait report status invoked
940         return (FSDataInputStream)invocation.callRealMethod();
941       }
942     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
943 
944     try {
945       conf.setInt("hbase.splitlog.report.period", 1000);
946       boolean ret = HLogSplitter.splitLogFile(
947         HBASEDIR, logfile, spiedFs, conf, localReporter, null, null, this.mode);
948       assertFalse("Log splitting should failed", ret);
949       assertTrue(count.get() > 0);
950     } catch (IOException e) {
951       fail("There shouldn't be any exception but: " + e.toString());
952     } finally {
953       // reset it back to its default value
954       conf.setInt("hbase.splitlog.report.period", 59000);
955     }
956   }
957 
958   /**
959    * Test log split process with fake data and lots of edits to trigger threading
960    * issues.
961    */
962   @Test (timeout=300000)
963   public void testThreading() throws Exception {
964     doTestThreading(20000, 128*1024*1024, 0);
965   }
966 
967   /**
968    * Test blocking behavior of the log split process if writers are writing slower
969    * than the reader is reading.
970    */
971   @Test (timeout=300000)
972   public void testThreadingSlowWriterSmallBuffer() throws Exception {
973     doTestThreading(200, 1024, 50);
974   }
975 
976   /**
977    * Sets up a log splitter with a mock reader and writer. The mock reader generates
978    * a specified number of edits spread across 5 regions. The mock writer optionally
979    * sleeps for each edit it is fed.
980    * *
981    * After the split is complete, verifies that the statistics show the correct number
982    * of edits output into each region.
983    *
984    * @param numFakeEdits number of fake edits to push through pipeline
985    * @param bufferSize size of in-memory buffer
986    * @param writerSlowness writer threads will sleep this many ms per edit
987    */
988   private void doTestThreading(final int numFakeEdits,
989       final int bufferSize,
990       final int writerSlowness) throws Exception {
991 
992     Configuration localConf = new Configuration(conf);
993     localConf.setInt("hbase.regionserver.hlog.splitlog.buffersize", bufferSize);
994 
995     // Create a fake log file (we'll override the reader to produce a stream of edits)
996     Path logPath = new Path(HLOGDIR, HLOG_FILE_PREFIX + ".fake");
997     FSDataOutputStream out = fs.create(logPath);
998     out.close();
999 
1000     // Make region dirs for our destination regions so the output doesn't get skipped
1001     final List<String> regions = ImmutableList.of("r0", "r1", "r2", "r3", "r4");
1002     makeRegionDirs(fs, regions);
1003 
1004     // Create a splitter that reads and writes the data without touching disk
1005     HLogSplitter logSplitter = new HLogSplitter(
1006         localConf, HBASEDIR, fs, null, null, this.mode) {
1007 
1008       /* Produce a mock writer that doesn't write anywhere */
1009       protected HLog.Writer createWriter(FileSystem fs, Path logfile, Configuration conf)
1010       throws IOException {
1011         HLog.Writer mockWriter = Mockito.mock(HLog.Writer.class);
1012         Mockito.doAnswer(new Answer<Void>() {
1013           int expectedIndex = 0;
1014 
1015           @Override
1016           public Void answer(InvocationOnMock invocation) {
1017             if (writerSlowness > 0) {
1018               try {
1019                 Thread.sleep(writerSlowness);
1020               } catch (InterruptedException ie) {
1021                 Thread.currentThread().interrupt();
1022               }
1023             }
1024             HLog.Entry entry = (Entry) invocation.getArguments()[0];
1025             WALEdit edit = entry.getEdit();
1026             List<KeyValue> keyValues = edit.getKeyValues();
1027             assertEquals(1, keyValues.size());
1028             KeyValue kv = keyValues.get(0);
1029 
1030             // Check that the edits come in the right order.
1031             assertEquals(expectedIndex, Bytes.toInt(kv.getRow()));
1032             expectedIndex++;
1033             return null;
1034           }
1035         }).when(mockWriter).append(Mockito.<HLog.Entry>any());
1036         return mockWriter;
1037       }
1038 
1039       /* Produce a mock reader that generates fake entries */
1040       protected Reader getReader(FileSystem fs, Path curLogFile,
1041           Configuration conf, CancelableProgressable reporter) throws IOException {
1042         Reader mockReader = Mockito.mock(Reader.class);
1043         Mockito.doAnswer(new Answer<HLog.Entry>() {
1044           int index = 0;
1045 
1046           @Override
1047           public HLog.Entry answer(InvocationOnMock invocation) throws Throwable {
1048             if (index >= numFakeEdits) return null;
1049 
1050             // Generate r0 through r4 in round robin fashion
1051             int regionIdx = index % regions.size();
1052             byte region[] = new byte[] {(byte)'r', (byte) (0x30 + regionIdx)};
1053 
1054             HLog.Entry ret = createTestEntry(TABLE_NAME, region,
1055                 Bytes.toBytes((int)(index / regions.size())),
1056                 FAMILY, QUALIFIER, VALUE, index);
1057             index++;
1058             return ret;
1059           }
1060         }).when(mockReader).next();
1061         return mockReader;
1062       }
1063     };
1064 
1065     logSplitter.splitLogFile(fs.getFileStatus(logPath), null);
1066 
1067     // Verify number of written edits per region
1068     Map<byte[], Long> outputCounts = logSplitter.outputSink.getOutputCounts();
1069     for (Map.Entry<byte[], Long> entry : outputCounts.entrySet()) {
1070       LOG.info("Got " + entry.getValue() + " output edits for region " +
1071           Bytes.toString(entry.getKey()));
1072       assertEquals((long)entry.getValue(), numFakeEdits / regions.size());
1073     }
1074     assertEquals(regions.size(), outputCounts.size());
1075   }
1076 
1077   // HBASE-2312: tests the case where a RegionServer enters a GC pause,
1078   // comes back online after the master declared it dead and started to split.
1079   // Want log rolling after a master split to fail
1080   @Test (timeout=300000)
1081   @Ignore("Need HADOOP-6886, HADOOP-6840, & HDFS-617 for this. HDFS 0.20.205.1+ should have this")
1082   public void testLogRollAfterSplitStart() throws IOException {
1083     HLog log = null;
1084     String logName = "testLogRollAfterSplitStart";
1085     Path thisTestsDir = new Path(HBASEDIR, logName);
1086 
1087     try {
1088       // put some entries in an HLog
1089       TableName tableName =
1090           TableName.valueOf(this.getClass().getName());
1091       HRegionInfo regioninfo = new HRegionInfo(tableName,
1092           HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
1093       log = HLogFactory.createHLog(fs, HBASEDIR, logName, conf);
1094       final AtomicLong sequenceId = new AtomicLong(1);
1095 
1096       final int total = 20;
1097       for (int i = 0; i < total; i++) {
1098         WALEdit kvs = new WALEdit();
1099         kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
1100         HTableDescriptor htd = new HTableDescriptor(tableName);
1101         htd.addFamily(new HColumnDescriptor("column"));
1102         log.append(regioninfo, tableName, kvs, System.currentTimeMillis(), htd, sequenceId);
1103       }
1104       // Send the data to HDFS datanodes and close the HDFS writer
1105       log.sync();
1106       ((FSHLog) log).cleanupCurrentWriter(log.getFilenum());
1107 
1108       /* code taken from ProcessServerShutdown.process()
1109        * handles RS shutdowns (as observed by the Master)
1110        */
1111       // rename the directory so a rogue RS doesn't create more HLogs
1112       Path rsSplitDir = new Path(thisTestsDir.getParent(),
1113                                  thisTestsDir.getName() + "-splitting");
1114       fs.rename(thisTestsDir, rsSplitDir);
1115       LOG.debug("Renamed region directory: " + rsSplitDir);
1116 
1117       // Process the old log files
1118       HLogSplitter.split(HBASEDIR, rsSplitDir, OLDLOGDIR, fs, conf);
1119 
1120       // Now, try to roll the HLog and verify failure
1121       try {
1122         log.rollWriter();
1123         Assert.fail("rollWriter() did not throw any exception.");
1124       } catch (IOException ioe) {
1125         if (ioe.getCause().getMessage().contains("FileNotFound")) {
1126           LOG.info("Got the expected exception: ", ioe.getCause());
1127         } else {
1128           Assert.fail("Unexpected exception: " + ioe);
1129         }
1130       }
1131     } finally {
1132       if (log != null) {
1133         log.close();
1134       }
1135       if (fs.exists(thisTestsDir)) {
1136         fs.delete(thisTestsDir, true);
1137       }
1138     }
1139   }
1140 
1141   /**
1142    * This thread will keep adding new log files
1143    * It simulates a region server that was considered dead but woke up and wrote
1144    * some more to a new hlog
1145    */
1146   class ZombieNewLogWriterRegionServer extends Thread {
1147     AtomicBoolean stop;
1148     CountDownLatch latch;
1149     public ZombieNewLogWriterRegionServer(CountDownLatch latch, AtomicBoolean stop) {
1150       super("ZombieNewLogWriterRegionServer");
1151       this.latch = latch;
1152       this.stop = stop;
1153     }
1154 
1155     @Override
1156     public void run() {
1157       if (stop.get()) {
1158         return;
1159       }
1160       Path tableDir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
1161       Path regionDir = new Path(tableDir, REGIONS.get(0));
1162       Path recoveredEdits = new Path(regionDir, HConstants.RECOVERED_EDITS_DIR);
1163       String region = "juliet";
1164       Path julietLog = new Path(HLOGDIR, HLOG_FILE_PREFIX + ".juliet");
1165       try {
1166 
1167         while (!fs.exists(recoveredEdits) && !stop.get()) {
1168           LOG.info("Juliet: split not started, sleeping a bit...");
1169           Threads.sleep(10);
1170         }
1171 
1172         fs.mkdirs(new Path(tableDir, region));
1173         HLog.Writer writer = HLogFactory.createWALWriter(fs,
1174           julietLog, conf);
1175         appendEntry(writer, TableName.valueOf("juliet"), ("juliet").getBytes(),
1176             ("r").getBytes(), FAMILY, QUALIFIER, VALUE, 0);
1177         writer.close();
1178         LOG.info("Juliet file creator: created file " + julietLog);
1179         latch.countDown();
1180       } catch (IOException e1) {
1181         LOG.error("Failed to create file " + julietLog, e1);
1182         assertTrue("Failed to create file " + julietLog, false);
1183       }
1184     }
1185   }
1186 
1187   @Test (timeout=300000)
1188   public void testSplitLogFileWithOneRegion() throws IOException {
1189     LOG.info("testSplitLogFileWithOneRegion");
1190     final String REGION = "region__1";
1191     REGIONS.removeAll(REGIONS);
1192     REGIONS.add(REGION);
1193 
1194     generateHLogs(1, 10, -1);
1195     fs.initialize(fs.getUri(), conf);
1196     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1197 
1198     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
1199     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
1200     assertEquals(1, splitLog.length);
1201 
1202     assertEquals(true, logsAreEqual(originalLog, splitLog[0]));
1203   }
1204 
1205   @Test (timeout=300000)
1206   public void testSplitLogFileDeletedRegionDir() throws IOException {
1207     LOG.info("testSplitLogFileDeletedRegionDir");
1208     final String REGION = "region__1";
1209     REGIONS.removeAll(REGIONS);
1210     REGIONS.add(REGION);
1211 
1212     generateHLogs(1, 10, -1);
1213     fs.initialize(fs.getUri(), conf);
1214 
1215     Path regiondir = new Path(TABLEDIR, REGION);
1216     LOG.info("Region directory is" + regiondir);
1217     fs.delete(regiondir, true);
1218 
1219     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1220 
1221     assertTrue(!fs.exists(regiondir));
1222     assertTrue(true);
1223   }
1224 
1225   @Test (timeout=300000)
1226   public void testSplitLogFileEmpty() throws IOException {
1227     LOG.info("testSplitLogFileEmpty");
1228     injectEmptyFile(".empty", true);
1229 
1230     fs.initialize(fs.getUri(), conf);
1231 
1232     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1233     Path tdir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
1234     assertFalse(fs.exists(tdir));
1235 
1236     assertEquals(0, countHLog(fs.listStatus(OLDLOGDIR)[0].getPath(), fs, conf));
1237   }
1238 
1239   @Test (timeout=300000)
1240   public void testSplitLogFileMultipleRegions() throws IOException {
1241     LOG.info("testSplitLogFileMultipleRegions");
1242     generateHLogs(1, 10, -1);
1243     fs.initialize(fs.getUri(), conf);
1244 
1245     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1246     for (String region : REGIONS) {
1247       Path[] recovered = getLogForRegion(HBASEDIR, TABLE_NAME, region);
1248       assertEquals(1, recovered.length);
1249       assertEquals(10, countHLog(recovered[0], fs, conf));
1250     }
1251   }
1252 
1253   @Test (timeout=300000)
1254   public void testSplitLogFileFirstLineCorruptionLog()
1255   throws IOException {
1256     conf.setBoolean(HBASE_SKIP_ERRORS, true);
1257     generateHLogs(1, 10, -1);
1258     FileStatus logfile = fs.listStatus(HLOGDIR)[0];
1259 
1260     corruptHLog(logfile.getPath(),
1261         Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true, fs);
1262 
1263     fs.initialize(fs.getUri(), conf);
1264     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1265 
1266     final Path corruptDir = new Path(FSUtils.getRootDir(conf), conf.get(
1267         "hbase.regionserver.hlog.splitlog.corrupt.dir", HConstants.CORRUPT_DIR_NAME));
1268     assertEquals(1, fs.listStatus(corruptDir).length);
1269   }
1270 
1271   /**
1272    * @throws IOException
1273    * @see https://issues.apache.org/jira/browse/HBASE-4862
1274    */
1275   @Test (timeout=300000)
1276   public void testConcurrentSplitLogAndReplayRecoverEdit() throws IOException {
1277     LOG.info("testConcurrentSplitLogAndReplayRecoverEdit");
1278     // Generate hlogs for our destination region
1279     String regionName = "r0";
1280     final Path regiondir = new Path(TABLEDIR, regionName);
1281     REGIONS = new ArrayList<String>();
1282     REGIONS.add(regionName);
1283     generateHLogs(-1);
1284 
1285     HLogFactory.createHLog(fs, regiondir, regionName, conf);
1286     FileStatus[] logfiles = fs.listStatus(HLOGDIR);
1287     assertTrue("There should be some log file",
1288       logfiles != null && logfiles.length > 0);
1289 
1290     HLogSplitter logSplitter = new HLogSplitter(
1291         conf, HBASEDIR, fs, null, null, this.mode) {
1292       protected HLog.Writer createWriter(FileSystem fs, Path logfile, Configuration conf)
1293       throws IOException {
1294         HLog.Writer writer = HLogFactory.createRecoveredEditsWriter(fs, logfile, conf);
1295         // After creating writer, simulate region's
1296         // replayRecoveredEditsIfAny() which gets SplitEditFiles of this
1297         // region and delete them, excluding files with '.temp' suffix.
1298         NavigableSet<Path> files = HLogUtil.getSplitEditFilesSorted(fs, regiondir);
1299         if (files != null && !files.isEmpty()) {
1300           for (Path file : files) {
1301             if (!this.fs.delete(file, false)) {
1302               LOG.error("Failed delete of " + file);
1303             } else {
1304               LOG.debug("Deleted recovered.edits file=" + file);
1305             }
1306           }
1307         }
1308         return writer;
1309       }
1310     };
1311     try{
1312       logSplitter.splitLogFile(logfiles[0], null);
1313     } catch (IOException e) {
1314       LOG.info(e);
1315       Assert.fail("Throws IOException when spliting "
1316           + "log, it is most likely because writing file does not "
1317           + "exist which is caused by concurrent replayRecoveredEditsIfAny()");
1318     }
1319     if (fs.exists(CORRUPTDIR)) {
1320       if (fs.listStatus(CORRUPTDIR).length > 0) {
1321         Assert.fail("There are some corrupt logs, "
1322                 + "it is most likely caused by concurrent replayRecoveredEditsIfAny()");
1323       }
1324     }
1325   }
1326 
1327   private static void flushToConsole(String s) {
1328     System.out.println(s);
1329     System.out.flush();
1330   }
1331 
1332 
1333   private HLog.Writer [] generateHLogs(int leaveOpen) throws IOException {
1334     return generateHLogs(NUM_WRITERS, ENTRIES, leaveOpen);
1335   }
1336 
1337   private HLog.Writer [] generateHLogs(final int writers, final int entries, final int leaveOpen) throws IOException {
1338     return generateHLogs((DistributedFileSystem)this.fs, writers, entries, leaveOpen);
1339   }
1340 
1341   private static void makeRegionDirs(FileSystem fs, List<String> regions) throws IOException {
1342     for (String region : regions) {
1343       flushToConsole("Creating dir for region " + region);
1344       fs.mkdirs(new Path(TABLEDIR, region));
1345     }
1346   }
1347 
1348   private static HLog.Writer [] generateHLogs(final DistributedFileSystem dfs, int writers, int entries, int leaveOpen)
1349   throws IOException {
1350     makeRegionDirs(dfs, REGIONS);
1351     dfs.mkdirs(HLOGDIR);
1352     HLog.Writer [] ws = new HLog.Writer[writers];
1353     int seq = 0;
1354     for (int i = 0; i < writers; i++) {
1355       ws[i] = HLogFactory.createWALWriter(dfs, new Path(HLOGDIR, HLOG_FILE_PREFIX + i), dfs.getConf());
1356       for (int j = 0; j < entries; j++) {
1357         int prefix = 0;
1358         for (String region : REGIONS) {
1359           String row_key = region + prefix++ + i + j;
1360           appendEntry(ws[i], TABLE_NAME, region.getBytes(), row_key.getBytes(), FAMILY, QUALIFIER, VALUE, seq++);
1361         }
1362       }
1363       if (i != leaveOpen) {
1364         ws[i].close();
1365         LOG.info("Closing writer " + i);
1366       }
1367     }
1368     return ws;
1369   }
1370 
1371   private Path[] getLogForRegion(Path rootdir, TableName table, String region)
1372   throws IOException {
1373     Path tdir = FSUtils.getTableDir(rootdir, table);
1374     @SuppressWarnings("deprecation")
1375     Path editsdir = HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir,
1376       Bytes.toString(region.getBytes())));
1377     FileStatus [] files = this.fs.listStatus(editsdir);
1378     Path[] paths = new Path[files.length];
1379     for (int i = 0; i < files.length; i++) {
1380       paths[i] = files[i].getPath();
1381     }
1382     return paths;
1383   }
1384 
1385   private void corruptHLog(Path path, Corruptions corruption, boolean close,
1386                            FileSystem fs) throws IOException {
1387 
1388     FSDataOutputStream out;
1389     int fileSize = (int) fs.listStatus(path)[0].getLen();
1390 
1391     FSDataInputStream in = fs.open(path);
1392     byte[] corrupted_bytes = new byte[fileSize];
1393     in.readFully(0, corrupted_bytes, 0, fileSize);
1394     in.close();
1395 
1396     switch (corruption) {
1397       case APPEND_GARBAGE:
1398         fs.delete(path, false);
1399         out = fs.create(path);
1400         out.write(corrupted_bytes);
1401         out.write("-----".getBytes());
1402         closeOrFlush(close, out);
1403         break;
1404 
1405       case INSERT_GARBAGE_ON_FIRST_LINE:
1406         fs.delete(path, false);
1407         out = fs.create(path);
1408         out.write(0);
1409         out.write(corrupted_bytes);
1410         closeOrFlush(close, out);
1411         break;
1412 
1413       case INSERT_GARBAGE_IN_THE_MIDDLE:
1414         fs.delete(path, false);
1415         out = fs.create(path);
1416         int middle = (int) Math.floor(corrupted_bytes.length / 2);
1417         out.write(corrupted_bytes, 0, middle);
1418         out.write(0);
1419         out.write(corrupted_bytes, middle, corrupted_bytes.length - middle);
1420         closeOrFlush(close, out);
1421         break;
1422 
1423       case TRUNCATE:
1424         fs.delete(path, false);
1425         out = fs.create(path);
1426         out.write(corrupted_bytes, 0, fileSize
1427           - (32 + ProtobufLogReader.PB_WAL_COMPLETE_MAGIC.length + Bytes.SIZEOF_INT));
1428         closeOrFlush(close, out);
1429         break;
1430 
1431       case TRUNCATE_TRAILER:
1432         fs.delete(path, false);
1433         out = fs.create(path);
1434         out.write(corrupted_bytes, 0, fileSize - Bytes.SIZEOF_INT);// trailer is truncated.
1435         closeOrFlush(close, out);
1436         break;
1437     }
1438   }
1439 
1440   private void closeOrFlush(boolean close, FSDataOutputStream out)
1441   throws IOException {
1442     if (close) {
1443       out.close();
1444     } else {
1445       Method syncMethod = null;
1446       try {
1447         syncMethod = out.getClass().getMethod("hflush", new Class<?> []{});
1448       } catch (NoSuchMethodException e) {
1449         try {
1450           syncMethod = out.getClass().getMethod("sync", new Class<?> []{});
1451         } catch (NoSuchMethodException ex) {
1452           throw new IOException("This version of Hadoop supports " +
1453               "neither Syncable.sync() nor Syncable.hflush().");
1454         }
1455       }
1456       try {
1457         syncMethod.invoke(out, new Object[]{});
1458       } catch (Exception e) {
1459         throw new IOException(e);
1460       }
1461       // Not in 0out.hflush();
1462     }
1463   }
1464 
1465   @SuppressWarnings("unused")
1466   private void dumpHLog(Path log, FileSystem fs, Configuration conf) throws IOException {
1467     HLog.Entry entry;
1468     HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1469     while ((entry = in.next()) != null) {
1470       System.out.println(entry);
1471     }
1472   }
1473 
1474   private int countHLog(Path log, FileSystem fs, Configuration conf) throws IOException {
1475     int count = 0;
1476     HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1477     while (in.next() != null) {
1478       count++;
1479     }
1480     return count;
1481   }
1482 
1483 
1484   public static long appendEntry(HLog.Writer writer, TableName table, byte[] region,
1485                           byte[] row, byte[] family, byte[] qualifier,
1486                           byte[] value, long seq)
1487           throws IOException {
1488     LOG.info(Thread.currentThread().getName() + " append");
1489     writer.append(createTestEntry(table, region, row, family, qualifier, value, seq));
1490     LOG.info(Thread.currentThread().getName() + " sync");
1491     writer.sync();
1492     return seq;
1493   }
1494 
1495   private static HLog.Entry createTestEntry(
1496       TableName table, byte[] region,
1497       byte[] row, byte[] family, byte[] qualifier,
1498       byte[] value, long seq) {
1499     long time = System.nanoTime();
1500     WALEdit edit = new WALEdit();
1501     seq++;
1502     edit.add(new KeyValue(row, family, qualifier, time, KeyValue.Type.Put, value));
1503     return new HLog.Entry(new HLogKey(region, table, seq, time,
1504         HConstants.DEFAULT_CLUSTER_ID), edit);
1505   }
1506 
1507 
1508   private void injectEmptyFile(String suffix, boolean closeFile)
1509           throws IOException {
1510     HLog.Writer writer = HLogFactory.createWALWriter(
1511         fs, new Path(HLOGDIR, HLOG_FILE_PREFIX + suffix), conf);
1512     if (closeFile) writer.close();
1513   }
1514 
1515   @SuppressWarnings("unused")
1516   private void listLogs(FileSystem fs, Path dir) throws IOException {
1517     for (FileStatus file : fs.listStatus(dir)) {
1518       System.out.println(file.getPath());
1519     }
1520 
1521   }
1522 
1523   private int compareHLogSplitDirs(Path p1, Path p2) throws IOException {
1524     FileStatus[] f1 = fs.listStatus(p1);
1525     FileStatus[] f2 = fs.listStatus(p2);
1526     assertNotNull("Path " + p1 + " doesn't exist", f1);
1527     assertNotNull("Path " + p2 + " doesn't exist", f2);
1528 
1529     System.out.println("Files in " + p1 + ": " +
1530         Joiner.on(",").join(FileUtil.stat2Paths(f1)));
1531     System.out.println("Files in " + p2 + ": " +
1532         Joiner.on(",").join(FileUtil.stat2Paths(f2)));
1533     assertEquals(f1.length, f2.length);
1534 
1535     for (int i = 0; i < f1.length; i++) {
1536       // Regions now have a directory named RECOVERED_EDITS_DIR and in here
1537       // are split edit files. In below presume only 1.
1538       Path rd1 = HLogUtil.getRegionDirRecoveredEditsDir(f1[i].getPath());
1539       FileStatus[] rd1fs = fs.listStatus(rd1);
1540       assertEquals(1, rd1fs.length);
1541       Path rd2 = HLogUtil.getRegionDirRecoveredEditsDir(f2[i].getPath());
1542       FileStatus[] rd2fs = fs.listStatus(rd2);
1543       assertEquals(1, rd2fs.length);
1544       if (!logsAreEqual(rd1fs[0].getPath(), rd2fs[0].getPath())) {
1545         return -1;
1546       }
1547     }
1548     return 0;
1549   }
1550 
1551   private boolean logsAreEqual(Path p1, Path p2) throws IOException {
1552     HLog.Reader in1, in2;
1553     in1 = HLogFactory.createReader(fs, p1, conf);
1554     in2 = HLogFactory.createReader(fs, p2, conf);
1555     HLog.Entry entry1;
1556     HLog.Entry entry2;
1557     while ((entry1 = in1.next()) != null) {
1558       entry2 = in2.next();
1559       if ((entry1.getKey().compareTo(entry2.getKey()) != 0) ||
1560               (!entry1.getEdit().toString().equals(entry2.getEdit().toString()))) {
1561         return false;
1562       }
1563     }
1564     return true;
1565   }
1566 }