View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.wal;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.FileNotFoundException;
28  import java.io.IOException;
29  import java.lang.reflect.Method;
30  import java.security.PrivilegedExceptionAction;
31  import java.util.ArrayList;
32  import java.util.Collections;
33  import java.util.HashMap;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.NavigableSet;
37  import java.util.concurrent.CountDownLatch;
38  import java.util.concurrent.atomic.AtomicBoolean;
39  import java.util.concurrent.atomic.AtomicInteger;
40  import java.util.concurrent.atomic.AtomicLong;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.commons.logging.impl.Log4JLogger;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.log4j.Level;
47  import org.apache.hadoop.hdfs.server.datanode.DataNode;
48  import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
49  import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
50  import org.apache.hadoop.conf.Configuration;
51  import org.apache.hadoop.fs.FSDataInputStream;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.FileUtil;
56  import org.apache.hadoop.fs.Path;
57  import org.apache.hadoop.hbase.HBaseConfiguration;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HColumnDescriptor;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HRegionInfo;
62  import org.apache.hadoop.hbase.HTableDescriptor;
63  import org.apache.hadoop.hbase.KeyValue;
64  import org.apache.hadoop.hbase.LargeTests;
65  import org.apache.hadoop.hbase.regionserver.HRegion;
66  import org.apache.hadoop.hbase.regionserver.wal.HLog.Entry;
67  import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
68  import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter.CorruptedLogFileException;
69  import org.apache.hadoop.hbase.security.User;
70  import org.apache.hadoop.hbase.util.Bytes;
71  import org.apache.hadoop.hbase.util.CancelableProgressable;
72  import org.apache.hadoop.hbase.util.FSUtils;
73  import org.apache.hadoop.hbase.util.Threads;
74  import org.apache.hadoop.hdfs.DFSTestUtil;
75  import org.apache.hadoop.hdfs.DistributedFileSystem;
76  import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
77  import org.apache.hadoop.ipc.RemoteException;
78  import org.junit.After;
79  import org.junit.AfterClass;
80  import org.junit.Assert;
81  import org.junit.Before;
82  import org.junit.BeforeClass;
83  import org.junit.Ignore;
84  import org.junit.Test;
85  import org.junit.experimental.categories.Category;
86  import org.mockito.Mockito;
87  import org.mockito.invocation.InvocationOnMock;
88  import org.mockito.stubbing.Answer;
89  
90  import com.google.common.base.Joiner;
91  import com.google.common.collect.ImmutableList;
92  
93  /**
94   * Testing {@link HLog} splitting code.
95   */
96  @Category(LargeTests.class)
97  public class TestHLogSplit {
98    {
99      ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
100     ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
101     ((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
102   }
103   private final static Log LOG = LogFactory.getLog(TestHLogSplit.class);
104 
105   private Configuration conf;
106   private FileSystem fs;
107 
108   protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
109 
110   private static final Path HBASEDIR = new Path("/hbase");
111   private static final Path HLOGDIR = new Path(HBASEDIR, "hlog");
112   private static final Path OLDLOGDIR = new Path(HBASEDIR, "hlog.old");
113   private static final Path CORRUPTDIR = new Path(HBASEDIR, HConstants.CORRUPT_DIR_NAME);
114 
115   private static final int NUM_WRITERS = 10;
116   private static final int ENTRIES = 10; // entries per writer per region
117 
118   private static final TableName TABLE_NAME =
119       TableName.valueOf("t1");
120   private static final byte[] FAMILY = "f1".getBytes();
121   private static final byte[] QUALIFIER = "q1".getBytes();
122   private static final byte[] VALUE = "v1".getBytes();
123   private static final String HLOG_FILE_PREFIX = "hlog.dat.";
124   private static List<String> REGIONS = new ArrayList<String>();
125   private static final String HBASE_SKIP_ERRORS = "hbase.hlog.split.skip.errors";
126   private static final Path TABLEDIR = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
127   private static String ROBBER;
128   private static String ZOMBIE;
129   private static String [] GROUP = new String [] {"supergroup"};
130 
131   static enum Corruptions {
132     INSERT_GARBAGE_ON_FIRST_LINE,
133     INSERT_GARBAGE_IN_THE_MIDDLE,
134     APPEND_GARBAGE,
135     TRUNCATE,
136     TRUNCATE_TRAILER
137   }
138 
139   @BeforeClass
140   public static void setUpBeforeClass() throws Exception {
141     FSUtils.setRootDir(TEST_UTIL.getConfiguration(), HBASEDIR);
142     TEST_UTIL.getConfiguration().setClass("hbase.regionserver.hlog.writer.impl",
143       InstrumentedSequenceFileLogWriter.class, HLog.Writer.class);
144     TEST_UTIL.getConfiguration().setBoolean("dfs.support.broken.append", true);
145     TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
146     // This is how you turn off shortcircuit read currently.  TODO: Fix.  Should read config.
147     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
148     // Create fake maping user to group and set it to the conf.
149     Map<String, String []> u2g_map = new HashMap<String, String []>(2);
150     ROBBER = User.getCurrent().getName() + "-robber";
151     ZOMBIE = User.getCurrent().getName() + "-zombie";
152     u2g_map.put(ROBBER, GROUP);
153     u2g_map.put(ZOMBIE, GROUP);
154     DFSTestUtil.updateConfWithFakeGroupMapping(TEST_UTIL.getConfiguration(), u2g_map);
155     TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
156     TEST_UTIL.startMiniDFSCluster(2);
157   }
158 
159   @AfterClass
160   public static void tearDownAfterClass() throws Exception {
161     TEST_UTIL.shutdownMiniDFSCluster();
162   }
163 
164   @Before
165   public void setUp() throws Exception {
166     flushToConsole("Cleaning up cluster for new test\n"
167         + "--------------------------");
168     conf = TEST_UTIL.getConfiguration();
169     fs = TEST_UTIL.getDFSCluster().getFileSystem();
170     FileStatus[] entries = fs.listStatus(new Path("/"));
171     flushToConsole("Num entries in /:" + entries.length);
172     for (FileStatus dir : entries){
173       assertTrue("Deleting " + dir.getPath(), fs.delete(dir.getPath(), true));
174     }
175     // create the HLog directory because recursive log creates are not allowed
176     fs.mkdirs(HLOGDIR);
177     REGIONS.clear();
178     Collections.addAll(REGIONS, "bbb", "ccc");
179     InstrumentedSequenceFileLogWriter.activateFailure = false;
180   }
181 
182   @After
183   public void tearDown() throws Exception {
184   }
185 
186   /**
187    * Simulates splitting a WAL out from under a regionserver that is still trying to write it.  Ensures we do not
188    * lose edits.
189    * @throws IOException
190    * @throws InterruptedException
191    */
192   @Test (timeout=300000)
193   public void testLogCannotBeWrittenOnceParsed() throws IOException, InterruptedException {
194     final AtomicLong counter = new AtomicLong(0);
195     AtomicBoolean stop = new AtomicBoolean(false);
196     // Region we'll write edits too and then later examine to make sure they all made it in.
197     final String region = REGIONS.get(0);
198     Thread zombie = new ZombieLastLogWriterRegionServer(this.conf, counter, stop, region);
199     try {
200       long startCount = counter.get();
201       zombie.start();
202       // Wait till writer starts going.
203       while (startCount == counter.get()) Threads.sleep(1);
204       // Give it a second to write a few appends.
205       Threads.sleep(1000);
206       final Configuration conf2 = HBaseConfiguration.create(this.conf);
207       final User robber = User.createUserForTesting(conf2, ROBBER, GROUP);
208       int count = robber.runAs(new PrivilegedExceptionAction<Integer>() {
209         @Override
210         public Integer run() throws Exception {
211           FileSystem fs = FileSystem.get(conf2);
212           int expectedFiles = fs.listStatus(HLOGDIR).length;
213           HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf2);
214           Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
215           assertEquals(expectedFiles, logfiles.length);
216           int count = 0;
217           for (Path logfile: logfiles) {
218             count += countHLog(logfile, fs, conf2);
219           }
220           return count;
221         }
222       });
223       LOG.info("zombie=" + counter.get() + ", robber=" + count);
224       assertTrue("The log file could have at most 1 extra log entry, but can't have less. Zombie could write " +
225         counter.get() + " and logfile had only " + count,
226         counter.get() == count || counter.get() + 1 == count);
227     } finally {
228       stop.set(true);
229       zombie.interrupt();
230       Threads.threadDumpingIsAlive(zombie);
231     }
232   }
233 
234   /**
235    * This thread will keep writing to a 'wal' file even after the split process has started.
236    * It simulates a region server that was considered dead but woke up and wrote some more to he last log entry.
237    * Does its writing as an alternate user in another filesystem instance to simulate better it being a regionserver.
238    */
239   static class ZombieLastLogWriterRegionServer extends Thread {
240     final AtomicLong editsCount;
241     final AtomicBoolean stop;
242     // final User user;
243     /**
244      * Region to write edits for.
245      */
246     final String region;
247     final Configuration conf;
248     final User user;
249 
250     public ZombieLastLogWriterRegionServer(final Configuration conf, AtomicLong counter, AtomicBoolean stop,
251         final String region)
252     throws IOException, InterruptedException {
253       super("ZombieLastLogWriterRegionServer");
254       setDaemon(true);
255       this.stop = stop;
256       this.editsCount = counter;
257       this.region = region;
258       this.conf = HBaseConfiguration.create(conf);
259       this.user = User.createUserForTesting(this.conf, ZOMBIE, GROUP);
260     }
261 
262     @Override
263     public void run() {
264       try {
265         doWriting();
266       } catch (IOException e) {
267         LOG.warn(getName() + " Writer exiting " + e);
268       } catch (InterruptedException e) {
269         LOG.warn(getName() + " Writer exiting " + e);
270       }
271     }
272 
273     private void doWriting() throws IOException, InterruptedException {
274       this.user.runAs(new PrivilegedExceptionAction<Object>() {
275         @Override
276         public Object run() throws Exception {
277           // Index of the WAL we want to keep open.  generateHLogs will leave open the WAL whose index we supply here.
278           int walToKeepOpen = 2;
279           // How many files to write.
280           final int numOfWriters = walToKeepOpen + 1;
281           // The below method writes numOfWriters files each with ENTRIES entries for a total of numOfWriters * ENTRIES
282           // added per column family in the region.
283           HLog.Writer[] writers = null;
284           try {
285             DistributedFileSystem dfs = (DistributedFileSystem)FileSystem.get(conf);
286             writers = generateHLogs(dfs, numOfWriters, ENTRIES, walToKeepOpen);
287           } catch (IOException e1) {
288             throw new RuntimeException("Failed", e1);
289           }
290           // Update counter so has all edits written so far.
291           editsCount.addAndGet(numOfWriters * NUM_WRITERS);
292           // This WAL should be open still after our call to generateHLogs -- we asked it leave it open.
293           HLog.Writer writer = writers[walToKeepOpen];
294           loop(writer);
295           return null;
296         }
297       });
298     }
299 
300     private void loop(final HLog.Writer writer) {
301       byte [] regionBytes = Bytes.toBytes(this.region);
302       while (true) {
303         try {
304           long seq = appendEntry(writer, TABLE_NAME, regionBytes, ("r" + editsCount.get()).getBytes(),
305             regionBytes, QUALIFIER, VALUE, 0);
306           long count = editsCount.incrementAndGet();
307           flushToConsole(getName() + " sync count=" + count + ", seq=" + seq);
308           try {
309             Thread.sleep(1);
310           } catch (InterruptedException e) {
311             //
312           }
313         } catch (IOException ex) {
314           flushToConsole(getName() + " ex " + ex.toString());
315           if (ex instanceof RemoteException) {
316             flushToConsole("Juliet: got RemoteException " + ex.getMessage() +
317               " while writing " + (editsCount.get() + 1));
318           } else {
319             flushToConsole(getName() + " failed to write....at " + editsCount.get());
320             assertTrue("Failed to write " + editsCount.get(), false);
321           }
322           break;
323         } catch (Throwable t) {
324           flushToConsole(getName() + " HOW? " + t);
325           t.printStackTrace();
326           break;
327         }
328       }
329       flushToConsole(getName() + " Writer exiting");
330     }
331   }
332 
333   /**
334    * @throws IOException
335    * @see https://issues.apache.org/jira/browse/HBASE-3020
336    */
337   @Test (timeout=300000)
338   public void testRecoveredEditsPathForMeta() throws IOException {
339     FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
340     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
341     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
342     Path regiondir = new Path(tdir,
343         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
344     fs.mkdirs(regiondir);
345     long now = System.currentTimeMillis();
346     HLog.Entry entry =
347         new HLog.Entry(new HLogKey(encoded,
348             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
349       new WALEdit());
350     Path p = HLogSplitter.getRegionSplitEditsPath(fs, entry, HBASEDIR, true);
351     String parentOfParent = p.getParent().getParent().getName();
352     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
353   }
354 
355   /**
356    * Test old recovered edits file doesn't break HLogSplitter.
357    * This is useful in upgrading old instances.
358    */
359   @Test (timeout=300000)
360   public void testOldRecoveredEditsFileSidelined() throws IOException {
361     FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
362     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
363     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
364     Path regiondir = new Path(tdir,
365         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
366     fs.mkdirs(regiondir);
367     long now = System.currentTimeMillis();
368     HLog.Entry entry =
369         new HLog.Entry(new HLogKey(encoded,
370             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
371       new WALEdit());
372     Path parent = HLogUtil.getRegionDirRecoveredEditsDir(regiondir);
373     assertEquals(parent.getName(), HConstants.RECOVERED_EDITS_DIR);
374     fs.createNewFile(parent); // create a recovered.edits file
375 
376     Path p = HLogSplitter.getRegionSplitEditsPath(fs, entry, HBASEDIR, true);
377     String parentOfParent = p.getParent().getParent().getName();
378     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
379     HLogFactory.createRecoveredEditsWriter(fs, p, conf).close();
380   }
381 
382   @Test (timeout=300000)
383   public void testSplitPreservesEdits() throws IOException{
384     final String REGION = "region__1";
385     REGIONS.removeAll(REGIONS);
386     REGIONS.add(REGION);
387 
388     generateHLogs(1, 10, -1);
389     fs.initialize(fs.getUri(), conf);
390     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
391     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
392     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
393     assertEquals(1, splitLog.length);
394 
395     assertEquals("edits differ after split", true, logsAreEqual(originalLog, splitLog[0]));
396   }
397 
398 
399   @Test (timeout=300000)
400   public void testEmptyLogFiles() throws IOException {
401 
402     injectEmptyFile(".empty", true);
403     generateHLogs(Integer.MAX_VALUE);
404     injectEmptyFile("empty", true);
405 
406     // make fs act as a different client now
407     // initialize will create a new DFSClient with a new client ID
408     fs.initialize(fs.getUri(), conf);
409 
410     int expectedFiles = fs.listStatus(HLOGDIR).length - 2; // less 2 empty files
411     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
412     for (String region : REGIONS) {
413       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
414       assertEquals(expectedFiles, logfiles.length);
415       int count = 0;
416       for (Path logfile: logfiles) {
417         count += countHLog(logfile, fs, conf);
418       }
419       assertEquals(NUM_WRITERS * ENTRIES, count);
420     }
421   }
422 
423 
424   @Test (timeout=300000)
425   public void testEmptyOpenLogFiles() throws IOException {
426     injectEmptyFile(".empty", false);
427     generateHLogs(Integer.MAX_VALUE);
428     injectEmptyFile("empty", false);
429 
430     // make fs act as a different client now
431     // initialize will create a new DFSClient with a new client ID
432     fs.initialize(fs.getUri(), conf);
433 
434     int expectedFiles = fs.listStatus(HLOGDIR).length - 2 ; // less 2 empty files
435     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
436     for (String region : REGIONS) {
437       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
438       assertEquals(expectedFiles, logfiles.length);
439       int count = 0;
440       for (Path logfile: logfiles) {
441         count += countHLog(logfile, fs, conf);
442       }
443       assertEquals(NUM_WRITERS * ENTRIES, count);
444     }
445   }
446 
447   @Test (timeout=300000)
448   public void testOpenZeroLengthReportedFileButWithDataGetsSplit() throws IOException {
449     // generate logs but leave hlog.dat.5 open.
450     generateHLogs(5);
451 
452     fs.initialize(fs.getUri(), conf);
453 
454     int expectedFiles = fs.listStatus(HLOGDIR).length;
455     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
456     for (String region : REGIONS) {
457       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
458       assertEquals(expectedFiles, logfiles.length);
459       int count = 0;
460       for (Path logfile: logfiles) {
461         count += countHLog(logfile, fs, conf);
462       }
463       assertEquals(NUM_WRITERS * ENTRIES, count);
464     }
465   }
466 
467 
468   @Test (timeout=300000)
469   public void testTralingGarbageCorruptionFileSkipErrorsPasses() throws IOException {
470     conf.setBoolean(HBASE_SKIP_ERRORS, true);
471     generateHLogs(Integer.MAX_VALUE);
472     corruptHLog(new Path(HLOGDIR, HLOG_FILE_PREFIX + "5"),
473             Corruptions.APPEND_GARBAGE, true, fs);
474     fs.initialize(fs.getUri(), conf);
475 
476     int expectedFiles = fs.listStatus(HLOGDIR).length;
477     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
478     for (String region : REGIONS) {
479       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
480       assertEquals(expectedFiles, logfiles.length);
481       int count = 0;
482       for (Path logfile: logfiles) {
483         count += countHLog(logfile, fs, conf);
484       }
485       assertEquals(NUM_WRITERS * ENTRIES, count);
486     }
487   }
488 
489   @Test (timeout=300000)
490   public void testFirstLineCorruptionLogFileSkipErrorsPasses() throws IOException {
491     conf.setBoolean(HBASE_SKIP_ERRORS, true);
492     generateHLogs(Integer.MAX_VALUE);
493     corruptHLog(new Path(HLOGDIR, HLOG_FILE_PREFIX + "5"),
494             Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true, fs);
495     fs.initialize(fs.getUri(), conf);
496 
497     int expectedFiles = fs.listStatus(HLOGDIR).length - 1; // less 1 corrupted file
498     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
499     for (String region : REGIONS) {
500       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
501       assertEquals(expectedFiles, logfiles.length);
502       int count = 0;
503       for (Path logfile: logfiles) {
504         count += countHLog(logfile, fs, conf);
505       }
506       assertEquals((NUM_WRITERS - 1) * ENTRIES, count);
507     }
508   }
509 
510   @Test (timeout=300000)
511   public void testMiddleGarbageCorruptionSkipErrorsReadsHalfOfFile() throws IOException {
512     conf.setBoolean(HBASE_SKIP_ERRORS, true);
513     generateHLogs(Integer.MAX_VALUE);
514     corruptHLog(new Path(HLOGDIR, HLOG_FILE_PREFIX + "5"),
515             Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false, fs);
516     fs.initialize(fs.getUri(), conf);
517 
518     int expectedFiles = fs.listStatus(HLOGDIR).length;
519     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
520     for (String region : REGIONS) {
521       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
522       assertEquals(expectedFiles, logfiles.length);
523       int count = 0;
524       for (Path logfile: logfiles) {
525         count += countHLog(logfile, fs, conf);
526       }
527       // the entries in the original logs are alternating regions
528       // considering the sequence file header, the middle corruption should
529       // affect at least half of the entries
530       int goodEntries = (NUM_WRITERS - 1) * ENTRIES;
531       int firstHalfEntries = (int) Math.ceil(ENTRIES / 2) - 1;
532       assertTrue("The file up to the corrupted area hasn't been parsed",
533               goodEntries + firstHalfEntries <= count);
534     }
535   }
536 
537   @Test (timeout=300000)
538   public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException {
539     conf.setBoolean(HBASE_SKIP_ERRORS, true);
540     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
541         Reader.class);
542     InstrumentedSequenceFileLogWriter.activateFailure = false;
543     HLogFactory.resetLogReaderClass();
544 
545     try {
546     Path c1 = new Path(HLOGDIR, HLOG_FILE_PREFIX + "0");
547       conf.setClass("hbase.regionserver.hlog.reader.impl",
548           FaultySequenceFileLogReader.class, HLog.Reader.class);
549       for (FaultySequenceFileLogReader.FailureType  failureType : FaultySequenceFileLogReader.FailureType.values()) {
550         conf.set("faultysequencefilelogreader.failuretype", failureType.name());
551         generateHLogs(1, ENTRIES, -1);
552         fs.initialize(fs.getUri(), conf);
553         HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
554         FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
555         assertEquals("expected a different file", c1.getName(), archivedLogs[0]
556             .getPath().getName());
557         assertEquals(archivedLogs.length, 1);
558         fs.delete(new Path(OLDLOGDIR, HLOG_FILE_PREFIX + "0"), false);
559       }
560     } finally {
561       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
562           Reader.class);
563       HLogFactory.resetLogReaderClass();
564     }
565   }
566 
567   @Test (timeout=300000, expected = IOException.class)
568   public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows()
569       throws IOException {
570     conf.setBoolean(HBASE_SKIP_ERRORS, false);
571     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
572         Reader.class);
573     InstrumentedSequenceFileLogWriter.activateFailure = false;
574     HLogFactory.resetLogReaderClass();
575 
576     try {
577       conf.setClass("hbase.regionserver.hlog.reader.impl",
578           FaultySequenceFileLogReader.class, HLog.Reader.class);
579       conf.set("faultysequencefilelogreader.failuretype", FaultySequenceFileLogReader.FailureType.BEGINNING.name());
580       generateHLogs(Integer.MAX_VALUE);
581       fs.initialize(fs.getUri(), conf);
582       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
583     } finally {
584       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
585           Reader.class);
586       HLogFactory.resetLogReaderClass();
587     }
588   }
589 
590   @Test (timeout=300000)
591   public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs()
592       throws IOException {
593     conf.setBoolean(HBASE_SKIP_ERRORS, false);
594     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
595         Reader.class);
596     InstrumentedSequenceFileLogWriter.activateFailure = false;
597     HLogFactory.resetLogReaderClass();
598 
599     try {
600       conf.setClass("hbase.regionserver.hlog.reader.impl",
601           FaultySequenceFileLogReader.class, HLog.Reader.class);
602       conf.set("faultysequencefilelogreader.failuretype", FaultySequenceFileLogReader.FailureType.BEGINNING.name());
603       generateHLogs(-1);
604       fs.initialize(fs.getUri(), conf);
605       try {
606         HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
607       } catch (IOException e) {
608         assertEquals(
609             "if skip.errors is false all files should remain in place",
610             NUM_WRITERS, fs.listStatus(HLOGDIR).length);
611       }
612     } finally {
613       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
614           Reader.class);
615       HLogFactory.resetLogReaderClass();
616     }
617   }
618 
619   @Test (timeout=300000)
620   public void testEOFisIgnored() throws IOException {
621     conf.setBoolean(HBASE_SKIP_ERRORS, false);
622 
623     final String REGION = "region__1";
624     REGIONS.removeAll(REGIONS);
625     REGIONS.add(REGION);
626 
627     int entryCount = 10;
628     Path c1 = new Path(HLOGDIR, HLOG_FILE_PREFIX + "0");
629     generateHLogs(1, entryCount, -1);
630     corruptHLog(c1, Corruptions.TRUNCATE, true, fs);
631 
632     fs.initialize(fs.getUri(), conf);
633     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
634 
635     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
636     assertEquals(1, splitLog.length);
637 
638     int actualCount = 0;
639     HLog.Reader in = HLogFactory.createReader(fs, splitLog[0], conf);
640     @SuppressWarnings("unused")
641     HLog.Entry entry;
642     while ((entry = in.next()) != null) ++actualCount;
643     assertEquals(entryCount-1, actualCount);
644 
645     // should not have stored the EOF files as corrupt
646     FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
647     assertEquals(archivedLogs.length, 0);
648   }
649 
650   @Test (timeout=300000)
651   public void testCorruptWALTrailer() throws IOException {
652     conf.setBoolean(HBASE_SKIP_ERRORS, false);
653 
654     final String REGION = "region__1";
655     REGIONS.removeAll(REGIONS);
656     REGIONS.add(REGION);
657 
658     int entryCount = 10;
659     Path c1 = new Path(HLOGDIR, HLOG_FILE_PREFIX + "0");
660     generateHLogs(1, entryCount, -1);
661     corruptHLog(c1, Corruptions.TRUNCATE_TRAILER, true, fs);
662 
663     fs.initialize(fs.getUri(), conf);
664     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
665 
666     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
667     assertEquals(1, splitLog.length);
668 
669     int actualCount = 0;
670     HLog.Reader in = HLogFactory.createReader(fs, splitLog[0], conf);
671     @SuppressWarnings("unused")
672     HLog.Entry entry;
673     while ((entry = in.next()) != null) ++actualCount;
674     assertEquals(entryCount, actualCount);
675 
676     // should not have stored the EOF files as corrupt
677     FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
678     assertEquals(archivedLogs.length, 0);
679   }
680 
681   @Test (timeout=300000)
682   public void testLogsGetArchivedAfterSplit() throws IOException {
683     conf.setBoolean(HBASE_SKIP_ERRORS, false);
684     generateHLogs(-1);
685     fs.initialize(fs.getUri(), conf);
686     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
687     FileStatus[] archivedLogs = fs.listStatus(OLDLOGDIR);
688     assertEquals("wrong number of files in the archive log", NUM_WRITERS, archivedLogs.length);
689   }
690 
691   @Test (timeout=300000)
692   public void testSplit() throws IOException {
693     generateHLogs(-1);
694     fs.initialize(fs.getUri(), conf);
695 
696     int expectedFiles = fs.listStatus(HLOGDIR).length;
697     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
698     for (String region : REGIONS) {
699       Path[] logfiles = getLogForRegion(HBASEDIR, TABLE_NAME, region);
700       assertEquals(expectedFiles, logfiles.length);
701       int count = 0;
702       for (Path logfile: logfiles) {
703         count += countHLog(logfile, fs, conf);
704       }
705       assertEquals(NUM_WRITERS * ENTRIES, count);
706     }
707   }
708 
709   @Test (timeout=300000)
710   public void testLogDirectoryShouldBeDeletedAfterSuccessfulSplit()
711   throws IOException {
712     generateHLogs(-1);
713     fs.initialize(fs.getUri(), conf);
714     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
715     FileStatus [] statuses = null;
716     try {
717       statuses = fs.listStatus(HLOGDIR);
718       if (statuses != null) {
719         Assert.fail("Files left in log dir: " +
720             Joiner.on(",").join(FileUtil.stat2Paths(statuses)));
721       }
722     } catch (FileNotFoundException e) {
723       // hadoop 0.21 throws FNFE whereas hadoop 0.20 returns null
724     }
725   }
726 
727   @Test(timeout=300000, expected = IOException.class)
728   public void testSplitWillFailIfWritingToRegionFails() throws Exception {
729     //leave 5th log open so we could append the "trap"
730     HLog.Writer [] writer = generateHLogs(4);
731 
732     fs.initialize(fs.getUri(), conf);
733 
734     String region = "break";
735     Path regiondir = new Path(TABLEDIR, region);
736     fs.mkdirs(regiondir);
737 
738     InstrumentedSequenceFileLogWriter.activateFailure = false;
739     appendEntry(writer[4], TABLE_NAME, Bytes.toBytes(region),
740         ("r" + 999).getBytes(), FAMILY, QUALIFIER, VALUE, 0);
741     writer[4].close();
742 
743     try {
744       InstrumentedSequenceFileLogWriter.activateFailure = true;
745       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
746     } catch (IOException e) {
747       assertTrue(e.getMessage().
748         contains("This exception is instrumented and should only be thrown for testing"));
749       throw e;
750     } finally {
751       InstrumentedSequenceFileLogWriter.activateFailure = false;
752     }
753   }
754 
755 
756   // @Test TODO this test has been disabled since it was created!
757   // It currently fails because the second split doesn't output anything
758   // -- because there are no region dirs after we move aside the first
759   // split result
760   public void testSplittingLargeNumberOfRegionsConsistency() throws IOException {
761 
762     REGIONS.removeAll(REGIONS);
763     for (int i=0; i<100; i++) {
764       REGIONS.add("region__"+i);
765     }
766 
767     generateHLogs(1, 100, -1);
768     fs.initialize(fs.getUri(), conf);
769 
770     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
771     fs.rename(OLDLOGDIR, HLOGDIR);
772     Path firstSplitPath = new Path(HBASEDIR, TABLE_NAME+ ".first");
773     Path splitPath = new Path(HBASEDIR, TABLE_NAME.getNameAsString());
774     fs.rename(splitPath,
775             firstSplitPath);
776 
777     fs.initialize(fs.getUri(), conf);
778     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
779     assertEquals(0, compareHLogSplitDirs(firstSplitPath, splitPath));
780   }
781 
782   @Test (timeout=300000)
783   public void testSplitDeletedRegion() throws IOException {
784     REGIONS.removeAll(REGIONS);
785     String region = "region_that_splits";
786     REGIONS.add(region);
787 
788     generateHLogs(1);
789     fs.initialize(fs.getUri(), conf);
790 
791     Path regiondir = new Path(TABLEDIR, region);
792     fs.delete(regiondir, true);
793     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
794     assertFalse(fs.exists(regiondir));
795   }
796 
797   @Test (timeout=300000)
798   public void testIOEOnOutputThread() throws Exception {
799     conf.setBoolean(HBASE_SKIP_ERRORS, false);
800 
801     generateHLogs(-1);
802     fs.initialize(fs.getUri(), conf);
803     FileStatus[] logfiles = fs.listStatus(HLOGDIR);
804     assertTrue("There should be some log file",
805       logfiles != null && logfiles.length > 0);
806     // Set up a splitter that will throw an IOE on the output side
807     HLogSplitter logSplitter = new HLogSplitter(
808         conf, HBASEDIR, fs, null, null) {
809       protected HLog.Writer createWriter(FileSystem fs,
810           Path logfile, Configuration conf) throws IOException {
811         HLog.Writer mockWriter = Mockito.mock(HLog.Writer.class);
812         Mockito.doThrow(new IOException("Injected")).when(
813           mockWriter).append(Mockito.<HLog.Entry>any());
814         return mockWriter;
815       }
816     };
817     // Set up a background thread dumper.  Needs a thread to depend on and then we need to run
818     // the thread dumping in a background thread so it does not hold up the test.
819     final AtomicBoolean stop = new AtomicBoolean(false);
820     final Thread someOldThread = new Thread("Some-old-thread") {
821       @Override
822       public void run() {
823         while(!stop.get()) Threads.sleep(10);
824       }
825     };
826     someOldThread.setDaemon(true);
827     someOldThread.start();
828     final Thread t = new Thread("Background-thread-dumper") {
829       public void run() {
830         try {
831           Threads.threadDumpingIsAlive(someOldThread);
832         } catch (InterruptedException e) {
833           e.printStackTrace();
834         }
835       }
836     };
837     t.setDaemon(true);
838     t.start();
839     try {
840       logSplitter.splitLogFile(logfiles[0], null);
841       fail("Didn't throw!");
842     } catch (IOException ioe) {
843       assertTrue(ioe.toString().contains("Injected"));
844     } finally {
845       // Setting this to true will turn off the background thread dumper.
846       stop.set(true);
847     }
848   }
849 
850   // Test for HBASE-3412
851   @Test (timeout=300000)
852   public void testMovedHLogDuringRecovery() throws Exception {
853     generateHLogs(-1);
854 
855     fs.initialize(fs.getUri(), conf);
856 
857     // This partial mock will throw LEE for every file simulating
858     // files that were moved
859     FileSystem spiedFs = Mockito.spy(fs);
860     // The "File does not exist" part is very important,
861     // that's how it comes out of HDFS
862     Mockito.doThrow(new LeaseExpiredException("Injected: File does not exist")).
863         when(spiedFs).append(Mockito.<Path>any());
864 
865     try {
866       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, spiedFs, conf);
867       assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length);
868       assertFalse(fs.exists(HLOGDIR));
869     } catch (IOException e) {
870       fail("There shouldn't be any exception but: " + e.toString());
871     }
872   }
873 
874   @Test (timeout=300000)
875   public void testRetryOpenDuringRecovery() throws Exception {
876     generateHLogs(-1);
877 
878     fs.initialize(fs.getUri(), conf);
879 
880     FileSystem spiedFs = Mockito.spy(fs);
881     // The "Cannot obtain block length", "Could not obtain the last block",
882     // and "Blocklist for [^ ]* has changed.*" part is very important,
883     // that's how it comes out of HDFS. If HDFS changes the exception
884     // message, this test needs to be adjusted accordingly.
885     //
886     // When DFSClient tries to open a file, HDFS needs to locate
887     // the last block of the file and get its length. However, if the
888     // last block is under recovery, HDFS may have problem to obtain
889     // the block length, in which case, retry may help.
890     Mockito.doAnswer(new Answer<FSDataInputStream>() {
891       private final String[] errors = new String[] {
892         "Cannot obtain block length", "Could not obtain the last block",
893         "Blocklist for " + OLDLOGDIR + " has changed"};
894       private int count = 0;
895 
896       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
897             if (count < 3) {
898                 throw new IOException(errors[count++]);
899             }
900             return (FSDataInputStream)invocation.callRealMethod();
901         }
902     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
903 
904     try {
905       HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, spiedFs, conf);
906       assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length);
907       assertFalse(fs.exists(HLOGDIR));
908     } catch (IOException e) {
909       fail("There shouldn't be any exception but: " + e.toString());
910     }
911   }
912 
913   @Test (timeout=300000)
914   public void testTerminationAskedByReporter() throws IOException, CorruptedLogFileException {
915     generateHLogs(1, 10, -1);
916     FileStatus logfile = fs.listStatus(HLOGDIR)[0];
917     fs.initialize(fs.getUri(), conf);
918 
919     final AtomicInteger count = new AtomicInteger();
920 
921     CancelableProgressable localReporter
922       = new CancelableProgressable() {
923         @Override
924         public boolean progress() {
925           count.getAndIncrement();
926           return false;
927         }
928       };
929 
930     FileSystem spiedFs = Mockito.spy(fs);
931     Mockito.doAnswer(new Answer<FSDataInputStream>() {
932       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
933         Thread.sleep(1500); // Sleep a while and wait report status invoked
934         return (FSDataInputStream)invocation.callRealMethod();
935       }
936     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
937 
938     try {
939       conf.setInt("hbase.splitlog.report.period", 1000);
940       boolean ret = HLogSplitter.splitLogFile(
941         HBASEDIR, logfile, spiedFs, conf, localReporter, null, null);
942       assertFalse("Log splitting should failed", ret);
943       assertTrue(count.get() > 0);
944     } catch (IOException e) {
945       fail("There shouldn't be any exception but: " + e.toString());
946     } finally {
947       // reset it back to its default value
948       conf.setInt("hbase.splitlog.report.period", 59000);
949     }
950   }
951 
952   /**
953    * Test log split process with fake data and lots of edits to trigger threading
954    * issues.
955    */
956   @Test (timeout=300000)
957   public void testThreading() throws Exception {
958     doTestThreading(20000, 128*1024*1024, 0);
959   }
960 
961   /**
962    * Test blocking behavior of the log split process if writers are writing slower
963    * than the reader is reading.
964    */
965   @Test (timeout=300000)
966   public void testThreadingSlowWriterSmallBuffer() throws Exception {
967     doTestThreading(200, 1024, 50);
968   }
969 
970   /**
971    * Sets up a log splitter with a mock reader and writer. The mock reader generates
972    * a specified number of edits spread across 5 regions. The mock writer optionally
973    * sleeps for each edit it is fed.
974    * *
975    * After the split is complete, verifies that the statistics show the correct number
976    * of edits output into each region.
977    *
978    * @param numFakeEdits number of fake edits to push through pipeline
979    * @param bufferSize size of in-memory buffer
980    * @param writerSlowness writer threads will sleep this many ms per edit
981    */
982   private void doTestThreading(final int numFakeEdits,
983       final int bufferSize,
984       final int writerSlowness) throws Exception {
985 
986     Configuration localConf = new Configuration(conf);
987     localConf.setInt("hbase.regionserver.hlog.splitlog.buffersize", bufferSize);
988 
989     // Create a fake log file (we'll override the reader to produce a stream of edits)
990     Path logPath = new Path(HLOGDIR, HLOG_FILE_PREFIX + ".fake");
991     FSDataOutputStream out = fs.create(logPath);
992     out.close();
993 
994     // Make region dirs for our destination regions so the output doesn't get skipped
995     final List<String> regions = ImmutableList.of("r0", "r1", "r2", "r3", "r4");
996     makeRegionDirs(fs, regions);
997 
998     // Create a splitter that reads and writes the data without touching disk
999     HLogSplitter logSplitter = new HLogSplitter(
1000         localConf, HBASEDIR, fs, null, null) {
1001 
1002       /* Produce a mock writer that doesn't write anywhere */
1003       protected HLog.Writer createWriter(FileSystem fs, Path logfile, Configuration conf)
1004       throws IOException {
1005         HLog.Writer mockWriter = Mockito.mock(HLog.Writer.class);
1006         Mockito.doAnswer(new Answer<Void>() {
1007           int expectedIndex = 0;
1008 
1009           @Override
1010           public Void answer(InvocationOnMock invocation) {
1011             if (writerSlowness > 0) {
1012               try {
1013                 Thread.sleep(writerSlowness);
1014               } catch (InterruptedException ie) {
1015                 Thread.currentThread().interrupt();
1016               }
1017             }
1018             HLog.Entry entry = (Entry) invocation.getArguments()[0];
1019             WALEdit edit = entry.getEdit();
1020             List<KeyValue> keyValues = edit.getKeyValues();
1021             assertEquals(1, keyValues.size());
1022             KeyValue kv = keyValues.get(0);
1023 
1024             // Check that the edits come in the right order.
1025             assertEquals(expectedIndex, Bytes.toInt(kv.getRow()));
1026             expectedIndex++;
1027             return null;
1028           }
1029         }).when(mockWriter).append(Mockito.<HLog.Entry>any());
1030         return mockWriter;
1031       }
1032 
1033       /* Produce a mock reader that generates fake entries */
1034       protected Reader getReader(FileSystem fs, Path curLogFile,
1035           Configuration conf, CancelableProgressable reporter) throws IOException {
1036         Reader mockReader = Mockito.mock(Reader.class);
1037         Mockito.doAnswer(new Answer<HLog.Entry>() {
1038           int index = 0;
1039 
1040           @Override
1041           public HLog.Entry answer(InvocationOnMock invocation) throws Throwable {
1042             if (index >= numFakeEdits) return null;
1043 
1044             // Generate r0 through r4 in round robin fashion
1045             int regionIdx = index % regions.size();
1046             byte region[] = new byte[] {(byte)'r', (byte) (0x30 + regionIdx)};
1047 
1048             HLog.Entry ret = createTestEntry(TABLE_NAME, region,
1049                 Bytes.toBytes((int)(index / regions.size())),
1050                 FAMILY, QUALIFIER, VALUE, index);
1051             index++;
1052             return ret;
1053           }
1054         }).when(mockReader).next();
1055         return mockReader;
1056       }
1057     };
1058 
1059     logSplitter.splitLogFile(fs.getFileStatus(logPath), null);
1060 
1061     // Verify number of written edits per region
1062     Map<byte[], Long> outputCounts = logSplitter.outputSink.getOutputCounts();
1063     for (Map.Entry<byte[], Long> entry : outputCounts.entrySet()) {
1064       LOG.info("Got " + entry.getValue() + " output edits for region " +
1065           Bytes.toString(entry.getKey()));
1066       assertEquals((long)entry.getValue(), numFakeEdits / regions.size());
1067     }
1068     assertEquals(regions.size(), outputCounts.size());
1069   }
1070 
1071   // HBASE-2312: tests the case where a RegionServer enters a GC pause,
1072   // comes back online after the master declared it dead and started to split.
1073   // Want log rolling after a master split to fail
1074   @Test (timeout=300000)
1075   @Ignore("Need HADOOP-6886, HADOOP-6840, & HDFS-617 for this. HDFS 0.20.205.1+ should have this")
1076   public void testLogRollAfterSplitStart() throws IOException {
1077     // set flush interval to a large number so it doesn't interrupt us
1078     final String F_INTERVAL = "hbase.regionserver.optionallogflushinterval";
1079     long oldFlushInterval = conf.getLong(F_INTERVAL, 1000);
1080     conf.setLong(F_INTERVAL, 1000*1000*100);
1081     HLog log = null;
1082     String logName = "testLogRollAfterSplitStart";
1083     Path thisTestsDir = new Path(HBASEDIR, logName);
1084 
1085     try {
1086       // put some entries in an HLog
1087       TableName tableName =
1088           TableName.valueOf(this.getClass().getName());
1089       HRegionInfo regioninfo = new HRegionInfo(tableName,
1090           HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
1091       log = HLogFactory.createHLog(fs, HBASEDIR, logName, conf);
1092 
1093       final int total = 20;
1094       for (int i = 0; i < total; i++) {
1095         WALEdit kvs = new WALEdit();
1096         kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
1097         HTableDescriptor htd = new HTableDescriptor(tableName);
1098         htd.addFamily(new HColumnDescriptor("column"));
1099         log.append(regioninfo, tableName, kvs, System.currentTimeMillis(), htd);
1100       }
1101       // Send the data to HDFS datanodes and close the HDFS writer
1102       log.sync();
1103       ((FSHLog) log).cleanupCurrentWriter(log.getFilenum());
1104 
1105       /* code taken from ProcessServerShutdown.process()
1106        * handles RS shutdowns (as observed by the Master)
1107        */
1108       // rename the directory so a rogue RS doesn't create more HLogs
1109       Path rsSplitDir = new Path(thisTestsDir.getParent(),
1110                                  thisTestsDir.getName() + "-splitting");
1111       fs.rename(thisTestsDir, rsSplitDir);
1112       LOG.debug("Renamed region directory: " + rsSplitDir);
1113 
1114       // Process the old log files
1115       HLogSplitter.split(HBASEDIR, rsSplitDir, OLDLOGDIR, fs, conf);
1116 
1117       // Now, try to roll the HLog and verify failure
1118       try {
1119         log.rollWriter();
1120         Assert.fail("rollWriter() did not throw any exception.");
1121       } catch (IOException ioe) {
1122         if (ioe.getCause().getMessage().contains("FileNotFound")) {
1123           LOG.info("Got the expected exception: ", ioe.getCause());
1124         } else {
1125           Assert.fail("Unexpected exception: " + ioe);
1126         }
1127       }
1128     } finally {
1129       conf.setLong(F_INTERVAL, oldFlushInterval);
1130       if (log != null) {
1131         log.close();
1132       }
1133       if (fs.exists(thisTestsDir)) {
1134         fs.delete(thisTestsDir, true);
1135       }
1136     }
1137   }
1138 
1139   /**
1140    * This thread will keep adding new log files
1141    * It simulates a region server that was considered dead but woke up and wrote
1142    * some more to a new hlog
1143    */
1144   class ZombieNewLogWriterRegionServer extends Thread {
1145     AtomicBoolean stop;
1146     CountDownLatch latch;
1147     public ZombieNewLogWriterRegionServer(CountDownLatch latch, AtomicBoolean stop) {
1148       super("ZombieNewLogWriterRegionServer");
1149       this.latch = latch;
1150       this.stop = stop;
1151     }
1152 
1153     @Override
1154     public void run() {
1155       if (stop.get()) {
1156         return;
1157       }
1158       Path tableDir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
1159       Path regionDir = new Path(tableDir, REGIONS.get(0));
1160       Path recoveredEdits = new Path(regionDir, HConstants.RECOVERED_EDITS_DIR);
1161       String region = "juliet";
1162       Path julietLog = new Path(HLOGDIR, HLOG_FILE_PREFIX + ".juliet");
1163       try {
1164 
1165         while (!fs.exists(recoveredEdits) && !stop.get()) {
1166           LOG.info("Juliet: split not started, sleeping a bit...");
1167           Threads.sleep(10);
1168         }
1169 
1170         fs.mkdirs(new Path(tableDir, region));
1171         HLog.Writer writer = HLogFactory.createWALWriter(fs,
1172           julietLog, conf);
1173         appendEntry(writer, TableName.valueOf("juliet"), ("juliet").getBytes(),
1174             ("r").getBytes(), FAMILY, QUALIFIER, VALUE, 0);
1175         writer.close();
1176         LOG.info("Juliet file creator: created file " + julietLog);
1177         latch.countDown();
1178       } catch (IOException e1) {
1179         LOG.error("Failed to create file " + julietLog, e1);
1180         assertTrue("Failed to create file " + julietLog, false);
1181       }
1182     }
1183   }
1184 
1185   @Test (timeout=300000)
1186   public void testSplitLogFileWithOneRegion() throws IOException {
1187     LOG.info("testSplitLogFileWithOneRegion");
1188     final String REGION = "region__1";
1189     REGIONS.removeAll(REGIONS);
1190     REGIONS.add(REGION);
1191 
1192     generateHLogs(1, 10, -1);
1193     fs.initialize(fs.getUri(), conf);
1194     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1195 
1196     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
1197     Path[] splitLog = getLogForRegion(HBASEDIR, TABLE_NAME, REGION);
1198     assertEquals(1, splitLog.length);
1199 
1200     assertEquals(true, logsAreEqual(originalLog, splitLog[0]));
1201   }
1202 
1203   @Test (timeout=300000)
1204   public void testSplitLogFileDeletedRegionDir() throws IOException {
1205     LOG.info("testSplitLogFileDeletedRegionDir");
1206     final String REGION = "region__1";
1207     REGIONS.removeAll(REGIONS);
1208     REGIONS.add(REGION);
1209 
1210     generateHLogs(1, 10, -1);
1211     fs.initialize(fs.getUri(), conf);
1212 
1213     Path regiondir = new Path(TABLEDIR, REGION);
1214     LOG.info("Region directory is" + regiondir);
1215     fs.delete(regiondir, true);
1216 
1217     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1218 
1219     assertTrue(!fs.exists(regiondir));
1220     assertTrue(true);
1221   }
1222 
1223   @Test (timeout=300000)
1224   public void testSplitLogFileEmpty() throws IOException {
1225     LOG.info("testSplitLogFileEmpty");
1226     injectEmptyFile(".empty", true);
1227 
1228     fs.initialize(fs.getUri(), conf);
1229 
1230     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1231     Path tdir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
1232     assertFalse(fs.exists(tdir));
1233 
1234     assertEquals(0, countHLog(fs.listStatus(OLDLOGDIR)[0].getPath(), fs, conf));
1235   }
1236 
1237   @Test (timeout=300000)
1238   public void testSplitLogFileMultipleRegions() throws IOException {
1239     LOG.info("testSplitLogFileMultipleRegions");
1240     generateHLogs(1, 10, -1);
1241     fs.initialize(fs.getUri(), conf);
1242 
1243     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1244     for (String region : REGIONS) {
1245       Path[] recovered = getLogForRegion(HBASEDIR, TABLE_NAME, region);
1246       assertEquals(1, recovered.length);
1247       assertEquals(10, countHLog(recovered[0], fs, conf));
1248     }
1249   }
1250 
1251   @Test (timeout=300000)
1252   public void testSplitLogFileFirstLineCorruptionLog()
1253   throws IOException {
1254     conf.setBoolean(HBASE_SKIP_ERRORS, true);
1255     generateHLogs(1, 10, -1);
1256     FileStatus logfile = fs.listStatus(HLOGDIR)[0];
1257 
1258     corruptHLog(logfile.getPath(),
1259         Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true, fs);
1260 
1261     fs.initialize(fs.getUri(), conf);
1262     HLogSplitter.split(HBASEDIR, HLOGDIR, OLDLOGDIR, fs, conf);
1263 
1264     final Path corruptDir = new Path(FSUtils.getRootDir(conf), conf.get(
1265         "hbase.regionserver.hlog.splitlog.corrupt.dir", HConstants.CORRUPT_DIR_NAME));
1266     assertEquals(1, fs.listStatus(corruptDir).length);
1267   }
1268 
1269   /**
1270    * @throws IOException
1271    * @see https://issues.apache.org/jira/browse/HBASE-4862
1272    */
1273   @Test (timeout=300000)
1274   public void testConcurrentSplitLogAndReplayRecoverEdit() throws IOException {
1275     LOG.info("testConcurrentSplitLogAndReplayRecoverEdit");
1276     // Generate hlogs for our destination region
1277     String regionName = "r0";
1278     final Path regiondir = new Path(TABLEDIR, regionName);
1279     REGIONS = new ArrayList<String>();
1280     REGIONS.add(regionName);
1281     generateHLogs(-1);
1282 
1283     HLogFactory.createHLog(fs, regiondir, regionName, conf);
1284     FileStatus[] logfiles = fs.listStatus(HLOGDIR);
1285     assertTrue("There should be some log file",
1286       logfiles != null && logfiles.length > 0);
1287 
1288     HLogSplitter logSplitter = new HLogSplitter(
1289         conf, HBASEDIR, fs, null, null) {
1290       protected HLog.Writer createWriter(FileSystem fs, Path logfile, Configuration conf)
1291       throws IOException {
1292         HLog.Writer writer = HLogFactory.createRecoveredEditsWriter(fs, logfile, conf);
1293         // After creating writer, simulate region's
1294         // replayRecoveredEditsIfAny() which gets SplitEditFiles of this
1295         // region and delete them, excluding files with '.temp' suffix.
1296         NavigableSet<Path> files = HLogUtil.getSplitEditFilesSorted(fs, regiondir);
1297         if (files != null && !files.isEmpty()) {
1298           for (Path file : files) {
1299             if (!this.fs.delete(file, false)) {
1300               LOG.error("Failed delete of " + file);
1301             } else {
1302               LOG.debug("Deleted recovered.edits file=" + file);
1303             }
1304           }
1305         }
1306         return writer;
1307       }
1308     };
1309     try{
1310       logSplitter.splitLogFile(logfiles[0], null);
1311     } catch (IOException e) {
1312       LOG.info(e);
1313       Assert.fail("Throws IOException when spliting "
1314           + "log, it is most likely because writing file does not "
1315           + "exist which is caused by concurrent replayRecoveredEditsIfAny()");
1316     }
1317     if (fs.exists(CORRUPTDIR)) {
1318       if (fs.listStatus(CORRUPTDIR).length > 0) {
1319         Assert.fail("There are some corrupt logs, "
1320                 + "it is most likely caused by concurrent replayRecoveredEditsIfAny()");
1321       }
1322     }
1323   }
1324 
1325   private static void flushToConsole(String s) {
1326     System.out.println(s);
1327     System.out.flush();
1328   }
1329 
1330 
1331   private HLog.Writer [] generateHLogs(int leaveOpen) throws IOException {
1332     return generateHLogs(NUM_WRITERS, ENTRIES, leaveOpen);
1333   }
1334 
1335   private HLog.Writer [] generateHLogs(final int writers, final int entries, final int leaveOpen) throws IOException {
1336     return generateHLogs((DistributedFileSystem)this.fs, writers, entries, leaveOpen);
1337   }
1338 
1339   private static void makeRegionDirs(FileSystem fs, List<String> regions) throws IOException {
1340     for (String region : regions) {
1341       flushToConsole("Creating dir for region " + region);
1342       fs.mkdirs(new Path(TABLEDIR, region));
1343     }
1344   }
1345 
1346   private static HLog.Writer [] generateHLogs(final DistributedFileSystem dfs, int writers, int entries, int leaveOpen)
1347   throws IOException {
1348     makeRegionDirs(dfs, REGIONS);
1349     dfs.mkdirs(HLOGDIR);
1350     HLog.Writer [] ws = new HLog.Writer[writers];
1351     int seq = 0;
1352     for (int i = 0; i < writers; i++) {
1353       ws[i] = HLogFactory.createWALWriter(dfs, new Path(HLOGDIR, HLOG_FILE_PREFIX + i), dfs.getConf());
1354       for (int j = 0; j < entries; j++) {
1355         int prefix = 0;
1356         for (String region : REGIONS) {
1357           String row_key = region + prefix++ + i + j;
1358           appendEntry(ws[i], TABLE_NAME, region.getBytes(), row_key.getBytes(), FAMILY, QUALIFIER, VALUE, seq++);
1359         }
1360       }
1361       if (i != leaveOpen) {
1362         ws[i].close();
1363         LOG.info("Closing writer " + i);
1364       }
1365     }
1366     return ws;
1367   }
1368 
1369   private Path[] getLogForRegion(Path rootdir, TableName table, String region)
1370   throws IOException {
1371     Path tdir = FSUtils.getTableDir(rootdir, table);
1372     @SuppressWarnings("deprecation")
1373     Path editsdir = HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir,
1374       Bytes.toString(region.getBytes())));
1375     FileStatus [] files = this.fs.listStatus(editsdir);
1376     Path[] paths = new Path[files.length];
1377     for (int i = 0; i < files.length; i++) {
1378       paths[i] = files[i].getPath();
1379     }
1380     return paths;
1381   }
1382 
1383   private void corruptHLog(Path path, Corruptions corruption, boolean close,
1384                            FileSystem fs) throws IOException {
1385 
1386     FSDataOutputStream out;
1387     int fileSize = (int) fs.listStatus(path)[0].getLen();
1388 
1389     FSDataInputStream in = fs.open(path);
1390     byte[] corrupted_bytes = new byte[fileSize];
1391     in.readFully(0, corrupted_bytes, 0, fileSize);
1392     in.close();
1393 
1394     switch (corruption) {
1395       case APPEND_GARBAGE:
1396         fs.delete(path, false);
1397         out = fs.create(path);
1398         out.write(corrupted_bytes);
1399         out.write("-----".getBytes());
1400         closeOrFlush(close, out);
1401         break;
1402 
1403       case INSERT_GARBAGE_ON_FIRST_LINE:
1404         fs.delete(path, false);
1405         out = fs.create(path);
1406         out.write(0);
1407         out.write(corrupted_bytes);
1408         closeOrFlush(close, out);
1409         break;
1410 
1411       case INSERT_GARBAGE_IN_THE_MIDDLE:
1412         fs.delete(path, false);
1413         out = fs.create(path);
1414         int middle = (int) Math.floor(corrupted_bytes.length / 2);
1415         out.write(corrupted_bytes, 0, middle);
1416         out.write(0);
1417         out.write(corrupted_bytes, middle, corrupted_bytes.length - middle);
1418         closeOrFlush(close, out);
1419         break;
1420 
1421       case TRUNCATE:
1422         fs.delete(path, false);
1423         out = fs.create(path);
1424         out.write(corrupted_bytes, 0, fileSize
1425           - (32 + ProtobufLogReader.PB_WAL_COMPLETE_MAGIC.length + Bytes.SIZEOF_INT));
1426         closeOrFlush(close, out);
1427         break;
1428 
1429       case TRUNCATE_TRAILER:
1430         fs.delete(path, false);
1431         out = fs.create(path);
1432         out.write(corrupted_bytes, 0, fileSize - Bytes.SIZEOF_INT);// trailer is truncated.
1433         closeOrFlush(close, out);
1434         break;
1435     }
1436   }
1437 
1438   private void closeOrFlush(boolean close, FSDataOutputStream out)
1439   throws IOException {
1440     if (close) {
1441       out.close();
1442     } else {
1443       Method syncMethod = null;
1444       try {
1445         syncMethod = out.getClass().getMethod("hflush", new Class<?> []{});
1446       } catch (NoSuchMethodException e) {
1447         try {
1448           syncMethod = out.getClass().getMethod("sync", new Class<?> []{});
1449         } catch (NoSuchMethodException ex) {
1450           throw new IOException("This version of Hadoop supports " +
1451               "neither Syncable.sync() nor Syncable.hflush().");
1452         }
1453       }
1454       try {
1455         syncMethod.invoke(out, new Object[]{});
1456       } catch (Exception e) {
1457         throw new IOException(e);
1458       }
1459       // Not in 0out.hflush();
1460     }
1461   }
1462 
1463   @SuppressWarnings("unused")
1464   private void dumpHLog(Path log, FileSystem fs, Configuration conf) throws IOException {
1465     HLog.Entry entry;
1466     HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1467     while ((entry = in.next()) != null) {
1468       System.out.println(entry);
1469     }
1470   }
1471 
1472   private int countHLog(Path log, FileSystem fs, Configuration conf) throws IOException {
1473     int count = 0;
1474     HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1475     while (in.next() != null) {
1476       count++;
1477     }
1478     return count;
1479   }
1480 
1481 
1482   public static long appendEntry(HLog.Writer writer, TableName table, byte[] region,
1483                           byte[] row, byte[] family, byte[] qualifier,
1484                           byte[] value, long seq)
1485           throws IOException {
1486     LOG.info(Thread.currentThread().getName() + " append");
1487     writer.append(createTestEntry(table, region, row, family, qualifier, value, seq));
1488     LOG.info(Thread.currentThread().getName() + " sync");
1489     writer.sync();
1490     return seq;
1491   }
1492 
1493   private static HLog.Entry createTestEntry(
1494       TableName table, byte[] region,
1495       byte[] row, byte[] family, byte[] qualifier,
1496       byte[] value, long seq) {
1497     long time = System.nanoTime();
1498     WALEdit edit = new WALEdit();
1499     seq++;
1500     edit.add(new KeyValue(row, family, qualifier, time, KeyValue.Type.Put, value));
1501     return new HLog.Entry(new HLogKey(region, table, seq, time,
1502         HConstants.DEFAULT_CLUSTER_ID), edit);
1503   }
1504 
1505 
1506   private void injectEmptyFile(String suffix, boolean closeFile)
1507           throws IOException {
1508     HLog.Writer writer = HLogFactory.createWALWriter(
1509         fs, new Path(HLOGDIR, HLOG_FILE_PREFIX + suffix), conf);
1510     if (closeFile) writer.close();
1511   }
1512 
1513   @SuppressWarnings("unused")
1514   private void listLogs(FileSystem fs, Path dir) throws IOException {
1515     for (FileStatus file : fs.listStatus(dir)) {
1516       System.out.println(file.getPath());
1517     }
1518 
1519   }
1520 
1521   private int compareHLogSplitDirs(Path p1, Path p2) throws IOException {
1522     FileStatus[] f1 = fs.listStatus(p1);
1523     FileStatus[] f2 = fs.listStatus(p2);
1524     assertNotNull("Path " + p1 + " doesn't exist", f1);
1525     assertNotNull("Path " + p2 + " doesn't exist", f2);
1526 
1527     System.out.println("Files in " + p1 + ": " +
1528         Joiner.on(",").join(FileUtil.stat2Paths(f1)));
1529     System.out.println("Files in " + p2 + ": " +
1530         Joiner.on(",").join(FileUtil.stat2Paths(f2)));
1531     assertEquals(f1.length, f2.length);
1532 
1533     for (int i = 0; i < f1.length; i++) {
1534       // Regions now have a directory named RECOVERED_EDITS_DIR and in here
1535       // are split edit files. In below presume only 1.
1536       Path rd1 = HLogUtil.getRegionDirRecoveredEditsDir(f1[i].getPath());
1537       FileStatus[] rd1fs = fs.listStatus(rd1);
1538       assertEquals(1, rd1fs.length);
1539       Path rd2 = HLogUtil.getRegionDirRecoveredEditsDir(f2[i].getPath());
1540       FileStatus[] rd2fs = fs.listStatus(rd2);
1541       assertEquals(1, rd2fs.length);
1542       if (!logsAreEqual(rd1fs[0].getPath(), rd2fs[0].getPath())) {
1543         return -1;
1544       }
1545     }
1546     return 0;
1547   }
1548 
1549   private boolean logsAreEqual(Path p1, Path p2) throws IOException {
1550     HLog.Reader in1, in2;
1551     in1 = HLogFactory.createReader(fs, p1, conf);
1552     in2 = HLogFactory.createReader(fs, p2, conf);
1553     HLog.Entry entry1;
1554     HLog.Entry entry2;
1555     while ((entry1 = in1.next()) != null) {
1556       entry2 = in2.next();
1557       if ((entry1.getKey().compareTo(entry2.getKey()) != 0) ||
1558               (!entry1.getEdit().toString().equals(entry2.getEdit().toString()))) {
1559         return false;
1560       }
1561     }
1562     return true;
1563   }
1564 }