1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver.wal;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.EOFException;
27 import java.io.IOException;
28 import java.io.OutputStream;
29 import java.lang.reflect.InvocationTargetException;
30 import java.lang.reflect.Method;
31 import java.util.ArrayList;
32 import java.util.HashSet;
33 import java.util.List;
34 import java.util.Set;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38 import org.apache.commons.logging.impl.Log4JLogger;
39 import org.apache.hadoop.fs.FileSystem;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.KeyValue;
47 import org.apache.hadoop.hbase.LargeTests;
48 import org.apache.hadoop.hbase.MiniHBaseCluster;
49 import org.apache.hadoop.hbase.TableName;
50 import org.apache.hadoop.hbase.client.Get;
51 import org.apache.hadoop.hbase.client.HBaseAdmin;
52 import org.apache.hadoop.hbase.client.HTable;
53 import org.apache.hadoop.hbase.client.Put;
54 import org.apache.hadoop.hbase.client.Result;
55 import org.apache.hadoop.hbase.client.ResultScanner;
56 import org.apache.hadoop.hbase.client.Scan;
57 import org.apache.hadoop.hbase.fs.HFileSystem;
58 import org.apache.hadoop.hbase.ipc.RpcClient;
59 import org.apache.hadoop.hbase.regionserver.HRegion;
60 import org.apache.hadoop.hbase.regionserver.HRegionServer;
61 import org.apache.hadoop.hbase.regionserver.Store;
62 import org.apache.hadoop.hbase.util.Bytes;
63 import org.apache.hadoop.hbase.util.FSUtils;
64 import org.apache.hadoop.hbase.util.JVMClusterUtil;
65 import org.apache.hadoop.hbase.util.Threads;
66 import org.apache.hadoop.hdfs.DFSClient;
67 import org.apache.hadoop.hdfs.MiniDFSCluster;
68 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
69 import org.apache.hadoop.hdfs.server.datanode.DataNode;
70 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
71 import org.apache.log4j.Level;
72 import org.junit.After;
73 import org.junit.Assert;
74 import org.junit.Before;
75 import org.junit.BeforeClass;
76 import org.junit.Test;
77 import org.junit.experimental.categories.Category;
78
79
80
81
82 @Category(LargeTests.class)
83 public class TestLogRolling {
84 private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
85 private HRegionServer server;
86 private HLog log;
87 private String tableName;
88 private byte[] value;
89 private FileSystem fs;
90 private MiniDFSCluster dfsCluster;
91 private HBaseAdmin admin;
92 private MiniHBaseCluster cluster;
93 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
94
95
96 {
97 ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
98 ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
99 ((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
100 .getLogger().setLevel(Level.ALL);
101 ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
102 ((Log4JLogger)HRegionServer.LOG).getLogger().setLevel(Level.ALL);
103 ((Log4JLogger)HRegion.LOG).getLogger().setLevel(Level.ALL);
104 ((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL);
105 }
106
107
108
109
110
111 public TestLogRolling() {
112 this.server = null;
113 this.log = null;
114 this.tableName = null;
115
116 String className = this.getClass().getName();
117 StringBuilder v = new StringBuilder(className);
118 while (v.length() < 1000) {
119 v.append(className);
120 }
121 this.value = Bytes.toBytes(v.toString());
122 }
123
124
125
126 @BeforeClass
127 public static void setUpBeforeClass() throws Exception {
128
129
130 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
131
132
133
134 TEST_UTIL.getConfiguration().setLong(HConstants.HREGION_MAX_FILESIZE, 768L * 1024L);
135
136
137 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.maxlogentries", 32);
138
139 TEST_UTIL.getConfiguration().setInt(
140 "hbase.regionserver.logroll.errors.tolerated", 2);
141 TEST_UTIL.getConfiguration().setInt(RpcClient.PING_INTERVAL_NAME, 10 * 1000);
142 TEST_UTIL.getConfiguration().setInt(RpcClient.SOCKET_TIMEOUT, 10 * 1000);
143 TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
144
145
146 TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.optionalflushcount", 2);
147
148
149 TEST_UTIL.getConfiguration().setInt(
150 HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 8192);
151
152
153 TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 10 * 1000);
154
155
156
157 TEST_UTIL.getConfiguration().setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000);
158
159
160
161 TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
162
163
164 TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000);
165 TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
166
167
168 TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 30);
169 TEST_UTIL.getConfiguration().setInt(
170 "hbase.regionserver.hlog.tolerable.lowreplication", 2);
171 TEST_UTIL.getConfiguration().setInt(
172 "hbase.regionserver.hlog.lowreplication.rolllimit", 3);
173 }
174
175 @Before
176 public void setUp() throws Exception {
177 TEST_UTIL.startMiniCluster(1, 1, 2);
178
179 cluster = TEST_UTIL.getHBaseCluster();
180 dfsCluster = TEST_UTIL.getDFSCluster();
181 fs = TEST_UTIL.getTestFileSystem();
182 admin = TEST_UTIL.getHBaseAdmin();
183
184
185 cluster.getMaster().balanceSwitch(false);
186 }
187
188 @After
189 public void tearDown() throws Exception {
190 TEST_UTIL.shutdownMiniCluster();
191 }
192
193 private void startAndWriteData() throws IOException, InterruptedException {
194
195 new HTable(TEST_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
196 this.server = cluster.getRegionServerThreads().get(0).getRegionServer();
197 this.log = server.getWAL();
198
199 HTable table = createTestTable(this.tableName);
200
201 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
202 this.log = server.getWAL();
203 for (int i = 1; i <= 256; i++) {
204 doPut(table, i);
205 if (i % 32 == 0) {
206
207 try {
208 Thread.sleep(2000);
209 } catch (InterruptedException e) {
210
211 }
212 }
213 }
214 }
215
216
217
218
219
220
221 @Test
222 public void testLogRolling() throws Exception {
223 this.tableName = getName();
224 startAndWriteData();
225 LOG.info("after writing there are " + ((FSHLog) log).getNumRolledLogFiles() + " log files");
226
227
228
229 List<HRegion> regions =
230 new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
231 for (HRegion r: regions) {
232 r.flushcache();
233 }
234
235
236 log.rollWriter();
237
238 int count = ((FSHLog) log).getNumRolledLogFiles();
239 LOG.info("after flushing all regions and rolling logs there are " +
240 ((FSHLog) log).getNumRolledLogFiles() + " log files");
241 assertTrue(("actual count: " + count), count <= 2);
242 }
243
244 private static String getName() {
245 return "TestLogRolling";
246 }
247
248 void writeData(HTable table, int rownum) throws IOException {
249 doPut(table, rownum);
250
251
252 try {
253 Thread.sleep(2000);
254 } catch (InterruptedException e) {
255
256 }
257 }
258
259 void validateData(HTable table, int rownum) throws IOException {
260 String row = "row" + String.format("%1$04d", rownum);
261 Get get = new Get(Bytes.toBytes(row));
262 get.addFamily(HConstants.CATALOG_FAMILY);
263 Result result = table.get(get);
264 assertTrue(result.size() == 1);
265 assertTrue(Bytes.equals(value,
266 result.getValue(HConstants.CATALOG_FAMILY, null)));
267 LOG.info("Validated row " + row);
268 }
269
270 void batchWriteAndWait(HTable table, int start, boolean expect, int timeout)
271 throws IOException {
272 for (int i = 0; i < 10; i++) {
273 Put put = new Put(Bytes.toBytes("row"
274 + String.format("%1$04d", (start + i))));
275 put.add(HConstants.CATALOG_FAMILY, null, value);
276 table.put(put);
277 }
278 Put tmpPut = new Put(Bytes.toBytes("tmprow"));
279 tmpPut.add(HConstants.CATALOG_FAMILY, null, value);
280 long startTime = System.currentTimeMillis();
281 long remaining = timeout;
282 while (remaining > 0) {
283 if (log.isLowReplicationRollEnabled() == expect) {
284 break;
285 } else {
286
287 table.put(tmpPut);
288 try {
289 Thread.sleep(200);
290 } catch (InterruptedException e) {
291
292 }
293 remaining = timeout - (System.currentTimeMillis() - startTime);
294 }
295 }
296 }
297
298
299
300
301 DatanodeInfo[] getPipeline(HLog log) throws IllegalArgumentException,
302 IllegalAccessException, InvocationTargetException {
303 OutputStream stm = ((FSHLog) log).getOutputStream();
304 Method getPipeline = null;
305 for (Method m : stm.getClass().getDeclaredMethods()) {
306 if (m.getName().endsWith("getPipeline")) {
307 getPipeline = m;
308 getPipeline.setAccessible(true);
309 break;
310 }
311 }
312
313 assertTrue("Need DFSOutputStream.getPipeline() for this test",
314 null != getPipeline);
315 Object repl = getPipeline.invoke(stm, new Object[] {}
316 return (DatanodeInfo[]) repl;
317 }
318
319
320
321
322
323
324 @Test
325 public void testLogRollOnDatanodeDeath() throws Exception {
326 assertTrue("This test requires HLog file replication set to 2.",
327 fs.getDefaultReplication() == 2);
328 LOG.info("Replication=" + fs.getDefaultReplication());
329
330 this.server = cluster.getRegionServer(0);
331 this.log = server.getWAL();
332
333
334 String tableName = getName();
335 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
336 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
337
338 admin.createTable(desc);
339 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
340 assertTrue(table.isAutoFlush());
341
342 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
343 this.log = server.getWAL();
344
345 assertTrue("Need HDFS-826 for this test", ((FSHLog) log).canGetCurReplicas());
346
347 assertTrue("Need append support for this test", FSUtils
348 .isAppendSupported(TEST_UTIL.getConfiguration()));
349
350
351
352
353
354 List<DataNode> existingNodes = dfsCluster.getDataNodes();
355 int numDataNodes = 3;
356 dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), numDataNodes, true,
357 null, null);
358 List<DataNode> allNodes = dfsCluster.getDataNodes();
359 for (int i = allNodes.size()-1; i >= 0; i--) {
360 if (existingNodes.contains(allNodes.get(i))) {
361 dfsCluster.stopDataNode( i );
362 }
363 }
364
365 assertTrue("DataNodes " + dfsCluster.getDataNodes().size() +
366 " default replication " + fs.getDefaultReplication(),
367 dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
368
369 writeData(table, 2);
370
371 long curTime = System.currentTimeMillis();
372 long oldFilenum = ((FSHLog) log).getFilenum();
373 assertTrue("Log should have a timestamp older than now",
374 curTime > oldFilenum && oldFilenum != -1);
375
376 assertTrue("The log shouldn't have rolled yet",
377 oldFilenum == ((FSHLog) log).getFilenum());
378 final DatanodeInfo[] pipeline = getPipeline(log);
379 assertTrue(pipeline.length == fs.getDefaultReplication());
380
381
382
383 assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
384
385
386 writeData(table, 2);
387 long newFilenum = ((FSHLog) log).getFilenum();
388
389 assertTrue("Missing datanode should've triggered a log roll",
390 newFilenum > oldFilenum && newFilenum > curTime);
391
392
393 writeData(table, 3);
394 assertTrue("The log should not roll again.",
395 ((FSHLog) log).getFilenum() == newFilenum);
396
397
398 assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
399
400 batchWriteAndWait(table, 3, false, 14000);
401 assertTrue("LowReplication Roller should've been disabled, current replication="
402 + ((FSHLog) log).getLogReplication(),
403 !log.isLowReplicationRollEnabled());
404
405 dfsCluster
406 .startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
407
408
409
410 log.rollWriter(true);
411 batchWriteAndWait(table, 13, true, 10000);
412 assertTrue("New log file should have the default replication instead of " +
413 ((FSHLog) log).getLogReplication(),
414 ((FSHLog) log).getLogReplication() == fs.getDefaultReplication());
415 assertTrue("LowReplication Roller should've been enabled",
416 log.isLowReplicationRollEnabled());
417 }
418
419
420
421
422
423
424 @Test
425 public void testLogRollOnPipelineRestart() throws Exception {
426 LOG.info("Starting testLogRollOnPipelineRestart");
427 assertTrue("This test requires HLog file replication.",
428 fs.getDefaultReplication() > 1);
429 LOG.info("Replication=" + fs.getDefaultReplication());
430
431 new HTable(TEST_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
432
433 this.server = cluster.getRegionServer(0);
434 this.log = server.getWAL();
435
436
437 String tableName = getName();
438 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
439 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
440
441 admin.createTable(desc);
442 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
443
444 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
445 this.log = server.getWAL();
446 final List<Path> paths = new ArrayList<Path>();
447 final List<Integer> preLogRolledCalled = new ArrayList<Integer>();
448 paths.add(((FSHLog) log).computeFilename());
449 log.registerWALActionsListener(new WALActionsListener() {
450 @Override
451 public void preLogRoll(Path oldFile, Path newFile) {
452 LOG.debug("preLogRoll: oldFile="+oldFile+" newFile="+newFile);
453 preLogRolledCalled.add(new Integer(1));
454 }
455 @Override
456 public void postLogRoll(Path oldFile, Path newFile) {
457 paths.add(newFile);
458 }
459 @Override
460 public void preLogArchive(Path oldFile, Path newFile) {}
461 @Override
462 public void postLogArchive(Path oldFile, Path newFile) {}
463 @Override
464 public void logRollRequested() {}
465 @Override
466 public void logCloseRequested() {}
467 @Override
468 public void visitLogEntryBeforeWrite(HRegionInfo info, HLogKey logKey,
469 WALEdit logEdit) {}
470 @Override
471 public void visitLogEntryBeforeWrite(HTableDescriptor htd, HLogKey logKey,
472 WALEdit logEdit) {}
473 });
474
475 assertTrue("Need HDFS-826 for this test", ((FSHLog) log).canGetCurReplicas());
476
477 assertTrue("Need append support for this test", FSUtils
478 .isAppendSupported(TEST_UTIL.getConfiguration()));
479
480 writeData(table, 1002);
481
482 table.setAutoFlush(true, true);
483
484 long curTime = System.currentTimeMillis();
485 long oldFilenum = log.getFilenum();
486 assertTrue("Log should have a timestamp older than now",
487 curTime > oldFilenum && oldFilenum != -1);
488
489 assertTrue("The log shouldn't have rolled yet", oldFilenum == log.getFilenum());
490
491
492 dfsCluster.restartDataNodes();
493 Thread.sleep(1000);
494 dfsCluster.waitActive();
495 LOG.info("Data Nodes restarted");
496 validateData(table, 1002);
497
498
499 writeData(table, 1003);
500 long newFilenum = log.getFilenum();
501
502 assertTrue("Missing datanode should've triggered a log roll",
503 newFilenum > oldFilenum && newFilenum > curTime);
504 validateData(table, 1003);
505
506 writeData(table, 1004);
507
508
509 dfsCluster.restartDataNodes();
510 Thread.sleep(1000);
511 dfsCluster.waitActive();
512 LOG.info("Data Nodes restarted");
513 validateData(table, 1004);
514
515
516 writeData(table, 1005);
517
518
519 log.rollWriter(true);
520 assertTrue("preLogRolledCalled has size of " + preLogRolledCalled.size(),
521 preLogRolledCalled.size() >= 1);
522
523
524 Set<String> loggedRows = new HashSet<String>();
525 FSUtils fsUtils = FSUtils.getInstance(fs, TEST_UTIL.getConfiguration());
526 for (Path p : paths) {
527 LOG.debug("recovering lease for " + p);
528 fsUtils.recoverFileLease(((HFileSystem)fs).getBackingFs(), p, TEST_UTIL.getConfiguration(), null);
529
530 LOG.debug("Reading HLog "+FSUtils.getPath(p));
531 HLog.Reader reader = null;
532 try {
533 reader = HLogFactory.createReader(fs, p,
534 TEST_UTIL.getConfiguration());
535 HLog.Entry entry;
536 while ((entry = reader.next()) != null) {
537 LOG.debug("#"+entry.getKey().getLogSeqNum()+": "+entry.getEdit().getKeyValues());
538 for (KeyValue kv : entry.getEdit().getKeyValues()) {
539 loggedRows.add(Bytes.toStringBinary(kv.getRow()));
540 }
541 }
542 } catch (EOFException e) {
543 LOG.debug("EOF reading file "+FSUtils.getPath(p));
544 } finally {
545 if (reader != null) reader.close();
546 }
547 }
548
549
550 assertTrue(loggedRows.contains("row1002"));
551 assertTrue(loggedRows.contains("row1003"));
552 assertTrue(loggedRows.contains("row1004"));
553 assertTrue(loggedRows.contains("row1005"));
554
555
556 List<HRegion> regions =
557 new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
558 for (HRegion r: regions) {
559 r.flushcache();
560 }
561
562 ResultScanner scanner = table.getScanner(new Scan());
563 try {
564 for (int i=2; i<=5; i++) {
565 Result r = scanner.next();
566 assertNotNull(r);
567 assertFalse(r.isEmpty());
568 assertEquals("row100"+i, Bytes.toString(r.getRow()));
569 }
570 } finally {
571 scanner.close();
572 }
573
574
575 for (JVMClusterUtil.RegionServerThread rsThread:
576 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
577 assertFalse(rsThread.getRegionServer().isAborted());
578 }
579 }
580
581
582
583
584
585 @Test
586 public void testCompactionRecordDoesntBlockRolling() throws Exception {
587
588 new HTable(TEST_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
589
590 String tableName = getName();
591 HTable table = createTestTable(tableName);
592 String tableName2 = tableName + "1";
593 HTable table2 = createTestTable(tableName2);
594
595 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
596 this.log = server.getWAL();
597 FSHLog fshLog = (FSHLog)log;
598 HRegion region = server.getOnlineRegions(table2.getName()).get(0);
599 Store s = region.getStore(HConstants.CATALOG_FAMILY);
600
601
602 admin.flush(TableName.NAMESPACE_TABLE_NAME.getName());
603
604
605 for (int i = 1; i <= 2; ++i) {
606 doPut(table2, i);
607 admin.flush(table2.getTableName());
608 }
609 doPut(table2, 3);
610 assertEquals("Should have no WAL after initial writes", 0, fshLog.getNumRolledLogFiles());
611 assertEquals(2, s.getStorefilesCount());
612
613
614 fshLog.rollWriter();
615 assertEquals("Should have WAL; one table is not flushed", 1, fshLog.getNumRolledLogFiles());
616 admin.flush(table2.getTableName());
617 region.compactStores();
618
619 Assert.assertNotNull(s);
620 for (int waitTime = 3000; s.getStorefilesCount() > 1 && waitTime > 0; waitTime -= 200) {
621 Threads.sleepWithoutInterrupt(200);
622 }
623 assertEquals("Compaction didn't happen", 1, s.getStorefilesCount());
624
625
626 doPut(table, 0);
627 fshLog.rollWriter();
628 assertEquals("Should have WAL; one table is not flushed", 1, fshLog.getNumRolledLogFiles());
629
630
631 admin.flush(table.getTableName());
632 doPut(table, 1);
633 fshLog.rollWriter();
634 assertEquals("Should have 1 WALs at the end", 1, fshLog.getNumRolledLogFiles());
635
636 table.close();
637 table2.close();
638 }
639
640 private void doPut(HTable table, int i) throws IOException {
641 Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", i)));
642 put.add(HConstants.CATALOG_FAMILY, null, value);
643 table.put(put);
644 }
645
646 private HTable createTestTable(String tableName) throws IOException {
647
648 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
649 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
650 admin.createTable(desc);
651 return new HTable(TEST_UTIL.getConfiguration(), tableName);
652 }
653 }
654