1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver.wal;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.EOFException;
27 import java.io.IOException;
28 import java.util.ArrayList;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Set;
32 import java.util.concurrent.atomic.AtomicBoolean;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.Cell;
40 import org.apache.hadoop.hbase.HBaseTestingUtility;
41 import org.apache.hadoop.hbase.HColumnDescriptor;
42 import org.apache.hadoop.hbase.HConstants;
43 import org.apache.hadoop.hbase.HTableDescriptor;
44 import org.apache.hadoop.hbase.testclassification.LargeTests;
45 import org.apache.hadoop.hbase.MiniHBaseCluster;
46 import org.apache.hadoop.hbase.ServerName;
47 import org.apache.hadoop.hbase.TableName;
48 import org.apache.hadoop.hbase.client.Admin;
49 import org.apache.hadoop.hbase.client.Get;
50 import org.apache.hadoop.hbase.client.HTable;
51 import org.apache.hadoop.hbase.client.Put;
52 import org.apache.hadoop.hbase.client.Result;
53 import org.apache.hadoop.hbase.client.ResultScanner;
54 import org.apache.hadoop.hbase.client.Scan;
55 import org.apache.hadoop.hbase.client.Table;
56 import org.apache.hadoop.hbase.fs.HFileSystem;
57 import org.apache.hadoop.hbase.regionserver.HRegion;
58 import org.apache.hadoop.hbase.regionserver.HRegionServer;
59 import org.apache.hadoop.hbase.regionserver.Store;
60 import org.apache.hadoop.hbase.util.Bytes;
61 import org.apache.hadoop.hbase.util.FSUtils;
62 import org.apache.hadoop.hbase.util.JVMClusterUtil;
63 import org.apache.hadoop.hbase.util.Threads;
64 import org.apache.hadoop.hbase.wal.DefaultWALProvider;
65 import org.apache.hadoop.hbase.wal.WAL;
66 import org.apache.hadoop.hbase.wal.WALFactory;
67 import org.apache.hadoop.hdfs.MiniDFSCluster;
68 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
69 import org.apache.hadoop.hdfs.server.datanode.DataNode;
70 import org.junit.After;
71 import org.junit.Assert;
72 import org.junit.Before;
73 import org.junit.BeforeClass;
74 import org.junit.Test;
75 import org.junit.experimental.categories.Category;
76
77
78
79
80 @Category(LargeTests.class)
81 public class TestLogRolling {
82 private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
83 private HRegionServer server;
84 private String tableName;
85 private byte[] value;
86 private FileSystem fs;
87 private MiniDFSCluster dfsCluster;
88 private Admin admin;
89 private MiniHBaseCluster cluster;
90 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
91
92
93
94
95
96 public TestLogRolling() {
97 this.server = null;
98 this.tableName = null;
99
100 String className = this.getClass().getName();
101 StringBuilder v = new StringBuilder(className);
102 while (v.length() < 1000) {
103 v.append(className);
104 }
105 this.value = Bytes.toBytes(v.toString());
106 }
107
108
109
110 @BeforeClass
111 public static void setUpBeforeClass() throws Exception {
112
113
114 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
115
116
117
118 TEST_UTIL.getConfiguration().setLong(HConstants.HREGION_MAX_FILESIZE, 768L * 1024L);
119
120
121 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.maxlogentries", 32);
122
123 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.logroll.errors.tolerated", 2);
124 TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
125
126
127 TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.optionalflushcount", 2);
128
129
130 TEST_UTIL.getConfiguration().setInt(
131 HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 8192);
132
133
134 TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 10 * 1000);
135
136
137
138 TEST_UTIL.getConfiguration().setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000);
139
140
141
142 TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
143
144
145 TEST_UTIL.getConfiguration().setInt("dfs.namenode.heartbeat.recheck-interval", 5000);
146 TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
147
148
149 TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 30);
150 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.hlog.tolerable.lowreplication", 2);
151 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.hlog.lowreplication.rolllimit", 3);
152 }
153
154 @Before
155 public void setUp() throws Exception {
156 TEST_UTIL.startMiniCluster(1, 1, 2);
157
158 cluster = TEST_UTIL.getHBaseCluster();
159 dfsCluster = TEST_UTIL.getDFSCluster();
160 fs = TEST_UTIL.getTestFileSystem();
161 admin = TEST_UTIL.getHBaseAdmin();
162
163
164 cluster.getMaster().balanceSwitch(false);
165 }
166
167 @After
168 public void tearDown() throws Exception {
169 TEST_UTIL.shutdownMiniCluster();
170 }
171
172 private void startAndWriteData() throws IOException, InterruptedException {
173
174 new HTable(TEST_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
175 this.server = cluster.getRegionServerThreads().get(0).getRegionServer();
176
177 Table table = createTestTable(this.tableName);
178
179 server = TEST_UTIL.getRSForFirstRegionInTable(table.getName());
180 for (int i = 1; i <= 256; i++) {
181 doPut(table, i);
182 if (i % 32 == 0) {
183
184 try {
185 Thread.sleep(2000);
186 } catch (InterruptedException e) {
187
188 }
189 }
190 }
191 }
192
193
194
195
196 @Test(timeout=120000)
197 public void testLogRollOnNothingWritten() throws Exception {
198 final Configuration conf = TEST_UTIL.getConfiguration();
199 final WALFactory wals = new WALFactory(conf, null,
200 ServerName.valueOf("test.com",8080, 1).toString());
201 final WAL newLog = wals.getWAL(new byte[]{});
202 try {
203
204 newLog.rollWriter(true);
205 } finally {
206 wals.close();
207 }
208 }
209
210
211
212
213
214
215 @Test
216 public void testLogRolling() throws Exception {
217 this.tableName = getName();
218
219 startAndWriteData();
220 final WAL log = server.getWAL(null);
221 LOG.info("after writing there are " + DefaultWALProvider.getNumRolledLogFiles(log) +
222 " log files");
223
224
225
226 List<HRegion> regions =
227 new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
228 for (HRegion r: regions) {
229 r.flushcache();
230 }
231
232
233 log.rollWriter();
234
235 int count = DefaultWALProvider.getNumRolledLogFiles(log);
236 LOG.info("after flushing all regions and rolling logs there are " + count + " log files");
237 assertTrue(("actual count: " + count), count <= 2);
238 }
239
240 private static String getName() {
241 return "TestLogRolling";
242 }
243
244 void writeData(Table table, int rownum) throws IOException {
245 doPut(table, rownum);
246
247
248 try {
249 Thread.sleep(2000);
250 } catch (InterruptedException e) {
251
252 }
253 }
254
255 void validateData(Table table, int rownum) throws IOException {
256 String row = "row" + String.format("%1$04d", rownum);
257 Get get = new Get(Bytes.toBytes(row));
258 get.addFamily(HConstants.CATALOG_FAMILY);
259 Result result = table.get(get);
260 assertTrue(result.size() == 1);
261 assertTrue(Bytes.equals(value,
262 result.getValue(HConstants.CATALOG_FAMILY, null)));
263 LOG.info("Validated row " + row);
264 }
265
266 void batchWriteAndWait(Table table, final FSHLog log, int start, boolean expect, int timeout)
267 throws IOException {
268 for (int i = 0; i < 10; i++) {
269 Put put = new Put(Bytes.toBytes("row"
270 + String.format("%1$04d", (start + i))));
271 put.add(HConstants.CATALOG_FAMILY, null, value);
272 table.put(put);
273 }
274 Put tmpPut = new Put(Bytes.toBytes("tmprow"));
275 tmpPut.add(HConstants.CATALOG_FAMILY, null, value);
276 long startTime = System.currentTimeMillis();
277 long remaining = timeout;
278 while (remaining > 0) {
279 if (log.isLowReplicationRollEnabled() == expect) {
280 break;
281 } else {
282
283 table.put(tmpPut);
284 try {
285 Thread.sleep(200);
286 } catch (InterruptedException e) {
287
288 }
289 remaining = timeout - (System.currentTimeMillis() - startTime);
290 }
291 }
292 }
293
294
295
296
297
298 @Test
299 public void testLogRollOnDatanodeDeath() throws Exception {
300 TEST_UTIL.ensureSomeRegionServersAvailable(2);
301 assertTrue("This test requires WAL file replication set to 2.",
302 fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) == 2);
303 LOG.info("Replication=" +
304 fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
305
306 this.server = cluster.getRegionServer(0);
307
308
309 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(getName()));
310 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
311
312 admin.createTable(desc);
313 Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
314 assertTrue(((HTable) table).isAutoFlush());
315
316 server = TEST_UTIL.getRSForFirstRegionInTable(desc.getTableName());
317 final FSHLog log = (FSHLog) server.getWAL(null);
318 final AtomicBoolean lowReplicationHookCalled = new AtomicBoolean(false);
319
320 log.registerWALActionsListener(new WALActionsListener.Base() {
321 @Override
322 public void logRollRequested(boolean lowReplication) {
323 if (lowReplication) {
324 lowReplicationHookCalled.lazySet(true);
325 }
326 }
327 });
328
329
330 assertTrue("Need append support for this test", FSUtils
331 .isAppendSupported(TEST_UTIL.getConfiguration()));
332
333
334
335
336
337 List<DataNode> existingNodes = dfsCluster.getDataNodes();
338 int numDataNodes = 3;
339 dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), numDataNodes, true,
340 null, null);
341 List<DataNode> allNodes = dfsCluster.getDataNodes();
342 for (int i = allNodes.size()-1; i >= 0; i--) {
343 if (existingNodes.contains(allNodes.get(i))) {
344 dfsCluster.stopDataNode( i );
345 }
346 }
347
348 assertTrue("DataNodes " + dfsCluster.getDataNodes().size() +
349 " default replication " +
350 fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()),
351 dfsCluster.getDataNodes().size() >=
352 fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) + 1);
353
354 writeData(table, 2);
355
356 long curTime = System.currentTimeMillis();
357 LOG.info("log.getCurrentFileName(): " + log.getCurrentFileName());
358 long oldFilenum = DefaultWALProvider.extractFileNumFromWAL(log);
359 assertTrue("Log should have a timestamp older than now",
360 curTime > oldFilenum && oldFilenum != -1);
361
362 assertTrue("The log shouldn't have rolled yet",
363 oldFilenum == DefaultWALProvider.extractFileNumFromWAL(log));
364 final DatanodeInfo[] pipeline = log.getPipeLine();
365 assertTrue(pipeline.length ==
366 fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
367
368
369
370 assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
371
372
373 writeData(table, 2);
374 long newFilenum = DefaultWALProvider.extractFileNumFromWAL(log);
375
376 assertTrue("Missing datanode should've triggered a log roll",
377 newFilenum > oldFilenum && newFilenum > curTime);
378
379 assertTrue("The log rolling hook should have been called with the low replication flag",
380 lowReplicationHookCalled.get());
381
382
383 writeData(table, 3);
384 assertTrue("The log should not roll again.",
385 DefaultWALProvider.extractFileNumFromWAL(log) == newFilenum);
386
387
388 assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
389
390 batchWriteAndWait(table, log, 3, false, 14000);
391 int replication = log.getLogReplication();
392 assertTrue("LowReplication Roller should've been disabled, current replication="
393 + replication, !log.isLowReplicationRollEnabled());
394
395 dfsCluster
396 .startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
397
398
399
400 log.rollWriter(true);
401 batchWriteAndWait(table, log, 13, true, 10000);
402 replication = log.getLogReplication();
403 assertTrue("New log file should have the default replication instead of " +
404 replication,
405 replication == fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
406 assertTrue("LowReplication Roller should've been enabled", log.isLowReplicationRollEnabled());
407 }
408
409
410
411
412
413
414 @Test
415 public void testLogRollOnPipelineRestart() throws Exception {
416 LOG.info("Starting testLogRollOnPipelineRestart");
417 assertTrue("This test requires WAL file replication.",
418 fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) > 1);
419 LOG.info("Replication=" +
420 fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
421
422 Table t = new HTable(TEST_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
423 try {
424 this.server = cluster.getRegionServer(0);
425
426
427 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(getName()));
428 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
429
430 admin.createTable(desc);
431 Table table = new HTable(TEST_UTIL.getConfiguration(), desc.getTableName());
432
433 server = TEST_UTIL.getRSForFirstRegionInTable(desc.getTableName());
434 final WAL log = server.getWAL(null);
435 final List<Path> paths = new ArrayList<Path>();
436 final List<Integer> preLogRolledCalled = new ArrayList<Integer>();
437
438 paths.add(DefaultWALProvider.getCurrentFileName(log));
439 log.registerWALActionsListener(new WALActionsListener.Base() {
440
441 @Override
442 public void preLogRoll(Path oldFile, Path newFile) {
443 LOG.debug("preLogRoll: oldFile="+oldFile+" newFile="+newFile);
444 preLogRolledCalled.add(new Integer(1));
445 }
446 @Override
447 public void postLogRoll(Path oldFile, Path newFile) {
448 paths.add(newFile);
449 }
450 });
451
452
453 assertTrue("Need append support for this test", FSUtils
454 .isAppendSupported(TEST_UTIL.getConfiguration()));
455
456 writeData(table, 1002);
457
458 long curTime = System.currentTimeMillis();
459 LOG.info("log.getCurrentFileName()): " + DefaultWALProvider.getCurrentFileName(log));
460 long oldFilenum = DefaultWALProvider.extractFileNumFromWAL(log);
461 assertTrue("Log should have a timestamp older than now",
462 curTime > oldFilenum && oldFilenum != -1);
463
464 assertTrue("The log shouldn't have rolled yet", oldFilenum ==
465 DefaultWALProvider.extractFileNumFromWAL(log));
466
467
468 dfsCluster.restartDataNodes();
469 Thread.sleep(1000);
470 dfsCluster.waitActive();
471 LOG.info("Data Nodes restarted");
472 validateData(table, 1002);
473
474
475 writeData(table, 1003);
476 long newFilenum = DefaultWALProvider.extractFileNumFromWAL(log);
477
478 assertTrue("Missing datanode should've triggered a log roll",
479 newFilenum > oldFilenum && newFilenum > curTime);
480 validateData(table, 1003);
481
482 writeData(table, 1004);
483
484
485 dfsCluster.restartDataNodes();
486 Thread.sleep(1000);
487 dfsCluster.waitActive();
488 LOG.info("Data Nodes restarted");
489 validateData(table, 1004);
490
491
492 writeData(table, 1005);
493
494
495 log.rollWriter(true);
496 assertTrue("preLogRolledCalled has size of " + preLogRolledCalled.size(),
497 preLogRolledCalled.size() >= 1);
498
499
500 Set<String> loggedRows = new HashSet<String>();
501 FSUtils fsUtils = FSUtils.getInstance(fs, TEST_UTIL.getConfiguration());
502 for (Path p : paths) {
503 LOG.debug("recovering lease for " + p);
504 fsUtils.recoverFileLease(((HFileSystem)fs).getBackingFs(), p,
505 TEST_UTIL.getConfiguration(), null);
506
507 LOG.debug("Reading WAL "+FSUtils.getPath(p));
508 WAL.Reader reader = null;
509 try {
510 reader = WALFactory.createReader(fs, p, TEST_UTIL.getConfiguration());
511 WAL.Entry entry;
512 while ((entry = reader.next()) != null) {
513 LOG.debug("#"+entry.getKey().getLogSeqNum()+": "+entry.getEdit().getCells());
514 for (Cell cell : entry.getEdit().getCells()) {
515 loggedRows.add(Bytes.toStringBinary(cell.getRow()));
516 }
517 }
518 } catch (EOFException e) {
519 LOG.debug("EOF reading file "+FSUtils.getPath(p));
520 } finally {
521 if (reader != null) reader.close();
522 }
523 }
524
525
526 assertTrue(loggedRows.contains("row1002"));
527 assertTrue(loggedRows.contains("row1003"));
528 assertTrue(loggedRows.contains("row1004"));
529 assertTrue(loggedRows.contains("row1005"));
530
531
532 List<HRegion> regions = new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
533 for (HRegion r: regions) {
534 r.flushcache();
535 }
536
537 ResultScanner scanner = table.getScanner(new Scan());
538 try {
539 for (int i=2; i<=5; i++) {
540 Result r = scanner.next();
541 assertNotNull(r);
542 assertFalse(r.isEmpty());
543 assertEquals("row100"+i, Bytes.toString(r.getRow()));
544 }
545 } finally {
546 scanner.close();
547 }
548
549
550 for (JVMClusterUtil.RegionServerThread rsThread:
551 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
552 assertFalse(rsThread.getRegionServer().isAborted());
553 }
554 } finally {
555 if (t != null) t.close();
556 }
557 }
558
559
560
561
562
563 @Test
564 public void testCompactionRecordDoesntBlockRolling() throws Exception {
565 Table table = null;
566 Table table2 = null;
567
568
569 Table t = new HTable(TEST_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
570 try {
571 table = createTestTable(getName());
572 table2 = createTestTable(getName() + "1");
573
574 server = TEST_UTIL.getRSForFirstRegionInTable(table.getName());
575 final WAL log = server.getWAL(null);
576 HRegion region = server.getOnlineRegions(table2.getName()).get(0);
577 Store s = region.getStore(HConstants.CATALOG_FAMILY);
578
579
580 admin.flush(TableName.NAMESPACE_TABLE_NAME);
581
582
583 for (int i = 1; i <= 2; ++i) {
584 doPut(table2, i);
585 admin.flush(table2.getName());
586 }
587 doPut(table2, 3);
588 assertEquals("Should have no WAL after initial writes", 0,
589 DefaultWALProvider.getNumRolledLogFiles(log));
590 assertEquals(2, s.getStorefilesCount());
591
592
593 log.rollWriter();
594 assertEquals("Should have WAL; one table is not flushed", 1,
595 DefaultWALProvider.getNumRolledLogFiles(log));
596 admin.flush(table2.getName());
597 region.compactStores();
598
599 Assert.assertNotNull(s);
600 for (int waitTime = 3000; s.getStorefilesCount() > 1 && waitTime > 0; waitTime -= 200) {
601 Threads.sleepWithoutInterrupt(200);
602 }
603 assertEquals("Compaction didn't happen", 1, s.getStorefilesCount());
604
605
606 doPut(table, 0);
607 log.rollWriter();
608 assertEquals("Should have WAL; one table is not flushed", 1,
609 DefaultWALProvider.getNumRolledLogFiles(log));
610
611
612 admin.flush(table.getName());
613 doPut(table, 1);
614 log.rollWriter();
615 assertEquals("Should have 1 WALs at the end", 1,
616 DefaultWALProvider.getNumRolledLogFiles(log));
617 } finally {
618 if (t != null) t.close();
619 if (table != null) table.close();
620 if (table2 != null) table2.close();
621 }
622 }
623
624 private void doPut(Table table, int i) throws IOException {
625 Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", i)));
626 put.add(HConstants.CATALOG_FAMILY, null, value);
627 table.put(put);
628 }
629
630 private Table createTestTable(String tableName) throws IOException {
631
632 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
633 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
634 admin.createTable(desc);
635 return new HTable(TEST_UTIL.getConfiguration(), desc.getTableName());
636 }
637 }
638