1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver.wal;
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.commons.logging.impl.Log4JLogger;
25 import org.apache.hadoop.fs.FileSystem;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.*;
28 import org.apache.hadoop.hbase.client.*;
29 import org.apache.hadoop.hbase.regionserver.HRegion;
30 import org.apache.hadoop.hbase.regionserver.HRegionServer;
31 import org.apache.hadoop.hbase.util.*;
32 import org.apache.hadoop.hdfs.DFSClient;
33 import org.apache.hadoop.hdfs.MiniDFSCluster;
34 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
35 import org.apache.hadoop.hdfs.server.datanode.DataNode;
36 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
37 import org.apache.log4j.Level;
38 import org.junit.*;
39 import org.junit.experimental.categories.Category;
40
41 import java.io.*;
42 import java.lang.reflect.InvocationTargetException;
43 import java.lang.reflect.Method;
44 import java.util.*;
45
46 import static org.junit.Assert.*;
47
48
49
50
51 @Category(LargeTests.class)
52 public class TestLogRolling {
53 private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
54 private HRegionServer server;
55 private HLog log;
56 private String tableName;
57 private byte[] value;
58 private FileSystem fs;
59 private MiniDFSCluster dfsCluster;
60 private HBaseAdmin admin;
61 private MiniHBaseCluster cluster;
62 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63
64
65 {
66 ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
67 ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
68 ((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
69 .getLogger().setLevel(Level.ALL);
70 ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
71 ((Log4JLogger)HRegionServer.LOG).getLogger().setLevel(Level.ALL);
72 ((Log4JLogger)HRegion.LOG).getLogger().setLevel(Level.ALL);
73 ((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL);
74 }
75
76
77
78
79
80 public TestLogRolling() {
81 this.server = null;
82 this.log = null;
83 this.tableName = null;
84
85 String className = this.getClass().getName();
86 StringBuilder v = new StringBuilder(className);
87 while (v.length() < 1000) {
88 v.append(className);
89 }
90 this.value = Bytes.toBytes(v.toString());
91 }
92
93
94
95 @BeforeClass
96 public static void setUpBeforeClass() throws Exception {
97
98
99 TEST_UTIL.getConfiguration().setLong(HConstants.HREGION_MAX_FILESIZE, 768L * 1024L);
100
101
102 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.maxlogentries", 32);
103
104 TEST_UTIL.getConfiguration().setInt(
105 "hbase.regionserver.logroll.errors.tolerated", 2);
106 TEST_UTIL.getConfiguration().setInt("ipc.ping.interval", 10 * 1000);
107 TEST_UTIL.getConfiguration().setInt("ipc.socket.timeout", 10 * 1000);
108 TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
109
110
111 TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.optionalflushcount", 2);
112
113
114 TEST_UTIL.getConfiguration().setInt(
115 HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 8192);
116
117
118 TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 10 * 1000);
119
120
121
122 TEST_UTIL.getConfiguration().setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000);
123
124
125
126 TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
127
128
129 TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000);
130 TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
131
132
133 TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 30);
134 TEST_UTIL.getConfiguration().setInt(
135 "hbase.regionserver.hlog.tolerable.lowreplication", 2);
136 TEST_UTIL.getConfiguration().setInt(
137 "hbase.regionserver.hlog.lowreplication.rolllimit", 3);
138 }
139
140 @Before
141 public void setUp() throws Exception {
142 TEST_UTIL.startMiniCluster(2);
143
144 cluster = TEST_UTIL.getHBaseCluster();
145 dfsCluster = TEST_UTIL.getDFSCluster();
146 fs = TEST_UTIL.getTestFileSystem();
147 admin = TEST_UTIL.getHBaseAdmin();
148
149
150 cluster.getMaster().balanceSwitch(false);
151 }
152
153 @After
154 public void tearDown() throws Exception {
155 TEST_UTIL.shutdownMiniCluster();
156 }
157
158 private void startAndWriteData() throws IOException {
159
160 new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
161 this.server = cluster.getRegionServerThreads().get(0).getRegionServer();
162 this.log = server.getWAL();
163
164
165 HTableDescriptor desc = new HTableDescriptor(tableName);
166 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
167 admin.createTable(desc);
168 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
169
170 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
171 this.log = server.getWAL();
172 for (int i = 1; i <= 256; i++) {
173 Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", i)));
174 put.add(HConstants.CATALOG_FAMILY, null, value);
175 table.put(put);
176 if (i % 32 == 0) {
177
178 try {
179 Thread.sleep(2000);
180 } catch (InterruptedException e) {
181
182 }
183 }
184 }
185 }
186
187
188
189
190
191
192 @Test
193 public void testLogRolling() throws FailedLogCloseException, IOException {
194 this.tableName = getName();
195 startAndWriteData();
196 LOG.info("after writing there are " + log.getNumLogFiles() + " log files");
197
198
199
200 List<HRegion> regions =
201 new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
202 for (HRegion r: regions) {
203 r.flushcache();
204 }
205
206
207 log.rollWriter();
208
209 int count = log.getNumLogFiles();
210 LOG.info("after flushing all regions and rolling logs there are " +
211 log.getNumLogFiles() + " log files");
212 assertTrue(("actual count: " + count), count <= 2);
213 }
214
215 private static String getName() {
216 return "TestLogRolling";
217 }
218
219 void writeData(HTable table, int rownum) throws IOException {
220 Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", rownum)));
221 put.add(HConstants.CATALOG_FAMILY, null, value);
222 table.put(put);
223
224
225 try {
226 Thread.sleep(2000);
227 } catch (InterruptedException e) {
228
229 }
230 }
231
232 void validateData(HTable table, int rownum) throws IOException {
233 String row = "row" + String.format("%1$04d", rownum);
234 Get get = new Get(Bytes.toBytes(row));
235 get.addFamily(HConstants.CATALOG_FAMILY);
236 Result result = table.get(get);
237 assertTrue(result.size() == 1);
238 assertTrue(Bytes.equals(value,
239 result.getValue(HConstants.CATALOG_FAMILY, null)));
240 LOG.info("Validated row " + row);
241 }
242
243 void batchWriteAndWait(HTable table, int start, boolean expect, int timeout)
244 throws IOException {
245 for (int i = 0; i < 10; i++) {
246 Put put = new Put(Bytes.toBytes("row"
247 + String.format("%1$04d", (start + i))));
248 put.add(HConstants.CATALOG_FAMILY, null, value);
249 table.put(put);
250 }
251 long startTime = System.currentTimeMillis();
252 long remaining = timeout;
253 while (remaining > 0) {
254 if (log.isLowReplicationRollEnabled() == expect) {
255 break;
256 } else {
257 try {
258 Thread.sleep(200);
259 } catch (InterruptedException e) {
260
261 }
262 remaining = timeout - (System.currentTimeMillis() - startTime);
263 }
264 }
265 }
266
267
268
269
270 DatanodeInfo[] getPipeline(HLog log) throws IllegalArgumentException,
271 IllegalAccessException, InvocationTargetException {
272 OutputStream stm = log.getOutputStream();
273 Method getPipeline = null;
274 for (Method m : stm.getClass().getDeclaredMethods()) {
275 if (m.getName().endsWith("getPipeline")) {
276 getPipeline = m;
277 getPipeline.setAccessible(true);
278 break;
279 }
280 }
281
282 assertTrue("Need DFSOutputStream.getPipeline() for this test",
283 null != getPipeline);
284 Object repl = getPipeline.invoke(stm, new Object[] {}
285 return (DatanodeInfo[]) repl;
286 }
287
288
289
290
291
292
293
294
295
296
297
298 @Test
299 public void testLogRollOnDatanodeDeath() throws Exception {
300 assertTrue("This test requires HLog file replication set to 2.",
301 fs.getDefaultReplication() == 2);
302 LOG.info("Replication=" + fs.getDefaultReplication());
303
304 this.server = cluster.getRegionServer(0);
305 this.log = server.getWAL();
306
307
308 String tableName = getName();
309 HTableDescriptor desc = new HTableDescriptor(tableName);
310 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
311
312 admin.createTable(desc);
313 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
314 assertTrue(table.isAutoFlush());
315
316 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
317 this.log = server.getWAL();
318
319 assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
320
321 assertTrue("Need append support for this test", FSUtils
322 .isAppendSupported(TEST_UTIL.getConfiguration()));
323
324
325
326
327
328 List<DataNode> existingNodes = dfsCluster.getDataNodes();
329 int numDataNodes = 3;
330 dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), numDataNodes, true,
331 null, null);
332 List<DataNode> allNodes = dfsCluster.getDataNodes();
333 for (int i = allNodes.size()-1; i >= 0; i--) {
334 if (existingNodes.contains(allNodes.get(i))) {
335 dfsCluster.stopDataNode( i );
336 }
337 }
338
339 assertTrue("DataNodes " + dfsCluster.getDataNodes().size() +
340 " default replication " + fs.getDefaultReplication(),
341 dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
342
343 writeData(table, 2);
344
345 long curTime = System.currentTimeMillis();
346 long oldFilenum = log.getFilenum();
347 assertTrue("Log should have a timestamp older than now",
348 curTime > oldFilenum && oldFilenum != -1);
349
350 assertTrue("The log shouldn't have rolled yet",
351 oldFilenum == log.getFilenum());
352 final DatanodeInfo[] pipeline = getPipeline(log);
353 assertTrue(pipeline.length == fs.getDefaultReplication());
354
355
356
357 assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
358
359
360 writeData(table, 2);
361 long newFilenum = log.getFilenum();
362
363 assertTrue("Missing datanode should've triggered a log roll",
364 newFilenum > oldFilenum && newFilenum > curTime);
365
366
367 writeData(table, 3);
368 assertTrue("The log should not roll again.",
369 log.getFilenum() == newFilenum);
370
371
372 assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
373
374 batchWriteAndWait(table, 3, false, 10000);
375 assertTrue("LowReplication Roller should've been disabled",
376 !log.isLowReplicationRollEnabled());
377
378 dfsCluster
379 .startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
380
381
382
383 log.rollWriter(true);
384 batchWriteAndWait(table, 13, true, 10000);
385 assertTrue("New log file should have the default replication instead of " +
386 log.getLogReplication(),
387 log.getLogReplication() == fs.getDefaultReplication());
388 assertTrue("LowReplication Roller should've been enabled",
389 log.isLowReplicationRollEnabled());
390 }
391
392
393
394
395
396
397
398 public void testLogRollOnPipelineRestart() throws Exception {
399 LOG.info("Starting testLogRollOnPipelineRestart");
400 assertTrue("This test requires HLog file replication.",
401 fs.getDefaultReplication() > 1);
402 LOG.info("Replication=" + fs.getDefaultReplication());
403
404 new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
405
406 this.server = cluster.getRegionServer(0);
407 this.log = server.getWAL();
408
409
410 String tableName = getName();
411 HTableDescriptor desc = new HTableDescriptor(tableName);
412 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
413
414 admin.createTable(desc);
415 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
416
417 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
418 this.log = server.getWAL();
419 final List<Path> paths = new ArrayList<Path>();
420 final List<Integer> preLogRolledCalled = new ArrayList<Integer>();
421 paths.add(log.computeFilename());
422 log.registerWALActionsListener(new WALActionsListener() {
423 @Override
424 public void preLogRoll(Path oldFile, Path newFile) {
425 LOG.debug("preLogRoll: oldFile="+oldFile+" newFile="+newFile);
426 preLogRolledCalled.add(new Integer(1));
427 }
428 @Override
429 public void postLogRoll(Path oldFile, Path newFile) {
430 paths.add(newFile);
431 }
432 @Override
433 public void preLogArchive(Path oldFile, Path newFile) {}
434 @Override
435 public void postLogArchive(Path oldFile, Path newFile) {}
436 @Override
437 public void logRollRequested() {}
438 @Override
439 public void logCloseRequested() {}
440 @Override
441 public void visitLogEntryBeforeWrite(HRegionInfo info, HLogKey logKey,
442 WALEdit logEdit) {}
443 @Override
444 public void visitLogEntryBeforeWrite(HTableDescriptor htd, HLogKey logKey,
445 WALEdit logEdit) {}
446 });
447
448 assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
449
450 assertTrue("Need append support for this test", FSUtils
451 .isAppendSupported(TEST_UTIL.getConfiguration()));
452
453 writeData(table, 1002);
454
455 table.setAutoFlush(true);
456
457 long curTime = System.currentTimeMillis();
458 long oldFilenum = log.getFilenum();
459 assertTrue("Log should have a timestamp older than now",
460 curTime > oldFilenum && oldFilenum != -1);
461
462 assertTrue("The log shouldn't have rolled yet", oldFilenum == log.getFilenum());
463
464
465 dfsCluster.restartDataNodes();
466 Thread.sleep(1000);
467 dfsCluster.waitActive();
468 LOG.info("Data Nodes restarted");
469 validateData(table, 1002);
470
471
472 writeData(table, 1003);
473 long newFilenum = log.getFilenum();
474
475 assertTrue("Missing datanode should've triggered a log roll",
476 newFilenum > oldFilenum && newFilenum > curTime);
477 validateData(table, 1003);
478
479 writeData(table, 1004);
480
481
482 dfsCluster.restartDataNodes();
483 Thread.sleep(1000);
484 dfsCluster.waitActive();
485 LOG.info("Data Nodes restarted");
486 validateData(table, 1004);
487
488
489 writeData(table, 1005);
490
491
492 log.rollWriter(true);
493 assertTrue("preLogRolledCalled has size of " + preLogRolledCalled.size(),
494 preLogRolledCalled.size() >= 1);
495
496
497 Set<String> loggedRows = new HashSet<String>();
498 for (Path p : paths) {
499 LOG.debug("Reading HLog "+FSUtils.getPath(p));
500 HLog.Reader reader = null;
501 try {
502 reader = HLog.getReader(fs, p, TEST_UTIL.getConfiguration());
503 HLog.Entry entry;
504 while ((entry = reader.next()) != null) {
505 LOG.debug("#"+entry.getKey().getLogSeqNum()+": "+entry.getEdit().getKeyValues());
506 for (KeyValue kv : entry.getEdit().getKeyValues()) {
507 loggedRows.add(Bytes.toStringBinary(kv.getRow()));
508 }
509 }
510 } catch (EOFException e) {
511 LOG.debug("EOF reading file "+FSUtils.getPath(p));
512 } finally {
513 if (reader != null) reader.close();
514 }
515 }
516
517
518 assertTrue(loggedRows.contains("row1002"));
519 assertTrue(loggedRows.contains("row1003"));
520 assertTrue(loggedRows.contains("row1004"));
521 assertTrue(loggedRows.contains("row1005"));
522
523
524 List<HRegion> regions =
525 new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
526 for (HRegion r: regions) {
527 r.flushcache();
528 }
529
530 ResultScanner scanner = table.getScanner(new Scan());
531 try {
532 for (int i=2; i<=5; i++) {
533 Result r = scanner.next();
534 assertNotNull(r);
535 assertFalse(r.isEmpty());
536 assertEquals("row100"+i, Bytes.toString(r.getRow()));
537 }
538 } finally {
539 scanner.close();
540 }
541
542
543 for (JVMClusterUtil.RegionServerThread rsThread:
544 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
545 assertFalse(rsThread.getRegionServer().isAborted());
546 }
547 }
548
549 @org.junit.Rule
550 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
551 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
552 }
553