1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertTrue;
23
24 import java.util.List;
25 import java.util.concurrent.atomic.AtomicLong;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.hbase.HBaseTestingUtility;
30 import org.apache.hadoop.hbase.MediumTests;
31 import org.apache.hadoop.hbase.ServerName;
32 import org.apache.hadoop.hbase.SplitLogCounters;
33 import org.apache.hadoop.hbase.SplitLogTask;
34 import org.apache.hadoop.hbase.Waiter;
35 import org.apache.hadoop.hbase.util.CancelableProgressable;
36 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
37 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
38 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
39 import org.apache.log4j.Level;
40 import org.apache.log4j.Logger;
41 import org.apache.zookeeper.CreateMode;
42 import org.apache.zookeeper.ZooDefs.Ids;
43 import org.junit.After;
44 import org.junit.Before;
45 import org.junit.Test;
46 import org.junit.experimental.categories.Category;
47
48 @Category(MediumTests.class)
49 public class TestSplitLogWorker {
50 private static final Log LOG = LogFactory.getLog(TestSplitLogWorker.class);
51 private final ServerName MANAGER = new ServerName("manager,1,1");
52 static {
53 Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
54 }
55 private final static HBaseTestingUtility TEST_UTIL =
56 new HBaseTestingUtility();
57 private ZooKeeperWatcher zkw;
58 private SplitLogWorker slw;
59
60 private void waitForCounter(AtomicLong ctr, long oldval, long newval, long timems)
61 throws Exception {
62 assertTrue("ctr=" + ctr.get() + ", oldval=" + oldval + ", newval=" + newval,
63 waitForCounterBoolean(ctr, oldval, newval, timems));
64 }
65
66 private boolean waitForCounterBoolean(final AtomicLong ctr, final long oldval, long newval,
67 long timems) throws Exception {
68
69 return waitForCounterBoolean(ctr, oldval, newval, timems, true);
70 }
71
72 private boolean waitForCounterBoolean(final AtomicLong ctr, final long oldval, long newval,
73 long timems, boolean failIfTimeout) throws Exception {
74
75 long timeWaited = TEST_UTIL.waitFor(timems, 10, failIfTimeout,
76 new Waiter.Predicate<Exception>() {
77 @Override
78 public boolean evaluate() throws Exception {
79 return (ctr.get() != oldval);
80 }
81 });
82
83 if( timeWaited > 0) {
84
85 assertEquals(newval, ctr.get());
86 }
87 return true;
88 }
89
90 @Before
91 public void setup() throws Exception {
92 TEST_UTIL.startMiniZKCluster();
93 zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
94 "split-log-worker-tests", null);
95 ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode);
96 ZKUtil.createAndFailSilent(zkw, zkw.baseZNode);
97 assertTrue(ZKUtil.checkExists(zkw, zkw.baseZNode) != -1);
98 LOG.debug(zkw.baseZNode + " created");
99 ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode);
100 assertTrue(ZKUtil.checkExists(zkw, zkw.splitLogZNode) != -1);
101 LOG.debug(zkw.splitLogZNode + " created");
102 SplitLogCounters.resetCounters();
103 }
104
105 @After
106 public void teardown() throws Exception {
107 TEST_UTIL.shutdownMiniZKCluster();
108 }
109
110 SplitLogWorker.TaskExecutor neverEndingTask =
111 new SplitLogWorker.TaskExecutor() {
112
113 @Override
114 public Status exec(String name, CancelableProgressable p) {
115 while (true) {
116 try {
117 Thread.sleep(1000);
118 } catch (InterruptedException e) {
119 return Status.PREEMPTED;
120 }
121 if (!p.progress()) {
122 return Status.PREEMPTED;
123 }
124 }
125 }
126
127 };
128
129 @Test
130 public void testAcquireTaskAtStartup() throws Exception {
131 LOG.info("testAcquireTaskAtStartup");
132 SplitLogCounters.resetCounters();
133 final String TATAS = "tatas";
134 final ServerName RS = new ServerName("rs,1,1");
135 zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS),
136 new SplitLogTask.Unassigned(new ServerName("mgr,1,1")).toByteArray(), Ids.OPEN_ACL_UNSAFE,
137 CreateMode.PERSISTENT);
138
139 SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), RS, neverEndingTask);
140 slw.start();
141 try {
142 waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
143 byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS));
144 SplitLogTask slt = SplitLogTask.parseFrom(bytes);
145 assertTrue(slt.isOwned(RS));
146 } finally {
147 stopSplitLogWorker(slw);
148 }
149 }
150
151 private void stopSplitLogWorker(final SplitLogWorker slw)
152 throws InterruptedException {
153 if (slw != null) {
154 slw.stop();
155 slw.worker.join(3000);
156 if (slw.worker.isAlive()) {
157 assertTrue(("Could not stop the worker thread slw=" + slw) == null);
158 }
159 }
160 }
161
162 @Test
163 public void testRaceForTask() throws Exception {
164 LOG.info("testRaceForTask");
165 SplitLogCounters.resetCounters();
166 final String TRFT = "trft";
167 final ServerName SVR1 = new ServerName("svr1,1,1");
168 final ServerName SVR2 = new ServerName("svr2,1,1");
169 zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TRFT),
170 new SplitLogTask.Unassigned(MANAGER).toByteArray(), Ids.OPEN_ACL_UNSAFE,
171 CreateMode.PERSISTENT);
172
173 SplitLogWorker slw1 = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SVR1, neverEndingTask);
174 SplitLogWorker slw2 = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SVR2, neverEndingTask);
175 slw1.start();
176 slw2.start();
177 try {
178 waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
179
180
181 assertTrue(waitForCounterBoolean(SplitLogCounters.tot_wkr_failed_to_grab_task_owned, 0, 1, 1500, false) ||
182 SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.get() == 1);
183 byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TRFT));
184 SplitLogTask slt = SplitLogTask.parseFrom(bytes);
185 assertTrue(slt.isOwned(SVR1) || slt.isOwned(SVR2));
186 } finally {
187 stopSplitLogWorker(slw1);
188 stopSplitLogWorker(slw2);
189 }
190 }
191
192 @Test
193 public void testPreemptTask() throws Exception {
194 LOG.info("testPreemptTask");
195 SplitLogCounters.resetCounters();
196 final ServerName SRV = new ServerName("tpt_svr,1,1");
197 final String PATH = ZKSplitLog.getEncodedNodeName(zkw, "tpt_task");
198 SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SRV, neverEndingTask);
199 slw.start();
200 try {
201 Thread.yield();
202 Thread.sleep(1000);
203 waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 5000);
204
205
206 zkw.getRecoverableZooKeeper().create(PATH,
207 new SplitLogTask.Unassigned(MANAGER).toByteArray(), Ids.OPEN_ACL_UNSAFE,
208 CreateMode.PERSISTENT);
209
210 waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 8000);
211 assertEquals(1, slw.taskReadySeq);
212 byte [] bytes = ZKUtil.getData(zkw, PATH);
213 SplitLogTask slt = SplitLogTask.parseFrom(bytes);
214 assertTrue(slt.isOwned(SRV));
215 slt = new SplitLogTask.Unassigned(MANAGER);
216 ZKUtil.setData(zkw, PATH, slt.toByteArray());
217 waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
218 } finally {
219 stopSplitLogWorker(slw);
220 }
221 }
222
223 @Test
224 public void testMultipleTasks() throws Exception {
225 LOG.info("testMultipleTasks");
226 SplitLogCounters.resetCounters();
227 final ServerName SRV = new ServerName("tmt_svr,1,1");
228 final String PATH1 = ZKSplitLog.getEncodedNodeName(zkw, "tmt_task");
229 SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SRV, neverEndingTask);
230 slw.start();
231 try {
232 Thread.yield();
233 Thread.sleep(100);
234 waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 1500);
235
236 SplitLogTask unassignedManager = new SplitLogTask.Unassigned(MANAGER);
237 zkw.getRecoverableZooKeeper().create(PATH1, unassignedManager.toByteArray(),
238 Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
239
240 waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
241
242
243
244 final String PATH2 = ZKSplitLog.getEncodedNodeName(zkw, "tmt_task_2");
245 zkw.getRecoverableZooKeeper().create(PATH2, unassignedManager.toByteArray(),
246 Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
247
248
249 final ServerName anotherWorker = new ServerName("another-worker,1,1");
250 SplitLogTask slt = new SplitLogTask.Owned(anotherWorker);
251 ZKUtil.setData(zkw, PATH1, slt.toByteArray());
252 waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
253
254 waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 1500);
255 assertEquals(2, slw.taskReadySeq);
256 byte [] bytes = ZKUtil.getData(zkw, PATH2);
257 slt = SplitLogTask.parseFrom(bytes);
258 assertTrue(slt.isOwned(SRV));
259 } finally {
260 stopSplitLogWorker(slw);
261 }
262 }
263
264 @Test
265 public void testRescan() throws Exception {
266 LOG.info("testRescan");
267 SplitLogCounters.resetCounters();
268 final ServerName SRV = new ServerName("svr,1,1");
269 slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SRV, neverEndingTask);
270 slw.start();
271 Thread.yield();
272 Thread.sleep(100);
273
274 String task = ZKSplitLog.getEncodedNodeName(zkw, "task");
275 SplitLogTask slt = new SplitLogTask.Unassigned(MANAGER);
276 zkw.getRecoverableZooKeeper().create(task,slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
277 CreateMode.PERSISTENT);
278
279 waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
280
281
282
283 ZKUtil.setData(zkw, task, slt.toByteArray());
284 waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
285
286
287 String rescan = ZKSplitLog.getEncodedNodeName(zkw, "RESCAN");
288 rescan = zkw.getRecoverableZooKeeper().create(rescan, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
289 CreateMode.PERSISTENT_SEQUENTIAL);
290
291 waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 1500);
292
293
294
295 ZKUtil.setData(zkw, task, slt.toByteArray());
296 waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 1, 2, 1500);
297 waitForCounter(SplitLogCounters.tot_wkr_task_acquired_rescan, 0, 1, 1500);
298
299 List<String> nodes = ZKUtil.listChildrenNoWatch(zkw, zkw.splitLogZNode);
300 LOG.debug(nodes);
301 int num = 0;
302 for (String node : nodes) {
303 num++;
304 if (node.startsWith("RESCAN")) {
305 String name = ZKSplitLog.getEncodedNodeName(zkw, node);
306 String fn = ZKSplitLog.getFileName(name);
307 byte [] data = ZKUtil.getData(zkw, ZKUtil.joinZNode(zkw.splitLogZNode, fn));
308 slt = SplitLogTask.parseFrom(data);
309 assertTrue(slt.toString(), slt.isDone(SRV));
310 }
311 }
312 assertEquals(2, num);
313 }
314
315 }