View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertTrue;
23  import static org.mockito.Mockito.mock;
24  import static org.mockito.Mockito.when;
25  
26  import java.util.List;
27  import java.util.concurrent.atomic.AtomicLong;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.hbase.HBaseConfiguration;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.MediumTests;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.SplitLogCounters;
37  import org.apache.hadoop.hbase.SplitLogTask;
38  import org.apache.hadoop.hbase.Waiter;
39  import org.apache.hadoop.hbase.executor.ExecutorService;
40  import org.apache.hadoop.hbase.executor.ExecutorType;
41  import org.apache.hadoop.hbase.util.CancelableProgressable;
42  import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
43  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
44  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
45  import org.apache.log4j.Level;
46  import org.apache.log4j.Logger;
47  import org.apache.zookeeper.CreateMode;
48  import org.apache.zookeeper.ZooDefs.Ids;
49  import org.junit.After;
50  import org.junit.Before;
51  import org.junit.Test;
52  import org.junit.experimental.categories.Category;
53  
54  @Category(MediumTests.class)
55  public class TestSplitLogWorker {
56    private static final Log LOG = LogFactory.getLog(TestSplitLogWorker.class);
57    private final ServerName MANAGER = ServerName.valueOf("manager,1,1");
58    static {
59      Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
60    }
61    private final static HBaseTestingUtility TEST_UTIL =
62      new HBaseTestingUtility();
63    private ZooKeeperWatcher zkw;
64    private SplitLogWorker slw;
65    private ExecutorService executorService;
66  
67    private void waitForCounter(AtomicLong ctr, long oldval, long newval, long timems)
68        throws Exception {
69      assertTrue("ctr=" + ctr.get() + ", oldval=" + oldval + ", newval=" + newval,
70        waitForCounterBoolean(ctr, oldval, newval, timems));
71    }
72  
73    private boolean waitForCounterBoolean(final AtomicLong ctr, final long oldval, long newval,
74        long timems) throws Exception {
75  
76      return waitForCounterBoolean(ctr, oldval, newval, timems, true);
77    }
78  
79    private boolean waitForCounterBoolean(final AtomicLong ctr, final long oldval, final long newval,
80        long timems, boolean failIfTimeout) throws Exception {
81  
82      long timeWaited = TEST_UTIL.waitFor(timems, 10, failIfTimeout,
83        new Waiter.Predicate<Exception>() {
84        @Override
85        public boolean evaluate() throws Exception {
86              return (ctr.get() >= newval);
87        }
88      });
89  
90      if( timeWaited > 0) {
91        // when not timed out
92        assertEquals(newval, ctr.get());
93      }
94      return true;
95    }
96  
97    @Before
98    public void setup() throws Exception {
99      TEST_UTIL.startMiniZKCluster();
100     zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
101         "split-log-worker-tests", null);
102     ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode);
103     ZKUtil.createAndFailSilent(zkw, zkw.baseZNode);
104     assertTrue(ZKUtil.checkExists(zkw, zkw.baseZNode) != -1);
105     LOG.debug(zkw.baseZNode + " created");
106     ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode);
107     assertTrue(ZKUtil.checkExists(zkw, zkw.splitLogZNode) != -1);
108     LOG.debug(zkw.splitLogZNode + " created");
109     ZKUtil.createAndFailSilent(zkw, zkw.rsZNode);
110     assertTrue(ZKUtil.checkExists(zkw, zkw.rsZNode) != -1);
111     SplitLogCounters.resetCounters();
112     executorService = new ExecutorService("TestSplitLogWorker");
113     executorService.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS, 10);
114   }
115 
116   @After
117   public void teardown() throws Exception {
118     if (executorService != null) {
119       executorService.shutdown();
120     }
121     TEST_UTIL.shutdownMiniZKCluster();
122   }
123 
124   SplitLogWorker.TaskExecutor neverEndingTask =
125     new SplitLogWorker.TaskExecutor() {
126 
127       @Override
128       public Status exec(String name, CancelableProgressable p) {
129         while (true) {
130           try {
131             Thread.sleep(1000);
132           } catch (InterruptedException e) {
133             return Status.PREEMPTED;
134           }
135           if (!p.progress()) {
136             return Status.PREEMPTED;
137           }
138         }
139       }
140 
141   };
142 
143   @Test
144   public void testAcquireTaskAtStartup() throws Exception {
145     LOG.info("testAcquireTaskAtStartup");
146     SplitLogCounters.resetCounters();
147     final String TATAS = "tatas";
148     final ServerName RS = ServerName.valueOf("rs,1,1");
149     RegionServerServices mockedRS = getRegionServer(RS);
150     zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS),
151       new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1")).toByteArray(), Ids.OPEN_ACL_UNSAFE,
152         CreateMode.PERSISTENT);
153 
154     SplitLogWorker slw =
155         new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask);
156     slw.start();
157     try {
158       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
159       byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS));
160       SplitLogTask slt = SplitLogTask.parseFrom(bytes);
161       assertTrue(slt.isOwned(RS));
162     } finally {
163       stopSplitLogWorker(slw);
164     }
165   }
166 
167   private void stopSplitLogWorker(final SplitLogWorker slw)
168   throws InterruptedException {
169     if (slw != null) {
170       slw.stop();
171       slw.worker.join(3000);
172       if (slw.worker.isAlive()) {
173         assertTrue(("Could not stop the worker thread slw=" + slw) == null);
174       }
175     }
176   }
177 
178   @Test
179   public void testRaceForTask() throws Exception {
180     LOG.info("testRaceForTask");
181     SplitLogCounters.resetCounters();
182     final String TRFT = "trft";
183     final ServerName SVR1 = ServerName.valueOf("svr1,1,1");
184     final ServerName SVR2 = ServerName.valueOf("svr2,1,1");
185     zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TRFT),
186       new SplitLogTask.Unassigned(MANAGER).toByteArray(), Ids.OPEN_ACL_UNSAFE,
187         CreateMode.PERSISTENT);
188     RegionServerServices mockedRS1 = getRegionServer(SVR1);
189     RegionServerServices mockedRS2 = getRegionServer(SVR2);
190     SplitLogWorker slw1 =
191         new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS1, neverEndingTask);
192     SplitLogWorker slw2 =
193         new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS2, neverEndingTask);
194     slw1.start();
195     slw2.start();
196     try {
197       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
198       // Assert that either the tot_wkr_failed_to_grab_task_owned count was set of if
199       // not it, that we fell through to the next counter in line and it was set.
200       assertTrue(waitForCounterBoolean(SplitLogCounters.tot_wkr_failed_to_grab_task_owned, 0, 1, 1500, false) ||
201           SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.get() == 1);
202       byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TRFT));
203       SplitLogTask slt = SplitLogTask.parseFrom(bytes);
204       assertTrue(slt.isOwned(SVR1) || slt.isOwned(SVR2));
205     } finally {
206       stopSplitLogWorker(slw1);
207       stopSplitLogWorker(slw2);
208     }
209   }
210 
211   @Test
212   public void testPreemptTask() throws Exception {
213     LOG.info("testPreemptTask");
214     SplitLogCounters.resetCounters();
215     final ServerName SRV = ServerName.valueOf("tpt_svr,1,1");
216     final String PATH = ZKSplitLog.getEncodedNodeName(zkw, "tpt_task");
217     RegionServerServices mockedRS = getRegionServer(SRV);
218     SplitLogWorker slw =
219         new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask);
220     slw.start();
221     try {
222       Thread.yield(); // let the worker start
223       Thread.sleep(1000);
224       waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 5000);
225 
226       // this time create a task node after starting the splitLogWorker
227       zkw.getRecoverableZooKeeper().create(PATH,
228         new SplitLogTask.Unassigned(MANAGER).toByteArray(), Ids.OPEN_ACL_UNSAFE,
229         CreateMode.PERSISTENT);
230 
231       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 8000);
232       assertEquals(1, slw.taskReadySeq);
233       byte [] bytes = ZKUtil.getData(zkw, PATH);
234       SplitLogTask slt = SplitLogTask.parseFrom(bytes);
235       assertTrue(slt.isOwned(SRV));
236       slt = new SplitLogTask.Unassigned(MANAGER);
237       ZKUtil.setData(zkw, PATH, slt.toByteArray());
238       waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
239     } finally {
240       stopSplitLogWorker(slw);
241     }
242   }
243 
244   @Test
245   public void testMultipleTasks() throws Exception {
246     LOG.info("testMultipleTasks");
247     SplitLogCounters.resetCounters();
248     final ServerName SRV = ServerName.valueOf("tmt_svr,1,1");
249     final String PATH1 = ZKSplitLog.getEncodedNodeName(zkw, "tmt_task");
250     RegionServerServices mockedRS = getRegionServer(SRV);
251     SplitLogWorker slw =
252         new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask);
253     slw.start();
254     try {
255       Thread.yield(); // let the worker start
256       Thread.sleep(100);
257       waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 1500);
258 
259       SplitLogTask unassignedManager = new SplitLogTask.Unassigned(MANAGER);
260       zkw.getRecoverableZooKeeper().create(PATH1, unassignedManager.toByteArray(),
261         Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
262 
263       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
264       // now the worker is busy doing the above task
265 
266       // create another task
267       final String PATH2 = ZKSplitLog.getEncodedNodeName(zkw, "tmt_task_2");
268       zkw.getRecoverableZooKeeper().create(PATH2, unassignedManager.toByteArray(),
269         Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
270 
271       // preempt the first task, have it owned by another worker
272       final ServerName anotherWorker = ServerName.valueOf("another-worker,1,1");
273       SplitLogTask slt = new SplitLogTask.Owned(anotherWorker);
274       ZKUtil.setData(zkw, PATH1, slt.toByteArray());
275       waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
276 
277       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 1500);
278       assertEquals(2, slw.taskReadySeq);
279       byte [] bytes = ZKUtil.getData(zkw, PATH2);
280       slt = SplitLogTask.parseFrom(bytes);
281       assertTrue(slt.isOwned(SRV));
282     } finally {
283       stopSplitLogWorker(slw);
284     }
285   }
286 
287   @Test
288   public void testRescan() throws Exception {
289     LOG.info("testRescan");
290     SplitLogCounters.resetCounters();
291     final ServerName SRV = ServerName.valueOf("svr,1,1");
292     RegionServerServices mockedRS = getRegionServer(SRV);
293     slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask);
294     slw.start();
295     Thread.yield(); // let the worker start
296     Thread.sleep(100);
297 
298     String task = ZKSplitLog.getEncodedNodeName(zkw, "task");
299     SplitLogTask slt = new SplitLogTask.Unassigned(MANAGER);
300     zkw.getRecoverableZooKeeper().create(task,slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
301       CreateMode.PERSISTENT);
302 
303     waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
304     // now the worker is busy doing the above task
305 
306     // preempt the task, have it owned by another worker
307     ZKUtil.setData(zkw, task, slt.toByteArray());
308     waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
309 
310     // create a RESCAN node
311     String rescan = ZKSplitLog.getEncodedNodeName(zkw, "RESCAN");
312     rescan = zkw.getRecoverableZooKeeper().create(rescan, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
313       CreateMode.PERSISTENT_SEQUENTIAL);
314 
315     waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 1500);
316     // RESCAN node might not have been processed if the worker became busy
317     // with the above task. preempt the task again so that now the RESCAN
318     // node is processed
319     ZKUtil.setData(zkw, task, slt.toByteArray());
320     waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 1, 2, 1500);
321     waitForCounter(SplitLogCounters.tot_wkr_task_acquired_rescan, 0, 1, 1500);
322 
323     List<String> nodes = ZKUtil.listChildrenNoWatch(zkw, zkw.splitLogZNode);
324     LOG.debug(nodes);
325     int num = 0;
326     for (String node : nodes) {
327       num++;
328       if (node.startsWith("RESCAN")) {
329         String name = ZKSplitLog.getEncodedNodeName(zkw, node);
330         String fn = ZKSplitLog.getFileName(name);
331         byte [] data = ZKUtil.getData(zkw, ZKUtil.joinZNode(zkw.splitLogZNode, fn));
332         slt = SplitLogTask.parseFrom(data);
333         assertTrue(slt.toString(), slt.isDone(SRV));
334       }
335     }
336     assertEquals(2, num);
337   }
338 
339   @Test
340   public void testAcquireMultiTasks() throws Exception {
341     LOG.info("testAcquireMultiTasks");
342     SplitLogCounters.resetCounters();
343     final String TATAS = "tatas";
344     final ServerName RS = ServerName.valueOf("rs,1,1");
345     final int maxTasks = 3;
346     Configuration testConf = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
347     testConf.setInt("hbase.regionserver.wal.max.splitters", maxTasks);
348     RegionServerServices mockedRS = getRegionServer(RS);
349 
350     for (int i = 0; i < maxTasks; i++) {
351       zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS + i),
352         new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1")).toByteArray(),
353         Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
354     }
355 
356     SplitLogWorker slw = new SplitLogWorker(zkw, testConf, mockedRS, neverEndingTask);
357     slw.start();
358     try {
359       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, maxTasks, 6000);
360       for (int i = 0; i < maxTasks; i++) {
361         byte[] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS + i));
362         SplitLogTask slt = SplitLogTask.parseFrom(bytes);
363         assertTrue(slt.isOwned(RS));
364       }
365     } finally {
366       stopSplitLogWorker(slw);
367     }
368   }
369 
370   /**
371    * The test checks SplitLogWorker should not spawn more splitters than expected num of tasks per
372    * RS
373    * @throws Exception
374    */
375   @Test
376   public void testAcquireMultiTasksByAvgTasksPerRS() throws Exception {
377     LOG.info("testAcquireMultiTasks");
378     SplitLogCounters.resetCounters();
379     final String TATAS = "tatas";
380     final ServerName RS = ServerName.valueOf("rs,1,1");
381     final ServerName RS2 = ServerName.valueOf("rs,1,2");
382     final int maxTasks = 3;
383     Configuration testConf = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
384     testConf.setInt("hbase.regionserver.wal.max.splitters", maxTasks);
385     RegionServerServices mockedRS = getRegionServer(RS);
386 
387     // create two RS nodes
388     String rsPath = ZKUtil.joinZNode(zkw.rsZNode, RS.getServerName());
389     zkw.getRecoverableZooKeeper().create(rsPath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
390     rsPath = ZKUtil.joinZNode(zkw.rsZNode, RS2.getServerName());
391     zkw.getRecoverableZooKeeper().create(rsPath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
392 
393     for (int i = 0; i < maxTasks; i++) {
394       zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS + i),
395         new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1")).toByteArray(),
396         Ids.OPEN_ACL_UNSAFE,
397         CreateMode.PERSISTENT);
398     }
399 
400     SplitLogWorker slw = new SplitLogWorker(zkw, testConf, mockedRS, neverEndingTask);
401     slw.start();
402     try {
403       int acquiredTasks = 0;
404       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 2, 6000);
405       for (int i = 0; i < maxTasks; i++) {
406         byte[] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS + i));
407         SplitLogTask slt = SplitLogTask.parseFrom(bytes);
408         if (slt.isOwned(RS)) {
409           acquiredTasks++;
410         }
411       }
412       assertEquals(2, acquiredTasks);
413     } finally {
414       stopSplitLogWorker(slw);
415     }
416   }
417 
418   /**
419    * Create a mocked region server service instance
420    * @param server
421    * @return
422    */
423   private RegionServerServices getRegionServer(ServerName name) {
424 
425     RegionServerServices mockedServer = mock(RegionServerServices.class);
426     when(mockedServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
427     when(mockedServer.getServerName()).thenReturn(name);
428     when(mockedServer.getZooKeeper()).thenReturn(zkw);
429     when(mockedServer.isStopped()).thenReturn(false);
430     when(mockedServer.getExecutorService()).thenReturn(executorService);
431 
432     return mockedServer;
433   }
434 
435 }