View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.util.List;
25  import java.util.concurrent.atomic.AtomicLong;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.HBaseTestingUtility;
30  import org.apache.hadoop.hbase.MediumTests;
31  import org.apache.hadoop.hbase.ServerName;
32  import org.apache.hadoop.hbase.SplitLogCounters;
33  import org.apache.hadoop.hbase.SplitLogTask;
34  import org.apache.hadoop.hbase.Waiter;
35  import org.apache.hadoop.hbase.util.CancelableProgressable;
36  import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
37  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
38  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
39  import org.apache.log4j.Level;
40  import org.apache.log4j.Logger;
41  import org.apache.zookeeper.CreateMode;
42  import org.apache.zookeeper.ZooDefs.Ids;
43  import org.junit.After;
44  import org.junit.Before;
45  import org.junit.Test;
46  import org.junit.experimental.categories.Category;
47  
48  @Category(MediumTests.class)
49  public class TestSplitLogWorker {
50    private static final Log LOG = LogFactory.getLog(TestSplitLogWorker.class);
51    private final ServerName MANAGER = new ServerName("manager,1,1");
52    static {
53      Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
54    }
55    private final static HBaseTestingUtility TEST_UTIL =
56      new HBaseTestingUtility();
57    private ZooKeeperWatcher zkw;
58    private SplitLogWorker slw;
59  
60    private void waitForCounter(AtomicLong ctr, long oldval, long newval, long timems)
61        throws Exception {
62      assertTrue("ctr=" + ctr.get() + ", oldval=" + oldval + ", newval=" + newval,
63        waitForCounterBoolean(ctr, oldval, newval, timems));
64    }
65  
66    private boolean waitForCounterBoolean(final AtomicLong ctr, final long oldval, long newval,
67        long timems) throws Exception {
68  
69      return waitForCounterBoolean(ctr, oldval, newval, timems, true);
70    }
71  
72    private boolean waitForCounterBoolean(final AtomicLong ctr, final long oldval, long newval,
73        long timems, boolean failIfTimeout) throws Exception {
74  
75      long timeWaited = TEST_UTIL.waitFor(timems, 10, failIfTimeout,
76        new Waiter.Predicate<Exception>() {
77        @Override
78        public boolean evaluate() throws Exception {
79          return (ctr.get() != oldval);
80        }
81      });
82  
83      if( timeWaited > 0) {
84        // when not timed out
85        assertEquals(newval, ctr.get());
86      }
87      return true;
88    }
89  
90    @Before
91    public void setup() throws Exception {
92      TEST_UTIL.startMiniZKCluster();
93      zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
94          "split-log-worker-tests", null);
95      ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode);
96      ZKUtil.createAndFailSilent(zkw, zkw.baseZNode);
97      assertTrue(ZKUtil.checkExists(zkw, zkw.baseZNode) != -1);
98      LOG.debug(zkw.baseZNode + " created");
99      ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode);
100     assertTrue(ZKUtil.checkExists(zkw, zkw.splitLogZNode) != -1);
101     LOG.debug(zkw.splitLogZNode + " created");
102     SplitLogCounters.resetCounters();
103   }
104 
105   @After
106   public void teardown() throws Exception {
107     TEST_UTIL.shutdownMiniZKCluster();
108   }
109 
110   SplitLogWorker.TaskExecutor neverEndingTask =
111     new SplitLogWorker.TaskExecutor() {
112 
113       @Override
114       public Status exec(String name, CancelableProgressable p) {
115         while (true) {
116           try {
117             Thread.sleep(1000);
118           } catch (InterruptedException e) {
119             return Status.PREEMPTED;
120           }
121           if (!p.progress()) {
122             return Status.PREEMPTED;
123           }
124         }
125       }
126 
127   };
128 
129   @Test
130   public void testAcquireTaskAtStartup() throws Exception {
131     LOG.info("testAcquireTaskAtStartup");
132     SplitLogCounters.resetCounters();
133     final String TATAS = "tatas";
134     final ServerName RS = new ServerName("rs,1,1");
135     zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS),
136       new SplitLogTask.Unassigned(new ServerName("mgr,1,1")).toByteArray(), Ids.OPEN_ACL_UNSAFE,
137         CreateMode.PERSISTENT);
138 
139     SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), RS, neverEndingTask);
140     slw.start();
141     try {
142       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
143       byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS));
144       SplitLogTask slt = SplitLogTask.parseFrom(bytes);
145       assertTrue(slt.isOwned(RS));
146     } finally {
147       stopSplitLogWorker(slw);
148     }
149   }
150 
151   private void stopSplitLogWorker(final SplitLogWorker slw)
152   throws InterruptedException {
153     if (slw != null) {
154       slw.stop();
155       slw.worker.join(3000);
156       if (slw.worker.isAlive()) {
157         assertTrue(("Could not stop the worker thread slw=" + slw) == null);
158       }
159     }
160   }
161 
162   @Test
163   public void testRaceForTask() throws Exception {
164     LOG.info("testRaceForTask");
165     SplitLogCounters.resetCounters();
166     final String TRFT = "trft";
167     final ServerName SVR1 = new ServerName("svr1,1,1");
168     final ServerName SVR2 = new ServerName("svr2,1,1");
169     zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TRFT),
170       new SplitLogTask.Unassigned(MANAGER).toByteArray(), Ids.OPEN_ACL_UNSAFE,
171         CreateMode.PERSISTENT);
172 
173     SplitLogWorker slw1 = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SVR1, neverEndingTask);
174     SplitLogWorker slw2 = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SVR2, neverEndingTask);
175     slw1.start();
176     slw2.start();
177     try {
178       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
179       // Assert that either the tot_wkr_failed_to_grab_task_owned count was set of if
180       // not it, that we fell through to the next counter in line and it was set.
181       assertTrue(waitForCounterBoolean(SplitLogCounters.tot_wkr_failed_to_grab_task_owned, 0, 1, 1500, false) ||
182           SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.get() == 1);
183       byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TRFT));
184       SplitLogTask slt = SplitLogTask.parseFrom(bytes);
185       assertTrue(slt.isOwned(SVR1) || slt.isOwned(SVR2));
186     } finally {
187       stopSplitLogWorker(slw1);
188       stopSplitLogWorker(slw2);
189     }
190   }
191 
192   @Test
193   public void testPreemptTask() throws Exception {
194     LOG.info("testPreemptTask");
195     SplitLogCounters.resetCounters();
196     final ServerName SRV = new ServerName("tpt_svr,1,1");
197     final String PATH = ZKSplitLog.getEncodedNodeName(zkw, "tpt_task");
198     SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SRV, neverEndingTask);
199     slw.start();
200     try {
201       Thread.yield(); // let the worker start
202       Thread.sleep(1000);
203       waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 5000);
204 
205       // this time create a task node after starting the splitLogWorker
206       zkw.getRecoverableZooKeeper().create(PATH,
207         new SplitLogTask.Unassigned(MANAGER).toByteArray(), Ids.OPEN_ACL_UNSAFE,
208         CreateMode.PERSISTENT);
209 
210       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 8000);
211       assertEquals(1, slw.taskReadySeq);
212       byte [] bytes = ZKUtil.getData(zkw, PATH);
213       SplitLogTask slt = SplitLogTask.parseFrom(bytes);
214       assertTrue(slt.isOwned(SRV));
215       slt = new SplitLogTask.Unassigned(MANAGER);
216       ZKUtil.setData(zkw, PATH, slt.toByteArray());
217       waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
218     } finally {
219       stopSplitLogWorker(slw);
220     }
221   }
222 
223   @Test
224   public void testMultipleTasks() throws Exception {
225     LOG.info("testMultipleTasks");
226     SplitLogCounters.resetCounters();
227     final ServerName SRV = new ServerName("tmt_svr,1,1");
228     final String PATH1 = ZKSplitLog.getEncodedNodeName(zkw, "tmt_task");
229     SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SRV, neverEndingTask);
230     slw.start();
231     try {
232       Thread.yield(); // let the worker start
233       Thread.sleep(100);
234       waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 1500);
235 
236       SplitLogTask unassignedManager = new SplitLogTask.Unassigned(MANAGER);
237       zkw.getRecoverableZooKeeper().create(PATH1, unassignedManager.toByteArray(),
238         Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
239 
240       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
241       // now the worker is busy doing the above task
242 
243       // create another task
244       final String PATH2 = ZKSplitLog.getEncodedNodeName(zkw, "tmt_task_2");
245       zkw.getRecoverableZooKeeper().create(PATH2, unassignedManager.toByteArray(),
246         Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
247 
248       // preempt the first task, have it owned by another worker
249       final ServerName anotherWorker = new ServerName("another-worker,1,1");
250       SplitLogTask slt = new SplitLogTask.Owned(anotherWorker);
251       ZKUtil.setData(zkw, PATH1, slt.toByteArray());
252       waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
253 
254       waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 1500);
255       assertEquals(2, slw.taskReadySeq);
256       byte [] bytes = ZKUtil.getData(zkw, PATH2);
257       slt = SplitLogTask.parseFrom(bytes);
258       assertTrue(slt.isOwned(SRV));
259     } finally {
260       stopSplitLogWorker(slw);
261     }
262   }
263 
264   @Test
265   public void testRescan() throws Exception {
266     LOG.info("testRescan");
267     SplitLogCounters.resetCounters();
268     final ServerName SRV = new ServerName("svr,1,1");
269     slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), SRV, neverEndingTask);
270     slw.start();
271     Thread.yield(); // let the worker start
272     Thread.sleep(100);
273 
274     String task = ZKSplitLog.getEncodedNodeName(zkw, "task");
275     SplitLogTask slt = new SplitLogTask.Unassigned(MANAGER);
276     zkw.getRecoverableZooKeeper().create(task,slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
277       CreateMode.PERSISTENT);
278 
279     waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
280     // now the worker is busy doing the above task
281 
282     // preempt the task, have it owned by another worker
283     ZKUtil.setData(zkw, task, slt.toByteArray());
284     waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
285 
286     // create a RESCAN node
287     String rescan = ZKSplitLog.getEncodedNodeName(zkw, "RESCAN");
288     rescan = zkw.getRecoverableZooKeeper().create(rescan, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
289       CreateMode.PERSISTENT_SEQUENTIAL);
290 
291     waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 1500);
292     // RESCAN node might not have been processed if the worker became busy
293     // with the above task. preempt the task again so that now the RESCAN
294     // node is processed
295     ZKUtil.setData(zkw, task, slt.toByteArray());
296     waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 1, 2, 1500);
297     waitForCounter(SplitLogCounters.tot_wkr_task_acquired_rescan, 0, 1, 1500);
298 
299     List<String> nodes = ZKUtil.listChildrenNoWatch(zkw, zkw.splitLogZNode);
300     LOG.debug(nodes);
301     int num = 0;
302     for (String node : nodes) {
303       num++;
304       if (node.startsWith("RESCAN")) {
305         String name = ZKSplitLog.getEncodedNodeName(zkw, node);
306         String fn = ZKSplitLog.getFileName(name);
307         byte [] data = ZKUtil.getData(zkw, ZKUtil.joinZNode(zkw.splitLogZNode, fn));
308         slt = SplitLogTask.parseFrom(data);
309         assertTrue(slt.toString(), slt.isDone(SRV));
310       }
311     }
312     assertEquals(2, num);
313   }
314 
315 }