1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase;
19
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.List;
25
26 import junit.framework.Assert;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.hbase.master.HMaster;
31 import org.apache.hadoop.hbase.protobuf.RequestConverter;
32 import org.apache.hadoop.hbase.master.ServerManager;
33 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
34 import org.apache.hadoop.hbase.regionserver.HRegion;
35 import org.apache.hadoop.hbase.regionserver.HRegionServer;
36 import org.apache.hadoop.hbase.util.Bytes;
37 import org.apache.hadoop.hbase.util.JVMClusterUtil;
38 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
39 import org.apache.hadoop.hbase.util.Threads;
40 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
41 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
42 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
43 import org.apache.zookeeper.KeeperException;
44 import org.junit.AfterClass;
45 import org.junit.BeforeClass;
46 import org.junit.Test;
47 import org.junit.experimental.categories.Category;
48
49
50
51
52
53
54
55
56
57
58 @Category(MediumTests.class)
59 public class TestDrainingServer {
60 private static final Log LOG = LogFactory.getLog(TestDrainingServer.class);
61 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
62 private static final int NB_SLAVES = 5;
63 private static final int COUNT_OF_REGIONS = NB_SLAVES * 2;
64
65
66
67
68 @BeforeClass
69 public static void setUpBeforeClass() throws Exception {
70 TEST_UTIL.startMiniCluster(NB_SLAVES);
71 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster();
72 TEST_UTIL.getConfiguration().setBoolean("hbase.master.enabletable.roundrobin", true);
73
74 final List<String> families = new ArrayList<String>(1);
75 families.add("family");
76 TEST_UTIL.createRandomTable("table", families, 1, 0, 0, COUNT_OF_REGIONS, 0);
77
78 TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
79
80 boolean ready = false;
81 while (!ready){
82 waitForAllRegionsOnline();
83
84
85 int i = 0;
86 ready = true;
87 while (i < NB_SLAVES && ready){
88 HRegionServer hrs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i);
89 if (ProtobufUtil.getOnlineRegions(hrs).isEmpty()){
90 ready = false;
91 }
92 i++;
93 }
94
95 if (!ready){
96 TEST_UTIL.getHBaseAdmin().setBalancerRunning(true, true);
97 Assert.assertTrue("Can't start a balance!", TEST_UTIL.getHBaseAdmin().balancer());
98 TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, false);
99 Thread.sleep(100);
100 }
101 }
102 }
103
104 private static HRegionServer setDrainingServer(final HRegionServer hrs)
105 throws KeeperException {
106 LOG.info("Making " + hrs.getServerName() + " the draining server; " +
107 "it has " + hrs.getNumberOfOnlineRegions() + " online regions");
108 ZooKeeperWatcher zkw = hrs.getZooKeeper();
109 String hrsDrainingZnode =
110 ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString());
111 ZKUtil.createWithParents(zkw, hrsDrainingZnode);
112 return hrs;
113 }
114
115 private static HRegionServer unsetDrainingServer(final HRegionServer hrs)
116 throws KeeperException {
117 ZooKeeperWatcher zkw = hrs.getZooKeeper();
118 String hrsDrainingZnode =
119 ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString());
120 ZKUtil.deleteNode(zkw, hrsDrainingZnode);
121 return hrs;
122 }
123
124 @AfterClass
125 public static void tearDownAfterClass() throws Exception {
126 TEST_UTIL.shutdownMiniCluster();
127 }
128
129
130
131
132
133
134
135 @Test
136 public void testDrainingServerOffloading()
137 throws Exception {
138
139 HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
140 HRegionInfo hriToMoveBack = null;
141
142 HRegionServer drainingServer =
143 setDrainingServer(TEST_UTIL.getMiniHBaseCluster().getRegionServer(0));
144 try {
145 final int regionsOnDrainingServer =
146 drainingServer.getNumberOfOnlineRegions();
147 Assert.assertTrue(regionsOnDrainingServer > 0);
148 List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(drainingServer);
149 for (HRegionInfo hri : hris) {
150
151
152 master.moveRegion(null,
153 RequestConverter.buildMoveRegionRequest(hri.getEncodedNameAsBytes(), null));
154
155 hriToMoveBack = hri;
156 }
157
158 waitForAllRegionsOnline();
159 Assert.assertEquals(0, drainingServer.getNumberOfOnlineRegions());
160 } finally {
161 unsetDrainingServer(drainingServer);
162 }
163
164
165 master.moveRegion(null,
166 RequestConverter.buildMoveRegionRequest(hriToMoveBack.getEncodedNameAsBytes(),
167 Bytes.toBytes(drainingServer.getServerName().toString())));
168
169 waitForAllRegionsOnline();
170 Assert.assertEquals(1, drainingServer.getNumberOfOnlineRegions());
171 }
172
173
174
175
176
177
178
179 @Test (timeout=30000)
180 public void testDrainingServerWithAbort() throws KeeperException, Exception {
181 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
182
183 waitForAllRegionsOnline();
184
185 final long regionCount = TEST_UTIL.getMiniHBaseCluster().countServedRegions();
186
187
188 Collection<HRegion> regions = getRegions();
189 LOG.info("All regions: " + regions);
190
191
192 HRegionServer drainingServer = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
193 final int regionsOnDrainingServer = drainingServer.getNumberOfOnlineRegions();
194 Assert.assertTrue(regionsOnDrainingServer > 0);
195
196 ServerManager sm = master.getServerManager();
197
198 Collection<HRegion> regionsBefore = drainingServer.getOnlineRegionsLocalContext();
199 LOG.info("Regions of drained server are: "+ regionsBefore );
200
201 try {
202
203 setDrainingServer(drainingServer);
204
205
206 while (sm.createDestinationServersList().contains(drainingServer.getServerName())) {
207 Thread.sleep(1);
208 }
209
210 LOG.info("The available servers are: "+ sm.createDestinationServersList());
211
212 Assert.assertEquals("Nothing should have happened here.", regionsOnDrainingServer,
213 drainingServer.getNumberOfOnlineRegions());
214 Assert.assertFalse("We should not have regions in transition here. List is: " +
215 master.getAssignmentManager().getRegionStates().getRegionsInTransition(),
216 master.getAssignmentManager().getRegionStates().isRegionsInTransition());
217
218
219 for (int aborted = 0; aborted <= 2; aborted++) {
220 HRegionServer hrs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(aborted + 1);
221 hrs.abort("Aborting");
222 }
223
224
225
226 Collection<HRegion> regionsAfter = null;
227 for (int i = 0; i < 1000; i++) {
228 waitForAllRegionsOnline();
229 regionsAfter = getRegions();
230 if (regionsAfter.size() >= regionCount) break;
231 LOG.info("Expecting " + regionCount + " but only " + regionsAfter);
232 Threads.sleep(10);
233 }
234 LOG.info("Regions of drained server: " + regionsAfter + ", all regions: " + getRegions());
235 Assert.assertEquals("Test conditions are not met: regions were" +
236 " created/deleted during the test. ",
237 regionCount, TEST_UTIL.getMiniHBaseCluster().countServedRegions());
238
239
240 regionsAfter = drainingServer.getOnlineRegionsLocalContext();
241 StringBuilder result = new StringBuilder();
242 for (HRegion r: regionsAfter){
243 if (!regionsBefore.contains(r)){
244 result.append(r).append(" was added after the drain");
245 if (regions.contains(r)){
246 result.append("(existing region");
247 } else {
248 result.append("(new region)");
249 }
250 result.append("; ");
251 }
252 }
253 for (HRegion r: regionsBefore){
254 if (!regionsAfter.contains(r)){
255 result.append(r).append(" was removed after the drain; ");
256 }
257 }
258 Assert.assertTrue("Errors are: "+ result.toString(), result.length()==0);
259
260 } finally {
261 unsetDrainingServer(drainingServer);
262 }
263 }
264
265 private Collection<HRegion> getRegions() {
266 Collection<HRegion> regions = new ArrayList<HRegion>();
267 List<RegionServerThread> rsthreads =
268 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads();
269 for (RegionServerThread t: rsthreads) {
270 HRegionServer rs = t.getRegionServer();
271 Collection<HRegion> lr = rs.getOnlineRegionsLocalContext();
272 LOG.info("Found " + lr + " on " + rs);
273 regions.addAll(lr);
274 }
275 return regions;
276 }
277
278 private static void waitForAllRegionsOnline() throws Exception {
279
280 boolean done = false;
281 while (!done) {
282 Thread.sleep(1);
283
284
285 ZKAssign.blockUntilNoRIT(TEST_UTIL.getZooKeeperWatcher());
286
287
288 if (!isAllRegionsOnline()) continue;
289
290
291 if (TEST_UTIL.getMiniHBaseCluster().getMaster().
292 getAssignmentManager().getRegionStates().isRegionsInTransition()) continue;
293
294
295 done = true;
296 for (JVMClusterUtil.RegionServerThread rs :
297 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()) {
298 if (!rs.getRegionServer().getRegionsInTransitionInRS().isEmpty()) {
299 done = false;
300 }
301
302 Threads.sleep(10);
303 }
304 }
305 }
306
307 private static boolean isAllRegionsOnline() {
308 return TEST_UTIL.getMiniHBaseCluster().countServedRegions() >=
309 (COUNT_OF_REGIONS + 2
310 }
311 }