View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.MediumTests;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.ServerLoad;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.UnknownRegionException;
47  import org.apache.hadoop.hbase.catalog.MetaEditor;
48  import org.apache.hadoop.hbase.client.HBaseAdmin;
49  import org.apache.hadoop.hbase.client.HTable;
50  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
51  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
52  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
53  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
54  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
55  import org.apache.hadoop.hbase.executor.EventType;
56  import org.apache.hadoop.hbase.master.RegionState.State;
57  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
58  import org.apache.hadoop.hbase.regionserver.HRegionServer;
59  import org.apache.hadoop.hbase.util.Bytes;
60  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
61  import org.apache.hadoop.hbase.util.FSUtils;
62  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
63  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
64  import org.apache.zookeeper.KeeperException;
65  import org.junit.AfterClass;
66  import org.junit.BeforeClass;
67  import org.junit.Test;
68  import org.junit.experimental.categories.Category;
69  
70  /**
71   * This tests AssignmentManager with a testing cluster.
72   */
73  @Category(MediumTests.class)
74  public class TestAssignmentManagerOnCluster {
75    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
76    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
77    private final static Configuration conf = TEST_UTIL.getConfiguration();
78    private static HBaseAdmin admin;
79  
80    @BeforeClass
81    public static void setUpBeforeClass() throws Exception {
82      // Using the our load balancer to control region plans
83      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
84        MyLoadBalancer.class, LoadBalancer.class);
85      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
86        MyRegionObserver.class, RegionObserver.class);
87      // Reduce the maximum attempts to speed up the test
88      conf.setInt("hbase.assignment.maximum.attempts", 3);
89  
90      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, null);
91      admin = TEST_UTIL.getHBaseAdmin();
92    }
93  
94    @AfterClass
95    public static void tearDownAfterClass() throws Exception {
96      TEST_UTIL.shutdownMiniCluster();
97    }
98  
99    /**
100    * This tests region assignment
101    */
102   @Test (timeout=60000)
103   public void testAssignRegion() throws Exception {
104     String table = "testAssignRegion";
105     try {
106       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
107       desc.addFamily(new HColumnDescriptor(FAMILY));
108       admin.createTable(desc);
109 
110       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
111       HRegionInfo hri = new HRegionInfo(
112         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
113       MetaEditor.addRegionToMeta(meta, hri);
114 
115       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
116       master.assignRegion(hri);
117       AssignmentManager am = master.getAssignmentManager();
118       am.waitForAssignment(hri);
119 
120       RegionStates regionStates = am.getRegionStates();
121       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
122       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
123 
124       // Region is assigned now. Let's assign it again.
125       // Master should not abort, and region should be assigned.
126       RegionState oldState = regionStates.getRegionState(hri);
127       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
128       master.getAssignmentManager().waitForAssignment(hri);
129       RegionState newState = regionStates.getRegionState(hri);
130       assertTrue(newState.isOpened()
131         && newState.getStamp() != oldState.getStamp());
132     } finally {
133       TEST_UTIL.deleteTable(Bytes.toBytes(table));
134     }
135   }
136 
137   /**
138    * This tests region assignment on a simulated restarted server
139    */
140   @Test (timeout=60000)
141   public void testAssignRegionOnRestartedServer() throws Exception {
142     String table = "testAssignRegionOnRestartedServer";
143     ServerName deadServer = null;
144     HMaster master = null;
145     try {
146       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
147       desc.addFamily(new HColumnDescriptor(FAMILY));
148       admin.createTable(desc);
149 
150       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
151       HRegionInfo hri = new HRegionInfo(
152         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
153       MetaEditor.addRegionToMeta(meta, hri);
154 
155       master = TEST_UTIL.getHBaseCluster().getMaster();
156       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
157       assertFalse("There should be some servers online", onlineServers.isEmpty());
158 
159       // Use the first server as the destination server
160       ServerName destServer = onlineServers.iterator().next();
161 
162       // Created faked dead server
163       deadServer = ServerName.valueOf(destServer.getHostname(),
164           destServer.getPort(), destServer.getStartcode() - 100L);
165       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
166 
167       AssignmentManager am = master.getAssignmentManager();
168       RegionPlan plan = new RegionPlan(hri, null, deadServer);
169       am.addPlan(hri.getEncodedName(), plan);
170       master.assignRegion(hri);
171 
172       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
173         destServer, EventType.M_ZK_REGION_OFFLINE,
174         EventType.RS_ZK_REGION_OPENING, 0);
175       assertEquals("TansitionNode should fail", -1, version);
176 
177       // Give region 2 seconds to assign, which may not be enough.
178       // However, if HBASE-8545 is broken, this test will be flaky.
179       // Otherwise, this test should never be flaky.
180       Thread.sleep(2000);
181 
182       assertTrue("Region should still be in transition",
183         am.getRegionStates().isRegionInTransition(hri));
184       assertEquals("Assign node should still be in version 0", 0,
185         ZKAssign.getVersion(master.getZooKeeper(), hri));
186     } finally {
187       if (deadServer != null) {
188         master.serverManager.expireServer(deadServer);
189       }
190 
191       TEST_UTIL.deleteTable(Bytes.toBytes(table));
192     }
193   }
194 
195   /**
196    * This tests offlining a region
197    */
198   @Test (timeout=60000)
199   public void testOfflineRegion() throws Exception {
200     TableName table =
201         TableName.valueOf("testOfflineRegion");
202     try {
203       HRegionInfo hri = createTableAndGetOneRegion(table);
204 
205       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
206         getMaster().getAssignmentManager().getRegionStates();
207       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
208       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
209       admin.offline(hri.getRegionName());
210 
211       long timeoutTime = System.currentTimeMillis() + 800;
212       while (true) {
213         List<HRegionInfo> regions =
214           regionStates.getRegionsOfTable(table);
215         if (!regions.contains(hri)) break;
216         long now = System.currentTimeMillis();
217         if (now > timeoutTime) {
218           fail("Failed to offline the region in time");
219           break;
220         }
221         Thread.sleep(10);
222       }
223       RegionState regionState = regionStates.getRegionState(hri);
224       assertTrue(regionState.isOffline());
225     } finally {
226       TEST_UTIL.deleteTable(table);
227     }
228   }
229 
230   /**
231    * This tests moving a region
232    */
233   @Test (timeout=50000)
234   public void testMoveRegion() throws Exception {
235     TableName table =
236         TableName.valueOf("testMoveRegion");
237     try {
238       HRegionInfo hri = createTableAndGetOneRegion(table);
239 
240       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
241         getMaster().getAssignmentManager().getRegionStates();
242       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
243       ServerName destServerName = null;
244       for (int i = 0; i < 3; i++) {
245         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
246         if (!destServer.getServerName().equals(serverName)) {
247           destServerName = destServer.getServerName();
248           break;
249         }
250       }
251       assertTrue(destServerName != null
252         && !destServerName.equals(serverName));
253       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
254         Bytes.toBytes(destServerName.getServerName()));
255 
256       long timeoutTime = System.currentTimeMillis() + 30000;
257       while (true) {
258         ServerName sn = regionStates.getRegionServerOfRegion(hri);
259         if (sn != null && sn.equals(destServerName)) {
260           TEST_UTIL.assertRegionOnServer(hri, sn, 200);
261           break;
262         }
263         long now = System.currentTimeMillis();
264         if (now > timeoutTime) {
265           fail("Failed to move the region in time: "
266             + regionStates.getRegionState(hri));
267         }
268         regionStates.waitForUpdate(50);
269       }
270 
271     } finally {
272       TEST_UTIL.deleteTable(table);
273     }
274   }
275 
276   /**
277    * If a table is deleted, we should not be able to move it anymore.
278    * Otherwise, the region will be brought back.
279    * @throws Exception
280    */
281   @Test (timeout=50000)
282   public void testMoveRegionOfDeletedTable() throws Exception {
283     TableName table =
284         TableName.valueOf("testMoveRegionOfDeletedTable");
285     HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
286     try {
287       HRegionInfo hri = createTableAndGetOneRegion(table);
288 
289       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
290       AssignmentManager am = master.getAssignmentManager();
291       RegionStates regionStates = am.getRegionStates();
292       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
293       ServerName destServerName = null;
294       for (int i = 0; i < 3; i++) {
295         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
296         if (!destServer.getServerName().equals(serverName)) {
297           destServerName = destServer.getServerName();
298           break;
299         }
300       }
301       assertTrue(destServerName != null
302         && !destServerName.equals(serverName));
303 
304       TEST_UTIL.deleteTable(table);
305 
306       try {
307         admin.move(hri.getEncodedNameAsBytes(),
308           Bytes.toBytes(destServerName.getServerName()));
309         fail("We should not find the region");
310       } catch (IOException ioe) {
311         assertTrue(ioe instanceof UnknownRegionException);
312       }
313 
314       am.balance(new RegionPlan(hri, serverName, destServerName));
315       assertFalse("The region should not be in transition",
316         regionStates.isRegionInTransition(hri));
317     } finally {
318       if (admin.tableExists(table)) {
319         TEST_UTIL.deleteTable(table);
320       }
321     }
322   }
323 
324   HRegionInfo createTableAndGetOneRegion(
325       final TableName tableName) throws IOException, InterruptedException {
326     HTableDescriptor desc = new HTableDescriptor(tableName);
327     desc.addFamily(new HColumnDescriptor(FAMILY));
328     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
329 
330     // wait till the table is assigned
331     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
332     long timeoutTime = System.currentTimeMillis() + 1000;
333     while (true) {
334       List<HRegionInfo> regions = master.getAssignmentManager().
335         getRegionStates().getRegionsOfTable(tableName);
336       if (regions.size() > 3) {
337         return regions.get(2);
338       }
339       long now = System.currentTimeMillis();
340       if (now > timeoutTime) {
341         fail("Could not find an online region");
342       }
343       Thread.sleep(10);
344     }
345   }
346 
347   /**
348    * This test should not be flaky. If it is flaky, it means something
349    * wrong with AssignmentManager which should be reported and fixed
350    *
351    * This tests forcefully assign a region while it's closing and re-assigned.
352    */
353   @Test (timeout=60000)
354   public void testForceAssignWhileClosing() throws Exception {
355     String table = "testForceAssignWhileClosing";
356     try {
357       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
358       desc.addFamily(new HColumnDescriptor(FAMILY));
359       admin.createTable(desc);
360 
361       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
362       HRegionInfo hri = new HRegionInfo(
363         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
364       MetaEditor.addRegionToMeta(meta, hri);
365 
366       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
367       master.assignRegion(hri);
368       AssignmentManager am = master.getAssignmentManager();
369       assertTrue(am.waitForAssignment(hri));
370 
371       MyRegionObserver.preCloseEnabled.set(true);
372       am.unassign(hri);
373       RegionState state = am.getRegionStates().getRegionState(hri);
374       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
375 
376       MyRegionObserver.preCloseEnabled.set(false);
377       am.unassign(hri, true);
378 
379       // region is closing now, will be re-assigned automatically.
380       // now, let's forcefully assign it again. it should be
381       // assigned properly and no double-assignment
382       am.assign(hri, true, true);
383 
384       // let's check if it's assigned after it's out of transition
385       am.waitOnRegionToClearRegionsInTransition(hri);
386       assertTrue(am.waitForAssignment(hri));
387 
388       ServerName serverName = master.getAssignmentManager().
389         getRegionStates().getRegionServerOfRegion(hri);
390       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
391     } finally {
392       MyRegionObserver.preCloseEnabled.set(false);
393       TEST_UTIL.deleteTable(Bytes.toBytes(table));
394     }
395   }
396 
397   /**
398    * This tests region close failed
399    */
400   @Test (timeout=60000)
401   public void testCloseFailed() throws Exception {
402     String table = "testCloseFailed";
403     try {
404       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
405       desc.addFamily(new HColumnDescriptor(FAMILY));
406       admin.createTable(desc);
407 
408       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
409       HRegionInfo hri = new HRegionInfo(
410         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
411       MetaEditor.addRegionToMeta(meta, hri);
412 
413       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
414       master.assignRegion(hri);
415       AssignmentManager am = master.getAssignmentManager();
416       assertTrue(am.waitForAssignment(hri));
417 
418       MyRegionObserver.preCloseEnabled.set(true);
419       am.unassign(hri);
420       RegionState state = am.getRegionStates().getRegionState(hri);
421       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
422 
423       MyRegionObserver.preCloseEnabled.set(false);
424       am.unassign(hri, true);
425 
426       // region may still be assigned now since it's closing,
427       // let's check if it's assigned after it's out of transition
428       am.waitOnRegionToClearRegionsInTransition(hri);
429 
430       // region should be closed and re-assigned
431       assertTrue(am.waitForAssignment(hri));
432       ServerName serverName = master.getAssignmentManager().
433         getRegionStates().getRegionServerOfRegion(hri);
434       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
435     } finally {
436       MyRegionObserver.preCloseEnabled.set(false);
437       TEST_UTIL.deleteTable(Bytes.toBytes(table));
438     }
439   }
440 
441   /**
442    * This tests region open failed
443    */
444   @Test (timeout=60000)
445   public void testOpenFailed() throws Exception {
446     String table = "testOpenFailed";
447     try {
448       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
449       desc.addFamily(new HColumnDescriptor(FAMILY));
450       admin.createTable(desc);
451 
452       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
453       HRegionInfo hri = new HRegionInfo(
454         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
455       MetaEditor.addRegionToMeta(meta, hri);
456 
457       MyLoadBalancer.controledRegion = hri.getEncodedName();
458 
459       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
460       master.assignRegion(hri);
461       AssignmentManager am = master.getAssignmentManager();
462       assertFalse(am.waitForAssignment(hri));
463 
464       RegionState state = am.getRegionStates().getRegionState(hri);
465       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
466       // Failed to open since no plan, so it's on no server
467       assertNull(state.getServerName());
468 
469       MyLoadBalancer.controledRegion = null;
470       master.assignRegion(hri);
471       assertTrue(am.waitForAssignment(hri));
472 
473       ServerName serverName = master.getAssignmentManager().
474         getRegionStates().getRegionServerOfRegion(hri);
475       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
476     } finally {
477       MyLoadBalancer.controledRegion = null;
478       TEST_UTIL.deleteTable(Bytes.toBytes(table));
479     }
480   }
481 
482   /**
483    * This tests region open failure which is not recoverable
484    */
485   @Test (timeout=60000)
486   public void testOpenFailedUnrecoverable() throws Exception {
487     TableName table =
488         TableName.valueOf("testOpenFailedUnrecoverable");
489     try {
490       HTableDescriptor desc = new HTableDescriptor(table);
491       desc.addFamily(new HColumnDescriptor(FAMILY));
492       admin.createTable(desc);
493 
494       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
495       HRegionInfo hri = new HRegionInfo(
496         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
497       MetaEditor.addRegionToMeta(meta, hri);
498 
499       FileSystem fs = FileSystem.get(conf);
500       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
501       Path regionDir = new Path(tableDir, hri.getEncodedName());
502       // create a file named the same as the region dir to
503       // mess up with region opening
504       fs.create(regionDir, true);
505 
506       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
507       master.assignRegion(hri);
508       AssignmentManager am = master.getAssignmentManager();
509       assertFalse(am.waitForAssignment(hri));
510 
511       RegionState state = am.getRegionStates().getRegionState(hri);
512       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
513       // Failed to open due to file system issue. Region state should
514       // carry the opening region server so that we can force close it
515       // later on before opening it again. See HBASE-9092.
516       assertNotNull(state.getServerName());
517 
518       // remove the blocking file, so that region can be opened
519       fs.delete(regionDir, true);
520       master.assignRegion(hri);
521       assertTrue(am.waitForAssignment(hri));
522 
523       ServerName serverName = master.getAssignmentManager().
524         getRegionStates().getRegionServerOfRegion(hri);
525       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
526     } finally {
527       TEST_UTIL.deleteTable(table);
528     }
529   }
530 
531   @Test (timeout=60000)
532   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
533     final TableName table =
534         TableName.valueOf
535             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
536     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
537     HRegionInfo hri = null;
538     ServerName serverName = null;
539     try {
540       hri = createTableAndGetOneRegion(table);
541       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
542       ServerName destServerName = null;
543       HRegionServer destServer = null;
544       for (int i = 0; i < 3; i++) {
545         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
546         if (!destServer.getServerName().equals(serverName)) {
547           destServerName = destServer.getServerName();
548           break;
549         }
550       }
551       am.regionOffline(hri);
552       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
553       am.getRegionStates().updateRegionState(hri, State.OFFLINE);
554       ZKAssign.createNodeOffline(zkw, hri, destServerName);
555       ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
556 
557       // Wait till the event is processed and the region is in transition
558       long timeoutTime = System.currentTimeMillis() + 20000;
559       while (!am.getRegionStates().isRegionInTransition(hri)) {
560         assertTrue("Failed to process ZK opening event in time",
561           System.currentTimeMillis() < timeoutTime);
562         Thread.sleep(100);
563       }
564 
565       am.getZKTable().setDisablingTable(table);
566       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
567       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
568       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
569           .getRegionState(hri).isOffline());
570     } finally {
571       if (hri != null && serverName != null) {
572         am.regionOnline(hri, serverName);
573       }
574       am.getZKTable().setDisabledTable(table);
575       TEST_UTIL.deleteTable(table);
576     }
577   }
578 
579   /**
580    * This tests region close hanging
581    */
582   @Test (timeout=60000)
583   public void testCloseHang() throws Exception {
584     String table = "testCloseHang";
585     try {
586       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
587       desc.addFamily(new HColumnDescriptor(FAMILY));
588       admin.createTable(desc);
589 
590       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
591       HRegionInfo hri = new HRegionInfo(
592         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
593       MetaEditor.addRegionToMeta(meta, hri);
594 
595       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
596       master.assignRegion(hri);
597       AssignmentManager am = master.getAssignmentManager();
598       assertTrue(am.waitForAssignment(hri));
599 
600       MyRegionObserver.postCloseEnabled.set(true);
601       am.unassign(hri);
602       // Now region should pending_close or closing
603       // Unassign it again forcefully so that we can trigger already
604       // in transition exception. This test is to make sure this scenario
605       // is handled properly.
606       am.server.getConfiguration().setLong(
607         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
608       am.unassign(hri, true);
609       RegionState state = am.getRegionStates().getRegionState(hri);
610       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
611 
612       // Let region closing move ahead. The region should be closed
613       // properly and re-assigned automatically
614       MyRegionObserver.postCloseEnabled.set(false);
615 
616       // region may still be assigned now since it's closing,
617       // let's check if it's assigned after it's out of transition
618       am.waitOnRegionToClearRegionsInTransition(hri);
619 
620       // region should be closed and re-assigned
621       assertTrue(am.waitForAssignment(hri));
622       ServerName serverName = master.getAssignmentManager().
623         getRegionStates().getRegionServerOfRegion(hri);
624       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
625     } finally {
626       MyRegionObserver.postCloseEnabled.set(false);
627       TEST_UTIL.deleteTable(Bytes.toBytes(table));
628     }
629   }
630 
631   /**
632    * This tests region close racing with open
633    */
634   @Test (timeout=60000)
635   public void testOpenCloseRacing() throws Exception {
636     String table = "testOpenCloseRacing";
637     try {
638       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
639       desc.addFamily(new HColumnDescriptor(FAMILY));
640       admin.createTable(desc);
641 
642       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
643       HRegionInfo hri = new HRegionInfo(
644         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
645       MetaEditor.addRegionToMeta(meta, hri);
646       meta.close();
647 
648       MyRegionObserver.postOpenEnabled.set(true);
649       MyRegionObserver.postOpenCalled = false;
650       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
651       // Region will be opened, but it won't complete
652       master.assignRegion(hri);
653       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
654       // Wait till postOpen is called
655       while (!MyRegionObserver.postOpenCalled ) {
656         assertFalse("Timed out waiting for postOpen to be called",
657           EnvironmentEdgeManager.currentTimeMillis() > end);
658         Thread.sleep(300);
659       }
660 
661       AssignmentManager am = master.getAssignmentManager();
662       // Now let's unassign it, it should do nothing
663       am.unassign(hri);
664       RegionState state = am.getRegionStates().getRegionState(hri);
665       ServerName oldServerName = state.getServerName();
666       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
667 
668       // Now the region is stuck in opening
669       // Let's forcefully re-assign it to trigger closing/opening
670       // racing. This test is to make sure this scenario
671       // is handled properly.
672       MyRegionObserver.postOpenEnabled.set(false);
673       ServerName destServerName = null;
674       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
675       for (int i = 0; i < numRS; i++) {
676         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
677         if (!destServer.getServerName().equals(oldServerName)) {
678           destServerName = destServer.getServerName();
679           break;
680         }
681       }
682       assertNotNull(destServerName);
683       assertFalse("Region should be assigned on a new region server",
684         oldServerName.equals(destServerName));
685       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
686       regions.add(hri);
687       am.assign(destServerName, regions);
688 
689       // let's check if it's assigned after it's out of transition
690       am.waitOnRegionToClearRegionsInTransition(hri);
691       assertTrue(am.waitForAssignment(hri));
692 
693       ServerName serverName = master.getAssignmentManager().
694         getRegionStates().getRegionServerOfRegion(hri);
695       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
696       assertFalse("Region should be assigned on a new region server",
697         oldServerName.equals(serverName));
698     } finally {
699       MyRegionObserver.postOpenEnabled.set(false);
700       TEST_UTIL.deleteTable(Bytes.toBytes(table));
701     }
702   }
703 
704   /**
705    * Test force unassign/assign a region hosted on a dead server
706    */
707   @Test (timeout=60000)
708   public void testAssignRacingWithSSH() throws Exception {
709     String table = "testAssignRacingWithSSH";
710     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
711     MyMaster master = null;
712     try {
713       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
714       desc.addFamily(new HColumnDescriptor(FAMILY));
715       admin.createTable(desc);
716 
717       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
718       HRegionInfo hri = new HRegionInfo(
719         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
720       MetaEditor.addRegionToMeta(meta, hri);
721 
722       // Assign the region
723       master = (MyMaster)cluster.getMaster();
724       master.assignRegion(hri);
725 
726       // Hold SSH before killing the hosting server
727       master.enableSSH(false);
728 
729       AssignmentManager am = master.getAssignmentManager();
730       RegionStates regionStates = am.getRegionStates();
731       ServerName metaServer = regionStates.getRegionServerOfRegion(
732         HRegionInfo.FIRST_META_REGIONINFO);
733       while (true) {
734         assertTrue(am.waitForAssignment(hri));
735         RegionState state = regionStates.getRegionState(hri);
736         ServerName oldServerName = state.getServerName();
737         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
738           // Kill the hosting server, which doesn't have meta on it.
739           cluster.killRegionServer(oldServerName);
740           cluster.waitForRegionServerToStop(oldServerName, -1);
741           break;
742         }
743         int i = cluster.getServerWithMeta();
744         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
745         oldServerName = rs.getServerName();
746         master.move(hri.getEncodedNameAsBytes(),
747           Bytes.toBytes(oldServerName.getServerName()));
748       }
749 
750       // You can't assign a dead region before SSH
751       am.assign(hri, true, true);
752       RegionState state = regionStates.getRegionState(hri);
753       assertTrue(state.isFailedClose());
754 
755       // You can't unassign a dead region before SSH either
756       am.unassign(hri, true);
757       state = regionStates.getRegionState(hri);
758       assertTrue(state.isFailedClose());
759 
760       synchronized (regionStates) {
761         // Enable SSH so that log can be split
762         master.enableSSH(true);
763 
764         // We hold regionStates now, so logSplit
765         // won't be known to AM yet.
766         am.unassign(hri, true);
767         state = regionStates.getRegionState(hri);
768         assertTrue(state.isOffline());
769       }
770 
771       // let's check if it's assigned after it's out of transition.
772       // no need to assign it manually, SSH should do it
773       am.waitOnRegionToClearRegionsInTransition(hri);
774       assertTrue(am.waitForAssignment(hri));
775 
776       ServerName serverName = master.getAssignmentManager().
777         getRegionStates().getRegionServerOfRegion(hri);
778       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
779     } finally {
780       if (master != null) {
781         master.enableSSH(true);
782       }
783       TEST_UTIL.deleteTable(Bytes.toBytes(table));
784     }
785   }
786 
787   /**
788    * Test force unassign/assign a region of a disabled table
789    */
790   @Test (timeout=60000)
791   public void testAssignDisabledRegion() throws Exception {
792     String table = "testAssignDisabledRegion";
793     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
794     MyMaster master = null;
795     try {
796       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
797       desc.addFamily(new HColumnDescriptor(FAMILY));
798       admin.createTable(desc);
799 
800       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
801       HRegionInfo hri = new HRegionInfo(
802         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
803       MetaEditor.addRegionToMeta(meta, hri);
804 
805       // Assign the region
806       master = (MyMaster)cluster.getMaster();
807       master.assignRegion(hri);
808       AssignmentManager am = master.getAssignmentManager();
809       RegionStates regionStates = am.getRegionStates();
810       assertTrue(am.waitForAssignment(hri));
811 
812       // Disable the table
813       admin.disableTable(table);
814       assertTrue(regionStates.isRegionOffline(hri));
815 
816       // You can't assign a disabled region
817       am.assign(hri, true, true);
818       assertTrue(regionStates.isRegionOffline(hri));
819 
820       // You can't unassign a disabled region either
821       am.unassign(hri, true);
822       assertTrue(regionStates.isRegionOffline(hri));
823     } finally {
824       TEST_UTIL.deleteTable(Bytes.toBytes(table));
825     }
826   }
827 
828   static class MyLoadBalancer extends StochasticLoadBalancer {
829     // For this region, if specified, always assign to nowhere
830     static volatile String controledRegion = null;
831 
832     @Override
833     public ServerName randomAssignment(HRegionInfo regionInfo,
834         List<ServerName> servers) {
835       if (regionInfo.getEncodedName().equals(controledRegion)) {
836         return null;
837       }
838       return super.randomAssignment(regionInfo, servers);
839     }
840   }
841 
842   public static class MyMaster extends HMaster {
843     AtomicBoolean enabled = new AtomicBoolean(true);
844 
845     public MyMaster(Configuration conf) throws IOException, KeeperException,
846         InterruptedException {
847       super(conf);
848     }
849 
850     @Override
851     public boolean isServerShutdownHandlerEnabled() {
852       return enabled.get() && super.isServerShutdownHandlerEnabled();
853     }
854 
855     public void enableSSH(boolean enabled) {
856       this.enabled.set(enabled);
857       if (enabled) {
858         serverManager.processQueuedDeadServers();
859       }
860     }
861   }
862 
863   public static class MyRegionObserver extends BaseRegionObserver {
864     // If enabled, fail all preClose calls
865     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
866 
867     // If enabled, stall postClose calls
868     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
869 
870     // If enabled, stall postOpen calls
871     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
872 
873     // A flag to track if postOpen is called
874     static volatile boolean postOpenCalled = false;
875 
876     @Override
877     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
878         boolean abortRequested) throws IOException {
879       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
880     }
881 
882     @Override
883     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
884         boolean abortRequested) {
885       stallOnFlag(postCloseEnabled);
886     }
887 
888     @Override
889     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
890       postOpenCalled = true;
891       stallOnFlag(postOpenEnabled);
892     }
893 
894     private void stallOnFlag(final AtomicBoolean flag) {
895       try {
896         // If enabled, stall
897         while (flag.get()) {
898           Thread.sleep(1000);
899         }
900       } catch (InterruptedException ie) {
901         Thread.currentThread().interrupt();
902       }
903     }
904   }
905 }