View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.testclassification.MediumTests;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
44  import org.apache.hadoop.hbase.ServerLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.UnknownRegionException;
48  import org.apache.hadoop.hbase.Waiter;
49  import org.apache.hadoop.hbase.catalog.MetaEditor;
50  import org.apache.hadoop.hbase.catalog.MetaReader;
51  import org.apache.hadoop.hbase.client.HBaseAdmin;
52  import org.apache.hadoop.hbase.client.HTable;
53  import org.apache.hadoop.hbase.client.Result;
54  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
55  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
56  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
57  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
58  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
59  import org.apache.hadoop.hbase.executor.EventType;
60  import org.apache.hadoop.hbase.master.RegionState.State;
61  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
62  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
63  import org.apache.hadoop.hbase.regionserver.HRegionServer;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.ConfigUtil;
66  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.hbase.util.Threads;
69  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
70  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
71  import org.apache.zookeeper.KeeperException;
72  import org.junit.AfterClass;
73  import org.junit.BeforeClass;
74  import org.junit.Test;
75  import org.junit.experimental.categories.Category;
76  
77  
78  /**
79   * This tests AssignmentManager with a testing cluster.
80   */
81  @Category(MediumTests.class)
82  public class TestAssignmentManagerOnCluster {
83    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
84    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
85    final static Configuration conf = TEST_UTIL.getConfiguration();
86    private static HBaseAdmin admin;
87  
88    static void setupOnce() throws Exception {
89      // Using the our load balancer to control region plans
90      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
91        MyLoadBalancer.class, LoadBalancer.class);
92      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
93        MyRegionObserver.class, RegionObserver.class);
94      // Reduce the maximum attempts to speed up the test
95      conf.setInt("hbase.assignment.maximum.attempts", 3);
96  
97      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
98      admin = TEST_UTIL.getHBaseAdmin();
99    }
100 
101   @BeforeClass
102   public static void setUpBeforeClass() throws Exception {
103     // Use ZK for region assignment
104     conf.setBoolean("hbase.assignment.usezk", true);
105     setupOnce();
106   }
107 
108   @AfterClass
109   public static void tearDownAfterClass() throws Exception {
110     TEST_UTIL.shutdownMiniCluster();
111   }
112 
113   /**
114    * This tests region assignment
115    */
116   @Test (timeout=60000)
117   public void testAssignRegion() throws Exception {
118     String table = "testAssignRegion";
119     try {
120       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
121       desc.addFamily(new HColumnDescriptor(FAMILY));
122       admin.createTable(desc);
123 
124       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
125       HRegionInfo hri = new HRegionInfo(
126         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
127       MetaEditor.addRegionToMeta(meta, hri);
128 
129       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
130       master.assignRegion(hri);
131       AssignmentManager am = master.getAssignmentManager();
132       am.waitForAssignment(hri);
133 
134       RegionStates regionStates = am.getRegionStates();
135       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
136       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
137 
138       // Region is assigned now. Let's assign it again.
139       // Master should not abort, and region should be assigned.
140       RegionState oldState = regionStates.getRegionState(hri);
141       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
142       master.getAssignmentManager().waitForAssignment(hri);
143       RegionState newState = regionStates.getRegionState(hri);
144       assertTrue(newState.isOpened()
145         && newState.getStamp() != oldState.getStamp());
146     } finally {
147       TEST_UTIL.deleteTable(Bytes.toBytes(table));
148     }
149   }
150   
151   // Simulate a scenario where the AssignCallable and SSH are trying to assign a region
152   @Test (timeout=60000)
153   public void testAssignRegionBySSH() throws Exception {
154     if (!conf.getBoolean("hbase.assignment.usezk", true)) {
155       return;
156     }
157     String table = "testAssignRegionBySSH";
158     MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster();
159     try {
160       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
161       desc.addFamily(new HColumnDescriptor(FAMILY));
162       admin.createTable(desc);
163 
164       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
165       HRegionInfo hri = new HRegionInfo(
166         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
167       MetaEditor.addRegionToMeta(meta, hri);
168       // Add some dummy server for the region entry
169       MetaEditor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getCatalogTracker(), hri,
170         ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0);
171       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
172       int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
173       HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
174       // Choose a server other than meta to kill
175       ServerName controlledServer = rs.getServerName();
176       master.enableSSH(false);
177       TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer);
178       TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1);
179       AssignmentManager am = master.getAssignmentManager();
180       
181       // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state,
182       // but not in transition and the server is the dead 'controlledServer'  
183       regionStates.createRegionState(hri, State.OFFLINE, controlledServer);
184       am.assign(hri, true, true);
185       // Region should remain in OFFLINE and go to transition
186       assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState());
187       assertTrue (regionStates.isRegionInTransition(hri));
188       
189       master.enableSSH(true);
190       am.waitForAssignment(hri);
191       assertTrue (regionStates.getRegionState(hri).isOpened());
192       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
193       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
194     } finally {
195       if (master != null) {
196         master.enableSSH(true);
197       }
198       TEST_UTIL.deleteTable(Bytes.toBytes(table));
199       TEST_UTIL.getHBaseCluster().startRegionServer();
200     }
201   }
202 
203   /**
204    * This tests region assignment on a simulated restarted server
205    */
206   @Test (timeout=120000)
207   public void testAssignRegionOnRestartedServer() throws Exception {
208     String table = "testAssignRegionOnRestartedServer";
209     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
210     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
211     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
212    
213     ServerName deadServer = null;
214     HMaster master = null;
215     try {
216       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
217       desc.addFamily(new HColumnDescriptor(FAMILY));
218       admin.createTable(desc);
219 
220       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
221       final HRegionInfo hri = new HRegionInfo(
222         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
223       MetaEditor.addRegionToMeta(meta, hri);
224 
225       master = TEST_UTIL.getHBaseCluster().getMaster();
226       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
227       assertFalse("There should be some servers online", onlineServers.isEmpty());
228 
229       // Use the first server as the destination server
230       ServerName destServer = onlineServers.iterator().next();
231 
232       // Created faked dead server
233       deadServer = ServerName.valueOf(destServer.getHostname(),
234           destServer.getPort(), destServer.getStartcode() - 100L);
235       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
236 
237       final AssignmentManager am = master.getAssignmentManager();
238       RegionPlan plan = new RegionPlan(hri, null, deadServer);
239       am.addPlan(hri.getEncodedName(), plan);
240       master.assignRegion(hri);
241 
242       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
243         destServer, EventType.M_ZK_REGION_OFFLINE,
244         EventType.RS_ZK_REGION_OPENING, 0);
245       assertEquals("TansitionNode should fail", -1, version);
246 
247       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
248         @Override
249         public boolean evaluate() throws Exception {
250           return ! am.getRegionStates().isRegionInTransition(hri);
251         }
252       });
253 
254     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
255     } finally {
256       if (deadServer != null) {
257         master.serverManager.expireServer(deadServer);
258       }
259 
260       TEST_UTIL.deleteTable(Bytes.toBytes(table));
261 
262       // reset the value for other tests
263       TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 3);
264       ServerName masterServerName = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName();
265       TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName);
266       TEST_UTIL.getMiniHBaseCluster().startMaster();
267       // Wait till master is active and is initialized
268       while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null
269           || !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
270         Threads.sleep(1);
271       }
272     }
273   }
274 
275   /**
276    * This tests offlining a region
277    */
278   @Test (timeout=60000)
279   public void testOfflineRegion() throws Exception {
280     TableName table =
281         TableName.valueOf("testOfflineRegion");
282     try {
283       HRegionInfo hri = createTableAndGetOneRegion(table);
284 
285       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
286         getMaster().getAssignmentManager().getRegionStates();
287       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
288       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
289       admin.offline(hri.getRegionName());
290 
291       long timeoutTime = System.currentTimeMillis() + 800;
292       while (true) {
293         List<HRegionInfo> regions =
294           regionStates.getRegionsOfTable(table);
295         if (!regions.contains(hri)) break;
296         long now = System.currentTimeMillis();
297         if (now > timeoutTime) {
298           fail("Failed to offline the region in time");
299           break;
300         }
301         Thread.sleep(10);
302       }
303       RegionState regionState = regionStates.getRegionState(hri);
304       assertTrue(regionState.isOffline());
305     } finally {
306       TEST_UTIL.deleteTable(table);
307     }
308   }
309 
310   /**
311    * This tests moving a region
312    */
313   @Test (timeout=50000)
314   public void testMoveRegion() throws Exception {
315     TableName table =
316         TableName.valueOf("testMoveRegion");
317     try {
318       HRegionInfo hri = createTableAndGetOneRegion(table);
319 
320       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
321         getMaster().getAssignmentManager().getRegionStates();
322       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
323       ServerName destServerName = null;
324       for (int i = 0; i < 3; i++) {
325         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
326         if (!destServer.getServerName().equals(serverName)) {
327           destServerName = destServer.getServerName();
328           break;
329         }
330       }
331       assertTrue(destServerName != null
332         && !destServerName.equals(serverName));
333       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
334         Bytes.toBytes(destServerName.getServerName()));
335 
336       long timeoutTime = System.currentTimeMillis() + 30000;
337       while (true) {
338         ServerName sn = regionStates.getRegionServerOfRegion(hri);
339         if (sn != null && sn.equals(destServerName)) {
340           TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
341           break;
342         }
343         long now = System.currentTimeMillis();
344         if (now > timeoutTime) {
345           fail("Failed to move the region in time: "
346             + regionStates.getRegionState(hri));
347         }
348         regionStates.waitForUpdate(50);
349       }
350 
351     } finally {
352       TEST_UTIL.deleteTable(table);
353     }
354   }
355 
356   /**
357    * If a table is deleted, we should not be able to move it anymore.
358    * Otherwise, the region will be brought back.
359    * @throws Exception
360    */
361   @Test (timeout=50000)
362   public void testMoveRegionOfDeletedTable() throws Exception {
363     TableName table =
364         TableName.valueOf("testMoveRegionOfDeletedTable");
365     HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
366     try {
367       HRegionInfo hri = createTableAndGetOneRegion(table);
368 
369       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
370       AssignmentManager am = master.getAssignmentManager();
371       RegionStates regionStates = am.getRegionStates();
372       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
373       ServerName destServerName = null;
374       for (int i = 0; i < 3; i++) {
375         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
376         if (!destServer.getServerName().equals(serverName)) {
377           destServerName = destServer.getServerName();
378           break;
379         }
380       }
381       assertTrue(destServerName != null
382         && !destServerName.equals(serverName));
383 
384       TEST_UTIL.deleteTable(table);
385 
386       try {
387         admin.move(hri.getEncodedNameAsBytes(),
388           Bytes.toBytes(destServerName.getServerName()));
389         fail("We should not find the region");
390       } catch (IOException ioe) {
391         assertTrue(ioe instanceof UnknownRegionException);
392       }
393 
394       am.balance(new RegionPlan(hri, serverName, destServerName));
395       assertFalse("The region should not be in transition",
396         regionStates.isRegionInTransition(hri));
397     } finally {
398       if (admin.tableExists(table)) {
399         TEST_UTIL.deleteTable(table);
400       }
401     }
402   }
403 
404   HRegionInfo createTableAndGetOneRegion(
405       final TableName tableName) throws IOException, InterruptedException {
406     HTableDescriptor desc = new HTableDescriptor(tableName);
407     desc.addFamily(new HColumnDescriptor(FAMILY));
408     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
409 
410     // wait till the table is assigned
411     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
412     long timeoutTime = System.currentTimeMillis() + 1000;
413     while (true) {
414       List<HRegionInfo> regions = master.getAssignmentManager().
415         getRegionStates().getRegionsOfTable(tableName);
416       if (regions.size() > 3) {
417         return regions.get(2);
418       }
419       long now = System.currentTimeMillis();
420       if (now > timeoutTime) {
421         fail("Could not find an online region");
422       }
423       Thread.sleep(10);
424     }
425   }
426 
427   /**
428    * This test should not be flaky. If it is flaky, it means something
429    * wrong with AssignmentManager which should be reported and fixed
430    *
431    * This tests forcefully assign a region while it's closing and re-assigned.
432    */
433   @Test (timeout=60000)
434   public void testForceAssignWhileClosing() throws Exception {
435     String table = "testForceAssignWhileClosing";
436     try {
437       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
438       desc.addFamily(new HColumnDescriptor(FAMILY));
439       admin.createTable(desc);
440 
441       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
442       HRegionInfo hri = new HRegionInfo(
443         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
444       MetaEditor.addRegionToMeta(meta, hri);
445 
446       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
447       master.assignRegion(hri);
448       AssignmentManager am = master.getAssignmentManager();
449       assertTrue(am.waitForAssignment(hri));
450       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
451       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
452       
453       MyRegionObserver.preCloseEnabled.set(true);
454       am.unassign(hri);
455       RegionState state = am.getRegionStates().getRegionState(hri);
456       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
457 
458       MyRegionObserver.preCloseEnabled.set(false);
459       am.unassign(hri, true);
460 
461       // region is closing now, will be re-assigned automatically.
462       // now, let's forcefully assign it again. it should be
463       // assigned properly and no double-assignment
464       am.assign(hri, true, true);
465 
466       // let's check if it's assigned after it's out of transition
467       am.waitOnRegionToClearRegionsInTransition(hri);
468       assertTrue(am.waitForAssignment(hri));
469 
470       ServerName serverName = master.getAssignmentManager().
471         getRegionStates().getRegionServerOfRegion(hri);
472       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
473     } finally {
474       MyRegionObserver.preCloseEnabled.set(false);
475       TEST_UTIL.deleteTable(Bytes.toBytes(table));
476     }
477   }
478 
479   /**
480    * This tests region close failed
481    */
482   @Test (timeout=60000)
483   public void testCloseFailed() throws Exception {
484     String table = "testCloseFailed";
485     try {
486       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
487       desc.addFamily(new HColumnDescriptor(FAMILY));
488       admin.createTable(desc);
489 
490       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
491       HRegionInfo hri = new HRegionInfo(
492         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
493       MetaEditor.addRegionToMeta(meta, hri);
494 
495       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
496       master.assignRegion(hri);
497       AssignmentManager am = master.getAssignmentManager();
498       assertTrue(am.waitForAssignment(hri));
499       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
500       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
501 
502       MyRegionObserver.preCloseEnabled.set(true);
503       am.unassign(hri);
504       RegionState state = am.getRegionStates().getRegionState(hri);
505       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
506 
507       MyRegionObserver.preCloseEnabled.set(false);
508       am.unassign(hri, true);
509 
510       // region may still be assigned now since it's closing,
511       // let's check if it's assigned after it's out of transition
512       am.waitOnRegionToClearRegionsInTransition(hri);
513 
514       // region should be closed and re-assigned
515       assertTrue(am.waitForAssignment(hri));
516       ServerName serverName = master.getAssignmentManager().
517         getRegionStates().getRegionServerOfRegion(hri);
518       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
519     } finally {
520       MyRegionObserver.preCloseEnabled.set(false);
521       TEST_UTIL.deleteTable(Bytes.toBytes(table));
522     }
523   }
524 
525   /**
526    * This tests region open failed
527    */
528   @Test (timeout=60000)
529   public void testOpenFailed() throws Exception {
530     String table = "testOpenFailed";
531     try {
532       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
533       desc.addFamily(new HColumnDescriptor(FAMILY));
534       admin.createTable(desc);
535 
536       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
537       HRegionInfo hri = new HRegionInfo(
538         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
539       MetaEditor.addRegionToMeta(meta, hri);
540 
541       MyLoadBalancer.controledRegion = hri.getEncodedName();
542 
543       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
544       master.assignRegion(hri);
545       AssignmentManager am = master.getAssignmentManager();
546       assertFalse(am.waitForAssignment(hri));
547 
548       RegionState state = am.getRegionStates().getRegionState(hri);
549       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
550       // Failed to open since no plan, so it's on no server
551       assertNull(state.getServerName());
552 
553       MyLoadBalancer.controledRegion = null;
554       master.assignRegion(hri);
555       assertTrue(am.waitForAssignment(hri));
556 
557       ServerName serverName = master.getAssignmentManager().
558         getRegionStates().getRegionServerOfRegion(hri);
559       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
560     } finally {
561       MyLoadBalancer.controledRegion = null;
562       TEST_UTIL.deleteTable(Bytes.toBytes(table));
563     }
564   }
565 
566   /**
567    * This tests region open failure which is not recoverable
568    */
569   @Test (timeout=60000)
570   public void testOpenFailedUnrecoverable() throws Exception {
571     TableName table =
572         TableName.valueOf("testOpenFailedUnrecoverable");
573     try {
574       HTableDescriptor desc = new HTableDescriptor(table);
575       desc.addFamily(new HColumnDescriptor(FAMILY));
576       admin.createTable(desc);
577 
578       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
579       HRegionInfo hri = new HRegionInfo(
580         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
581       MetaEditor.addRegionToMeta(meta, hri);
582 
583       FileSystem fs = FileSystem.get(conf);
584       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
585       Path regionDir = new Path(tableDir, hri.getEncodedName());
586       // create a file named the same as the region dir to
587       // mess up with region opening
588       fs.create(regionDir, true);
589 
590       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
591       master.assignRegion(hri);
592       AssignmentManager am = master.getAssignmentManager();
593       assertFalse(am.waitForAssignment(hri));
594 
595       RegionState state = am.getRegionStates().getRegionState(hri);
596       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
597       // Failed to open due to file system issue. Region state should
598       // carry the opening region server so that we can force close it
599       // later on before opening it again. See HBASE-9092.
600       assertNotNull(state.getServerName());
601 
602       // remove the blocking file, so that region can be opened
603       fs.delete(regionDir, true);
604       master.assignRegion(hri);
605       assertTrue(am.waitForAssignment(hri));
606 
607       ServerName serverName = master.getAssignmentManager().
608         getRegionStates().getRegionServerOfRegion(hri);
609       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
610     } finally {
611       TEST_UTIL.deleteTable(table);
612     }
613   }
614 
615   @Test (timeout=60000)
616   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
617     final TableName table =
618         TableName.valueOf
619             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
620     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
621     HRegionInfo hri = null;
622     ServerName serverName = null;
623     try {
624       hri = createTableAndGetOneRegion(table);
625       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
626       ServerName destServerName = null;
627       HRegionServer destServer = null;
628       for (int i = 0; i < 3; i++) {
629         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
630         if (!destServer.getServerName().equals(serverName)) {
631           destServerName = destServer.getServerName();
632           break;
633         }
634       }
635       ServerName metaServerName = am.getRegionStates().getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
636       // We don't want to process shutdown of meta, so move meta if required
637       if (ServerName.isSameHostnameAndPort(destServerName, metaServerName)) {
638         int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
639         HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
640         TEST_UTIL
641             .getHBaseCluster()
642             .getMaster()
643             .move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
644               Bytes.toBytes(rs.getServerName().getServerName()));
645         am.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
646       }
647       
648       am.regionOffline(hri);
649       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
650       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
651       if (ConfigUtil.useZKForAssignment(conf)) {
652         ZKAssign.createNodeOffline(zkw, hri, destServerName);
653         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
654 
655         // Wait till the event is processed and the region is in transition
656         long timeoutTime = System.currentTimeMillis() + 20000;
657         while (!am.getRegionStates().isRegionInTransition(hri)) {
658           assertTrue("Failed to process ZK opening event in time",
659             System.currentTimeMillis() < timeoutTime);
660           Thread.sleep(100);
661         }
662       }
663 
664       am.getZKTable().setDisablingTable(table);
665       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
666       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
667       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
668           .getRegionState(hri).isOffline());
669     } finally {
670       if (hri != null && serverName != null) {
671         am.regionOnline(hri, serverName);
672       }
673       am.getZKTable().setDisabledTable(table);
674       TEST_UTIL.deleteTable(table);
675     }
676   }
677 
678   /**
679    * This tests region close hanging
680    */
681   @Test (timeout=60000)
682   public void testCloseHang() throws Exception {
683     String table = "testCloseHang";
684     try {
685       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
686       desc.addFamily(new HColumnDescriptor(FAMILY));
687       admin.createTable(desc);
688 
689       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
690       HRegionInfo hri = new HRegionInfo(
691         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
692       MetaEditor.addRegionToMeta(meta, hri);
693 
694       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
695       master.assignRegion(hri);
696       AssignmentManager am = master.getAssignmentManager();
697       assertTrue(am.waitForAssignment(hri));
698       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
699       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
700 
701       MyRegionObserver.postCloseEnabled.set(true);
702       am.unassign(hri);
703       // Now region should pending_close or closing
704       // Unassign it again forcefully so that we can trigger already
705       // in transition exception. This test is to make sure this scenario
706       // is handled properly.
707       am.server.getConfiguration().setLong(
708         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
709       am.unassign(hri, true);
710       RegionState state = am.getRegionStates().getRegionState(hri);
711       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
712 
713       // Let region closing move ahead. The region should be closed
714       // properly and re-assigned automatically
715       MyRegionObserver.postCloseEnabled.set(false);
716 
717       // region may still be assigned now since it's closing,
718       // let's check if it's assigned after it's out of transition
719       am.waitOnRegionToClearRegionsInTransition(hri);
720 
721       // region should be closed and re-assigned
722       assertTrue(am.waitForAssignment(hri));
723       ServerName serverName = master.getAssignmentManager().
724         getRegionStates().getRegionServerOfRegion(hri);
725       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
726     } finally {
727       MyRegionObserver.postCloseEnabled.set(false);
728       TEST_UTIL.deleteTable(Bytes.toBytes(table));
729     }
730   }
731 
732   /**
733    * This tests region close racing with open
734    */
735   @Test (timeout=60000)
736   public void testOpenCloseRacing() throws Exception {
737     String table = "testOpenCloseRacing";
738     try {
739       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
740       desc.addFamily(new HColumnDescriptor(FAMILY));
741       admin.createTable(desc);
742 
743       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
744       HRegionInfo hri = new HRegionInfo(
745         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
746       MetaEditor.addRegionToMeta(meta, hri);
747       meta.close();
748 
749       MyRegionObserver.postOpenEnabled.set(true);
750       MyRegionObserver.postOpenCalled = false;
751       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
752       // Region will be opened, but it won't complete
753       master.assignRegion(hri);
754       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
755       // Wait till postOpen is called
756       while (!MyRegionObserver.postOpenCalled ) {
757         assertFalse("Timed out waiting for postOpen to be called",
758           EnvironmentEdgeManager.currentTimeMillis() > end);
759         Thread.sleep(300);
760       }
761 
762       AssignmentManager am = master.getAssignmentManager();
763       // Now let's unassign it, it should do nothing
764       am.unassign(hri);
765       RegionState state = am.getRegionStates().getRegionState(hri);
766       ServerName oldServerName = state.getServerName();
767       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
768 
769       // Now the region is stuck in opening
770       // Let's forcefully re-assign it to trigger closing/opening
771       // racing. This test is to make sure this scenario
772       // is handled properly.
773       ServerName destServerName = null;
774       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
775       for (int i = 0; i < numRS; i++) {
776         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
777         if (!destServer.getServerName().equals(oldServerName)) {
778           destServerName = destServer.getServerName();
779           break;
780         }
781       }
782       assertNotNull(destServerName);
783       assertFalse("Region should be assigned on a new region server",
784         oldServerName.equals(destServerName));
785       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
786       regions.add(hri);
787       am.assign(destServerName, regions);
788       
789       // let region open continue
790       MyRegionObserver.postOpenEnabled.set(false);
791 
792       // let's check if it's assigned after it's out of transition
793       am.waitOnRegionToClearRegionsInTransition(hri);
794       assertTrue(am.waitForAssignment(hri));
795 
796       ServerName serverName = master.getAssignmentManager().
797         getRegionStates().getRegionServerOfRegion(hri);
798       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
799     } finally {
800       MyRegionObserver.postOpenEnabled.set(false);
801       TEST_UTIL.deleteTable(Bytes.toBytes(table));
802     }
803   }
804 
805   /**
806    * Test force unassign/assign a region hosted on a dead server
807    */
808   @Test (timeout=60000)
809   public void testAssignRacingWithSSH() throws Exception {
810     String table = "testAssignRacingWithSSH";
811     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
812     MyMaster master = null;
813     try {
814       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
815       desc.addFamily(new HColumnDescriptor(FAMILY));
816       admin.createTable(desc);
817 
818       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
819       HRegionInfo hri = new HRegionInfo(
820         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
821       MetaEditor.addRegionToMeta(meta, hri);
822 
823       // Assign the region
824       master = (MyMaster)cluster.getMaster();
825       master.assignRegion(hri);
826 
827       // Hold SSH before killing the hosting server
828       master.enableSSH(false);
829 
830       AssignmentManager am = master.getAssignmentManager();
831       RegionStates regionStates = am.getRegionStates();
832       ServerName metaServer = regionStates.getRegionServerOfRegion(
833         HRegionInfo.FIRST_META_REGIONINFO);
834       while (true) {
835         assertTrue(am.waitForAssignment(hri));
836         RegionState state = regionStates.getRegionState(hri);
837         ServerName oldServerName = state.getServerName();
838         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
839           // Kill the hosting server, which doesn't have meta on it.
840           cluster.killRegionServer(oldServerName);
841           cluster.waitForRegionServerToStop(oldServerName, -1);
842           break;
843         }
844         int i = cluster.getServerWithMeta();
845         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
846         oldServerName = rs.getServerName();
847         master.move(hri.getEncodedNameAsBytes(),
848           Bytes.toBytes(oldServerName.getServerName()));
849       }
850 
851       // You can't assign a dead region before SSH
852       am.assign(hri, true, true);
853       RegionState state = regionStates.getRegionState(hri);
854       assertTrue(state.isFailedClose());
855 
856       // You can't unassign a dead region before SSH either
857       am.unassign(hri, true);
858       assertTrue(state.isFailedClose());
859 
860       // Enable SSH so that log can be split
861       master.enableSSH(true);
862 
863       // let's check if it's assigned after it's out of transition.
864       // no need to assign it manually, SSH should do it
865       am.waitOnRegionToClearRegionsInTransition(hri);
866       assertTrue(am.waitForAssignment(hri));
867 
868       ServerName serverName = master.getAssignmentManager().
869         getRegionStates().getRegionServerOfRegion(hri);
870       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
871     } finally {
872       if (master != null) {
873         master.enableSSH(true);
874       }
875       TEST_UTIL.deleteTable(Bytes.toBytes(table));
876     }
877   }
878 
879   /**
880    * Test force unassign/assign a region of a disabled table
881    */
882   @Test (timeout=60000)
883   public void testAssignDisabledRegion() throws Exception {
884     String table = "testAssignDisabledRegion";
885     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
886     MyMaster master = null;
887     try {
888       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
889       desc.addFamily(new HColumnDescriptor(FAMILY));
890       admin.createTable(desc);
891 
892       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
893       HRegionInfo hri = new HRegionInfo(
894         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
895       MetaEditor.addRegionToMeta(meta, hri);
896 
897       // Assign the region
898       master = (MyMaster)cluster.getMaster();
899       master.assignRegion(hri);
900       AssignmentManager am = master.getAssignmentManager();
901       RegionStates regionStates = am.getRegionStates();
902       assertTrue(am.waitForAssignment(hri));
903 
904       // Disable the table
905       admin.disableTable(table);
906       assertTrue(regionStates.isRegionOffline(hri));
907 
908       // You can't assign a disabled region
909       am.assign(hri, true, true);
910       assertTrue(regionStates.isRegionOffline(hri));
911 
912       // You can't unassign a disabled region either
913       am.unassign(hri, true);
914       assertTrue(regionStates.isRegionOffline(hri));
915     } finally {
916       TEST_UTIL.deleteTable(Bytes.toBytes(table));
917     }
918   }
919 
920   /**
921    * Test that region state transition call is idempotent
922    */
923   @Test(timeout = 60000)
924   public void testReportRegionStateTransition() throws Exception {
925     String table = "testReportRegionStateTransition";
926     try {
927       MyRegionServer.simulateRetry = true;
928       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
929       desc.addFamily(new HColumnDescriptor(FAMILY));
930       admin.createTable(desc);
931       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
932       HRegionInfo hri =
933           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
934       MetaEditor.addRegionToMeta(meta, hri);
935       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
936       master.assignRegion(hri);
937       AssignmentManager am = master.getAssignmentManager();
938       am.waitForAssignment(hri);
939       RegionStates regionStates = am.getRegionStates();
940       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
941       // Assert the the region is actually open on the server
942       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
943       // Closing region should just work fine
944       admin.disableTable(TableName.valueOf(table));
945       assertTrue(regionStates.isRegionOffline(hri));
946       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
947       assertTrue(!regions.contains(hri));
948     } finally {
949       MyRegionServer.simulateRetry = false;
950       TEST_UTIL.deleteTable(Bytes.toBytes(table));
951     }
952   }
953 
954   /**
955    * Test concurrent updates to meta when meta is not on master. Only for zk-less assignment
956    * @throws Exception
957    */
958   @Test(timeout = 30000)
959   public void testUpdatesRemoteMeta() throws Exception {
960     // Not for zk less assignment
961     if (conf.getBoolean("hbase.assignment.usezk", true)) {
962       return;
963     }
964     conf.setInt("hbase.regionstatestore.meta.connection", 3);
965     final RegionStateStore rss = new RegionStateStore(new MyRegionServer(conf));
966     rss.start();
967     // Create 10 threads and make each do 10 puts related to region state update
968     Thread[] th = new Thread[10];
969     List<String> nameList = new ArrayList<String>();
970     List<TableName> tableNameList = new ArrayList<TableName>();
971     for (int i = 0; i < th.length; i++) {
972       th[i] = new Thread() {
973         @Override
974         public void run() {
975           HRegionInfo[] hri = new HRegionInfo[10];
976           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
977           for (int i = 0; i < 10; i++) {
978             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
979             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
980             RegionState oldState =
981                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
982             rss.updateRegionState(1, newState, oldState);
983           }
984         }
985       };
986       th[i].start();
987       nameList.add(th[i].getName());
988     }
989     for (int i = 0; i < th.length; i++) {
990       th[i].join();
991     }
992     // Add all the expected table names in meta to tableNameList
993     for (String name : nameList) {
994       for (int i = 0; i < 10; i++) {
995         tableNameList.add(TableName.valueOf(name + "_" + i));
996       }
997     }
998     List<Result> metaRows =
999         MetaReader.fullScan(TEST_UTIL.getMiniHBaseCluster().getMaster().getCatalogTracker());
1000     int count = 0;
1001     // Check all 100 rows are in meta
1002     for (Result result : metaRows) {
1003       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
1004         count++;
1005         if (count == 100) {
1006           break;
1007         }
1008       }
1009     }
1010     assertTrue(count == 100);
1011     rss.stop();
1012   }
1013 
1014   static class MyLoadBalancer extends StochasticLoadBalancer {
1015     // For this region, if specified, always assign to nowhere
1016     static volatile String controledRegion = null;
1017 
1018     @Override
1019     public ServerName randomAssignment(HRegionInfo regionInfo,
1020         List<ServerName> servers) {
1021       if (regionInfo.getEncodedName().equals(controledRegion)) {
1022         return null;
1023       }
1024       return super.randomAssignment(regionInfo, servers);
1025     }
1026   }
1027 
1028   public static class MyMaster extends HMaster {
1029     AtomicBoolean enabled = new AtomicBoolean(true);
1030 
1031     public MyMaster(Configuration conf) throws IOException, KeeperException,
1032         InterruptedException {
1033       super(conf);
1034     }
1035 
1036     @Override
1037     public boolean isServerShutdownHandlerEnabled() {
1038       return enabled.get() && super.isServerShutdownHandlerEnabled();
1039     }
1040 
1041     public void enableSSH(boolean enabled) {
1042       this.enabled.set(enabled);
1043       if (enabled) {
1044         serverManager.processQueuedDeadServers();
1045       }
1046     }
1047   }
1048   
1049   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
1050     static volatile ServerName abortedServer = null;
1051     static volatile boolean simulateRetry;
1052 
1053     public MyRegionServer(Configuration conf)
1054       throws IOException, KeeperException,
1055         InterruptedException {
1056       super(conf);
1057     }
1058 
1059     @Override
1060     public boolean
1061         reportRegionStateTransition(TransitionCode code, long openSeqNum, HRegionInfo... hris) {
1062       if (simulateRetry == true) {
1063         // Simulate retry by calling the method twice
1064         super.reportRegionStateTransition(code, openSeqNum, hris);
1065         return super.reportRegionStateTransition(code, openSeqNum, hris);
1066       }
1067       return super.reportRegionStateTransition(code, openSeqNum, hris);
1068     }
1069 
1070     @Override
1071     public boolean isAborted() {
1072       return getServerName().equals(abortedServer) || super.isAborted();
1073     }
1074   }
1075 
1076 
1077   public static class MyRegionObserver extends BaseRegionObserver {
1078     // If enabled, fail all preClose calls
1079     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1080 
1081     // If enabled, stall postClose calls
1082     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1083 
1084     // If enabled, stall postOpen calls
1085     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1086 
1087     // A flag to track if postOpen is called
1088     static volatile boolean postOpenCalled = false;
1089 
1090     @Override
1091     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1092         boolean abortRequested) throws IOException {
1093       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1094     }
1095 
1096     @Override
1097     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1098         boolean abortRequested) {
1099       stallOnFlag(postCloseEnabled);
1100     }
1101 
1102     @Override
1103     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1104       postOpenCalled = true;
1105       stallOnFlag(postOpenEnabled);
1106     }
1107 
1108     private void stallOnFlag(final AtomicBoolean flag) {
1109       try {
1110         // If enabled, stall
1111         while (flag.get()) {
1112           Thread.sleep(1000);
1113         }
1114       } catch (InterruptedException ie) {
1115         Thread.currentThread().interrupt();
1116       }
1117     }
1118   }
1119 }