View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.MediumTests;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
44  import org.apache.hadoop.hbase.ServerLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.UnknownRegionException;
48  import org.apache.hadoop.hbase.Waiter;
49  import org.apache.hadoop.hbase.catalog.MetaEditor;
50  import org.apache.hadoop.hbase.catalog.MetaReader;
51  import org.apache.hadoop.hbase.client.HBaseAdmin;
52  import org.apache.hadoop.hbase.client.HTable;
53  import org.apache.hadoop.hbase.client.Result;
54  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
55  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
56  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
57  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
58  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
59  import org.apache.hadoop.hbase.executor.EventType;
60  import org.apache.hadoop.hbase.master.RegionState.State;
61  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
62  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionTransition.TransitionCode;
63  import org.apache.hadoop.hbase.regionserver.HRegionServer;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.ConfigUtil;
66  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
69  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
70  import org.apache.zookeeper.KeeperException;
71  import org.junit.AfterClass;
72  import org.junit.BeforeClass;
73  import org.junit.Test;
74  import org.junit.experimental.categories.Category;
75  
76  /**
77   * This tests AssignmentManager with a testing cluster.
78   */
79  @Category(MediumTests.class)
80  public class TestAssignmentManagerOnCluster {
81    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
82    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
83    final static Configuration conf = TEST_UTIL.getConfiguration();
84    private static HBaseAdmin admin;
85  
86    static void setupOnce() throws Exception {
87      // Using the our load balancer to control region plans
88      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
89        MyLoadBalancer.class, LoadBalancer.class);
90      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
91        MyRegionObserver.class, RegionObserver.class);
92      // Reduce the maximum attempts to speed up the test
93      conf.setInt("hbase.assignment.maximum.attempts", 3);
94  
95      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
96      admin = TEST_UTIL.getHBaseAdmin();
97    }
98  
99    @BeforeClass
100   public static void setUpBeforeClass() throws Exception {
101     // Use ZK for region assignment
102     conf.setBoolean("hbase.assignment.usezk", true);
103     setupOnce();
104   }
105 
106   @AfterClass
107   public static void tearDownAfterClass() throws Exception {
108     TEST_UTIL.shutdownMiniCluster();
109   }
110 
111   /**
112    * This tests region assignment
113    */
114   @Test (timeout=60000)
115   public void testAssignRegion() throws Exception {
116     String table = "testAssignRegion";
117     try {
118       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
119       desc.addFamily(new HColumnDescriptor(FAMILY));
120       admin.createTable(desc);
121 
122       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
123       HRegionInfo hri = new HRegionInfo(
124         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
125       MetaEditor.addRegionToMeta(meta, hri);
126 
127       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
128       master.assignRegion(hri);
129       AssignmentManager am = master.getAssignmentManager();
130       am.waitForAssignment(hri);
131 
132       RegionStates regionStates = am.getRegionStates();
133       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
134       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
135 
136       // Region is assigned now. Let's assign it again.
137       // Master should not abort, and region should be assigned.
138       RegionState oldState = regionStates.getRegionState(hri);
139       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
140       master.getAssignmentManager().waitForAssignment(hri);
141       RegionState newState = regionStates.getRegionState(hri);
142       assertTrue(newState.isOpened()
143         && newState.getStamp() != oldState.getStamp());
144     } finally {
145       TEST_UTIL.deleteTable(Bytes.toBytes(table));
146     }
147   }
148   
149   /**
150    * This tests region assignment on a simulated restarted server
151    */
152   @Test (timeout=120000)
153   public void testAssignRegionOnRestartedServer() throws Exception {
154     String table = "testAssignRegionOnRestartedServer";
155     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
156     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
157     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
158 
159     ServerName deadServer = null;
160     HMaster master = null;
161     try {
162       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
163       desc.addFamily(new HColumnDescriptor(FAMILY));
164       admin.createTable(desc);
165 
166       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
167       final HRegionInfo hri = new HRegionInfo(
168         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
169       MetaEditor.addRegionToMeta(meta, hri);
170 
171       master = TEST_UTIL.getHBaseCluster().getMaster();
172       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
173       assertFalse("There should be some servers online", onlineServers.isEmpty());
174 
175       // Use the first server as the destination server
176       ServerName destServer = onlineServers.iterator().next();
177 
178       // Created faked dead server
179       deadServer = ServerName.valueOf(destServer.getHostname(),
180           destServer.getPort(), destServer.getStartcode() - 100L);
181       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
182 
183       final AssignmentManager am = master.getAssignmentManager();
184       RegionPlan plan = new RegionPlan(hri, null, deadServer);
185       am.addPlan(hri.getEncodedName(), plan);
186       master.assignRegion(hri);
187 
188       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
189         destServer, EventType.M_ZK_REGION_OFFLINE,
190         EventType.RS_ZK_REGION_OPENING, 0);
191       assertEquals("TansitionNode should fail", -1, version);
192 
193       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
194         @Override
195         public boolean evaluate() throws Exception {
196           return ! am.getRegionStates().isRegionInTransition(hri);
197         }
198       });
199 
200     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
201     } finally {
202       if (deadServer != null) {
203         master.serverManager.expireServer(deadServer);
204       }
205 
206       TEST_UTIL.deleteTable(Bytes.toBytes(table));
207     }
208   }
209 
210   /**
211    * This tests offlining a region
212    */
213   @Test (timeout=60000)
214   public void testOfflineRegion() throws Exception {
215     TableName table =
216         TableName.valueOf("testOfflineRegion");
217     try {
218       HRegionInfo hri = createTableAndGetOneRegion(table);
219 
220       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
221         getMaster().getAssignmentManager().getRegionStates();
222       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
223       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
224       admin.offline(hri.getRegionName());
225 
226       long timeoutTime = System.currentTimeMillis() + 800;
227       while (true) {
228         List<HRegionInfo> regions =
229           regionStates.getRegionsOfTable(table);
230         if (!regions.contains(hri)) break;
231         long now = System.currentTimeMillis();
232         if (now > timeoutTime) {
233           fail("Failed to offline the region in time");
234           break;
235         }
236         Thread.sleep(10);
237       }
238       RegionState regionState = regionStates.getRegionState(hri);
239       assertTrue(regionState.isOffline());
240     } finally {
241       TEST_UTIL.deleteTable(table);
242     }
243   }
244 
245   /**
246    * This tests moving a region
247    */
248   @Test (timeout=50000)
249   public void testMoveRegion() throws Exception {
250     TableName table =
251         TableName.valueOf("testMoveRegion");
252     try {
253       HRegionInfo hri = createTableAndGetOneRegion(table);
254 
255       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
256         getMaster().getAssignmentManager().getRegionStates();
257       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
258       ServerName destServerName = null;
259       for (int i = 0; i < 3; i++) {
260         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
261         if (!destServer.getServerName().equals(serverName)) {
262           destServerName = destServer.getServerName();
263           break;
264         }
265       }
266       assertTrue(destServerName != null
267         && !destServerName.equals(serverName));
268       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
269         Bytes.toBytes(destServerName.getServerName()));
270 
271       long timeoutTime = System.currentTimeMillis() + 30000;
272       while (true) {
273         ServerName sn = regionStates.getRegionServerOfRegion(hri);
274         if (sn != null && sn.equals(destServerName)) {
275           TEST_UTIL.assertRegionOnServer(hri, sn, 200);
276           break;
277         }
278         long now = System.currentTimeMillis();
279         if (now > timeoutTime) {
280           fail("Failed to move the region in time: "
281             + regionStates.getRegionState(hri));
282         }
283         regionStates.waitForUpdate(50);
284       }
285 
286     } finally {
287       TEST_UTIL.deleteTable(table);
288     }
289   }
290 
291   /**
292    * If a table is deleted, we should not be able to move it anymore.
293    * Otherwise, the region will be brought back.
294    * @throws Exception
295    */
296   @Test (timeout=50000)
297   public void testMoveRegionOfDeletedTable() throws Exception {
298     TableName table =
299         TableName.valueOf("testMoveRegionOfDeletedTable");
300     HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
301     try {
302       HRegionInfo hri = createTableAndGetOneRegion(table);
303 
304       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
305       AssignmentManager am = master.getAssignmentManager();
306       RegionStates regionStates = am.getRegionStates();
307       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
308       ServerName destServerName = null;
309       for (int i = 0; i < 3; i++) {
310         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
311         if (!destServer.getServerName().equals(serverName)) {
312           destServerName = destServer.getServerName();
313           break;
314         }
315       }
316       assertTrue(destServerName != null
317         && !destServerName.equals(serverName));
318 
319       TEST_UTIL.deleteTable(table);
320 
321       try {
322         admin.move(hri.getEncodedNameAsBytes(),
323           Bytes.toBytes(destServerName.getServerName()));
324         fail("We should not find the region");
325       } catch (IOException ioe) {
326         assertTrue(ioe instanceof UnknownRegionException);
327       }
328 
329       am.balance(new RegionPlan(hri, serverName, destServerName));
330       assertFalse("The region should not be in transition",
331         regionStates.isRegionInTransition(hri));
332     } finally {
333       if (admin.tableExists(table)) {
334         TEST_UTIL.deleteTable(table);
335       }
336     }
337   }
338 
339   HRegionInfo createTableAndGetOneRegion(
340       final TableName tableName) throws IOException, InterruptedException {
341     HTableDescriptor desc = new HTableDescriptor(tableName);
342     desc.addFamily(new HColumnDescriptor(FAMILY));
343     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
344 
345     // wait till the table is assigned
346     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
347     long timeoutTime = System.currentTimeMillis() + 1000;
348     while (true) {
349       List<HRegionInfo> regions = master.getAssignmentManager().
350         getRegionStates().getRegionsOfTable(tableName);
351       if (regions.size() > 3) {
352         return regions.get(2);
353       }
354       long now = System.currentTimeMillis();
355       if (now > timeoutTime) {
356         fail("Could not find an online region");
357       }
358       Thread.sleep(10);
359     }
360   }
361 
362   /**
363    * This test should not be flaky. If it is flaky, it means something
364    * wrong with AssignmentManager which should be reported and fixed
365    *
366    * This tests forcefully assign a region while it's closing and re-assigned.
367    */
368   @Test (timeout=60000)
369   public void testForceAssignWhileClosing() throws Exception {
370     String table = "testForceAssignWhileClosing";
371     try {
372       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
373       desc.addFamily(new HColumnDescriptor(FAMILY));
374       admin.createTable(desc);
375 
376       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
377       HRegionInfo hri = new HRegionInfo(
378         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
379       MetaEditor.addRegionToMeta(meta, hri);
380 
381       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
382       master.assignRegion(hri);
383       AssignmentManager am = master.getAssignmentManager();
384       assertTrue(am.waitForAssignment(hri));
385       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
386       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
387       
388       MyRegionObserver.preCloseEnabled.set(true);
389       am.unassign(hri);
390       RegionState state = am.getRegionStates().getRegionState(hri);
391       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
392 
393       MyRegionObserver.preCloseEnabled.set(false);
394       am.unassign(hri, true);
395 
396       // region is closing now, will be re-assigned automatically.
397       // now, let's forcefully assign it again. it should be
398       // assigned properly and no double-assignment
399       am.assign(hri, true, true);
400 
401       // let's check if it's assigned after it's out of transition
402       am.waitOnRegionToClearRegionsInTransition(hri);
403       assertTrue(am.waitForAssignment(hri));
404 
405       ServerName serverName = master.getAssignmentManager().
406         getRegionStates().getRegionServerOfRegion(hri);
407       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
408     } finally {
409       MyRegionObserver.preCloseEnabled.set(false);
410       TEST_UTIL.deleteTable(Bytes.toBytes(table));
411     }
412   }
413 
414   /**
415    * This tests region close failed
416    */
417   @Test (timeout=60000)
418   public void testCloseFailed() throws Exception {
419     String table = "testCloseFailed";
420     try {
421       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
422       desc.addFamily(new HColumnDescriptor(FAMILY));
423       admin.createTable(desc);
424 
425       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
426       HRegionInfo hri = new HRegionInfo(
427         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
428       MetaEditor.addRegionToMeta(meta, hri);
429 
430       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
431       master.assignRegion(hri);
432       AssignmentManager am = master.getAssignmentManager();
433       assertTrue(am.waitForAssignment(hri));
434       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
435       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
436 
437       MyRegionObserver.preCloseEnabled.set(true);
438       am.unassign(hri);
439       RegionState state = am.getRegionStates().getRegionState(hri);
440       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
441 
442       MyRegionObserver.preCloseEnabled.set(false);
443       am.unassign(hri, true);
444 
445       // region may still be assigned now since it's closing,
446       // let's check if it's assigned after it's out of transition
447       am.waitOnRegionToClearRegionsInTransition(hri);
448 
449       // region should be closed and re-assigned
450       assertTrue(am.waitForAssignment(hri));
451       ServerName serverName = master.getAssignmentManager().
452         getRegionStates().getRegionServerOfRegion(hri);
453       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
454     } finally {
455       MyRegionObserver.preCloseEnabled.set(false);
456       TEST_UTIL.deleteTable(Bytes.toBytes(table));
457     }
458   }
459 
460   /**
461    * This tests region open failed
462    */
463   @Test (timeout=60000)
464   public void testOpenFailed() throws Exception {
465     String table = "testOpenFailed";
466     try {
467       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
468       desc.addFamily(new HColumnDescriptor(FAMILY));
469       admin.createTable(desc);
470 
471       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
472       HRegionInfo hri = new HRegionInfo(
473         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
474       MetaEditor.addRegionToMeta(meta, hri);
475 
476       MyLoadBalancer.controledRegion = hri.getEncodedName();
477 
478       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
479       master.assignRegion(hri);
480       AssignmentManager am = master.getAssignmentManager();
481       assertFalse(am.waitForAssignment(hri));
482 
483       RegionState state = am.getRegionStates().getRegionState(hri);
484       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
485       // Failed to open since no plan, so it's on no server
486       assertNull(state.getServerName());
487 
488       MyLoadBalancer.controledRegion = null;
489       master.assignRegion(hri);
490       assertTrue(am.waitForAssignment(hri));
491 
492       ServerName serverName = master.getAssignmentManager().
493         getRegionStates().getRegionServerOfRegion(hri);
494       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
495     } finally {
496       MyLoadBalancer.controledRegion = null;
497       TEST_UTIL.deleteTable(Bytes.toBytes(table));
498     }
499   }
500 
501   /**
502    * This tests region open failure which is not recoverable
503    */
504   @Test (timeout=60000)
505   public void testOpenFailedUnrecoverable() throws Exception {
506     TableName table =
507         TableName.valueOf("testOpenFailedUnrecoverable");
508     try {
509       HTableDescriptor desc = new HTableDescriptor(table);
510       desc.addFamily(new HColumnDescriptor(FAMILY));
511       admin.createTable(desc);
512 
513       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
514       HRegionInfo hri = new HRegionInfo(
515         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
516       MetaEditor.addRegionToMeta(meta, hri);
517 
518       FileSystem fs = FileSystem.get(conf);
519       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
520       Path regionDir = new Path(tableDir, hri.getEncodedName());
521       // create a file named the same as the region dir to
522       // mess up with region opening
523       fs.create(regionDir, true);
524 
525       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
526       master.assignRegion(hri);
527       AssignmentManager am = master.getAssignmentManager();
528       assertFalse(am.waitForAssignment(hri));
529 
530       RegionState state = am.getRegionStates().getRegionState(hri);
531       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
532       // Failed to open due to file system issue. Region state should
533       // carry the opening region server so that we can force close it
534       // later on before opening it again. See HBASE-9092.
535       assertNotNull(state.getServerName());
536 
537       // remove the blocking file, so that region can be opened
538       fs.delete(regionDir, true);
539       master.assignRegion(hri);
540       assertTrue(am.waitForAssignment(hri));
541 
542       ServerName serverName = master.getAssignmentManager().
543         getRegionStates().getRegionServerOfRegion(hri);
544       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
545     } finally {
546       TEST_UTIL.deleteTable(table);
547     }
548   }
549 
550   @Test (timeout=60000)
551   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
552     final TableName table =
553         TableName.valueOf
554             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
555     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
556     HRegionInfo hri = null;
557     ServerName serverName = null;
558     try {
559       hri = createTableAndGetOneRegion(table);
560       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
561       ServerName destServerName = null;
562       HRegionServer destServer = null;
563       for (int i = 0; i < 3; i++) {
564         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
565         if (!destServer.getServerName().equals(serverName)) {
566           destServerName = destServer.getServerName();
567           break;
568         }
569       }
570       am.regionOffline(hri);
571       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
572       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
573       if (ConfigUtil.useZKForAssignment(conf)) {
574         ZKAssign.createNodeOffline(zkw, hri, destServerName);
575         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
576   
577         // Wait till the event is processed and the region is in transition
578         long timeoutTime = System.currentTimeMillis() + 20000;
579         while (!am.getRegionStates().isRegionInTransition(hri)) {
580           assertTrue("Failed to process ZK opening event in time",
581             System.currentTimeMillis() < timeoutTime);
582           Thread.sleep(100);
583         }
584       }
585 
586       am.getZKTable().setDisablingTable(table);
587       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
588       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
589       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
590           .getRegionState(hri).isOffline());
591     } finally {
592       if (hri != null && serverName != null) {
593         am.regionOnline(hri, serverName);
594       }
595       am.getZKTable().setDisabledTable(table);
596       TEST_UTIL.deleteTable(table);
597     }
598   }
599 
600   /**
601    * This tests region close hanging
602    */
603   @Test (timeout=60000)
604   public void testCloseHang() throws Exception {
605     String table = "testCloseHang";
606     try {
607       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
608       desc.addFamily(new HColumnDescriptor(FAMILY));
609       admin.createTable(desc);
610 
611       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
612       HRegionInfo hri = new HRegionInfo(
613         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
614       MetaEditor.addRegionToMeta(meta, hri);
615 
616       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
617       master.assignRegion(hri);
618       AssignmentManager am = master.getAssignmentManager();
619       assertTrue(am.waitForAssignment(hri));
620       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
621       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
622 
623       MyRegionObserver.postCloseEnabled.set(true);
624       am.unassign(hri);
625       // Now region should pending_close or closing
626       // Unassign it again forcefully so that we can trigger already
627       // in transition exception. This test is to make sure this scenario
628       // is handled properly.
629       am.server.getConfiguration().setLong(
630         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
631       am.unassign(hri, true);
632       RegionState state = am.getRegionStates().getRegionState(hri);
633       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
634 
635       // Let region closing move ahead. The region should be closed
636       // properly and re-assigned automatically
637       MyRegionObserver.postCloseEnabled.set(false);
638 
639       // region may still be assigned now since it's closing,
640       // let's check if it's assigned after it's out of transition
641       am.waitOnRegionToClearRegionsInTransition(hri);
642 
643       // region should be closed and re-assigned
644       assertTrue(am.waitForAssignment(hri));
645       ServerName serverName = master.getAssignmentManager().
646         getRegionStates().getRegionServerOfRegion(hri);
647       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
648     } finally {
649       MyRegionObserver.postCloseEnabled.set(false);
650       TEST_UTIL.deleteTable(Bytes.toBytes(table));
651     }
652   }
653 
654   /**
655    * This tests region close racing with open
656    */
657   @Test (timeout=60000)
658   public void testOpenCloseRacing() throws Exception {
659     String table = "testOpenCloseRacing";
660     try {
661       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
662       desc.addFamily(new HColumnDescriptor(FAMILY));
663       admin.createTable(desc);
664 
665       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
666       HRegionInfo hri = new HRegionInfo(
667         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
668       MetaEditor.addRegionToMeta(meta, hri);
669       meta.close();
670 
671       MyRegionObserver.postOpenEnabled.set(true);
672       MyRegionObserver.postOpenCalled = false;
673       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
674       // Region will be opened, but it won't complete
675       master.assignRegion(hri);
676       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
677       // Wait till postOpen is called
678       while (!MyRegionObserver.postOpenCalled ) {
679         assertFalse("Timed out waiting for postOpen to be called",
680           EnvironmentEdgeManager.currentTimeMillis() > end);
681         Thread.sleep(300);
682       }
683 
684       AssignmentManager am = master.getAssignmentManager();
685       // Now let's unassign it, it should do nothing
686       am.unassign(hri);
687       RegionState state = am.getRegionStates().getRegionState(hri);
688       ServerName oldServerName = state.getServerName();
689       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
690 
691       // Now the region is stuck in opening
692       // Let's forcefully re-assign it to trigger closing/opening
693       // racing. This test is to make sure this scenario
694       // is handled properly.
695       MyRegionObserver.postOpenEnabled.set(false);
696       ServerName destServerName = null;
697       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
698       for (int i = 0; i < numRS; i++) {
699         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
700         if (!destServer.getServerName().equals(oldServerName)) {
701           destServerName = destServer.getServerName();
702           break;
703         }
704       }
705       assertNotNull(destServerName);
706       assertFalse("Region should be assigned on a new region server",
707         oldServerName.equals(destServerName));
708       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
709       regions.add(hri);
710       am.assign(destServerName, regions);
711 
712       // let's check if it's assigned after it's out of transition
713       am.waitOnRegionToClearRegionsInTransition(hri);
714       assertTrue(am.waitForAssignment(hri));
715 
716       ServerName serverName = master.getAssignmentManager().
717         getRegionStates().getRegionServerOfRegion(hri);
718       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
719     } finally {
720       MyRegionObserver.postOpenEnabled.set(false);
721       TEST_UTIL.deleteTable(Bytes.toBytes(table));
722     }
723   }
724 
725   /**
726    * Test force unassign/assign a region hosted on a dead server
727    */
728   @Test (timeout=60000)
729   public void testAssignRacingWithSSH() throws Exception {
730     String table = "testAssignRacingWithSSH";
731     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
732     MyMaster master = null;
733     try {
734       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
735       desc.addFamily(new HColumnDescriptor(FAMILY));
736       admin.createTable(desc);
737 
738       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
739       HRegionInfo hri = new HRegionInfo(
740         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
741       MetaEditor.addRegionToMeta(meta, hri);
742 
743       // Assign the region
744       master = (MyMaster)cluster.getMaster();
745       master.assignRegion(hri);
746 
747       // Hold SSH before killing the hosting server
748       master.enableSSH(false);
749 
750       AssignmentManager am = master.getAssignmentManager();
751       RegionStates regionStates = am.getRegionStates();
752       ServerName metaServer = regionStates.getRegionServerOfRegion(
753         HRegionInfo.FIRST_META_REGIONINFO);
754       while (true) {
755         assertTrue(am.waitForAssignment(hri));
756         RegionState state = regionStates.getRegionState(hri);
757         ServerName oldServerName = state.getServerName();
758         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
759           // Kill the hosting server, which doesn't have meta on it.
760           cluster.killRegionServer(oldServerName);
761           cluster.waitForRegionServerToStop(oldServerName, -1);
762           break;
763         }
764         int i = cluster.getServerWithMeta();
765         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
766         oldServerName = rs.getServerName();
767         master.move(hri.getEncodedNameAsBytes(),
768           Bytes.toBytes(oldServerName.getServerName()));
769       }
770 
771       // You can't assign a dead region before SSH
772       am.assign(hri, true, true);
773       RegionState state = regionStates.getRegionState(hri);
774       assertTrue(state.isFailedClose());
775 
776       // You can't unassign a dead region before SSH either
777       am.unassign(hri, true);
778       assertTrue(state.isFailedClose());
779 
780       // Enable SSH so that log can be split
781       master.enableSSH(true);
782 
783       // let's check if it's assigned after it's out of transition.
784       // no need to assign it manually, SSH should do it
785       am.waitOnRegionToClearRegionsInTransition(hri);
786       assertTrue(am.waitForAssignment(hri));
787 
788       ServerName serverName = master.getAssignmentManager().
789         getRegionStates().getRegionServerOfRegion(hri);
790       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
791     } finally {
792       if (master != null) {
793         master.enableSSH(true);
794       }
795       TEST_UTIL.deleteTable(Bytes.toBytes(table));
796     }
797   }
798 
799   /**
800    * Test force unassign/assign a region of a disabled table
801    */
802   @Test (timeout=60000)
803   public void testAssignDisabledRegion() throws Exception {
804     String table = "testAssignDisabledRegion";
805     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
806     MyMaster master = null;
807     try {
808       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
809       desc.addFamily(new HColumnDescriptor(FAMILY));
810       admin.createTable(desc);
811 
812       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
813       HRegionInfo hri = new HRegionInfo(
814         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
815       MetaEditor.addRegionToMeta(meta, hri);
816 
817       // Assign the region
818       master = (MyMaster)cluster.getMaster();
819       master.assignRegion(hri);
820       AssignmentManager am = master.getAssignmentManager();
821       RegionStates regionStates = am.getRegionStates();
822       assertTrue(am.waitForAssignment(hri));
823 
824       // Disable the table
825       admin.disableTable(table);
826       assertTrue(regionStates.isRegionOffline(hri));
827 
828       // You can't assign a disabled region
829       am.assign(hri, true, true);
830       assertTrue(regionStates.isRegionOffline(hri));
831 
832       // You can't unassign a disabled region either
833       am.unassign(hri, true);
834       assertTrue(regionStates.isRegionOffline(hri));
835     } finally {
836       TEST_UTIL.deleteTable(Bytes.toBytes(table));
837     }
838   }
839   
840   /**
841    * Test that region state transition call is idempotent
842    */
843   @Test(timeout = 60000)
844   public void testReportRegionStateTransition() throws Exception {
845     String table = "testReportRegionStateTransition";
846     try {
847       MyRegionServer.simulateRetry = true;
848       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
849       desc.addFamily(new HColumnDescriptor(FAMILY));
850       admin.createTable(desc);
851       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
852       HRegionInfo hri =
853           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
854       MetaEditor.addRegionToMeta(meta, hri);
855       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
856       master.assignRegion(hri);
857       AssignmentManager am = master.getAssignmentManager();
858       am.waitForAssignment(hri);
859       RegionStates regionStates = am.getRegionStates();
860       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
861       // Assert the the region is actually open on the server
862       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
863       // Closing region should just work fine
864       admin.disableTable(TableName.valueOf(table));
865       assertTrue(regionStates.isRegionOffline(hri));
866       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
867       assertTrue(!regions.contains(hri));
868     } finally {
869       MyRegionServer.simulateRetry = false;
870       TEST_UTIL.deleteTable(Bytes.toBytes(table));
871     }
872   }
873 
874   /**
875    * Test concurrent updates to meta when meta is not on master. Only for zk-less assignment
876    * @throws Exception
877    */
878   @Test(timeout = 30000)
879   public void testUpdatesRemoteMeta() throws Exception {
880     // Not for zk less assignment
881     if (conf.getBoolean("hbase.assignment.usezk", true)) {
882       return;
883     }
884     conf.setInt("hbase.regionstatestore.meta.connection", 3);
885     final RegionStateStore rss = new RegionStateStore(new MyRegionServer(conf));
886     rss.start();
887     // Create 10 threads and make each do 10 puts related to region state update
888     Thread[] th = new Thread[10];
889     List<String> nameList = new ArrayList<String>();
890     List<TableName> tableNameList = new ArrayList<TableName>();
891     for (int i = 0; i < th.length; i++) {
892       th[i] = new Thread() {
893         @Override
894         public void run() {
895           HRegionInfo[] hri = new HRegionInfo[10];
896           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
897           for (int i = 0; i < 10; i++) {
898             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
899             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
900             RegionState oldState =
901                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
902             rss.updateRegionState(1, newState, oldState);
903           }
904         }
905       };
906       th[i].start();
907       nameList.add(th[i].getName());
908     }
909     for (int i = 0; i < th.length; i++) {
910       th[i].join();
911     }
912     // Add all the expected table names in meta to tableNameList
913     for (String name : nameList) {
914       for (int i = 0; i < 10; i++) {
915         tableNameList.add(TableName.valueOf(name + "_" + i));
916       }
917     }
918     List<Result> metaRows =
919         MetaReader.fullScan(TEST_UTIL.getMiniHBaseCluster().getMaster().getCatalogTracker());
920     int count = 0;
921     // Check all 100 rows are in meta
922     for (Result result : metaRows) {
923       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
924         count++;
925         if (count == 100) {
926           break;
927         }
928       }
929     }
930     assertTrue(count == 100);
931     rss.stop();
932   }
933 
934   static class MyLoadBalancer extends StochasticLoadBalancer {
935     // For this region, if specified, always assign to nowhere
936     static volatile String controledRegion = null;
937 
938     @Override
939     public ServerName randomAssignment(HRegionInfo regionInfo,
940         List<ServerName> servers) {
941       if (regionInfo.getEncodedName().equals(controledRegion)) {
942         return null;
943       }
944       return super.randomAssignment(regionInfo, servers);
945     }
946   }
947 
948   public static class MyMaster extends HMaster {
949     AtomicBoolean enabled = new AtomicBoolean(true);
950 
951     public MyMaster(Configuration conf) throws IOException, KeeperException,
952         InterruptedException {
953       super(conf);
954     }
955 
956     @Override
957     public boolean isServerShutdownHandlerEnabled() {
958       return enabled.get() && super.isServerShutdownHandlerEnabled();
959     }
960 
961     public void enableSSH(boolean enabled) {
962       this.enabled.set(enabled);
963       if (enabled) {
964         serverManager.processQueuedDeadServers();
965       }
966     }
967   }
968   
969   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
970     static volatile ServerName abortedServer = null;
971     static volatile boolean simulateRetry;
972 
973     public MyRegionServer(Configuration conf)
974       throws IOException, KeeperException,
975         InterruptedException {
976       super(conf);
977     }
978 
979     @Override
980     public boolean
981         reportRegionTransition(TransitionCode code, long openSeqNum, HRegionInfo... hris) {
982       if (simulateRetry == true) {
983         // Simulate retry by calling the method twice
984         super.reportRegionTransition(code, openSeqNum, hris);
985         return super.reportRegionTransition(code, openSeqNum, hris);
986       }
987       return super.reportRegionTransition(code, openSeqNum, hris);
988     }
989 
990     @Override
991     public boolean isAborted() {
992       return getServerName().equals(abortedServer) || super.isAborted();
993     }
994   }
995 
996 
997   public static class MyRegionObserver extends BaseRegionObserver {
998     // If enabled, fail all preClose calls
999     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1000 
1001     // If enabled, stall postClose calls
1002     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1003 
1004     // If enabled, stall postOpen calls
1005     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1006 
1007     // A flag to track if postOpen is called
1008     static volatile boolean postOpenCalled = false;
1009 
1010     @Override
1011     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1012         boolean abortRequested) throws IOException {
1013       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1014     }
1015 
1016     @Override
1017     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1018         boolean abortRequested) {
1019       stallOnFlag(postCloseEnabled);
1020     }
1021 
1022     @Override
1023     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1024       postOpenCalled = true;
1025       stallOnFlag(postOpenEnabled);
1026     }
1027 
1028     private void stallOnFlag(final AtomicBoolean flag) {
1029       try {
1030         // If enabled, stall
1031         while (flag.get()) {
1032           Thread.sleep(1000);
1033         }
1034       } catch (InterruptedException ie) {
1035         Thread.currentThread().interrupt();
1036       }
1037     }
1038   }
1039 }