View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.IOException;
23  import java.util.List;
24  
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.HServerAddress;
29  import org.apache.hadoop.hbase.NotServingRegionException;
30  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
31  import org.apache.hadoop.hbase.client.HConnectionManager;
32  import org.apache.hadoop.hbase.ipc.HRegionInterface;
33  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
34  import org.apache.zookeeper.KeeperException;
35  
36  public class HBaseFsckRepair {
37  
38    /**
39     * Fix dupe assignment by doing silent closes on each RS hosting the region
40     * and then force ZK unassigned node to OFFLINE to trigger assignment by
41     * master.
42     * @param conf
43     * @param region
44     * @param servers
45     * @throws IOException
46     * @throws KeeperException
47     * @throws InterruptedException
48     */
49    public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
50        List<HServerAddress> servers)
51    throws IOException, KeeperException, InterruptedException {
52  
53      HRegionInfo actualRegion = new HRegionInfo(region);
54  
55      // Close region on the servers silently
56      for(HServerAddress server : servers) {
57        closeRegionSilentlyAndWait(conf, server, actualRegion);
58      }
59  
60      // Force ZK node to OFFLINE so master assigns
61      forceOfflineInZK(conf, actualRegion);
62    }
63  
64    /**
65     * Fix unassigned by creating/transition the unassigned ZK node for this
66     * region to OFFLINE state with a special flag to tell the master that this
67     * is a forced operation by HBCK.
68     * @param conf
69     * @param region
70     * @throws IOException
71     * @throws KeeperException
72     */
73    public static void fixUnassigned(Configuration conf, HRegionInfo region)
74    throws IOException, KeeperException {
75      HRegionInfo actualRegion = new HRegionInfo(region);
76  
77      // Force ZK node to OFFLINE so master assigns
78      forceOfflineInZK(conf, actualRegion);
79    }
80  
81    private static void forceOfflineInZK(Configuration conf, HRegionInfo region)
82    throws ZooKeeperConnectionException, KeeperException, IOException {
83      ZKAssign.createOrForceNodeOffline(
84          HConnectionManager.getConnection(conf).getZooKeeperWatcher(),
85          region, HConstants.HBCK_CODE_NAME);
86    }
87  
88    protected static void closeRegionSilentlyAndWait(Configuration conf,
89        HServerAddress server, HRegionInfo region)
90    throws IOException, InterruptedException {
91      HRegionInterface rs =
92        HConnectionManager.getConnection(conf).getHRegionConnection(server);
93      rs.closeRegion(region, false);
94      long timeout = conf.getLong("hbase.hbck.close.timeout", 120000);
95      long expiration = timeout + System.currentTimeMillis();
96      while (System.currentTimeMillis() < expiration) {
97        try {
98          HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName());
99          if (rsRegion == null) throw new NotServingRegionException();
100       } catch (Exception e) {
101         return;
102       }
103       Thread.sleep(1000);
104     }
105     throw new IOException("Region " + region + " failed to close within" +
106         " timeout " + timeout);
107   }
108 }