View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import java.io.IOException;
21  import java.util.HashMap;
22  
23  import com.google.common.collect.Sets;
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.ClusterManager.ServiceType;
27  import org.apache.hadoop.hbase.client.HBaseAdmin;
28  import org.apache.hadoop.hbase.client.HConnection;
29  import org.apache.hadoop.hbase.client.HConnectionManager;
30  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
31  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
32  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo;
33  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
34  import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos;
35  import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos;
36  import org.apache.hadoop.hbase.util.Bytes;
37  import org.apache.hadoop.hbase.util.Threads;
38  
39  /**
40   * Manages the interactions with an already deployed distributed cluster (as opposed to
41   * a pseudo-distributed, or mini/local cluster). This is used by integration and system tests.
42   */
43  @InterfaceAudience.Private
44  public class DistributedHBaseCluster extends HBaseCluster {
45  
46    private HBaseAdmin admin;
47  
48    private ClusterManager clusterManager;
49  
50    public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
51        throws IOException {
52      super(conf);
53      this.clusterManager = clusterManager;
54      this.admin = new HBaseAdmin(conf);
55      this.initialClusterStatus = getClusterStatus();
56    }
57  
58    public void setClusterManager(ClusterManager clusterManager) {
59      this.clusterManager = clusterManager;
60    }
61  
62    public ClusterManager getClusterManager() {
63      return clusterManager;
64    }
65  
66    /**
67     * Returns a ClusterStatus for this HBase cluster
68     * @throws IOException
69     */
70    @Override
71    public ClusterStatus getClusterStatus() throws IOException {
72      return admin.getClusterStatus();
73    }
74  
75    @Override
76    public ClusterStatus getInitialClusterStatus() throws IOException {
77      return initialClusterStatus;
78    }
79  
80    @Override
81    public void close() throws IOException {
82      if (this.admin != null) {
83        admin.close();
84      }
85    }
86  
87    @Override
88    public AdminProtos.AdminService.BlockingInterface getAdminProtocol(ServerName serverName)
89    throws IOException {
90      return admin.getConnection().getAdmin(serverName);
91    }
92  
93    @Override
94    public ClientProtos.ClientService.BlockingInterface getClientProtocol(ServerName serverName)
95    throws IOException {
96      return admin.getConnection().getClient(serverName);
97    }
98  
99    @Override
100   public void startRegionServer(String hostname) throws IOException {
101     LOG.info("Starting RS on: " + hostname);
102     clusterManager.start(ServiceType.HBASE_REGIONSERVER, hostname);
103   }
104 
105   @Override
106   public void killRegionServer(ServerName serverName) throws IOException {
107     LOG.info("Aborting RS: " + serverName.getServerName());
108     clusterManager.kill(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
109   }
110 
111   @Override
112   public void stopRegionServer(ServerName serverName) throws IOException {
113     LOG.info("Stopping RS: " + serverName.getServerName());
114     clusterManager.stop(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
115   }
116 
117   @Override
118   public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException {
119     waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout);
120   }
121 
122   private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout)
123     throws IOException {
124     LOG.info("Waiting service:" + service + " to stop: " + serverName.getServerName());
125     long start = System.currentTimeMillis();
126 
127     while ((System.currentTimeMillis() - start) < timeout) {
128       if (!clusterManager.isRunning(service, serverName.getHostname())) {
129         return;
130       }
131       Threads.sleep(1000);
132     }
133     throw new IOException("did timeout waiting for service to stop:" + serverName);
134   }
135 
136   @Override
137   public MasterAdminProtos.MasterAdminService.BlockingInterface getMasterAdmin()
138   throws IOException {
139     HConnection conn = HConnectionManager.getConnection(conf);
140     return conn.getMasterAdmin();
141   }
142 
143   @Override
144   public MasterMonitorProtos.MasterMonitorService.BlockingInterface getMasterMonitor()
145   throws IOException {
146     HConnection conn = HConnectionManager.getConnection(conf);
147     return conn.getMasterMonitor();
148   }
149 
150   @Override
151   public void startMaster(String hostname) throws IOException {
152     LOG.info("Starting Master on: " + hostname);
153     clusterManager.start(ServiceType.HBASE_MASTER, hostname);
154   }
155 
156   @Override
157   public void killMaster(ServerName serverName) throws IOException {
158     LOG.info("Aborting Master: " + serverName.getServerName());
159     clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname());
160   }
161 
162   @Override
163   public void stopMaster(ServerName serverName) throws IOException {
164     LOG.info("Stopping Master: " + serverName.getServerName());
165     clusterManager.stop(ServiceType.HBASE_MASTER, serverName.getHostname());
166   }
167 
168   @Override
169   public void waitForMasterToStop(ServerName serverName, long timeout) throws IOException {
170     waitForServiceToStop(ServiceType.HBASE_MASTER, serverName, timeout);
171   }
172 
173   @Override
174   public boolean waitForActiveAndReadyMaster(long timeout) throws IOException {
175     long start = System.currentTimeMillis();
176     while (System.currentTimeMillis() - start < timeout) {
177       try {
178         getMasterAdmin();
179         return true;
180       } catch (MasterNotRunningException m) {
181         LOG.warn("Master not started yet " + m);
182       } catch (ZooKeeperConnectionException e) {
183         LOG.warn("Failed to connect to ZK " + e);
184       }
185       Threads.sleep(1000);
186     }
187     return false;
188   }
189 
190   @Override
191   public ServerName getServerHoldingRegion(byte[] regionName) throws IOException {
192     HConnection connection = admin.getConnection();
193     HRegionLocation regionLoc = connection.locateRegion(regionName);
194     if (regionLoc == null) {
195       LOG.warn("Cannot find region server holding region " + Bytes.toString(regionName)
196           + " for table " + HRegionInfo.getTableName(regionName) + ", start key [" +
197           Bytes.toString(HRegionInfo.getStartKey(regionName)) + "]");
198       return null;
199     }
200 
201     AdminProtos.AdminService.BlockingInterface client =
202       connection.getAdmin(regionLoc.getServerName());
203     ServerInfo info = ProtobufUtil.getServerInfo(client);
204     return ProtobufUtil.toServerName(info.getServerName());
205   }
206 
207   @Override
208   public void waitUntilShutDown() {
209     //Simply wait for a few seconds for now (after issuing serverManager.kill
210     throw new RuntimeException("Not implemented yet");
211   }
212 
213   @Override
214   public void shutdown() throws IOException {
215     //not sure we want this
216     throw new RuntimeException("Not implemented yet");
217   }
218 
219   @Override
220   public boolean isDistributedCluster() {
221     return true;
222   }
223 
224   @Override
225   public void restoreClusterStatus(ClusterStatus initial) throws IOException {
226     //TODO: caution: not tested throughly
227     ClusterStatus current = getClusterStatus();
228 
229     //restore masters
230 
231     //check whether current master has changed
232     if (!ServerName.isSameHostnameAndPort(initial.getMaster(), current.getMaster())) {
233       LOG.info("Initial active master : " + initial.getMaster().getHostname()
234           + " has changed to : " + current.getMaster().getHostname());
235       // If initial master is stopped, start it, before restoring the state.
236       // It will come up as a backup master, if there is already an active master.
237       if (!clusterManager.isRunning(ServiceType.HBASE_MASTER, initial.getMaster().getHostname())) {
238         startMaster(initial.getMaster().getHostname());
239       }
240 
241       //master has changed, we would like to undo this.
242       //1. Kill the current backups
243       //2. Stop current master
244       //3. Start backup masters
245       for (ServerName currentBackup : current.getBackupMasters()) {
246         if (!ServerName.isSameHostnameAndPort(currentBackup, initial.getMaster())) {
247           stopMaster(currentBackup);
248         }
249       }
250       stopMaster(current.getMaster());
251       waitForActiveAndReadyMaster(); //wait so that active master takes over
252       //start backup masters
253       for (ServerName backup : initial.getBackupMasters()) {
254         //these are not started in backup mode, but we should already have an active master
255         if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, backup.getHostname())) {
256           startMaster(backup.getHostname());
257         }
258       }
259     } else {
260       //current master has not changed, match up backup masters
261       HashMap<String, ServerName> initialBackups = new HashMap<String, ServerName>();
262       HashMap<String, ServerName> currentBackups = new HashMap<String, ServerName>();
263 
264       for (ServerName server : initial.getBackupMasters()) {
265         initialBackups.put(server.getHostname(), server);
266       }
267       for (ServerName server : current.getBackupMasters()) {
268         currentBackups.put(server.getHostname(), server);
269       }
270 
271       for (String hostname : Sets.difference(initialBackups.keySet(), currentBackups.keySet())) {
272         if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, hostname)) {
273           startMaster(hostname);
274         }
275       }
276 
277       for (String hostname : Sets.difference(currentBackups.keySet(), initialBackups.keySet())) {
278         if(clusterManager.isRunning(ServiceType.HBASE_MASTER, hostname)) {
279           stopMaster(currentBackups.get(hostname));
280         }
281       }
282     }
283 
284     //restore region servers
285     HashMap<String, ServerName> initialServers = new HashMap<String, ServerName>();
286     HashMap<String, ServerName> currentServers = new HashMap<String, ServerName>();
287 
288     for (ServerName server : initial.getServers()) {
289       initialServers.put(server.getHostname(), server);
290     }
291     for (ServerName server : current.getServers()) {
292       currentServers.put(server.getHostname(), server);
293     }
294 
295     for (String hostname : Sets.difference(initialServers.keySet(), currentServers.keySet())) {
296       if(!clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, hostname)) {
297         startRegionServer(hostname);
298       }
299     }
300 
301     for (String hostname : Sets.difference(currentServers.keySet(), initialServers.keySet())) {
302       if(clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, hostname)) {
303         stopRegionServer(currentServers.get(hostname));
304       }
305     }
306     // While restoring above, if the HBase Master which was initially the Active one, was down
307     // and the restore put the cluster back to Initial configuration, HAdmin instance will need
308     // to refresh its connections (otherwise it will return incorrect information) or we can
309     // point it to new instance.
310     try {
311       admin.close();
312     } catch (IOException ioe) {
313       LOG.info("While closing the old connection", ioe);
314     }
315     this.admin = new HBaseAdmin(conf);
316     LOG.info("Added new HBaseAdmin");
317   }
318 }