1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import java.io.IOException;
21  import java.util.HashMap;
22  
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.hbase.ClusterManager.ServiceType;
26  import org.apache.hadoop.hbase.ipc.HRegionInterface;
27  import org.apache.hadoop.hbase.ipc.HMasterInterface;
28  import org.apache.hadoop.hbase.client.HBaseAdmin;
29  import org.apache.hadoop.hbase.client.HConnection;
30  import org.apache.hadoop.hbase.client.HConnectionManager;
31  import org.apache.hadoop.hbase.util.Threads;
32  
33  import com.google.common.collect.Sets;
34  
35  /**
36   * Manages the interactions with an already deployed distributed cluster (as opposed to
37   * a pseudo-distributed, or mini/local cluster). This is used by integration and system tests.
38   */
39  @InterfaceAudience.Private
40  public class DistributedHBaseCluster extends HBaseCluster {
41  
42    private HBaseAdmin admin;
43  
44    private ClusterManager clusterManager;
45  
46    public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
47        throws IOException {
48      super(conf);
49      this.clusterManager = clusterManager;
50      this.admin = new HBaseAdmin(conf);
51      this.initialClusterStatus = getClusterStatus();
52    }
53  
54    public void setClusterManager(ClusterManager clusterManager) {
55      this.clusterManager = clusterManager;
56    }
57  
58    public ClusterManager getClusterManager() {
59      return clusterManager;
60    }
61  
62    /**
63     * Returns a ClusterStatus for this HBase cluster
64     * @throws IOException
65     */
66    @Override
67    public ClusterStatus getClusterStatus() throws IOException {
68      return admin.getClusterStatus();
69    }
70  
71    @Override
72    public ClusterStatus getInitialClusterStatus() throws IOException {
73      return initialClusterStatus;
74    }
75  
76    @Override
77    public void close() throws IOException {
78      if (this.admin != null) {
79        admin.close();
80      }
81    }
82  
83    @Override
84    public void startRegionServer(String hostname) throws IOException {
85      LOG.info("Starting RS on: " + hostname);
86      clusterManager.start(ServiceType.HBASE_REGIONSERVER, hostname);
87    }
88  
89    @Override
90    public void killRegionServer(ServerName serverName) throws IOException {
91      LOG.info("Aborting RS: " + serverName.getServerName());
92      clusterManager.kill(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
93    }
94  
95    @Override
96    public void stopRegionServer(ServerName serverName) throws IOException {
97      LOG.info("Stopping RS: " + serverName.getServerName());
98      clusterManager.stop(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
99    }
100 
101   @Override
102   public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException {
103     waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout);
104   }
105 
106   private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout)
107     throws IOException {
108     LOG.info("Waiting service:" + service + " to stop: " + serverName.getServerName());
109     long start = System.currentTimeMillis();
110 
111     while ((System.currentTimeMillis() - start) < timeout) {
112       if (!clusterManager.isRunning(service, serverName.getHostname())) {
113         return;
114       }
115       Threads.sleep(1000);
116     }
117     throw new IOException("did timeout waiting for service to stop:" + serverName);
118   }
119 
120   @Override
121   public HMasterInterface getMasterAdmin() throws IOException {
122     HConnection conn = HConnectionManager.getConnection(conf);
123     return conn.getMaster();
124   }
125 
126   @Override
127   public void startMaster(String hostname) throws IOException {
128     LOG.info("Starting Master on: " + hostname);
129     clusterManager.start(ServiceType.HBASE_MASTER, hostname);
130   }
131 
132   @Override
133   public void killMaster(ServerName serverName) throws IOException {
134     LOG.info("Aborting Master: " + serverName.getServerName());
135     clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname());
136   }
137 
138   @Override
139   public void stopMaster(ServerName serverName) throws IOException {
140     LOG.info("Stopping Master: " + serverName.getServerName());
141     clusterManager.stop(ServiceType.HBASE_MASTER, serverName.getHostname());
142   }
143 
144   @Override
145   public void waitForMasterToStop(ServerName serverName, long timeout) throws IOException {
146     waitForServiceToStop(ServiceType.HBASE_MASTER, serverName, timeout);
147   }
148 
149   @Override
150   public boolean waitForActiveAndReadyMaster(long timeout) throws IOException {
151     long start = System.currentTimeMillis();
152     while (System.currentTimeMillis() - start < timeout) {
153       try {
154         getMasterAdmin();
155         return true;
156       } catch (MasterNotRunningException m) {
157         LOG.warn("Master not started yet " + m);
158       } catch (ZooKeeperConnectionException e) {
159         LOG.warn("Failed to connect to ZK " + e);
160       }
161       Threads.sleep(1000);
162     }
163     return false;
164   }
165 
166   @Override
167   public ServerName getServerHoldingRegion(byte[] regionName) throws IOException {
168     HConnection connection = admin.getConnection();
169     HRegionLocation regionLoc = connection.locateRegion(regionName);
170     if (regionLoc == null) {
171       return null;
172     }
173 
174     org.apache.hadoop.hbase.HServerInfo sn
175 		= connection.getHRegionConnection(regionLoc.getHostname(), regionLoc.getPort()).getHServerInfo();
176 
177     return new ServerName(sn.getServerAddress().getHostname(), sn.getServerAddress().getPort(), sn.getStartCode());
178   }
179 
180   @Override
181   public void waitUntilShutDown() {
182     //Simply wait for a few seconds for now (after issuing serverManager.kill
183     throw new RuntimeException("Not implemented yet");
184   }
185 
186   @Override
187   public void shutdown() throws IOException {
188     //not sure we want this
189     throw new RuntimeException("Not implemented yet");
190   }
191 
192   @Override
193   public boolean isDistributedCluster() {
194     return true;
195   }
196 
197   @Override
198   public void restoreClusterStatus(ClusterStatus initial) throws IOException {
199     //TODO: caution: not tested throughly
200     ClusterStatus current = getClusterStatus();
201 
202     //restore masters
203 
204     //check whether current master has changed
205     if (!ServerName.isSameHostnameAndPort(initial.getMaster(), current.getMaster())) {
206       LOG.info("Initial active master : " + initial.getMaster().getHostname()
207           + " has changed to : " + current.getMaster().getHostname());
208       // If initial master is stopped, start it, before restoring the state.
209       // It will come up as a backup master, if there is already an active master.
210       if (!clusterManager.isRunning(ServiceType.HBASE_MASTER, initial.getMaster().getHostname())) {
211         startMaster(initial.getMaster().getHostname());
212       }
213 
214       //master has changed, we would like to undo this.
215       //1. Kill the current backups
216       //2. Stop current master
217       //3. Start backup masters
218       for (ServerName currentBackup : current.getBackupMasters()) {
219         if (!ServerName.isSameHostnameAndPort(currentBackup, initial.getMaster())) {
220           stopMaster(currentBackup);
221         }
222       }
223       stopMaster(current.getMaster());
224       waitForActiveAndReadyMaster(); //wait so that active master takes over
225       //start backup masters
226       for (ServerName backup : initial.getBackupMasters()) {
227         //these are not started in backup mode, but we should already have an active master
228         if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, backup.getHostname())) {
229           startMaster(backup.getHostname());
230         }
231       }
232     } else {
233       //current master has not changed, match up backup masters
234       HashMap<String, ServerName> initialBackups = new HashMap<String, ServerName>();
235       HashMap<String, ServerName> currentBackups = new HashMap<String, ServerName>();
236 
237       for (ServerName server : initial.getBackupMasters()) {
238         initialBackups.put(server.getHostname(), server);
239       }
240       for (ServerName server : current.getBackupMasters()) {
241         currentBackups.put(server.getHostname(), server);
242       }
243 
244       for (String hostname : Sets.difference(initialBackups.keySet(), currentBackups.keySet())) {
245         if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, hostname)) {
246           startMaster(hostname);
247         }
248       }
249 
250       for (String hostname : Sets.difference(currentBackups.keySet(), initialBackups.keySet())) {
251         if(clusterManager.isRunning(ServiceType.HBASE_MASTER, hostname)) {
252           stopMaster(currentBackups.get(hostname));
253         }
254       }
255     }
256 
257     //restore region servers
258     HashMap<String, ServerName> initialServers = new HashMap<String, ServerName>();
259     HashMap<String, ServerName> currentServers = new HashMap<String, ServerName>();
260 
261     for (ServerName server : initial.getServers()) {
262       initialServers.put(server.getHostname(), server);
263     }
264     for (ServerName server : current.getServers()) {
265       currentServers.put(server.getHostname(), server);
266     }
267 
268     for (String hostname : Sets.difference(initialServers.keySet(), currentServers.keySet())) {
269       if(!clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, hostname)) {
270         startRegionServer(hostname);
271       }
272     }
273 
274     for (String hostname : Sets.difference(currentServers.keySet(), initialServers.keySet())) {
275       if(clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, hostname)) {
276         stopRegionServer(currentServers.get(hostname));
277       }
278     }
279     // While restoring above, if the HBase Master which was initially the Active one, was down
280     // and the restore put the cluster back to Initial configuration, HAdmin instance will need
281     // to refresh its connections (otherwise it will return incorrect information) or we can
282     // point it to new instance.
283     try {
284       admin.close();
285     } catch (IOException ioe) {
286       LOG.info("While closing the old connection", ioe);
287     }
288     this.admin = new HBaseAdmin(conf);
289     LOG.info("Added new HBaseAdmin");
290   }
291 }