1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.Comparator;
24 import java.util.List;
25
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.hbase.ClusterManager.ServiceType;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.client.Admin;
30 import org.apache.hadoop.hbase.client.ClusterConnection;
31 import org.apache.hadoop.hbase.client.Connection;
32 import org.apache.hadoop.hbase.client.ConnectionFactory;
33 import org.apache.hadoop.hbase.client.RegionLocator;
34 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
35 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
36 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo;
37 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
38 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.MasterService;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.hbase.util.Threads;
41 import com.google.common.collect.Sets;
42
43
44
45
46
47 @InterfaceAudience.Private
48 public class DistributedHBaseCluster extends HBaseCluster {
49 private Admin admin;
50 private final Connection connection;
51
52 private ClusterManager clusterManager;
53
54 public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
55 throws IOException {
56 super(conf);
57 this.clusterManager = clusterManager;
58 this.connection = ConnectionFactory.createConnection(conf);
59 this.admin = this.connection.getAdmin();
60 this.initialClusterStatus = getClusterStatus();
61 }
62
63 public void setClusterManager(ClusterManager clusterManager) {
64 this.clusterManager = clusterManager;
65 }
66
67 public ClusterManager getClusterManager() {
68 return clusterManager;
69 }
70
71
72
73
74
75 @Override
76 public ClusterStatus getClusterStatus() throws IOException {
77 return admin.getClusterStatus();
78 }
79
80 @Override
81 public ClusterStatus getInitialClusterStatus() throws IOException {
82 return initialClusterStatus;
83 }
84
85 @Override
86 public void close() throws IOException {
87 if (this.admin != null) {
88 admin.close();
89 }
90 if (this.connection != null && !this.connection.isClosed()) {
91 this.connection.close();
92 }
93 }
94
95 @Override
96 public AdminProtos.AdminService.BlockingInterface getAdminProtocol(ServerName serverName)
97 throws IOException {
98 return ((ClusterConnection)this.connection).getAdmin(serverName);
99 }
100
101 @Override
102 public ClientProtos.ClientService.BlockingInterface getClientProtocol(ServerName serverName)
103 throws IOException {
104 return ((ClusterConnection)this.connection).getClient(serverName);
105 }
106
107 @Override
108 public void startRegionServer(String hostname) throws IOException {
109 LOG.info("Starting RS on: " + hostname);
110 clusterManager.start(ServiceType.HBASE_REGIONSERVER, hostname);
111 }
112
113 @Override
114 public void killRegionServer(ServerName serverName) throws IOException {
115 LOG.info("Aborting RS: " + serverName.getServerName());
116 clusterManager.kill(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
117 }
118
119 @Override
120 public void stopRegionServer(ServerName serverName) throws IOException {
121 LOG.info("Stopping RS: " + serverName.getServerName());
122 clusterManager.stop(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
123 }
124
125 @Override
126 public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException {
127 waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout);
128 }
129
130 private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout)
131 throws IOException {
132 LOG.info("Waiting service:" + service + " to stop: " + serverName.getServerName());
133 long start = System.currentTimeMillis();
134
135 while ((System.currentTimeMillis() - start) < timeout) {
136 if (!clusterManager.isRunning(service, serverName.getHostname())) {
137 return;
138 }
139 Threads.sleep(1000);
140 }
141 throw new IOException("did timeout waiting for service to stop:" + serverName);
142 }
143
144 @Override
145 public MasterService.BlockingInterface getMasterAdminService()
146 throws IOException {
147 return ((ClusterConnection)this.connection).getMaster();
148 }
149
150 @Override
151 public void startMaster(String hostname) throws IOException {
152 LOG.info("Starting Master on: " + hostname);
153 clusterManager.start(ServiceType.HBASE_MASTER, hostname);
154 }
155
156 @Override
157 public void killMaster(ServerName serverName) throws IOException {
158 LOG.info("Aborting Master: " + serverName.getServerName());
159 clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname());
160 }
161
162 @Override
163 public void stopMaster(ServerName serverName) throws IOException {
164 LOG.info("Stopping Master: " + serverName.getServerName());
165 clusterManager.stop(ServiceType.HBASE_MASTER, serverName.getHostname());
166 }
167
168 @Override
169 public void waitForMasterToStop(ServerName serverName, long timeout) throws IOException {
170 waitForServiceToStop(ServiceType.HBASE_MASTER, serverName, timeout);
171 }
172
173 @Override
174 public boolean waitForActiveAndReadyMaster(long timeout) throws IOException {
175 long start = System.currentTimeMillis();
176 while (System.currentTimeMillis() - start < timeout) {
177 try {
178 getMasterAdminService();
179 return true;
180 } catch (MasterNotRunningException m) {
181 LOG.warn("Master not started yet " + m);
182 } catch (ZooKeeperConnectionException e) {
183 LOG.warn("Failed to connect to ZK " + e);
184 }
185 Threads.sleep(1000);
186 }
187 return false;
188 }
189
190 @Override
191 public ServerName getServerHoldingRegion(TableName tn, byte[] regionName) throws IOException {
192 HRegionLocation regionLoc = null;
193 try (RegionLocator locator = connection.getRegionLocator(tn)) {
194 regionLoc = locator.getRegionLocation(regionName);
195 }
196 if (regionLoc == null) {
197 LOG.warn("Cannot find region server holding region " + Bytes.toString(regionName) +
198 ", start key [" + Bytes.toString(HRegionInfo.getStartKey(regionName)) + "]");
199 return null;
200 }
201
202 AdminProtos.AdminService.BlockingInterface client =
203 ((ClusterConnection)this.connection).getAdmin(regionLoc.getServerName());
204 ServerInfo info = ProtobufUtil.getServerInfo(client);
205 return ProtobufUtil.toServerName(info.getServerName());
206 }
207
208 @Override
209 public void waitUntilShutDown() {
210
211 throw new RuntimeException("Not implemented yet");
212 }
213
214 @Override
215 public void shutdown() throws IOException {
216
217 throw new RuntimeException("Not implemented yet");
218 }
219
220 @Override
221 public boolean isDistributedCluster() {
222 return true;
223 }
224
225 @Override
226 public boolean restoreClusterStatus(ClusterStatus initial) throws IOException {
227 ClusterStatus current = getClusterStatus();
228
229 LOG.info("Restoring cluster - started");
230
231
232 boolean success = true;
233 success = restoreMasters(initial, current) & success;
234 success = restoreRegionServers(initial, current) & success;
235 success = restoreAdmin() & success;
236
237 LOG.info("Restoring cluster - done");
238 return success;
239 }
240
241 protected boolean restoreMasters(ClusterStatus initial, ClusterStatus current) {
242 List<IOException> deferred = new ArrayList<IOException>();
243
244 if (!ServerName.isSameHostnameAndPort(initial.getMaster(), current.getMaster())) {
245 LOG.info("Restoring cluster - Initial active master : " + initial.getMaster().getHostname()
246 + " has changed to : " + current.getMaster().getHostname());
247
248
249 try {
250 if (!clusterManager.isRunning(ServiceType.HBASE_MASTER, initial.getMaster().getHostname())) {
251 LOG.info("Restoring cluster - starting initial active master at:" + initial.getMaster().getHostname());
252 startMaster(initial.getMaster().getHostname());
253 }
254
255
256
257
258
259 for (ServerName currentBackup : current.getBackupMasters()) {
260 if (!ServerName.isSameHostnameAndPort(currentBackup, initial.getMaster())) {
261 LOG.info("Restoring cluster - stopping backup master: " + currentBackup);
262 stopMaster(currentBackup);
263 }
264 }
265 LOG.info("Restoring cluster - stopping active master: " + current.getMaster());
266 stopMaster(current.getMaster());
267 waitForActiveAndReadyMaster();
268 } catch (IOException ex) {
269
270
271 deferred.add(ex);
272 }
273
274
275 for (ServerName backup : initial.getBackupMasters()) {
276 try {
277
278 if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, backup.getHostname())) {
279 LOG.info("Restoring cluster - starting initial backup master: " + backup.getHostname());
280 startMaster(backup.getHostname());
281 }
282 } catch (IOException ex) {
283 deferred.add(ex);
284 }
285 }
286 } else {
287
288 HashMap<String, ServerName> initialBackups = new HashMap<String, ServerName>();
289 HashMap<String, ServerName> currentBackups = new HashMap<String, ServerName>();
290
291 for (ServerName server : initial.getBackupMasters()) {
292 initialBackups.put(server.getHostname(), server);
293 }
294 for (ServerName server : current.getBackupMasters()) {
295 currentBackups.put(server.getHostname(), server);
296 }
297
298 for (String hostname : Sets.difference(initialBackups.keySet(), currentBackups.keySet())) {
299 try {
300 if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, hostname)) {
301 LOG.info("Restoring cluster - starting initial backup master: " + hostname);
302 startMaster(hostname);
303 }
304 } catch (IOException ex) {
305 deferred.add(ex);
306 }
307 }
308
309 for (String hostname : Sets.difference(currentBackups.keySet(), initialBackups.keySet())) {
310 try {
311 if(clusterManager.isRunning(ServiceType.HBASE_MASTER, hostname)) {
312 LOG.info("Restoring cluster - stopping backup master: " + hostname);
313 stopMaster(currentBackups.get(hostname));
314 }
315 } catch (IOException ex) {
316 deferred.add(ex);
317 }
318 }
319 }
320 if (!deferred.isEmpty()) {
321 LOG.warn("Restoring cluster - restoring region servers reported " + deferred.size() + " errors:");
322 for (int i=0; i<deferred.size() && i < 3; i++) {
323 LOG.warn(deferred.get(i));
324 }
325 }
326
327 return deferred.isEmpty();
328 }
329
330 protected boolean restoreRegionServers(ClusterStatus initial, ClusterStatus current) {
331 HashMap<String, ServerName> initialServers = new HashMap<String, ServerName>();
332 HashMap<String, ServerName> currentServers = new HashMap<String, ServerName>();
333
334 for (ServerName server : initial.getServers()) {
335 initialServers.put(server.getHostname(), server);
336 }
337 for (ServerName server : current.getServers()) {
338 currentServers.put(server.getHostname(), server);
339 }
340
341 List<IOException> deferred = new ArrayList<IOException>();
342 for (String hostname : Sets.difference(initialServers.keySet(), currentServers.keySet())) {
343 try {
344 if(!clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, hostname)) {
345 LOG.info("Restoring cluster - starting initial region server: " + hostname);
346 startRegionServer(hostname);
347 }
348 } catch (IOException ex) {
349 deferred.add(ex);
350 }
351 }
352
353 for (String hostname : Sets.difference(currentServers.keySet(), initialServers.keySet())) {
354 try {
355 if(clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, hostname)) {
356 LOG.info("Restoring cluster - stopping initial region server: " + hostname);
357 stopRegionServer(currentServers.get(hostname));
358 }
359 } catch (IOException ex) {
360 deferred.add(ex);
361 }
362 }
363 if (!deferred.isEmpty()) {
364 LOG.warn("Restoring cluster - restoring region servers reported " + deferred.size() + " errors:");
365 for (int i=0; i<deferred.size() && i < 3; i++) {
366 LOG.warn(deferred.get(i));
367 }
368 }
369
370 return deferred.isEmpty();
371 }
372
373 protected boolean restoreAdmin() throws IOException {
374
375
376
377
378 try {
379 admin.close();
380 } catch (IOException ioe) {
381 LOG.warn("While closing the old connection", ioe);
382 }
383 this.admin = this.connection.getAdmin();
384 LOG.info("Added new HBaseAdmin");
385 return true;
386 }
387 }