1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.net.InetAddress;
23 import java.util.ArrayList;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Map.Entry;
31 import java.util.Set;
32 import java.util.SortedMap;
33 import java.util.concurrent.ConcurrentHashMap;
34 import java.util.concurrent.ConcurrentSkipListMap;
35 import java.util.concurrent.CopyOnWriteArrayList;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.hbase.classification.InterfaceAudience;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.hbase.ClockOutOfSyncException;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.RegionLoad;
44 import org.apache.hadoop.hbase.Server;
45 import org.apache.hadoop.hbase.ServerLoad;
46 import org.apache.hadoop.hbase.ServerName;
47 import org.apache.hadoop.hbase.YouAreDeadException;
48 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
49 import org.apache.hadoop.hbase.client.HConnection;
50 import org.apache.hadoop.hbase.client.HConnectionManager;
51 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
52
53 import org.apache.hadoop.hbase.classification.InterfaceAudience;
54 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
55
56 import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
57 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
58 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
59 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
60 import org.apache.hadoop.hbase.protobuf.RequestConverter;
61 import org.apache.hadoop.hbase.protobuf.ResponseConverter;
62 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
63 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest;
64 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse;
65 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo;
66 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
67 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
68 import org.apache.hadoop.hbase.util.Bytes;
69 import org.apache.hadoop.hbase.util.Triple;
70 import org.apache.hadoop.hbase.util.RetryCounter;
71 import org.apache.hadoop.hbase.util.RetryCounterFactory;
72
73 import com.google.common.annotations.VisibleForTesting;
74 import com.google.protobuf.ServiceException;
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 @InterfaceAudience.Private
99 public class ServerManager {
100 public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART =
101 "hbase.master.wait.on.regionservers.maxtostart";
102
103 public static final String WAIT_ON_REGIONSERVERS_MINTOSTART =
104 "hbase.master.wait.on.regionservers.mintostart";
105
106 public static final String WAIT_ON_REGIONSERVERS_TIMEOUT =
107 "hbase.master.wait.on.regionservers.timeout";
108
109 public static final String WAIT_ON_REGIONSERVERS_INTERVAL =
110 "hbase.master.wait.on.regionservers.interval";
111
112 private static final Log LOG = LogFactory.getLog(ServerManager.class);
113
114
115 private volatile boolean clusterShutdown = false;
116
117 private final SortedMap<byte[], Long> flushedSequenceIdByRegion =
118 new ConcurrentSkipListMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
119
120
121 private final ConcurrentHashMap<ServerName, ServerLoad> onlineServers =
122 new ConcurrentHashMap<ServerName, ServerLoad>();
123
124
125
126
127
128 private final Map<ServerName, AdminService.BlockingInterface> rsAdmins =
129 new HashMap<ServerName, AdminService.BlockingInterface>();
130
131
132
133
134
135 private final ArrayList<ServerName> drainingServers =
136 new ArrayList<ServerName>();
137
138 private final Server master;
139 private final MasterServices services;
140 private final HConnection connection;
141
142 private final DeadServer deadservers = new DeadServer();
143
144 private final long maxSkew;
145 private final long warningSkew;
146
147 private final RetryCounterFactory pingRetryCounterFactory;
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165 private Set<ServerName> queuedDeadServers = new HashSet<ServerName>();
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182 private Map<ServerName, Boolean> requeuedDeadServers
183 = new ConcurrentHashMap<ServerName, Boolean>();
184
185
186 private List<ServerListener> listeners = new CopyOnWriteArrayList<ServerListener>();
187
188
189
190
191
192
193
194 public ServerManager(final Server master, final MasterServices services)
195 throws IOException {
196 this(master, services, true);
197 }
198
199 @SuppressWarnings("deprecation")
200 ServerManager(final Server master, final MasterServices services,
201 final boolean connect) throws IOException {
202 this.master = master;
203 this.services = services;
204 Configuration c = master.getConfiguration();
205 maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
206 warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
207 this.connection = connect ? HConnectionManager.getConnection(c) : null;
208 int pingMaxAttempts = Math.max(1, master.getConfiguration().getInt(
209 "hbase.master.maximum.ping.server.attempts", 10));
210 int pingSleepInterval = Math.max(1, master.getConfiguration().getInt(
211 "hbase.master.ping.server.retry.sleep.interval", 100));
212 this.pingRetryCounterFactory = new RetryCounterFactory(pingMaxAttempts, pingSleepInterval);
213 }
214
215
216
217
218
219 public void registerListener(final ServerListener listener) {
220 this.listeners.add(listener);
221 }
222
223
224
225
226
227 public boolean unregisterListener(final ServerListener listener) {
228 return this.listeners.remove(listener);
229 }
230
231
232
233
234
235
236
237
238
239
240 ServerName regionServerStartup(final InetAddress ia, final int port,
241 final long serverStartcode, long serverCurrentTime)
242 throws IOException {
243
244
245
246
247
248
249
250 ServerName sn = ServerName.valueOf(ia.getHostName(), port, serverStartcode);
251 checkClockSkew(sn, serverCurrentTime);
252 checkIsDead(sn, "STARTUP");
253 if (!checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
254 LOG.warn("THIS SHOULD NOT HAPPEN, RegionServerStartup"
255 + " could not record the server: " + sn);
256 }
257 return sn;
258 }
259
260
261
262
263
264
265 private void updateLastFlushedSequenceIds(ServerName sn, ServerLoad hsl) {
266 Map<byte[], RegionLoad> regionsLoad = hsl.getRegionsLoad();
267 for (Entry<byte[], RegionLoad> entry : regionsLoad.entrySet()) {
268 byte[] encodedRegionName = Bytes.toBytes(HRegionInfo.encodeRegionName(entry.getKey()));
269 Long existingValue = flushedSequenceIdByRegion.get(encodedRegionName);
270 long l = entry.getValue().getCompleteSequenceId();
271 if (existingValue != null) {
272 if (l != -1 && l < existingValue) {
273 LOG.warn("RegionServer " + sn +
274 " indicates a last flushed sequence id (" + entry.getValue() +
275 ") that is less than the previous last flushed sequence id (" +
276 existingValue + ") for region " +
277 Bytes.toString(entry.getKey()) + " Ignoring.");
278
279 continue;
280 }
281 }
282 flushedSequenceIdByRegion.put(encodedRegionName, l);
283 }
284 }
285
286 void regionServerReport(ServerName sn,
287 ServerLoad sl) throws YouAreDeadException {
288 checkIsDead(sn, "REPORT");
289 if (null == this.onlineServers.replace(sn, sl)) {
290
291
292
293
294
295
296 if (!checkAndRecordNewServer(sn, sl)) {
297 LOG.info("RegionServerReport ignored, could not record the server: " + sn);
298 return;
299 }
300 }
301 updateLastFlushedSequenceIds(sn, sl);
302 }
303
304
305
306
307
308
309
310
311
312 boolean checkAndRecordNewServer(
313 final ServerName serverName, final ServerLoad sl) {
314 ServerName existingServer = null;
315 synchronized (this.onlineServers) {
316 existingServer = findServerWithSameHostnamePortWithLock(serverName);
317 if (existingServer != null && (existingServer.getStartcode() > serverName.getStartcode())) {
318 LOG.info("Server serverName=" + serverName + " rejected; we already have "
319 + existingServer.toString() + " registered with same hostname and port");
320 return false;
321 }
322 recordNewServerWithLock(serverName, sl);
323 }
324
325
326 if (!this.listeners.isEmpty()) {
327 for (ServerListener listener : this.listeners) {
328 listener.serverAdded(serverName);
329 }
330 }
331
332
333
334 if (existingServer != null && (existingServer.getStartcode() < serverName.getStartcode())) {
335 LOG.info("Triggering server recovery; existingServer " +
336 existingServer + " looks stale, new server:" + serverName);
337 expireServer(existingServer);
338 }
339 return true;
340 }
341
342
343
344
345
346
347
348
349
350 private void checkClockSkew(final ServerName serverName, final long serverCurrentTime)
351 throws ClockOutOfSyncException {
352 long skew = Math.abs(System.currentTimeMillis() - serverCurrentTime);
353 if (skew > maxSkew) {
354 String message = "Server " + serverName + " has been " +
355 "rejected; Reported time is too far out of sync with master. " +
356 "Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms";
357 LOG.warn(message);
358 throw new ClockOutOfSyncException(message);
359 } else if (skew > warningSkew){
360 String message = "Reported time for server " + serverName + " is out of sync with master " +
361 "by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " +
362 "error threshold is " + maxSkew + "ms)";
363 LOG.warn(message);
364 }
365 }
366
367
368
369
370
371
372
373
374
375 private void checkIsDead(final ServerName serverName, final String what)
376 throws YouAreDeadException {
377 if (this.deadservers.isDeadServer(serverName)) {
378
379
380 String message = "Server " + what + " rejected; currently processing " +
381 serverName + " as dead server";
382 LOG.debug(message);
383 throw new YouAreDeadException(message);
384 }
385
386
387 if ((this.services == null || ((HMaster) this.services).isInitialized())
388 && this.deadservers.cleanPreviousInstance(serverName)) {
389
390
391 LOG.debug(what + ":" + " Server " + serverName + " came back up," +
392 " removed it from the dead servers list");
393 }
394 }
395
396
397
398
399
400 private ServerName findServerWithSameHostnamePortWithLock(
401 final ServerName serverName) {
402 for (ServerName sn: this.onlineServers.keySet()) {
403 if (ServerName.isSameHostnameAndPort(serverName, sn)) return sn;
404 }
405 return null;
406 }
407
408
409
410
411
412
413
414 @VisibleForTesting
415 void recordNewServerWithLock(final ServerName serverName, final ServerLoad sl) {
416 LOG.info("Registering server=" + serverName);
417 this.onlineServers.put(serverName, sl);
418 this.rsAdmins.remove(serverName);
419 }
420
421 public long getLastFlushedSequenceId(byte[] encodedRegionName) {
422 long seqId = -1L;
423 if (flushedSequenceIdByRegion.containsKey(encodedRegionName)) {
424 seqId = flushedSequenceIdByRegion.get(encodedRegionName);
425 }
426 return seqId;
427 }
428
429
430
431
432
433 public ServerLoad getLoad(final ServerName serverName) {
434 return this.onlineServers.get(serverName);
435 }
436
437
438
439
440
441
442
443 public double getAverageLoad() {
444 int totalLoad = 0;
445 int numServers = 0;
446 double averageLoad;
447 for (ServerLoad sl: this.onlineServers.values()) {
448 numServers++;
449 totalLoad += sl.getNumberOfRegions();
450 }
451 averageLoad = (double)totalLoad / (double)numServers;
452 return averageLoad;
453 }
454
455
456 int countOfRegionServers() {
457
458 return this.onlineServers.size();
459 }
460
461
462
463
464 public Map<ServerName, ServerLoad> getOnlineServers() {
465
466 synchronized (this.onlineServers) {
467 return Collections.unmodifiableMap(this.onlineServers);
468 }
469 }
470
471
472 public DeadServer getDeadServers() {
473 return this.deadservers;
474 }
475
476
477
478
479
480 public boolean areDeadServersInProgress() {
481 return this.deadservers.areDeadServersInProgress();
482 }
483
484 void letRegionServersShutdown() {
485 long previousLogTime = 0;
486 int onlineServersCt;
487 while ((onlineServersCt = onlineServers.size()) > 0) {
488
489 if (System.currentTimeMillis() > (previousLogTime + 1000)) {
490 StringBuilder sb = new StringBuilder();
491
492 for (ServerName key : this.onlineServers.keySet()) {
493 if (sb.length() > 0) {
494 sb.append(", ");
495 }
496 sb.append(key);
497 }
498 LOG.info("Waiting on regionserver(s) to go down " + sb.toString());
499 previousLogTime = System.currentTimeMillis();
500 }
501
502 synchronized (onlineServers) {
503 try {
504 if (onlineServersCt == onlineServers.size()) onlineServers.wait(100);
505 } catch (InterruptedException ignored) {
506
507 }
508 }
509 }
510 }
511
512
513
514
515
516 public synchronized void expireServer(final ServerName serverName) {
517 if (!services.isServerShutdownHandlerEnabled()) {
518 LOG.info("Master doesn't enable ServerShutdownHandler during initialization, "
519 + "delay expiring server " + serverName);
520 this.queuedDeadServers.add(serverName);
521 return;
522 }
523 if (this.deadservers.isDeadServer(serverName)) {
524
525 LOG.warn("Expiration of " + serverName +
526 " but server shutdown already in progress");
527 return;
528 }
529 synchronized (onlineServers) {
530 if (!this.onlineServers.containsKey(serverName)) {
531 LOG.warn("Expiration of " + serverName + " but server not online");
532 }
533
534
535
536 this.deadservers.add(serverName);
537 this.onlineServers.remove(serverName);
538 onlineServers.notifyAll();
539 }
540 this.rsAdmins.remove(serverName);
541
542
543 if (this.clusterShutdown) {
544 LOG.info("Cluster shutdown set; " + serverName +
545 " expired; onlineServers=" + this.onlineServers.size());
546 if (this.onlineServers.isEmpty()) {
547 master.stop("Cluster shutdown set; onlineServer=0");
548 }
549 return;
550 }
551
552 boolean carryingMeta = services.getAssignmentManager().isCarryingMeta(serverName);
553 if (carryingMeta) {
554 this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master,
555 this.services, this.deadservers, serverName));
556 } else {
557 this.services.getExecutorService().submit(new ServerShutdownHandler(this.master,
558 this.services, this.deadservers, serverName, true));
559 }
560 LOG.debug("Added=" + serverName +
561 " to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);
562
563
564 if (!this.listeners.isEmpty()) {
565 for (ServerListener listener : this.listeners) {
566 listener.serverRemoved(serverName);
567 }
568 }
569 }
570
571 public synchronized void processDeadServer(final ServerName serverName) {
572 this.processDeadServer(serverName, false);
573 }
574
575 public synchronized void processDeadServer(final ServerName serverName, boolean shouldSplitHlog) {
576
577
578
579
580
581
582
583
584 if (!services.getAssignmentManager().isFailoverCleanupDone()) {
585 requeuedDeadServers.put(serverName, shouldSplitHlog);
586 return;
587 }
588
589 this.deadservers.add(serverName);
590 this.services.getExecutorService().submit(
591 new ServerShutdownHandler(this.master, this.services, this.deadservers, serverName,
592 shouldSplitHlog));
593 }
594
595
596
597
598
599 synchronized void processQueuedDeadServers() {
600 if (!services.isServerShutdownHandlerEnabled()) {
601 LOG.info("Master hasn't enabled ServerShutdownHandler");
602 }
603 Iterator<ServerName> serverIterator = queuedDeadServers.iterator();
604 while (serverIterator.hasNext()) {
605 ServerName tmpServerName = serverIterator.next();
606 expireServer(tmpServerName);
607 serverIterator.remove();
608 requeuedDeadServers.remove(tmpServerName);
609 }
610
611 if (!services.getAssignmentManager().isFailoverCleanupDone()) {
612 LOG.info("AssignmentManager hasn't finished failover cleanup; waiting");
613 }
614
615 for(ServerName tmpServerName : requeuedDeadServers.keySet()){
616 processDeadServer(tmpServerName, requeuedDeadServers.get(tmpServerName));
617 }
618 requeuedDeadServers.clear();
619 }
620
621
622
623
624 public boolean removeServerFromDrainList(final ServerName sn) {
625
626
627
628 if (!this.isServerOnline(sn)) {
629 LOG.warn("Server " + sn + " is not currently online. " +
630 "Removing from draining list anyway, as requested.");
631 }
632
633 return this.drainingServers.remove(sn);
634 }
635
636
637
638
639 public boolean addServerToDrainList(final ServerName sn) {
640
641
642
643 if (!this.isServerOnline(sn)) {
644 LOG.warn("Server " + sn + " is not currently online. " +
645 "Ignoring request to add it to draining list.");
646 return false;
647 }
648
649
650 if (this.drainingServers.contains(sn)) {
651 LOG.warn("Server " + sn + " is already in the draining server list." +
652 "Ignoring request to add it again.");
653 return false;
654 }
655 return this.drainingServers.add(sn);
656 }
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671 public RegionOpeningState sendRegionOpen(final ServerName server,
672 HRegionInfo region, int versionOfOfflineNode, List<ServerName> favoredNodes)
673 throws IOException {
674 AdminService.BlockingInterface admin = getRsAdmin(server);
675 if (admin == null) {
676 LOG.warn("Attempting to send OPEN RPC to server " + server.toString() +
677 " failed because no RPC connection found to this server");
678 return RegionOpeningState.FAILED_OPENING;
679 }
680 OpenRegionRequest request = RequestConverter.buildOpenRegionRequest(server,
681 region, versionOfOfflineNode, favoredNodes,
682 (RecoveryMode.LOG_REPLAY == this.services.getMasterFileSystem().getLogRecoveryMode()));
683 try {
684 OpenRegionResponse response = admin.openRegion(null, request);
685 return ResponseConverter.getRegionOpeningState(response);
686 } catch (ServiceException se) {
687 throw ProtobufUtil.getRemoteException(se);
688 }
689 }
690
691
692
693
694
695
696
697
698
699
700 public List<RegionOpeningState> sendRegionOpen(ServerName server,
701 List<Triple<HRegionInfo, Integer, List<ServerName>>> regionOpenInfos)
702 throws IOException {
703 AdminService.BlockingInterface admin = getRsAdmin(server);
704 if (admin == null) {
705 LOG.warn("Attempting to send OPEN RPC to server " + server.toString() +
706 " failed because no RPC connection found to this server");
707 return null;
708 }
709
710 OpenRegionRequest request = RequestConverter.buildOpenRegionRequest(server, regionOpenInfos,
711 (RecoveryMode.LOG_REPLAY == this.services.getMasterFileSystem().getLogRecoveryMode()));
712 try {
713 OpenRegionResponse response = admin.openRegion(null, request);
714 return ResponseConverter.getRegionOpeningStateList(response);
715 } catch (ServiceException se) {
716 throw ProtobufUtil.getRemoteException(se);
717 }
718 }
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734 public boolean sendRegionClose(ServerName server, HRegionInfo region,
735 int versionOfClosingNode, ServerName dest, boolean transitionInZK) throws IOException {
736 if (server == null) throw new NullPointerException("Passed server is null");
737 AdminService.BlockingInterface admin = getRsAdmin(server);
738 if (admin == null) {
739 throw new IOException("Attempting to send CLOSE RPC to server " +
740 server.toString() + " for region " +
741 region.getRegionNameAsString() +
742 " failed because no RPC connection found to this server");
743 }
744 return ProtobufUtil.closeRegion(admin, server, region.getRegionName(),
745 versionOfClosingNode, dest, transitionInZK);
746 }
747
748 public boolean sendRegionClose(ServerName server,
749 HRegionInfo region, int versionOfClosingNode) throws IOException {
750 return sendRegionClose(server, region, versionOfClosingNode, null, true);
751 }
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766 public void sendRegionsMerge(ServerName server, HRegionInfo region_a,
767 HRegionInfo region_b, boolean forcible) throws IOException {
768 if (server == null)
769 throw new NullPointerException("Passed server is null");
770 if (region_a == null || region_b == null)
771 throw new NullPointerException("Passed region is null");
772 AdminService.BlockingInterface admin = getRsAdmin(server);
773 if (admin == null) {
774 throw new IOException("Attempting to send MERGE REGIONS RPC to server "
775 + server.toString() + " for region "
776 + region_a.getRegionNameAsString() + ","
777 + region_b.getRegionNameAsString()
778 + " failed because no RPC connection found to this server");
779 }
780 ProtobufUtil.mergeRegions(admin, region_a, region_b, forcible);
781 }
782
783
784
785
786 public boolean isServerReachable(ServerName server) {
787 if (server == null) throw new NullPointerException("Passed server is null");
788
789 RetryCounter retryCounter = pingRetryCounterFactory.create();
790 while (retryCounter.shouldRetry()) {
791 synchronized (this.onlineServers) {
792 if (this.deadservers.isDeadServer(server)) {
793 return false;
794 }
795 }
796 try {
797 AdminService.BlockingInterface admin = getRsAdmin(server);
798 if (admin != null) {
799 ServerInfo info = ProtobufUtil.getServerInfo(admin);
800 return info != null && info.hasServerName()
801 && server.getStartcode() == info.getServerName().getStartCode();
802 }
803 } catch (IOException ioe) {
804 if (LOG.isDebugEnabled()) {
805 LOG.debug("Couldn't reach " + server + ", try=" + retryCounter.getAttemptTimes() + " of "
806 + retryCounter.getMaxAttempts(), ioe);
807 }
808 try {
809 retryCounter.sleepUntilNextRetry();
810 } catch(InterruptedException ie) {
811 Thread.currentThread().interrupt();
812 break;
813 }
814 }
815 }
816 return false;
817 }
818
819
820
821
822
823
824
825 private AdminService.BlockingInterface getRsAdmin(final ServerName sn)
826 throws IOException {
827 AdminService.BlockingInterface admin = this.rsAdmins.get(sn);
828 if (admin == null) {
829 LOG.debug("New admin connection to " + sn.toString());
830 admin = this.connection.getAdmin(sn);
831 this.rsAdmins.put(sn, admin);
832 }
833 return admin;
834 }
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849 public void waitForRegionServers(MonitoredTask status)
850 throws InterruptedException {
851 final long interval = this.master.getConfiguration().
852 getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500);
853 final long timeout = this.master.getConfiguration().
854 getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500);
855 int minToStart = this.master.getConfiguration().
856 getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
857 if (minToStart < 1) {
858 LOG.warn(String.format(
859 "The value of '%s' (%d) can not be less than 1, ignoring.",
860 WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart));
861 minToStart = 1;
862 }
863 int maxToStart = this.master.getConfiguration().
864 getInt(WAIT_ON_REGIONSERVERS_MAXTOSTART, Integer.MAX_VALUE);
865 if (maxToStart < minToStart) {
866 LOG.warn(String.format(
867 "The value of '%s' (%d) is set less than '%s' (%d), ignoring.",
868 WAIT_ON_REGIONSERVERS_MAXTOSTART, maxToStart,
869 WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart));
870 maxToStart = Integer.MAX_VALUE;
871 }
872
873 long now = System.currentTimeMillis();
874 final long startTime = now;
875 long slept = 0;
876 long lastLogTime = 0;
877 long lastCountChange = startTime;
878 int count = countOfRegionServers();
879 int oldCount = 0;
880 while (
881 !this.master.isStopped() &&
882 count < maxToStart &&
883 (lastCountChange+interval > now || timeout > slept || count < minToStart)
884 ){
885
886
887 if (oldCount != count || lastLogTime+interval < now){
888 lastLogTime = now;
889 String msg =
890 "Waiting for region servers count to settle; currently"+
891 " checked in " + count + ", slept for " + slept + " ms," +
892 " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+
893 ", timeout of "+timeout+" ms, interval of "+interval+" ms.";
894 LOG.info(msg);
895 status.setStatus(msg);
896 }
897
898
899 final long sleepTime = 50;
900 Thread.sleep(sleepTime);
901 now = System.currentTimeMillis();
902 slept = now - startTime;
903
904 oldCount = count;
905 count = countOfRegionServers();
906 if (count != oldCount) {
907 lastCountChange = now;
908 }
909 }
910
911 LOG.info("Finished waiting for region servers count to settle;" +
912 " checked in " + count + ", slept for " + slept + " ms," +
913 " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+","+
914 " master is "+ (this.master.isStopped() ? "stopped.": "running.")
915 );
916 }
917
918
919
920
921 public List<ServerName> getOnlineServersList() {
922
923
924 return new ArrayList<ServerName>(this.onlineServers.keySet());
925 }
926
927
928
929
930 public List<ServerName> getDrainingServersList() {
931 return new ArrayList<ServerName>(this.drainingServers);
932 }
933
934
935
936
937 Set<ServerName> getDeadNotExpiredServers() {
938 return new HashSet<ServerName>(this.queuedDeadServers);
939 }
940
941
942
943
944
945
946 void removeRequeuedDeadServers() {
947 requeuedDeadServers.clear();
948 }
949
950
951
952
953
954 Map<ServerName, Boolean> getRequeuedDeadServers() {
955 return Collections.unmodifiableMap(this.requeuedDeadServers);
956 }
957
958 public boolean isServerOnline(ServerName serverName) {
959 return serverName != null && onlineServers.containsKey(serverName);
960 }
961
962
963
964
965
966
967
968 public synchronized boolean isServerDead(ServerName serverName) {
969 return serverName == null || deadservers.isDeadServer(serverName)
970 || queuedDeadServers.contains(serverName)
971 || requeuedDeadServers.containsKey(serverName);
972 }
973
974 public void shutdownCluster() {
975 this.clusterShutdown = true;
976 this.master.stop("Cluster shutdown requested");
977 }
978
979 public boolean isClusterShutdown() {
980 return this.clusterShutdown;
981 }
982
983
984
985
986 public void stop() {
987 if (connection != null) {
988 try {
989 connection.close();
990 } catch (IOException e) {
991 LOG.error("Attempt to close connection to master failed", e);
992 }
993 }
994 }
995
996
997
998
999
1000
1001 public List<ServerName> createDestinationServersList(final ServerName serverToExclude){
1002 final List<ServerName> destServers = getOnlineServersList();
1003
1004 if (serverToExclude != null){
1005 destServers.remove(serverToExclude);
1006 }
1007
1008
1009 final List<ServerName> drainingServersCopy = getDrainingServersList();
1010 if (!drainingServersCopy.isEmpty()) {
1011 for (final ServerName server: drainingServersCopy) {
1012 destServers.remove(server);
1013 }
1014 }
1015
1016
1017 removeDeadNotExpiredServers(destServers);
1018
1019 return destServers;
1020 }
1021
1022
1023
1024
1025 public List<ServerName> createDestinationServersList(){
1026 return createDestinationServersList(null);
1027 }
1028
1029
1030
1031
1032
1033
1034
1035 void removeDeadNotExpiredServers(List<ServerName> servers) {
1036 Set<ServerName> deadNotExpiredServersCopy = this.getDeadNotExpiredServers();
1037 if (!deadNotExpiredServersCopy.isEmpty()) {
1038 for (ServerName server : deadNotExpiredServersCopy) {
1039 LOG.debug("Removing dead but not expired server: " + server
1040 + " from eligible server pool.");
1041 servers.remove(server);
1042 }
1043 }
1044 }
1045
1046
1047
1048
1049 void clearDeadServersWithSameHostNameAndPortOfOnlineServer() {
1050 for (ServerName serverName : getOnlineServersList()) {
1051 deadservers.cleanAllPreviousInstances(serverName);
1052 }
1053 }
1054 }