View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.hbase.Chore;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.HMsg;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.HRegionLocation;
30  import org.apache.hadoop.hbase.HServerAddress;
31  import org.apache.hadoop.hbase.HServerInfo;
32  import org.apache.hadoop.hbase.HServerLoad;
33  import org.apache.hadoop.hbase.PleaseHoldException;
34  import org.apache.hadoop.hbase.YouAreDeadException;
35  import org.apache.hadoop.hbase.client.Get;
36  import org.apache.hadoop.hbase.client.Result;
37  import org.apache.hadoop.hbase.ipc.HRegionInterface;
38  import org.apache.hadoop.hbase.master.RegionManager.RegionState;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.util.Threads;
41  import org.apache.zookeeper.WatchedEvent;
42  import org.apache.zookeeper.Watcher;
43  import org.apache.zookeeper.Watcher.Event.EventType;
44  
45  import java.io.IOException;
46  import java.util.ArrayList;
47  import java.util.Collections;
48  import java.util.HashSet;
49  import java.util.Map;
50  import java.util.Set;
51  import java.util.SortedMap;
52  import java.util.TreeMap;
53  import java.util.concurrent.ConcurrentHashMap;
54  import java.util.concurrent.atomic.AtomicBoolean;
55  import java.util.concurrent.atomic.AtomicInteger;
56  
57  /**
58   * The ServerManager class manages info about region servers - HServerInfo,
59   * load numbers, dying servers, etc.
60   */
61  public class ServerManager {
62    private static final Log LOG =
63      LogFactory.getLog(ServerManager.class.getName());
64  
65    private final AtomicInteger quiescedServers = new AtomicInteger(0);
66  
67    // The map of known server names to server info
68    private final Map<String, HServerInfo> serversToServerInfo =
69      new ConcurrentHashMap<String, HServerInfo>();
70  
71    /*
72     * Set of known dead servers.  On znode expiration, servers are added here.
73     * This is needed in case of a network partitioning where the server's lease
74     * expires, but the server is still running. After the network is healed,
75     * and it's server logs are recovered, it will be told to call server startup
76     * because by then, its regions have probably been reassigned.
77     */
78    private final Set<String> deadServers =
79      Collections.synchronizedSet(new HashSet<String>());
80  
81    // SortedMap server load -> Set of server names
82    private final SortedMap<HServerLoad, Set<String>> loadToServers =
83      Collections.synchronizedSortedMap(new TreeMap<HServerLoad, Set<String>>());
84    // Map of server names -> server load
85    private final Map<String, HServerLoad> serversToLoad =
86      new ConcurrentHashMap<String, HServerLoad>();
87  
88    private HMaster master;
89  
90    /* The regionserver will not be assigned or asked close regions if it
91     * is currently opening >= this many regions.
92     */
93    private final int nobalancingCount;
94  
95    private final ServerMonitor serverMonitorThread;
96  
97    private int minimumServerCount;
98  
99    private final LogsCleaner logCleaner;
100 
101   /*
102    * Dumps into log current stats on dead servers and number of servers
103    * TODO: Make this a metric; dump metrics into log.
104    */
105   class ServerMonitor extends Chore {
106     ServerMonitor(final int period, final AtomicBoolean stop) {
107       super("ServerMonitor", period, stop);
108     }
109 
110     @Override
111     protected void chore() {
112       int numServers = serversToServerInfo.size();
113       int numDeadServers = deadServers.size();
114       double averageLoad = getAverageLoad();
115       String deadServersList = null;
116       if (numDeadServers > 0) {
117         StringBuilder sb = new StringBuilder("Dead Server [");
118         boolean first = true;
119         synchronized (deadServers) {
120           for (String server: deadServers) {
121             if (!first) {
122               sb.append(",  ");
123               first = false;
124             }
125             sb.append(server);
126           }
127         }
128         sb.append("]");
129         deadServersList = sb.toString();
130       }
131       LOG.info(numServers + " region servers, " + numDeadServers +
132         " dead, average load " + averageLoad +
133         (deadServersList != null? deadServers: ""));
134     }
135   }
136 
137   /**
138    * Constructor.
139    * @param master
140    */
141   public ServerManager(HMaster master) {
142     this.master = master;
143     Configuration c = master.getConfiguration();
144     this.nobalancingCount = c.getInt("hbase.regions.nobalancing.count", 4);
145     int metaRescanInterval = c.getInt("hbase.master.meta.thread.rescanfrequency",
146       60 * 1000);
147     this.minimumServerCount = c.getInt("hbase.regions.server.count.min", 0);
148     this.serverMonitorThread = new ServerMonitor(metaRescanInterval,
149       this.master.getShutdownRequested());
150     String n = Thread.currentThread().getName();
151     Threads.setDaemonThreadRunning(this.serverMonitorThread,
152       n + ".serverMonitor");
153     this.logCleaner = new LogsCleaner(
154       c.getInt("hbase.master.meta.thread.rescanfrequency",60 * 1000),
155         this.master.getShutdownRequested(), c,
156         master.getFileSystem(), master.getOldLogDir());
157     Threads.setDaemonThreadRunning(logCleaner,
158       n + ".oldLogCleaner");
159 
160   }
161 
162   /**
163    * Let the server manager know a new regionserver has come online
164    * @param serverInfo
165    * @throws IOException
166    */
167   void regionServerStartup(final HServerInfo serverInfo)
168   throws IOException {
169     // Test for case where we get a region startup message from a regionserver
170     // that has been quickly restarted but whose znode expiration handler has
171     // not yet run, or from a server whose fail we are currently processing.
172     // Test its host+port combo is present in serverAddresstoServerInfo.  If it
173     // is, reject the server and trigger its expiration. The next time it comes
174     // in, it should have been removed from serverAddressToServerInfo and queued
175     // for processing by ProcessServerShutdown.
176     HServerInfo info = new HServerInfo(serverInfo);
177     String hostAndPort = info.getServerAddress().toString();
178     HServerInfo existingServer = haveServerWithSameHostAndPortAlready(info.getHostnamePort());
179     if (existingServer != null) {
180       String message = "Server start rejected; we already have " + hostAndPort +
181         " registered; existingServer=" + existingServer + ", newServer=" + info;
182       LOG.info(message);
183       if (existingServer.getStartCode() < info.getStartCode()) {
184         LOG.info("Triggering server recovery; existingServer looks stale");
185         expireServer(existingServer);
186       }
187       throw new PleaseHoldException(message);
188     }
189     checkIsDead(info.getServerName(), "STARTUP");
190     LOG.info("Received start message from: " + info.getServerName());
191     recordNewServer(info);
192   }
193 
194   private HServerInfo haveServerWithSameHostAndPortAlready(final String hostnamePort) {
195     synchronized (this.serversToServerInfo) {
196       for (Map.Entry<String, HServerInfo> e: this.serversToServerInfo.entrySet()) {
197         if (e.getValue().getHostnamePort().equals(hostnamePort)) {
198           return e.getValue();
199         }
200       }
201     }
202     return null;
203   }
204 
205   /*
206    * If this server is on the dead list, reject it with a LeaseStillHeldException
207    * @param serverName Server name formatted as host_port_startcode.
208    * @param what START or REPORT
209    * @throws LeaseStillHeldException
210    */
211   private void checkIsDead(final String serverName, final String what)
212   throws YouAreDeadException {
213     if (!isDead(serverName)) return;
214     String message = "Server " + what + " rejected; currently processing " +
215       serverName + " as dead server";
216     LOG.debug(message);
217     throw new YouAreDeadException(message);
218   }
219 
220   /**
221    * Adds the HSI to the RS list and creates an empty load
222    * @param info The region server informations
223    */
224   public void recordNewServer(HServerInfo info) {
225     recordNewServer(info, false);
226   }
227 
228   /**
229    * Adds the HSI to the RS list
230    * @param info The region server informations
231    * @param useInfoLoad True if the load from the info should be used
232    *                    like under a master failover
233    */
234   void recordNewServer(HServerInfo info, boolean useInfoLoad) {
235     HServerLoad load = useInfoLoad ? info.getLoad() : new HServerLoad();
236     String serverName = info.getServerName();
237     info.setLoad(load);
238     // We must set this watcher here because it can be set on a fresh start
239     // or on a failover
240     Watcher watcher = new ServerExpirer(new HServerInfo(info));
241     this.master.getZooKeeperWrapper().updateRSLocationGetWatch(info, watcher);
242     this.serversToServerInfo.put(serverName, info);
243     this.serversToLoad.put(serverName, load);
244     synchronized (this.loadToServers) {
245       Set<String> servers = this.loadToServers.get(load);
246       if (servers == null) {
247         servers = new HashSet<String>();
248       }
249       servers.add(serverName);
250       this.loadToServers.put(load, servers);
251     }
252   }
253 
254   /**
255    * Called to process the messages sent from the region server to the master
256    * along with the heart beat.
257    *
258    * @param serverInfo
259    * @param msgs
260    * @param mostLoadedRegions Array of regions the region server is submitting
261    * as candidates to be rebalanced, should it be overloaded
262    * @return messages from master to region server indicating what region
263    * server should do.
264    *
265    * @throws IOException
266    */
267   HMsg [] regionServerReport(final HServerInfo serverInfo,
268     final HMsg msgs[], final HRegionInfo[] mostLoadedRegions)
269   throws IOException {
270     HServerInfo info = new HServerInfo(serverInfo);
271     checkIsDead(info.getServerName(), "REPORT");
272     if (msgs.length > 0) {
273       if (msgs[0].isType(HMsg.Type.MSG_REPORT_EXITING)) {
274         processRegionServerExit(info, msgs);
275         return HMsg.EMPTY_HMSG_ARRAY;
276       } else if (msgs[0].isType(HMsg.Type.MSG_REPORT_QUIESCED)) {
277         LOG.info("Region server " + info.getServerName() + " quiesced");
278         this.quiescedServers.incrementAndGet();
279       }
280     }
281     if (this.master.getShutdownRequested().get()) {
282       if (quiescedServers.get() >= serversToServerInfo.size()) {
283         // If the only servers we know about are meta servers, then we can
284         // proceed with shutdown
285         LOG.info("All user tables quiesced. Proceeding with shutdown");
286         this.master.startShutdown();
287       }
288       if (!this.master.isClosed()) {
289         if (msgs.length > 0 &&
290             msgs[0].isType(HMsg.Type.MSG_REPORT_QUIESCED)) {
291           // Server is already quiesced, but we aren't ready to shut down
292           // return empty response
293           return HMsg.EMPTY_HMSG_ARRAY;
294         }
295         // Tell the server to stop serving any user regions
296         return new HMsg [] {HMsg.REGIONSERVER_QUIESCE};
297       }
298     }
299     if (this.master.isClosed()) {
300       // Tell server to shut down if we are shutting down.  This should
301       // happen after check of MSG_REPORT_EXITING above, since region server
302       // will send us one of these messages after it gets MSG_REGIONSERVER_STOP
303       return new HMsg [] {HMsg.REGIONSERVER_STOP};
304     }
305 
306     HServerInfo storedInfo = this.serversToServerInfo.get(info.getServerName());
307     if (storedInfo == null) {
308       LOG.warn("Received report from unknown server -- telling it " +
309         "to " + HMsg.REGIONSERVER_STOP + ": " + info.getServerName());
310       // The HBaseMaster may have been restarted.
311       // Tell the RegionServer to abort!
312       return new HMsg[] {HMsg.REGIONSERVER_STOP};
313     } else if (storedInfo.getStartCode() != info.getStartCode()) {
314       // This state is reachable if:
315       //
316       // 1) RegionServer A started
317       // 2) RegionServer B started on the same machine, then
318       //    clobbered A in regionServerStartup.
319       // 3) RegionServer A returns, expecting to work as usual.
320       //
321       // The answer is to ask A to shut down for good.
322 
323       if (LOG.isDebugEnabled()) {
324         LOG.debug("region server race condition detected: " +
325             info.getServerName());
326       }
327 
328       synchronized (this.serversToServerInfo) {
329         removeServerInfo(info.getServerName());
330         notifyServers();
331       }
332 
333       return new HMsg[] {HMsg.REGIONSERVER_STOP};
334     } else {
335       return processRegionServerAllsWell(info, mostLoadedRegions, msgs);
336     }
337   }
338 
339   /*
340    * Region server is exiting with a clean shutdown.
341    *
342    * In this case, the server sends MSG_REPORT_EXITING in msgs[0] followed by
343    * a MSG_REPORT_CLOSE for each region it was serving.
344    * @param serverInfo
345    * @param msgs
346    */
347   private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) {
348     synchronized (this.serversToServerInfo) {
349       // This method removes ROOT/META from the list and marks them to be
350       // reassigned in addition to other housework.
351       if (removeServerInfo(serverInfo.getServerName())) {
352         // Only process the exit message if the server still has registered info.
353         // Otherwise we could end up processing the server exit twice.
354         LOG.info("Region server " + serverInfo.getServerName() +
355           ": MSG_REPORT_EXITING");
356         // Get all the regions the server was serving reassigned
357         // (if we are not shutting down).
358         if (!master.closed.get()) {
359           for (int i = 1; i < msgs.length; i++) {
360             LOG.info("Processing " + msgs[i] + " from " +
361               serverInfo.getServerName());
362             assert msgs[i].getType() == HMsg.Type.MSG_REGION_CLOSE;
363             HRegionInfo info = msgs[i].getRegionInfo();
364             // Meta/root region offlining is handed in removeServerInfo above.
365             if (!info.isMetaRegion()) {
366               synchronized (master.getRegionManager()) {
367                 if (!master.getRegionManager().isOfflined(info.getRegionNameAsString())) {
368                   master.getRegionManager().setUnassigned(info, true);
369                 } else {
370                   master.getRegionManager().removeRegion(info);
371                 }
372               }
373             }
374           }
375         }
376         // There should not be any regions in transition for this server - the
377         // server should finish transitions itself before closing
378         Map<String, RegionState> inTransition = master.getRegionManager()
379             .getRegionsInTransitionOnServer(serverInfo.getServerName());
380         for (Map.Entry<String, RegionState> entry : inTransition.entrySet()) {
381           LOG.warn("Region server " + serverInfo.getServerName()
382               + " shut down with region " + entry.getKey() + " in transition "
383               + "state " + entry.getValue());
384           master.getRegionManager().setUnassigned(entry.getValue().getRegionInfo(),
385               true);
386         }
387       }
388     }
389   }
390 
391   /*
392    *  RegionServer is checking in, no exceptional circumstances
393    * @param serverInfo
394    * @param mostLoadedRegions
395    * @param msgs
396    * @return
397    * @throws IOException
398    */
399   private HMsg[] processRegionServerAllsWell(HServerInfo serverInfo,
400       final HRegionInfo[] mostLoadedRegions, HMsg[] msgs)
401   throws IOException {
402     // Refresh the info object and the load information
403     this.serversToServerInfo.put(serverInfo.getServerName(), serverInfo);
404     HServerLoad load = this.serversToLoad.get(serverInfo.getServerName());
405     if (load != null) {
406       this.master.getMetrics().incrementRequests(load.getNumberOfRequests());
407       if (!load.equals(serverInfo.getLoad())) {
408         updateLoadToServers(serverInfo.getServerName(), load);
409       }
410     }
411 
412     // Set the current load information
413     load = serverInfo.getLoad();
414     this.serversToLoad.put(serverInfo.getServerName(), load);
415     synchronized (loadToServers) {
416       Set<String> servers = this.loadToServers.get(load);
417       if (servers == null) {
418         servers = new HashSet<String>();
419       }
420       servers.add(serverInfo.getServerName());
421       this.loadToServers.put(load, servers);
422     }
423 
424     // Next, process messages for this server
425     return processMsgs(serverInfo, mostLoadedRegions, msgs);
426   }
427 
428   /*
429    * Process all the incoming messages from a server that's contacted us.
430    * Note that we never need to update the server's load information because
431    * that has already been done in regionServerReport.
432    * @param serverInfo
433    * @param mostLoadedRegions
434    * @param incomingMsgs
435    * @return
436    */
437   private HMsg[] processMsgs(HServerInfo serverInfo,
438       HRegionInfo[] mostLoadedRegions, HMsg incomingMsgs[]) {
439     ArrayList<HMsg> returnMsgs = new ArrayList<HMsg>();
440     if (serverInfo.getServerAddress() == null) {
441       throw new NullPointerException("Server address cannot be null; " +
442         "hbase-958 debugging");
443     }
444     // Get reports on what the RegionServer did.
445     // Be careful that in message processors we don't throw exceptions that
446     // break the switch below because then we might drop messages on the floor.
447     int openingCount = 0;
448     for (int i = 0; i < incomingMsgs.length; i++) {
449       HRegionInfo region = incomingMsgs[i].getRegionInfo();
450       LOG.info("Processing " + incomingMsgs[i] + " from " +
451         serverInfo.getServerName() + "; " + (i + 1) + " of " +
452         incomingMsgs.length);
453       if (!this.master.getRegionServerOperationQueue().
454           process(serverInfo, incomingMsgs[i])) {
455         continue;
456       }
457       switch (incomingMsgs[i].getType()) {
458         case MSG_REPORT_PROCESS_OPEN:
459           openingCount++;
460           break;
461 
462         case MSG_REPORT_OPEN:
463           processRegionOpen(serverInfo, region, returnMsgs);
464           break;
465 
466         case MSG_REPORT_CLOSE:
467           processRegionClose(region);
468           break;
469 
470         case MSG_REPORT_SPLIT:
471           processSplitRegion(region, incomingMsgs[++i].getRegionInfo(),
472             incomingMsgs[++i].getRegionInfo());
473           break;
474 
475         case MSG_REPORT_SPLIT_INCLUDES_DAUGHTERS:
476           processSplitRegion(region, incomingMsgs[i].getDaughterA(),
477             incomingMsgs[i].getDaughterB());
478           break;
479 
480         default:
481           LOG.warn("Impossible state during message processing. Instruction: " +
482             incomingMsgs[i].getType());
483       }
484     }
485 
486     synchronized (this.master.getRegionManager()) {
487       // Tell the region server to close regions that we have marked for closing.
488       for (HRegionInfo i:
489         this.master.getRegionManager().getMarkedToClose(serverInfo.getServerName())) {
490         returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, i));
491         // Transition the region from toClose to closing state
492         this.master.getRegionManager().setPendingClose(i.getRegionNameAsString());
493       }
494 
495       // Figure out what the RegionServer ought to do, and write back.
496 
497       // Should we tell it close regions because its overloaded?  If its
498       // currently opening regions, leave it alone till all are open.
499       if (openingCount < this.nobalancingCount) {
500         this.master.getRegionManager().assignRegions(serverInfo, mostLoadedRegions,
501           returnMsgs);
502       }
503 
504       // Send any pending table actions.
505       this.master.getRegionManager().applyActions(serverInfo, returnMsgs);
506     }
507     return returnMsgs.toArray(new HMsg[returnMsgs.size()]);
508   }
509 
510   /*
511    * A region has split.
512    *
513    * @param region
514    * @param splitA
515    * @param splitB
516    * @param returnMsgs
517    */
518   private void processSplitRegion(HRegionInfo region, HRegionInfo a, HRegionInfo b) {
519     synchronized (master.getRegionManager()) {
520       // Cancel any actions pending for the affected region.
521       // This prevents the master from sending a SPLIT message if the table
522       // has already split by the region server.
523       this.master.getRegionManager().endActions(region.getRegionName());
524       assignSplitDaughter(a);
525       assignSplitDaughter(b);
526       if (region.isMetaTable()) {
527         // A meta region has split.
528         this. master.getRegionManager().offlineMetaRegionWithStartKey(region.getStartKey());
529         this.master.getRegionManager().incrementNumMetaRegions();
530       }
531     }
532   }
533 
534   /*
535    * Assign new daughter-of-a-split UNLESS its already been assigned.
536    * It could have been assigned already in rare case where there was a large
537    * gap between insertion of the daughter region into .META. by the
538    * splitting regionserver and receipt of the split message in master (See
539    * HBASE-1784).
540    * @param hri Region to assign.
541    */
542   private void assignSplitDaughter(final HRegionInfo hri) {
543     MetaRegion mr =
544       this.master.getRegionManager().getFirstMetaRegionForRegion(hri);
545     Get g = new Get(hri.getRegionName());
546     g.addFamily(HConstants.CATALOG_FAMILY);
547     try {
548       HRegionInterface server =
549         this.master.getServerConnection().getHRegionConnection(mr.getServer());
550       Result r = server.get(mr.getRegionName(), g);
551       // If size > 3 -- presume regioninfo, startcode and server -- then presume
552       // that this daughter already assigned and return.
553       if (r.size() >= 3) return;
554     } catch (IOException e) {
555       LOG.warn("Failed get on " + HConstants.CATALOG_FAMILY_STR +
556         "; possible double-assignment?", e);
557     }
558     this.master.getRegionManager().setUnassigned(hri, false);
559   }
560 
561   /*
562    * Region server is reporting that a region is now opened
563    * @param serverInfo
564    * @param region
565    * @param returnMsgs
566    */
567   public void processRegionOpen(HServerInfo serverInfo,
568       HRegionInfo region, ArrayList<HMsg> returnMsgs) {
569     boolean duplicateAssignment = false;
570     synchronized (master.getRegionManager()) {
571       if (!this.master.getRegionManager().isUnassigned(region) &&
572           !this.master.getRegionManager().isPendingOpen(region.getRegionNameAsString())) {
573         if (region.isRootRegion()) {
574           // Root region
575           HServerAddress rootServer =
576             this.master.getRegionManager().getRootRegionLocation();
577           if (rootServer != null) {
578             if (rootServer.compareTo(serverInfo.getServerAddress()) == 0) {
579               // A duplicate open report from the correct server
580               return;
581             }
582             // We received an open report on the root region, but it is
583             // assigned to a different server
584             duplicateAssignment = true;
585           }
586         } else {
587           // Not root region. If it is not a pending region, then we are
588           // going to treat it as a duplicate assignment, although we can't
589           // tell for certain that's the case.
590           if (this.master.getRegionManager().isPendingOpen(
591               region.getRegionNameAsString())) {
592             // A duplicate report from the correct server
593             return;
594           }
595           duplicateAssignment = true;
596         }
597       }
598 
599       if (duplicateAssignment) {
600         LOG.warn("region server " + serverInfo.getServerAddress().toString() +
601           " should not have opened region " + Bytes.toString(region.getRegionName()));
602 
603         // This Region should not have been opened.
604         // Ask the server to shut it down, but don't report it as closed.
605         // Otherwise the HMaster will think the Region was closed on purpose,
606         // and then try to reopen it elsewhere; that's not what we want.
607         returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE_WITHOUT_REPORT,
608           region, "Duplicate assignment".getBytes()));
609       } else {
610         if (region.isRootRegion()) {
611           // it was assigned, and it's not a duplicate assignment, so take it out
612           // of the unassigned list.
613           this.master.getRegionManager().removeRegion(region);
614 
615           // Store the Root Region location (in memory)
616           HServerAddress rootServer = serverInfo.getServerAddress();
617           this.master.getServerConnection().setRootRegionLocation(
618             new HRegionLocation(region, rootServer));
619           this.master.getRegionManager().setRootRegionLocation(rootServer);
620         } else {
621           // Note that the table has been assigned and is waiting for the
622           // meta table to be updated.
623           this.master.getRegionManager().setOpen(region.getRegionNameAsString());
624           RegionServerOperation op =
625             new ProcessRegionOpen(master, serverInfo, region);
626           this.master.getRegionServerOperationQueue().put(op);
627         }
628       }
629     }
630   }
631 
632   /*
633    * @param region
634    * @throws Exception
635    */
636   public void processRegionClose(HRegionInfo region) {
637     synchronized (this.master.getRegionManager()) {
638       if (region.isRootRegion()) {
639         // Root region
640         this.master.getRegionManager().unsetRootRegion();
641         if (region.isOffline()) {
642           // Can't proceed without root region. Shutdown.
643           LOG.fatal("root region is marked offline");
644           this.master.shutdown();
645           return;
646         }
647 
648       } else if (region.isMetaTable()) {
649         // Region is part of the meta table. Remove it from onlineMetaRegions
650         this.master.getRegionManager().offlineMetaRegionWithStartKey(region.getStartKey());
651       }
652 
653       boolean offlineRegion =
654         this.master.getRegionManager().isOfflined(region.getRegionNameAsString());
655       boolean reassignRegion = !region.isOffline() && !offlineRegion;
656 
657       // NOTE: If the region was just being closed and not offlined, we cannot
658       //       mark the region unassignedRegions as that changes the ordering of
659       //       the messages we've received. In this case, a close could be
660       //       processed before an open resulting in the master not agreeing on
661       //       the region's state.
662       this.master.getRegionManager().setClosed(region.getRegionNameAsString());
663       RegionServerOperation op =
664         new ProcessRegionClose(master, region, offlineRegion, reassignRegion);
665       this.master.getRegionServerOperationQueue().put(op);
666     }
667   }
668 
669   /** Update a server load information because it's shutting down*/
670   private boolean removeServerInfo(final String serverName) {
671     boolean infoUpdated = false;
672     HServerInfo info = this.serversToServerInfo.remove(serverName);
673     // Only update load information once.
674     // This method can be called a couple of times during shutdown.
675     if (info != null) {
676       LOG.info("Removing server's info " + serverName);
677       this.master.getRegionManager().offlineMetaServer(info.getServerAddress());
678 
679       //HBASE-1928: Check whether this server has been transitioning the ROOT table
680       if (this.master.getRegionManager().isRootInTransitionOnThisServer(serverName)) {
681          this.master.getRegionManager().unsetRootRegion();
682          this.master.getRegionManager().reassignRootRegion();
683       }
684 
685       //HBASE-1928: Check whether this server has been transitioning the META table
686       HRegionInfo metaServerRegionInfo = this.master.getRegionManager().getMetaServerRegionInfo (serverName);
687       if (metaServerRegionInfo != null) {
688          this.master.getRegionManager().setUnassigned(metaServerRegionInfo, true);
689       }
690 
691       infoUpdated = true;
692       // update load information
693       updateLoadToServers(serverName, this.serversToLoad.remove(serverName));
694     }
695     return infoUpdated;
696   }
697 
698   private void updateLoadToServers(final String serverName,
699       final HServerLoad load) {
700     if (load == null) return;
701     synchronized (this.loadToServers) {
702       Set<String> servers = this.loadToServers.get(load);
703       if (servers != null) {
704         servers.remove(serverName);
705         if (servers.size() > 0)
706           this.loadToServers.put(load, servers);
707         else
708           this.loadToServers.remove(load);
709       }
710     }
711   }
712 
713   /**
714    * Compute the average load across all region servers.
715    * Currently, this uses a very naive computation - just uses the number of
716    * regions being served, ignoring stats about number of requests.
717    * @return the average load
718    */
719   public double getAverageLoad() {
720     int totalLoad = 0;
721     int numServers = 0;
722     double averageLoad = 0.0;
723     synchronized (serversToLoad) {
724       numServers = serversToLoad.size();
725       for (HServerLoad load : serversToLoad.values()) {
726         totalLoad += load.getNumberOfRegions();
727       }
728       averageLoad = (double)totalLoad / (double)numServers;
729     }
730     return averageLoad;
731   }
732 
733   /** @return the number of active servers */
734   public int numServers() {
735     return this.serversToServerInfo.size();
736   }
737 
738   /**
739    * @param name server name
740    * @return HServerInfo for the given server address
741    */
742   public HServerInfo getServerInfo(String name) {
743     return this.serversToServerInfo.get(name);
744   }
745 
746   /**
747    * @return Read-only map of servers to serverinfo.
748    */
749   public Map<String, HServerInfo> getServersToServerInfo() {
750     synchronized (this.serversToServerInfo) {
751       return Collections.unmodifiableMap(this.serversToServerInfo);
752     }
753   }
754 
755   /**
756    * @param hsa
757    * @return The HServerInfo whose HServerAddress is <code>hsa</code> or null
758    * if nothing found.
759    */
760   public HServerInfo getHServerInfo(final HServerAddress hsa) {
761     synchronized(this.serversToServerInfo) {
762       // TODO: This is primitive.  Do a better search.
763       for (Map.Entry<String, HServerInfo> e: this.serversToServerInfo.entrySet()) {
764         if (e.getValue().getServerAddress().equals(hsa)) return e.getValue();
765       }
766     }
767     return null;
768   }
769 
770   /**
771    * @return Read-only map of servers to load.
772    */
773   public Map<String, HServerLoad> getServersToLoad() {
774     synchronized (this.serversToLoad) {
775       return Collections.unmodifiableMap(serversToLoad);
776     }
777   }
778 
779   /**
780    * @return Read-only map of load to servers.
781    */
782   public SortedMap<HServerLoad, Set<String>> getLoadToServers() {
783     synchronized (this.loadToServers) {
784       return Collections.unmodifiableSortedMap(this.loadToServers);
785     }
786   }
787 
788   /**
789    * Wakes up threads waiting on serversToServerInfo
790    */
791   public void notifyServers() {
792     synchronized (this.serversToServerInfo) {
793       this.serversToServerInfo.notifyAll();
794     }
795   }
796 
797   /*
798    * Wait on regionservers to report in
799    * with {@link #regionServerReport(HServerInfo, HMsg[])} so they get notice
800    * the master is going down.  Waits until all region servers come back with
801    * a MSG_REGIONSERVER_STOP.
802    */
803   void letRegionServersShutdown() {
804     if (!master.checkFileSystem()) {
805       // Forget waiting for the region servers if the file system has gone
806       // away. Just exit as quickly as possible.
807       return;
808     }
809     synchronized (serversToServerInfo) {
810       while (serversToServerInfo.size() > 0) {
811         LOG.info("Waiting on following regionserver(s) to go down " +
812           this.serversToServerInfo.values());
813         try {
814           this.serversToServerInfo.wait(500);
815         } catch (InterruptedException e) {
816           // continue
817         }
818       }
819     }
820   }
821 
822   /** Watcher triggered when a RS znode is deleted */
823   private class ServerExpirer implements Watcher {
824     private HServerInfo server;
825 
826     ServerExpirer(final HServerInfo hsi) {
827       this.server = hsi;
828     }
829 
830     public void process(WatchedEvent event) {
831       if (!event.getType().equals(EventType.NodeDeleted)) {
832         LOG.warn("Unexpected event=" + event);
833         return;
834       }
835       LOG.info(this.server.getServerName() + " znode expired");
836       expireServer(this.server);
837     }
838   }
839 
840   /*
841    * Expire the passed server.  Add it to list of deadservers and queue a
842    * shutdown processing.
843    */
844   private synchronized void expireServer(final HServerInfo hsi) {
845     // First check a server to expire.  ServerName is of the form:
846     // <hostname> , <port> , <startcode>
847     String serverName = hsi.getServerName();
848     HServerInfo info = this.serversToServerInfo.get(serverName);
849     if (info == null) {
850       LOG.warn("No HServerInfo for " + serverName);
851       return;
852     }
853     if (this.deadServers.contains(serverName)) {
854       LOG.warn("Already processing shutdown of " + serverName);
855       return;
856     }
857     // Remove the server from the known servers lists and update load info
858     this.serversToServerInfo.remove(serverName);
859     HServerLoad load = this.serversToLoad.remove(serverName);
860     if (load != null) {
861       synchronized (this.loadToServers) {
862         Set<String> servers = this.loadToServers.get(load);
863         if (servers != null) {
864           servers.remove(serverName);
865           if (servers.isEmpty()) this.loadToServers.remove(load);
866         }
867       }
868     }
869     // Add to dead servers and queue a shutdown processing.
870     LOG.debug("Added=" + serverName +
871       " to dead servers, added shutdown processing operation");
872     this.deadServers.add(serverName);
873     this.master.getRegionServerOperationQueue().
874       put(new ProcessServerShutdown(master, info));
875   }
876 
877   /**
878    * @param serverName
879    */
880   void removeDeadServer(String serverName) {
881     this.deadServers.remove(serverName);
882   }
883 
884   /**
885    * @param serverName
886    * @return true if server is dead
887    */
888   public boolean isDead(final String serverName) {
889     return isDead(serverName, false);
890   }
891 
892   /**
893    * @param serverName Servername as either <code>host:port</code> or
894    * <code>host,port,startcode</code>.
895    * @param hostAndPortOnly True if <code>serverName</code> is host and
896    * port only (<code>host:port</code>) and if so, then we do a prefix compare
897    * (ignoring start codes) looking for dead server.
898    * @return true if server is dead
899    */
900   boolean isDead(final String serverName, final boolean hostAndPortOnly) {
901     return isDead(this.deadServers, serverName, hostAndPortOnly);
902   }
903 
904   static boolean isDead(final Set<String> deadServers,
905       final String serverName, final boolean hostAndPortOnly) {
906     return HServerInfo.isServer(deadServers, serverName, hostAndPortOnly);
907   }
908 
909   Set<String> getDeadServers() {
910     return this.deadServers;
911   }
912 
913   /**
914    * Add to the passed <code>m</code> servers that are loaded less than
915    * <code>l</code>.
916    * @param l
917    * @param m
918    */
919   void getLightServers(final HServerLoad l,
920       SortedMap<HServerLoad, Set<String>> m) {
921     synchronized (this.loadToServers) {
922       m.putAll(this.loadToServers.headMap(l));
923     }
924   }
925 
926   public boolean canAssignUserRegions() {
927     if (minimumServerCount == 0) {
928       return true;
929     }
930     return (numServers() >= minimumServerCount);
931   }
932 
933   public void setMinimumServerCount(int minimumServerCount) {
934     this.minimumServerCount = minimumServerCount;
935   }
936 }