View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.HashSet;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.Set;
27  import java.util.TreeMap;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.RegionTransition;
35  import org.apache.hadoop.hbase.Server;
36  import org.apache.hadoop.hbase.ServerLoad;
37  import org.apache.hadoop.hbase.ServerName;
38  import org.apache.hadoop.hbase.catalog.MetaReader;
39  import org.apache.hadoop.hbase.master.RegionState.State;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.Pair;
42  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
43  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
44  import org.apache.zookeeper.KeeperException;
45  
46  import com.google.common.base.Preconditions;
47  
48  /**
49   * Region state accountant. It holds the states of all regions in the memory.
50   * In normal scenario, it should match the meta table and the true region states.
51   *
52   * This map is used by AssignmentManager to track region states.
53   */
54  @InterfaceAudience.Private
55  public class RegionStates {
56    private static final Log LOG = LogFactory.getLog(RegionStates.class);
57  
58    /**
59     * Regions currently in transition.
60     */
61    final HashMap<String, RegionState> regionsInTransition;
62  
63    /**
64     * Region encoded name to state map.
65     * All the regions should be in this map.
66     */
67    private final Map<String, RegionState> regionStates;
68  
69    /**
70     * Server to regions assignment map.
71     * Contains the set of regions currently assigned to a given server.
72     */
73    private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
74  
75    /**
76     * Region to server assignment map.
77     * Contains the server a given region is currently assigned to.
78     */
79    private final TreeMap<HRegionInfo, ServerName> regionAssignments;
80  
81    private final ServerManager serverManager;
82    private final Server server;
83  
84    RegionStates(final Server master, final ServerManager serverManager) {
85      regionStates = new HashMap<String, RegionState>();
86      regionsInTransition = new HashMap<String, RegionState>();
87      serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
88      regionAssignments = new TreeMap<HRegionInfo, ServerName>();
89      this.serverManager = serverManager;
90      this.server = master;
91    }
92  
93    /**
94     * @return an unmodifiable the region assignment map
95     */
96    @SuppressWarnings("unchecked")
97    public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
98      return (Map<HRegionInfo, ServerName>)regionAssignments.clone();
99    }
100 
101   public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
102     return regionAssignments.get(hri);
103   }
104 
105   /**
106    * Get regions in transition and their states
107    */
108   @SuppressWarnings("unchecked")
109   public synchronized Map<String, RegionState> getRegionsInTransition() {
110     return (Map<String, RegionState>)regionsInTransition.clone();
111   }
112 
113   /**
114    * @return True if specified region in transition.
115    */
116   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
117     return regionsInTransition.containsKey(hri.getEncodedName());
118   }
119 
120   /**
121    * @return True if specified region in transition.
122    */
123   public synchronized boolean isRegionInTransition(final String regionName) {
124     return regionsInTransition.containsKey(regionName);
125   }
126 
127   /**
128    * @return True if any region in transition.
129    */
130   public synchronized boolean isRegionsInTransition() {
131     return !regionsInTransition.isEmpty();
132   }
133 
134   /**
135    * @return True if specified region assigned.
136    */
137   public synchronized boolean isRegionAssigned(final HRegionInfo hri) {
138     return regionAssignments.containsKey(hri);
139   }
140 
141   /**
142    * @return True if specified region is in specified state
143    */
144   public synchronized boolean isRegionInState(
145       final HRegionInfo hri, final State state) {
146     RegionState regionState = getRegionState(hri);
147     State s = regionState != null ? regionState.getState() : null;
148     return s == state;
149   }
150 
151   /**
152    * Wait for the state map to be updated by assignment manager.
153    */
154   public synchronized void waitForUpdate(
155       final long timeout) throws InterruptedException {
156     this.wait(timeout);
157   }
158 
159   /**
160    * Get region transition state
161    */
162   public synchronized RegionState
163       getRegionTransitionState(final HRegionInfo hri) {
164     return regionsInTransition.get(hri.getEncodedName());
165   }
166 
167   /**
168    * Get region transition state
169    */
170   public synchronized RegionState
171       getRegionTransitionState(final String regionName) {
172     return regionsInTransition.get(regionName);
173   }
174 
175   /**
176    * Add a list of regions to RegionStates. The initial state is OFFLINE.
177    * If any region is already in RegionStates, that region will be skipped.
178    */
179   public synchronized void createRegionStates(
180       final List<HRegionInfo> hris) {
181     for (HRegionInfo hri: hris) {
182       createRegionState(hri);
183     }
184   }
185 
186   /**
187    * Add a region to RegionStates. The initial state is OFFLINE.
188    * If it is already in RegionStates, this call has no effect,
189    * and the original state is returned.
190    */
191   public synchronized RegionState createRegionState(final HRegionInfo hri) {
192     String regionName = hri.getEncodedName();
193     RegionState regionState = regionStates.get(regionName);
194     if (regionState != null) {
195       LOG.warn("Tried to create a state of a region already in RegionStates, " +
196         "used existing state: " + regionState + ", ignored new state: state=OFFLINE, server=null");
197     } else {
198       regionState = new RegionState(hri, State.OFFLINE);
199       regionStates.put(regionName, regionState);
200     }
201     return regionState;
202   }
203 
204   /**
205    * Update a region state. It will be put in transition if not already there.
206    */
207   public synchronized RegionState updateRegionState(
208       final HRegionInfo hri, final State state) {
209     RegionState regionState = regionStates.get(hri.getEncodedName());
210     ServerName serverName = (regionState == null || state == State.CLOSED
211       || state == State.OFFLINE) ? null : regionState.getServerName();
212     return updateRegionState(hri, state, serverName);
213   }
214 
215   /**
216    * Update a region state. It will be put in transition if not already there.
217    *
218    * If we can't find the region info based on the region name in
219    * the transition, log a warning and return null.
220    */
221   public synchronized RegionState updateRegionState(
222       final RegionTransition transition, final State state) {
223     byte [] regionName = transition.getRegionName();
224     HRegionInfo regionInfo = getRegionInfo(regionName);
225     if (regionInfo == null) {
226       String prettyRegionName = HRegionInfo.prettyPrint(
227         HRegionInfo.encodeRegionName(regionName));
228       LOG.warn("Failed to find region " + prettyRegionName
229         + " in updating its state to " + state
230         + " based on region transition " + transition);
231       return null;
232     }
233     return updateRegionState(regionInfo, state,
234       transition.getServerName());
235   }
236 
237   /**
238    * Update a region state. It will be put in transition if not already there.
239    */
240   public synchronized RegionState updateRegionState(
241       final HRegionInfo hri, final State state, final ServerName serverName) {
242     ServerName newServerName = serverName;
243     if (serverName != null &&
244         (state == State.CLOSED || state == State.OFFLINE)) {
245       LOG.warn("Closed region " + hri.getShortNameToLog() + " still on "
246         + serverName + "? Ignored, reset it to null");
247       newServerName = null;
248     }
249 
250     if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
251       LOG.warn("Failed to transition " + hri.getShortNameToLog()
252         + " on " + serverName + ", set to " + state);
253     }
254 
255     String regionName = hri.getEncodedName();
256     RegionState regionState = new RegionState(
257       hri, state, System.currentTimeMillis(), newServerName);
258     RegionState oldState = regionStates.put(regionName, regionState);
259     if (oldState == null || oldState.getState() != regionState.getState()) {
260       LOG.info("Transitioned from " + oldState + " to " + regionState);
261     }
262     if (newServerName != null || (
263         state != State.PENDING_CLOSE && state != State.CLOSING)) {
264       regionsInTransition.put(regionName, regionState);
265     }
266 
267     // notify the change
268     this.notifyAll();
269     return regionState;
270   }
271 
272   /**
273    * A region is online, won't be in transition any more.
274    * We can't confirm it is really online on specified region server
275    * because it hasn't been put in region server's online region list yet.
276    */
277   public synchronized void regionOnline(
278       final HRegionInfo hri, final ServerName serverName) {
279     String regionName = hri.getEncodedName();
280     RegionState oldState = regionStates.get(regionName);
281     if (oldState == null) {
282       LOG.warn("Online a region not in RegionStates: " + hri.getShortNameToLog());
283     } else {
284       State state = oldState.getState();
285       ServerName sn = oldState.getServerName();
286       if (state != State.OPEN || sn == null || !sn.equals(serverName)) {
287         LOG.debug("Online a region " + hri.getShortNameToLog() + " with current state=" + state +
288           ", expected state=OPEN" + ", assigned to server: " + sn + " expected " + serverName);
289       }
290     }
291     updateRegionState(hri, State.OPEN, serverName);
292     regionsInTransition.remove(regionName);
293 
294     ServerName oldServerName = regionAssignments.put(hri, serverName);
295     if (!serverName.equals(oldServerName)) {
296       LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
297       Set<HRegionInfo> regions = serverHoldings.get(serverName);
298       if (regions == null) {
299         regions = new HashSet<HRegionInfo>();
300         serverHoldings.put(serverName, regions);
301       }
302       regions.add(hri);
303       if (oldServerName != null) {
304         LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
305         serverHoldings.get(oldServerName).remove(hri);
306       }
307     }
308   }
309 
310   /**
311    * A region is offline, won't be in transition any more.
312    */
313   public void regionOffline(final HRegionInfo hri) {
314     regionOffline(hri, null);
315   }
316 
317   /**
318    * A region is offline, won't be in transition any more.
319    * Its state should be the specified expected state, which
320    * can be Split/Merged/Offline/null(=Offline) only.
321    */
322   public synchronized void regionOffline(
323       final HRegionInfo hri, final State expectedState) {
324     Preconditions.checkArgument(expectedState == null
325       || expectedState == State.OFFLINE || expectedState == State.SPLIT
326       || expectedState == State.MERGED, "Offlined region should be in state"
327         + " OFFLINE/SPLIT/MERGED instead of " + expectedState);
328     String regionName = hri.getEncodedName();
329     RegionState oldState = regionStates.get(regionName);
330     if (oldState == null) {
331       LOG.warn("Offline a region not in RegionStates: " + hri.getShortNameToLog());
332     } else if (LOG.isDebugEnabled()) {
333       State state = oldState.getState();
334       ServerName sn = oldState.getServerName();
335       if (state != State.OFFLINE
336           && state != State.SPLITTING && state != State.MERGING) {
337         LOG.debug("Offline a region " + hri.getShortNameToLog() + " with current state="
338           + state + ", expected state=OFFLINE/SPLITTING/MERGING");
339       }
340       if (sn != null && state == State.OFFLINE) {
341         LOG.debug("Offline a region " + hri.getShortNameToLog()
342           + " with current state=OFFLINE, assigned to server: "
343           + sn + ", expected null");
344       }
345     }
346     State newState = expectedState;
347     if (newState == null) newState = State.OFFLINE;
348     updateRegionState(hri, newState);
349     regionsInTransition.remove(regionName);
350 
351     ServerName oldServerName = regionAssignments.remove(hri);
352     if (oldServerName != null) {
353       LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
354       serverHoldings.get(oldServerName).remove(hri);
355     }
356   }
357 
358   /**
359    * A server is offline, all regions on it are dead.
360    */
361   public synchronized List<HRegionInfo> serverOffline(
362       final ZooKeeperWatcher watcher, final ServerName sn) {
363     // Clean up this server from map of servers to regions, and remove all regions
364     // of this server from online map of regions.
365     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
366     Set<HRegionInfo> assignedRegions = serverHoldings.remove(sn);
367     if (assignedRegions == null) {
368       assignedRegions = new HashSet<HRegionInfo>();
369     }
370 
371     for (HRegionInfo region : assignedRegions) {
372       regionAssignments.remove(region);
373     }
374 
375     for (RegionState state : regionsInTransition.values()) {
376       HRegionInfo hri = state.getRegion();
377       if (assignedRegions.contains(hri)) {
378         // Region is open on this region server, but in transition.
379         // This region must be moving away from this server, or splitting/merging.
380         // SSH will handle it, either skip assigning, or re-assign.
381         LOG.info("Transitioning " + state + " will be handled by SSH for " + sn);
382         if (state.isSplitting() || state.isMerging()) {
383           LOG.info("Offline splitting/merging region " + state);
384           try {
385             // Delete the ZNode if exists
386             ZKAssign.deleteNodeFailSilent(watcher, hri);
387             regionOffline(hri);
388           } catch (KeeperException ke) {
389             server.abort("Unexpected ZK exception deleting node " + hri, ke);
390           }
391         }
392       } else if (sn.equals(state.getServerName())) {
393         // Region is in transition on this region server, and this
394         // region is not open on this server. So the region must be
395         // moving to this server from another one (i.e. opening or
396         // pending open on this server, was open on another one
397         if (state.isPendingOpen() || state.isOpening()) {
398           LOG.info("Found opening region " + state + " to be reassigned by SSH for " + sn);
399           rits.add(hri);
400         } else {
401           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected state "
402             + state + " of region in transition on server " + sn);
403         }
404       }
405     }
406     assignedRegions.clear();
407     this.notifyAll();
408     return rits;
409   }
410 
411   /**
412    * Gets the online regions of the specified table.
413    * This method looks at the in-memory state.  It does not go to <code>.META.</code>.
414    * Only returns <em>online</em> regions.  If a region on this table has been
415    * closed during a disable, etc., it will be included in the returned list.
416    * So, the returned list may not necessarily be ALL regions in this table, its
417    * all the ONLINE regions in the table.
418    * @param tableName
419    * @return Online regions from <code>tableName</code>
420    */
421   public synchronized List<HRegionInfo> getRegionsOfTable(TableName tableName) {
422     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
423     // boundary needs to have table's name but regionID 0 so that it is sorted
424     // before all table's regions.
425     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
426     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
427       if(!hri.getTableName().equals(tableName)) break;
428       tableRegions.add(hri);
429     }
430     return tableRegions;
431   }
432 
433 
434   /**
435    * Wait on region to clear regions-in-transition.
436    * <p>
437    * If the region isn't in transition, returns immediately.  Otherwise, method
438    * blocks until the region is out of transition.
439    */
440   public synchronized void waitOnRegionToClearRegionsInTransition(
441       final HRegionInfo hri) throws InterruptedException {
442     if (!isRegionInTransition(hri)) return;
443 
444     while(!server.isStopped() && isRegionInTransition(hri)) {
445       RegionState rs = getRegionState(hri);
446       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
447       waitForUpdate(100);
448     }
449 
450     if (server.isStopped()) {
451       LOG.info("Giving up wait on region in " +
452         "transition because stoppable.isStopped is set");
453     }
454   }
455 
456   /**
457    * Waits until the specified region has completed assignment.
458    * <p>
459    * If the region is already assigned, returns immediately.  Otherwise, method
460    * blocks until the region is assigned.
461    */
462   public synchronized void waitForAssignment(
463       final HRegionInfo hri) throws InterruptedException {
464     if (!isRegionAssigned(hri)) return;
465 
466     while(!server.isStopped() && !isRegionAssigned(hri)) {
467       RegionState rs = getRegionState(hri);
468       LOG.info("Waiting on " + rs + " to be assigned");
469       waitForUpdate(100);
470     }
471 
472     if (server.isStopped()) {
473       LOG.info("Giving up wait on region " +
474         "assignment because stoppable.isStopped is set");
475     }
476   }
477 
478   /**
479    * Compute the average load across all region servers.
480    * Currently, this uses a very naive computation - just uses the number of
481    * regions being served, ignoring stats about number of requests.
482    * @return the average load
483    */
484   protected synchronized double getAverageLoad() {
485     int numServers = 0, totalLoad = 0;
486     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
487       Set<HRegionInfo> regions = e.getValue();
488       ServerName serverName = e.getKey();
489       int regionCount = regions.size();
490       if (regionCount > 0 || serverManager.isServerOnline(serverName)) {
491         totalLoad += regionCount;
492         numServers++;
493       }
494     }
495     return numServers == 0 ? 0.0 :
496       (double)totalLoad / (double)numServers;
497   }
498 
499   /**
500    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
501    * Can't let out original since it can change and at least the load balancer
502    * wants to iterate this exported list.  We need to synchronize on regions
503    * since all access to this.servers is under a lock on this.regions.
504    *
505    * @return A clone of current assignments by table.
506    */
507   protected Map<TableName, Map<ServerName, List<HRegionInfo>>>
508       getAssignmentsByTable() {
509     Map<TableName, Map<ServerName, List<HRegionInfo>>> result =
510       new HashMap<TableName, Map<ServerName,List<HRegionInfo>>>();
511     synchronized (this) {
512       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
513         Map<ServerName, List<HRegionInfo>> svrToRegions =
514           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
515         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
516           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
517         }
518         result.put(TableName.valueOf("ensemble"), svrToRegions);
519       } else {
520         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
521           for (HRegionInfo hri: e.getValue()) {
522             if (hri.isMetaRegion()) continue;
523             TableName tablename = hri.getTableName();
524             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
525             if (svrToRegions == null) {
526               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
527               result.put(tablename, svrToRegions);
528             }
529             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
530             if (regions == null) {
531               regions = new ArrayList<HRegionInfo>();
532               svrToRegions.put(e.getKey(), regions);
533             }
534             regions.add(hri);
535           }
536         }
537       }
538     }
539 
540     Map<ServerName, ServerLoad>
541       onlineSvrs = serverManager.getOnlineServers();
542     // Take care of servers w/o assignments.
543     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
544       for (ServerName svr: onlineSvrs.keySet()) {
545         if (!map.containsKey(svr)) {
546           map.put(svr, new ArrayList<HRegionInfo>());
547         }
548       }
549     }
550     return result;
551   }
552 
553   protected synchronized RegionState getRegionState(final HRegionInfo hri) {
554     return regionStates.get(hri.getEncodedName());
555   }
556 
557   protected synchronized RegionState getRegionState(final String regionName) {
558     return regionStates.get(regionName);
559   }
560 
561   /**
562    * Get the HRegionInfo from cache, if not there, from the META table
563    * @param  regionName
564    * @return HRegionInfo for the region
565    */
566   protected HRegionInfo getRegionInfo(final byte [] regionName) {
567     String encodedName = HRegionInfo.encodeRegionName(regionName);
568     RegionState regionState = regionStates.get(encodedName);
569     if (regionState != null) {
570       return regionState.getRegion();
571     }
572 
573     try {
574       Pair<HRegionInfo, ServerName> p =
575         MetaReader.getRegion(server.getCatalogTracker(), regionName);
576       HRegionInfo hri = p == null ? null : p.getFirst();
577       if (hri != null) {
578         createRegionState(hri);
579       }
580       return hri;
581     } catch (IOException e) {
582       server.abort("Aborting because error occoured while reading " +
583         Bytes.toStringBinary(regionName) + " from .META.", e);
584       return null;
585     }
586   }
587 }