View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.HashSet;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.Set;
27  import java.util.TreeMap;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.RegionTransition;
34  import org.apache.hadoop.hbase.Server;
35  import org.apache.hadoop.hbase.ServerLoad;
36  import org.apache.hadoop.hbase.ServerName;
37  import org.apache.hadoop.hbase.catalog.MetaReader;
38  import org.apache.hadoop.hbase.master.RegionState.State;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.util.Pair;
41  
42  /**
43   * Region state accountant. It holds the states of all regions in the memory.
44   * In normal scenario, it should match the meta table and the true region states.
45   *
46   * This map is used by AssignmentManager to track region states.
47   */
48  @InterfaceAudience.Private
49  public class RegionStates {
50    private static final Log LOG = LogFactory.getLog(RegionStates.class);
51  
52    /**
53     * Regions currently in transition.
54     */
55    final HashMap<String, RegionState> regionsInTransition;
56  
57    /**
58     * Region encoded name to state map.
59     * All the regions should be in this map.
60     */
61    private final Map<String, RegionState> regionStates;
62  
63    /**
64     * Server to regions assignment map.
65     * Contains the set of regions currently assigned to a given server.
66     */
67    private final Map<ServerName, Set<HRegionInfo>> serverHoldings;
68  
69    /**
70     * Region to server assignment map.
71     * Contains the server a given region is currently assigned to.
72     */
73    private final TreeMap<HRegionInfo, ServerName> regionAssignments;
74  
75    private final ServerManager serverManager;
76    private final Server server;
77  
78    RegionStates(final Server master, final ServerManager serverManager) {
79      regionStates = new HashMap<String, RegionState>();
80      regionsInTransition = new HashMap<String, RegionState>();
81      serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>();
82      regionAssignments = new TreeMap<HRegionInfo, ServerName>();
83      this.serverManager = serverManager;
84      this.server = master;
85    }
86  
87    /**
88     * @return an unmodifiable the region assignment map
89     */
90    @SuppressWarnings("unchecked")
91    public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() {
92      return (Map<HRegionInfo, ServerName>)regionAssignments.clone();
93    }
94  
95    public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) {
96      return regionAssignments.get(hri);
97    }
98  
99    /**
100    * Get regions in transition and their states
101    */
102   @SuppressWarnings("unchecked")
103   public synchronized Map<String, RegionState> getRegionsInTransition() {
104     return (Map<String, RegionState>)regionsInTransition.clone();
105   }
106 
107   /**
108    * @return True if specified region in transition.
109    */
110   public synchronized boolean isRegionInTransition(final HRegionInfo hri) {
111     return regionsInTransition.containsKey(hri.getEncodedName());
112   }
113 
114   /**
115    * @return True if specified region in transition.
116    */
117   public synchronized boolean isRegionInTransition(final String regionName) {
118     return regionsInTransition.containsKey(regionName);
119   }
120 
121   /**
122    * @return True if any region in transition.
123    */
124   public synchronized boolean isRegionsInTransition() {
125     return !regionsInTransition.isEmpty();
126   }
127 
128   /**
129    * @return True if specified region assigned.
130    */
131   public synchronized boolean isRegionAssigned(final HRegionInfo hri) {
132     return regionAssignments.containsKey(hri);
133   }
134 
135   /**
136    * @return True if specified region failed to open.
137    */
138   public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) {
139     RegionState regionState = getRegionTransitionState(hri);
140     State state = regionState != null ? regionState.getState() : null;
141     return state == State.FAILED_OPEN;
142   }
143 
144   /**
145    * @return True if specified region failed to close.
146    */
147   public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) {
148     RegionState regionState = getRegionTransitionState(hri);
149     State state = regionState != null ? regionState.getState() : null;
150     return state == State.FAILED_CLOSE;
151   }
152 
153   /**
154    * Wait for the state map to be updated by assignment manager.
155    */
156   public synchronized void waitForUpdate(
157       final long timeout) throws InterruptedException {
158     this.wait(timeout);
159   }
160 
161   /**
162    * Get region transition state
163    */
164   public synchronized RegionState
165       getRegionTransitionState(final HRegionInfo hri) {
166     return regionsInTransition.get(hri.getEncodedName());
167   }
168 
169   /**
170    * Get region transition state
171    */
172   public synchronized RegionState
173       getRegionTransitionState(final String regionName) {
174     return regionsInTransition.get(regionName);
175   }
176 
177   /**
178    * Add a list of regions to RegionStates. The initial state is OFFLINE.
179    * If any region is already in RegionStates, that region will be skipped.
180    */
181   public synchronized void createRegionStates(
182       final List<HRegionInfo> hris) {
183     for (HRegionInfo hri: hris) {
184       createRegionState(hri);
185     }
186   }
187 
188   /**
189    * Add a region to RegionStates. The initial state is OFFLINE.
190    * If it is already in RegionStates, this call has no effect,
191    * and the original state is returned.
192    */
193   public synchronized RegionState createRegionState(final HRegionInfo hri) {
194     String regionName = hri.getEncodedName();
195     RegionState regionState = regionStates.get(regionName);
196     if (regionState != null) {
197       LOG.warn("Tried to create a state of a region already in RegionStates "
198         + hri + ", used existing state: " + regionState
199         + ", ignored new state: state=OFFLINE, server=null");
200     } else {
201       regionState = new RegionState(hri, State.OFFLINE);
202       regionStates.put(regionName, regionState);
203     }
204     return regionState;
205   }
206 
207   /**
208    * Update a region state. If it is not splitting,
209    * it will be put in transition if not already there.
210    */
211   public synchronized RegionState updateRegionState(
212       final HRegionInfo hri, final State state) {
213     RegionState regionState = regionStates.get(hri.getEncodedName());
214     ServerName serverName = (regionState == null || state == State.CLOSED
215       || state == State.OFFLINE) ? null : regionState.getServerName();
216     return updateRegionState(hri, state, serverName);
217   }
218 
219   /**
220    * Update a region state. If it is not splitting,
221    * it will be put in transition if not already there.
222    *
223    * If we can't find the region info based on the region name in
224    * the transition, log a warning and return null.
225    */
226   public synchronized RegionState updateRegionState(
227       final RegionTransition transition, final State state) {
228     byte[] regionName = transition.getRegionName();
229     HRegionInfo regionInfo = getRegionInfo(regionName);
230     if (regionInfo == null) {
231       String prettyRegionName = HRegionInfo.prettyPrint(
232         HRegionInfo.encodeRegionName(regionName));
233       LOG.warn("Failed to find region " + prettyRegionName
234         + " in updating its state to " + state
235         + " based on region transition " + transition);
236       return null;
237     }
238     return updateRegionState(regionInfo, state,
239       transition.getServerName());
240   }
241 
242   /**
243    * Update a region state. If it is not splitting,
244    * it will be put in transition if not already there.
245    */
246   public synchronized RegionState updateRegionState(
247       final HRegionInfo hri, final State state, final ServerName serverName) {
248     ServerName newServerName = serverName;
249     if (serverName != null &&
250         (state == State.CLOSED || state == State.OFFLINE)) {
251       LOG.warn("Closed region " + hri + " still on "
252         + serverName + "? Ignored, reset it to null");
253       newServerName = null;
254     }
255 
256     if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) {
257       LOG.warn("Failed to transition " + hri + " on " + serverName + ": " + state);
258     }
259 
260     String regionName = hri.getEncodedName();
261     RegionState regionState = new RegionState(
262       hri, state, System.currentTimeMillis(), newServerName);
263     RegionState oldState = regionStates.put(regionName, regionState);
264     LOG.info("Region " + hri + " transitioned from " + oldState + " to " + regionState);
265     if (state != State.SPLITTING && (newServerName != null
266         || (state != State.PENDING_CLOSE && state != State.CLOSING))) {
267       regionsInTransition.put(regionName, regionState);
268     }
269 
270     // notify the change
271     this.notifyAll();
272     return regionState;
273   }
274 
275   /**
276    * A region is online, won't be in transition any more.
277    * We can't confirm it is really online on specified region server
278    * because it hasn't been put in region server's online region list yet.
279    */
280   public synchronized void regionOnline(
281       final HRegionInfo hri, final ServerName serverName) {
282     String regionName = hri.getEncodedName();
283     RegionState oldState = regionStates.get(regionName);
284     if (oldState == null) {
285       LOG.warn("Online a region not in RegionStates: " + hri);
286     } else {
287       State state = oldState.getState();
288       ServerName sn = oldState.getServerName();
289       if (state != State.OPEN || sn == null || !sn.equals(serverName)) {
290         LOG.debug("Online a region with current state=" + state + ", expected state=OPEN"
291           + ", assigned to server: " + sn + " expected " + serverName);
292       }
293     }
294     updateRegionState(hri, State.OPEN, serverName);
295     regionsInTransition.remove(regionName);
296 
297     ServerName oldServerName = regionAssignments.put(hri, serverName);
298     if (!serverName.equals(oldServerName)) {
299       LOG.info("Onlined region " + hri + " on " + serverName);
300       Set<HRegionInfo> regions = serverHoldings.get(serverName);
301       if (regions == null) {
302         regions = new HashSet<HRegionInfo>();
303         serverHoldings.put(serverName, regions);
304       }
305       regions.add(hri);
306       if (oldServerName != null) {
307         LOG.info("Offlined region " + hri + " from " + oldServerName);
308         serverHoldings.get(oldServerName).remove(hri);
309       }
310     }
311   }
312 
313   /**
314    * A region is offline, won't be in transition any more.
315    */
316   public synchronized void regionOffline(final HRegionInfo hri) {
317     String regionName = hri.getEncodedName();
318     RegionState oldState = regionStates.get(regionName);
319     if (oldState == null) {
320       LOG.warn("Offline a region not in RegionStates: " + hri);
321     } else {
322       State state = oldState.getState();
323       ServerName sn = oldState.getServerName();
324       if (state != State.OFFLINE || sn != null) {
325         LOG.debug("Offline a region with current state=" + state + ", expected state=OFFLINE"
326           + ", assigned to server: " + sn + ", expected null");
327       }
328     }
329     updateRegionState(hri, State.OFFLINE);
330     regionsInTransition.remove(regionName);
331 
332     ServerName oldServerName = regionAssignments.remove(hri);
333     if (oldServerName != null) {
334       LOG.info("Offlined region " + hri + " from " + oldServerName);
335       serverHoldings.get(oldServerName).remove(hri);
336     }
337   }
338 
339   /**
340    * A server is offline, all regions on it are dead.
341    */
342   public synchronized List<HRegionInfo> serverOffline(final ServerName sn) {
343     // Clean up this server from map of servers to regions, and remove all regions
344     // of this server from online map of regions.
345     List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
346     Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
347     if (assignedRegions == null) {
348       assignedRegions = new HashSet<HRegionInfo>();
349     }
350 
351     for (HRegionInfo region : assignedRegions) {
352       regionAssignments.remove(region);
353     }
354 
355     for (RegionState state : regionsInTransition.values()) {
356       HRegionInfo hri = state.getRegion();
357       if (assignedRegions.contains(hri)) {
358         // Region is open on this region server, but in transition.
359         // This region must be moving away from this server.
360         // SSH will handle it, either skip assigning, or re-assign.
361         LOG.info("Transitioning region "
362           + state + " will be handled by SSH for " + sn);
363       } else if (sn.equals(state.getServerName())) {
364         // Region is in transition on this region server, and this
365         // region is not open on this server. So the region must be
366         // moving to this server from another one (i.e. opening or
367         // pending open on this server, was open on another one
368         if (state.isPendingOpen() || state.isOpening()) {
369           LOG.info("Found opening region "
370             + state + " to be reassigned by SSH for " + sn);
371           rits.add(hri);
372         } else {
373           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected state "
374             + state + " of region in transition on server " + sn);
375         }
376       }
377     }
378     assignedRegions.clear();
379     this.notifyAll();
380     return rits;
381   }
382 
383   /**
384    * Gets the online regions of the specified table.
385    * This method looks at the in-memory state.  It does not go to <code>.META.</code>.
386    * Only returns <em>online</em> regions.  If a region on this table has been
387    * closed during a disable, etc., it will be included in the returned list.
388    * So, the returned list may not necessarily be ALL regions in this table, its
389    * all the ONLINE regions in the table.
390    * @param tableName
391    * @return Online regions from <code>tableName</code>
392    */
393   public synchronized List<HRegionInfo> getRegionsOfTable(byte[] tableName) {
394     List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>();
395     // boundary needs to have table's name but regionID 0 so that it is sorted
396     // before all table's regions.
397     HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L);
398     for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) {
399       if(!Bytes.equals(hri.getTableName(), tableName)) break;
400       tableRegions.add(hri);
401     }
402     return tableRegions;
403   }
404 
405 
406   /**
407    * Wait on region to clear regions-in-transition.
408    * <p>
409    * If the region isn't in transition, returns immediately.  Otherwise, method
410    * blocks until the region is out of transition.
411    */
412   public synchronized void waitOnRegionToClearRegionsInTransition(
413       final HRegionInfo hri) throws InterruptedException {
414     if (!isRegionInTransition(hri)) return;
415 
416     while(!server.isStopped() && isRegionInTransition(hri)) {
417       RegionState rs = getRegionState(hri);
418       LOG.info("Waiting on " + rs + " to clear regions-in-transition");
419       waitForUpdate(100);
420     }
421 
422     if (server.isStopped()) {
423       LOG.info("Giving up wait on region in " +
424         "transition because stoppable.isStopped is set");
425     }
426   }
427 
428   /**
429    * Waits until the specified region has completed assignment.
430    * <p>
431    * If the region is already assigned, returns immediately.  Otherwise, method
432    * blocks until the region is assigned.
433    */
434   public synchronized void waitForAssignment(
435       final HRegionInfo hri) throws InterruptedException {
436     if (!isRegionAssigned(hri)) return;
437 
438     while(!server.isStopped() && !isRegionAssigned(hri)) {
439       RegionState rs = getRegionState(hri);
440       LOG.info("Waiting on " + rs + " to be assigned");
441       waitForUpdate(100);
442     }
443 
444     if (server.isStopped()) {
445       LOG.info("Giving up wait on region " +
446         "assignment because stoppable.isStopped is set");
447     }
448   }
449 
450   /**
451    * Compute the average load across all region servers.
452    * Currently, this uses a very naive computation - just uses the number of
453    * regions being served, ignoring stats about number of requests.
454    * @return the average load
455    */
456   protected synchronized double getAverageLoad() {
457     int numServers = 0, totalLoad = 0;
458     for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
459       Set<HRegionInfo> regions = e.getValue();
460       ServerName serverName = e.getKey();
461       int regionCount = regions.size();
462       if (regionCount > 0 || serverManager.isServerOnline(serverName)) {
463         totalLoad += regionCount;
464         numServers++;
465       }
466     }
467     return numServers == 0 ? 0.0 :
468       (double)totalLoad / (double)numServers;
469   }
470 
471   /**
472    * This is an EXPENSIVE clone.  Cloning though is the safest thing to do.
473    * Can't let out original since it can change and at least the load balancer
474    * wants to iterate this exported list.  We need to synchronize on regions
475    * since all access to this.servers is under a lock on this.regions.
476    *
477    * @return A clone of current assignments by table.
478    */
479   protected Map<String, Map<ServerName, List<HRegionInfo>>> getAssignmentsByTable() {
480     Map<String, Map<ServerName, List<HRegionInfo>>> result =
481       new HashMap<String, Map<ServerName,List<HRegionInfo>>>();
482     synchronized (this) {
483       if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) {
484         Map<ServerName, List<HRegionInfo>> svrToRegions =
485           new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
486         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
487           svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue()));
488         }
489         result.put("ensemble", svrToRegions);
490       } else {
491         for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) {
492           for (HRegionInfo hri: e.getValue()) {
493             if (hri.isMetaRegion()) continue;
494             String tablename = hri.getTableNameAsString();
495             Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename);
496             if (svrToRegions == null) {
497               svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size());
498               result.put(tablename, svrToRegions);
499             }
500             List<HRegionInfo> regions = svrToRegions.get(e.getKey());
501             if (regions == null) {
502               regions = new ArrayList<HRegionInfo>();
503               svrToRegions.put(e.getKey(), regions);
504             }
505             regions.add(hri);
506           }
507         }
508       }
509     }
510 
511     Map<ServerName, ServerLoad>
512       onlineSvrs = serverManager.getOnlineServers();
513     // Take care of servers w/o assignments.
514     for (Map<ServerName, List<HRegionInfo>> map: result.values()) {
515       for (ServerName svr: onlineSvrs.keySet()) {
516         if (!map.containsKey(svr)) {
517           map.put(svr, new ArrayList<HRegionInfo>());
518         }
519       }
520     }
521     return result;
522   }
523 
524   protected synchronized RegionState getRegionState(final HRegionInfo hri) {
525     return regionStates.get(hri.getEncodedName());
526   }
527 
528   protected synchronized RegionState getRegionState(final String regionName) {
529     return regionStates.get(regionName);
530   }
531 
532   /**
533    * Get the HRegionInfo from cache, if not there, from the META table
534    * @param  regionName
535    * @return HRegionInfo for the region
536    */
537   protected HRegionInfo getRegionInfo(final byte [] regionName) {
538     String encodedName = HRegionInfo.encodeRegionName(regionName);
539     RegionState regionState = regionStates.get(encodedName);
540     if (regionState != null) {
541       return regionState.getRegion();
542     }
543 
544     try {
545       Pair<HRegionInfo, ServerName> p =
546         MetaReader.getRegion(server.getCatalogTracker(), regionName);
547       HRegionInfo hri = p == null ? null : p.getFirst();
548       if (hri != null) {
549         createRegionState(hri);
550       }
551       return hri;
552     } catch (IOException e) {
553       server.abort("Aborting because error occoured while reading " +
554         Bytes.toStringBinary(regionName) + " from .META.", e);
555       return null;
556     }
557   }
558 }