View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.lang.Thread.UncaughtExceptionHandler;
24  import java.lang.management.ManagementFactory;
25  import java.lang.management.MemoryUsage;
26  import java.lang.management.RuntimeMXBean;
27  import java.lang.reflect.Constructor;
28  import java.net.BindException;
29  import java.net.InetSocketAddress;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Collections;
33  import java.util.Comparator;
34  import java.util.HashSet;
35  import java.util.Iterator;
36  import java.util.LinkedList;
37  import java.util.List;
38  import java.util.Map;
39  import java.util.Random;
40  import java.util.Set;
41  import java.util.SortedMap;
42  import java.util.SortedSet;
43  import java.util.TreeMap;
44  import java.util.TreeSet;
45  import java.util.concurrent.BlockingQueue;
46  import java.util.concurrent.ConcurrentHashMap;
47  import java.util.concurrent.LinkedBlockingQueue;
48  import java.util.concurrent.TimeUnit;
49  import java.util.concurrent.atomic.AtomicBoolean;
50  import java.util.concurrent.atomic.AtomicInteger;
51  import java.util.concurrent.locks.ReentrantReadWriteLock;
52  
53  import org.apache.commons.cli.CommandLine;
54  import org.apache.commons.cli.GnuParser;
55  import org.apache.commons.cli.Options;
56  import org.apache.commons.cli.ParseException;
57  import org.apache.commons.logging.Log;
58  import org.apache.commons.logging.LogFactory;
59  import org.apache.hadoop.conf.Configuration;
60  import org.apache.hadoop.fs.FileSystem;
61  import org.apache.hadoop.fs.Path;
62  import org.apache.hadoop.hbase.Chore;
63  import org.apache.hadoop.hbase.HBaseConfiguration;
64  import org.apache.hadoop.hbase.HConstants;
65  import org.apache.hadoop.hbase.HMsg;
66  import org.apache.hadoop.hbase.HRegionInfo;
67  import org.apache.hadoop.hbase.HRegionLocation;
68  import org.apache.hadoop.hbase.HServerAddress;
69  import org.apache.hadoop.hbase.HServerInfo;
70  import org.apache.hadoop.hbase.HServerLoad;
71  import org.apache.hadoop.hbase.HTableDescriptor;
72  import org.apache.hadoop.hbase.KeyValue;
73  import org.apache.hadoop.hbase.LeaseListener;
74  import org.apache.hadoop.hbase.Leases;
75  import org.apache.hadoop.hbase.LocalHBaseCluster;
76  import org.apache.hadoop.hbase.NotServingRegionException;
77  import org.apache.hadoop.hbase.RemoteExceptionHandler;
78  import org.apache.hadoop.hbase.UnknownRowLockException;
79  import org.apache.hadoop.hbase.UnknownScannerException;
80  import org.apache.hadoop.hbase.YouAreDeadException;
81  import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
82  import org.apache.hadoop.hbase.HMsg.Type;
83  import org.apache.hadoop.hbase.Leases.LeaseStillHeldException;
84  import org.apache.hadoop.hbase.client.Delete;
85  import org.apache.hadoop.hbase.client.Get;
86  import org.apache.hadoop.hbase.client.MultiPut;
87  import org.apache.hadoop.hbase.client.MultiPutResponse;
88  import org.apache.hadoop.hbase.client.Put;
89  import org.apache.hadoop.hbase.client.Result;
90  import org.apache.hadoop.hbase.client.Scan;
91  import org.apache.hadoop.hbase.client.ServerConnection;
92  import org.apache.hadoop.hbase.client.ServerConnectionManager;
93  import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
94  import org.apache.hadoop.hbase.ipc.HBaseRPC;
95  import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
96  import org.apache.hadoop.hbase.ipc.HBaseRPCProtocolVersion;
97  import org.apache.hadoop.hbase.ipc.HBaseServer;
98  import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
99  import org.apache.hadoop.hbase.ipc.HRegionInterface;
100 import org.apache.hadoop.hbase.regionserver.metrics.RegionServerMetrics;
101 import org.apache.hadoop.hbase.regionserver.wal.HLog;
102 import org.apache.hadoop.hbase.replication.regionserver.Replication;
103 import org.apache.hadoop.hbase.util.Bytes;
104 import org.apache.hadoop.hbase.util.FSUtils;
105 import org.apache.hadoop.hbase.util.InfoServer;
106 import org.apache.hadoop.hbase.util.Pair;
107 import org.apache.hadoop.hbase.util.Sleeper;
108 import org.apache.hadoop.hbase.util.Threads;
109 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
110 import org.apache.hadoop.io.MapWritable;
111 import org.apache.hadoop.io.Writable;
112 import org.apache.hadoop.net.DNS;
113 import org.apache.hadoop.util.Progressable;
114 import org.apache.hadoop.util.StringUtils;
115 import org.apache.zookeeper.WatchedEvent;
116 import org.apache.zookeeper.Watcher;
117 import org.apache.zookeeper.Watcher.Event.EventType;
118 import org.apache.zookeeper.Watcher.Event.KeeperState;
119 
120 /**
121  * HRegionServer makes a set of HRegions available to clients.  It checks in with
122  * the HMaster. There are many HRegionServers in a single HBase deployment.
123  */
124 public class HRegionServer implements HRegionInterface,
125     HBaseRPCErrorHandler, Runnable, Watcher, Stoppable, OnlineRegions {
126   public static final Log LOG = LogFactory.getLog(HRegionServer.class);
127   private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
128   private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
129   private static final HMsg [] EMPTY_HMSG_ARRAY = new HMsg [] {};
130 
131   // Set when a report to the master comes back with a message asking us to
132   // shutdown.  Also set by call to stop when debugging or running unit tests
133   // of HRegionServer in isolation. We use AtomicBoolean rather than
134   // plain boolean so we can pass a reference to Chore threads.  Otherwise,
135   // Chore threads need to know about the hosting class.
136   protected final AtomicBoolean stopRequested = new AtomicBoolean(false);
137 
138   protected final AtomicBoolean quiesced = new AtomicBoolean(false);
139 
140   // Go down hard.  Used if file system becomes unavailable and also in
141   // debugging and unit tests.
142   protected volatile boolean abortRequested;
143 
144   private volatile boolean killed = false;
145 
146   // If false, the file system has become unavailable
147   protected volatile boolean fsOk;
148 
149   protected HServerInfo serverInfo;
150   protected final Configuration conf;
151 
152   private final ServerConnection connection;
153   protected final AtomicBoolean haveRootRegion = new AtomicBoolean(false);
154   private FileSystem fs;
155   private Path rootDir;
156   private final Random rand = new Random();
157 
158   // Key is Bytes.hashCode of region name byte array and the value is HRegion
159   // in both of the maps below.  Use Bytes.mapKey(byte []) generating key for
160   // below maps.
161   protected final Map<Integer, HRegion> onlineRegions =
162     new ConcurrentHashMap<Integer, HRegion>();
163 
164   protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
165   private final LinkedBlockingQueue<HMsg> outboundMsgs =
166     new LinkedBlockingQueue<HMsg>();
167 
168   final int numRetries;
169   protected final int threadWakeFrequency;
170   private final int msgInterval;
171 
172   protected final int numRegionsToReport;
173 
174   private final long maxScannerResultSize;
175 
176   // Remote HMaster
177   private HMasterRegionInterface hbaseMaster;
178 
179   // Server to handle client requests.  Default access so can be accessed by
180   // unit tests.
181   HBaseServer server;
182 
183   // Leases
184   private Leases leases;
185 
186   // Request counter
187   private volatile AtomicInteger requestCount = new AtomicInteger();
188 
189   // Info server.  Default access so can be used by unit tests.  REGIONSERVER
190   // is name of the webapp and the attribute name used stuffing this instance
191   // into web context.
192   InfoServer infoServer;
193 
194   /** region server process name */
195   public static final String REGIONSERVER = "regionserver";
196 
197   /*
198    * Space is reserved in HRS constructor and then released when aborting
199    * to recover from an OOME. See HBASE-706.  TODO: Make this percentage of the
200    * heap or a minimum.
201    */
202   private final LinkedList<byte[]> reservedSpace = new LinkedList<byte []>();
203 
204   private RegionServerMetrics metrics;
205 
206   // Compactions
207   CompactSplitThread compactSplitThread;
208 
209   // Cache flushing
210   MemStoreFlusher cacheFlusher;
211 
212   /* Check for major compactions.
213    */
214   Chore majorCompactionChecker;
215 
216   // HLog and HLog roller.  log is protected rather than private to avoid
217   // eclipse warning when accessed by inner classes
218   protected volatile HLog hlog;
219   LogRoller hlogRoller;
220 
221   // flag set after we're done setting up server threads (used for testing)
222   protected volatile boolean isOnline;
223 
224   final Map<String, InternalScanner> scanners =
225     new ConcurrentHashMap<String, InternalScanner>();
226 
227   private ZooKeeperWrapper zooKeeperWrapper;
228 
229   // A sleeper that sleeps for msgInterval.
230   private final Sleeper sleeper;
231 
232   private final long rpcTimeout;
233 
234   // Address passed in to constructor.  This is not always the address we run
235   // with.  For example, if passed port is 0, then we are to pick a port.  The
236   // actual address we run with is in the #serverInfo data member.
237   private final HServerAddress address;
238 
239   // The main region server thread.
240   private Thread regionServerThread;
241 
242   private final String machineName;
243 
244   // Replication-related attributes
245   private Replication replicationHandler;
246   // End of replication
247 
248   /**
249    * Starts a HRegionServer at the default location
250    * @param conf
251    * @throws IOException
252    */
253   public HRegionServer(Configuration conf) throws IOException {
254     machineName = DNS.getDefaultHost(
255         conf.get("hbase.regionserver.dns.interface","default"),
256         conf.get("hbase.regionserver.dns.nameserver","default"));
257     String addressStr = machineName + ":" +
258       conf.get(HConstants.REGIONSERVER_PORT,
259           Integer.toString(HConstants.DEFAULT_REGIONSERVER_PORT));
260     // This is not necessarily the address we will run with.  The address we
261     // use will be in #serverInfo data member.  For example, we may have been
262     // passed a port of 0 which means we should pick some ephemeral port to bind
263     // to.
264     address = new HServerAddress(addressStr);
265     LOG.info("My address is " + address);
266 
267     this.abortRequested = false;
268     this.fsOk = true;
269     this.conf = conf;
270     this.connection = ServerConnectionManager.getConnection(conf);
271 
272     this.isOnline = false;
273 
274     // Config'ed params
275     this.numRetries =  conf.getInt("hbase.client.retries.number", 2);
276     this.threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY,
277         10 * 1000);
278     this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 1 * 1000);
279 
280     sleeper = new Sleeper(this.msgInterval, this.stopRequested);
281 
282     this.maxScannerResultSize = conf.getLong(
283             HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
284             HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
285 
286     // Task thread to process requests from Master
287     this.worker = new Worker();
288 
289     this.numRegionsToReport =
290       conf.getInt("hbase.regionserver.numregionstoreport", 10);
291 
292     this.rpcTimeout =
293       conf.getLong(HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
294           HConstants.DEFAULT_HBASE_REGIONSERVER_LEASE_PERIOD);
295 
296     reinitialize();
297   }
298 
299   /**
300    * Creates all of the state that needs to be reconstructed in case we are
301    * doing a restart. This is shared between the constructor and restart().
302    * Both call it.
303    * @throws IOException
304    */
305   private void reinitialize() throws IOException {
306     this.abortRequested = false;
307     this.stopRequested.set(false);
308 
309     // Server to handle client requests
310     this.server = HBaseRPC.getServer(this,
311         new Class<?>[]{HRegionInterface.class, HBaseRPCErrorHandler.class,
312         OnlineRegions.class}, address.getBindAddress(),
313         address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
314         false, conf);
315     this.server.setErrorHandler(this);
316     // Address is giving a default IP for the moment. Will be changed after
317     // calling the master.
318     this.serverInfo = new HServerInfo(new HServerAddress(
319       new InetSocketAddress(address.getBindAddress(),
320       this.server.getListenerAddress().getPort())), System.currentTimeMillis(),
321       this.conf.getInt("hbase.regionserver.info.port", 60030), machineName);
322     if (this.serverInfo.getServerAddress() == null) {
323       throw new NullPointerException("Server address cannot be null; " +
324         "hbase-958 debugging");
325     }
326     reinitializeThreads();
327     reinitializeZooKeeper();
328     int nbBlocks = conf.getInt("hbase.regionserver.nbreservationblocks", 4);
329     for(int i = 0; i < nbBlocks; i++)  {
330       reservedSpace.add(new byte[HConstants.DEFAULT_SIZE_RESERVATION_BLOCK]);
331     }
332   }
333 
334   private void reinitializeZooKeeper() throws IOException {
335     zooKeeperWrapper =
336         ZooKeeperWrapper.createInstance(conf, serverInfo.getServerName());
337     zooKeeperWrapper.registerListener(this);
338     watchMasterAddress();
339   }
340 
341   private void reinitializeThreads() {
342     this.workerThread = new Thread(worker);
343 
344     // Cache flushing thread.
345     this.cacheFlusher = new MemStoreFlusher(conf, this);
346 
347     // Compaction thread
348     this.compactSplitThread = new CompactSplitThread(this);
349 
350     // Log rolling thread
351     this.hlogRoller = new LogRoller(this);
352 
353     // Background thread to check for major compactions; needed if region
354     // has not gotten updates in a while.  Make it run at a lesser frequency.
355     int multiplier = this.conf.getInt(HConstants.THREAD_WAKE_FREQUENCY +
356         ".multiplier", 1000);
357     this.majorCompactionChecker = new MajorCompactionChecker(this,
358       this.threadWakeFrequency * multiplier,  this.stopRequested);
359 
360     this.leases = new Leases(
361         (int) conf.getLong(HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
362             HConstants.DEFAULT_HBASE_REGIONSERVER_LEASE_PERIOD),
363         this.threadWakeFrequency);
364   }
365 
366   /**
367    * We register ourselves as a watcher on the master address ZNode. This is
368    * called by ZooKeeper when we get an event on that ZNode. When this method
369    * is called it means either our master has died, or a new one has come up.
370    * Either way we need to update our knowledge of the master.
371    * @param event WatchedEvent from ZooKeeper.
372    */
373   public void process(WatchedEvent event) {
374     EventType type = event.getType();
375     KeeperState state = event.getState();
376     LOG.info("Got ZooKeeper event, state: " + state + ", type: " +
377       type + ", path: " + event.getPath());
378 
379     // Ignore events if we're shutting down.
380     if (this.stopRequested.get()) {
381       LOG.debug("Ignoring ZooKeeper event while shutting down");
382       return;
383     }
384 
385     if (state == KeeperState.Expired) {
386       LOG.error("ZooKeeper session expired");
387       boolean restart =
388         this.conf.getBoolean("hbase.regionserver.restart.on.zk.expire", false);
389       if (restart) {
390         restart();
391       } else {
392         abort("ZooKeeper session expired");
393       }
394     } else if (type == EventType.NodeDeleted) {
395       watchMasterAddress();
396     } else if (type == EventType.NodeCreated) {
397       getMaster();
398 
399       // ZooKeeper watches are one time only, so we need to re-register our watch.
400       watchMasterAddress();
401     }
402   }
403 
404   private void watchMasterAddress() {
405     while (!stopRequested.get() && !zooKeeperWrapper.watchMasterAddress(this)) {
406       LOG.warn("Unable to set watcher on ZooKeeper master address. Retrying.");
407       sleeper.sleep();
408     }
409   }
410 
411   private void restart() {
412     abort("Restarting region server");
413     Threads.shutdown(regionServerThread);
414     boolean done = false;
415     while (!done) {
416       try {
417         reinitialize();
418         done = true;
419       } catch (IOException e) {
420         LOG.debug("Error trying to reinitialize ZooKeeper", e);
421       }
422     }
423     Thread t = new Thread(this);
424     String name = regionServerThread.getName();
425     t.setName(name);
426     t.start();
427   }
428 
429   /** @return ZooKeeperWrapper used by RegionServer. */
430   public ZooKeeperWrapper getZooKeeperWrapper() {
431     return zooKeeperWrapper;
432   }
433 
434   /**
435    * The HRegionServer sticks in this loop until closed. It repeatedly checks
436    * in with the HMaster, sending heartbeats & reports, and receiving HRegion
437    * load/unload instructions.
438    */
439   public void run() {
440     regionServerThread = Thread.currentThread();
441     boolean quiesceRequested = false;
442     try {
443       MapWritable w = null;
444       while (!stopRequested.get()) {
445         w = reportForDuty();
446         if (w != null) {
447           init(w);
448           break;
449         }
450         sleeper.sleep();
451         LOG.warn("No response from master on reportForDuty. Sleeping and " +
452           "then trying again.");
453       }
454       List<HMsg> outboundMessages = new ArrayList<HMsg>();
455       long lastMsg = 0;
456       // Now ask master what it wants us to do and tell it what we have done
457       for (int tries = 0; !stopRequested.get() && isHealthy();) {
458         // Try to get the root region location from the master.
459         if (!haveRootRegion.get()) {
460           HServerAddress rootServer = zooKeeperWrapper.readRootRegionLocation();
461           if (rootServer != null) {
462             // By setting the root region location, we bypass the wait imposed on
463             // HTable for all regions being assigned.
464             this.connection.setRootRegionLocation(
465                 new HRegionLocation(HRegionInfo.ROOT_REGIONINFO, rootServer));
466             haveRootRegion.set(true);
467           }
468         }
469         long now = System.currentTimeMillis();
470         // Drop into the send loop if msgInterval has elapsed or if something
471         // to send.  If we fail talking to the master, then we'll sleep below
472         // on poll of the outboundMsgs blockingqueue.
473         if ((now - lastMsg) >= msgInterval || !outboundMessages.isEmpty()) {
474           try {
475             doMetrics();
476             MemoryUsage memory =
477               ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
478             HServerLoad hsl = new HServerLoad(requestCount.get(),
479               (int)(memory.getUsed()/1024/1024),
480               (int)(memory.getMax()/1024/1024));
481             for (HRegion r: onlineRegions.values()) {
482               hsl.addRegionInfo(createRegionLoad(r));
483             }
484             this.serverInfo.setLoad(hsl);
485             this.requestCount.set(0);
486             addOutboundMsgs(outboundMessages);
487             HMsg msgs[] = this.hbaseMaster.regionServerReport(
488               serverInfo, outboundMessages.toArray(EMPTY_HMSG_ARRAY),
489               getMostLoadedRegions());
490             lastMsg = System.currentTimeMillis();
491             updateOutboundMsgs(outboundMessages);
492             outboundMessages.clear();
493             if (this.quiesced.get() && onlineRegions.size() == 0) {
494               // We've just told the master we're exiting because we aren't
495               // serving any regions. So set the stop bit and exit.
496               LOG.info("Server quiesced and not serving any regions. " +
497                 "Starting shutdown");
498               stopRequested.set(true);
499               this.outboundMsgs.clear();
500               continue;
501             }
502 
503             // Queue up the HMaster's instruction stream for processing
504             boolean restart = false;
505             for(int i = 0;
506                 !restart && !stopRequested.get() && i < msgs.length;
507                 i++) {
508               LOG.info(msgs[i].toString());
509               this.connection.unsetRootRegionLocation();
510               switch(msgs[i].getType()) {
511 
512               case MSG_REGIONSERVER_STOP:
513                 stopRequested.set(true);
514                 break;
515 
516               case MSG_REGIONSERVER_QUIESCE:
517                 if (!quiesceRequested) {
518                   try {
519                     toDo.put(new ToDoEntry(msgs[i]));
520                   } catch (InterruptedException e) {
521                     throw new RuntimeException("Putting into msgQueue was " +
522                         "interrupted.", e);
523                   }
524                   quiesceRequested = true;
525                 }
526                 break;
527 
528               default:
529                 if (fsOk) {
530                   try {
531                     toDo.put(new ToDoEntry(msgs[i]));
532                   } catch (InterruptedException e) {
533                     throw new RuntimeException("Putting into msgQueue was " +
534                         "interrupted.", e);
535                   }
536                 }
537               }
538             }
539             // Reset tries count if we had a successful transaction.
540             tries = 0;
541 
542             if (restart || this.stopRequested.get()) {
543               toDo.clear();
544               continue;
545             }
546           } catch (Exception e) { // FindBugs REC_CATCH_EXCEPTION
547             // Two special exceptions could be printed out here,
548             // PleaseHoldException and YouAreDeadException
549             if (e instanceof IOException) {
550               e = RemoteExceptionHandler.checkIOException((IOException) e);
551             }
552             if (e instanceof YouAreDeadException) {
553               // This will be caught and handled as a fatal error below
554               throw e;
555             }
556             tries++;
557             if (tries > 0 && (tries % this.numRetries) == 0) {
558               // Check filesystem every so often.
559               checkFileSystem();
560             }
561             if (this.stopRequested.get()) {
562               LOG.info("Stop requested, clearing toDo despite exception");
563               toDo.clear();
564               continue;
565             }
566             LOG.warn("Attempt=" + tries, e);
567             // No point retrying immediately; this is probably connection to
568             // master issue.  Doing below will cause us to sleep.
569             lastMsg = System.currentTimeMillis();
570           }
571         }
572         now = System.currentTimeMillis();
573         HMsg msg = this.outboundMsgs.poll((msgInterval - (now - lastMsg)),
574           TimeUnit.MILLISECONDS);
575         // If we got something, add it to list of things to send.
576         if (msg != null) outboundMessages.add(msg);
577         // Do some housekeeping before going back around
578         housekeeping();
579       } // for
580     } catch (Throwable t) {
581       if (!checkOOME(t)) {
582         abort("Unhandled exception", t);
583       }
584     }
585     this.leases.closeAfterLeasesExpire();
586     this.worker.stop();
587     this.server.stop();
588     if (this.infoServer != null) {
589       LOG.info("Stopping infoServer");
590       try {
591         this.infoServer.stop();
592       } catch (Exception e) {
593         e.printStackTrace();
594       }
595     }
596     // Send cache a shutdown.
597     LruBlockCache c = (LruBlockCache)StoreFile.getBlockCache(this.conf);
598     if (c != null) c.shutdown();
599 
600     // Send interrupts to wake up threads if sleeping so they notice shutdown.
601     // TODO: Should we check they are alive?  If OOME could have exited already
602     cacheFlusher.interruptIfNecessary();
603     compactSplitThread.interruptIfNecessary();
604     hlogRoller.interruptIfNecessary();
605     this.majorCompactionChecker.interrupt();
606 
607     if (killed) {
608       // Just skip out w/o closing regions.
609     } else if (abortRequested) {
610       if (this.fsOk) {
611         // Only try to clean up if the file system is available
612         try {
613           if (this.hlog != null) {
614             this.hlog.close();
615             LOG.info("On abort, closed hlog");
616           }
617         } catch (Throwable e) {
618           LOG.error("Unable to close log in abort",
619             RemoteExceptionHandler.checkThrowable(e));
620         }
621         closeAllRegions(); // Don't leave any open file handles
622       }
623       LOG.info("aborting server at: " + this.serverInfo.getServerName());
624     } else {
625       ArrayList<HRegion> closedRegions = closeAllRegions();
626       try {
627         if (this.hlog != null) {
628           hlog.closeAndDelete();
629         }
630       } catch (Throwable e) {
631         LOG.error("Close and delete failed",
632           RemoteExceptionHandler.checkThrowable(e));
633       }
634       try {
635         HMsg[] exitMsg = new HMsg[closedRegions.size() + 1];
636         exitMsg[0] = REPORT_EXITING;
637         // Tell the master what regions we are/were serving
638         int i = 1;
639         for (HRegion region: closedRegions) {
640           exitMsg[i++] = new HMsg(HMsg.Type.MSG_REPORT_CLOSE,
641               region.getRegionInfo());
642         }
643 
644         LOG.info("telling master that region server is shutting down at: " +
645             serverInfo.getServerName());
646         hbaseMaster.regionServerReport(serverInfo, exitMsg, (HRegionInfo[])null);
647       } catch (Throwable e) {
648         LOG.warn("Failed to send exiting message to master: ",
649           RemoteExceptionHandler.checkThrowable(e));
650       }
651       LOG.info("stopping server at: " + this.serverInfo.getServerName());
652     }
653 
654     // Make sure the proxy is down.
655     if (this.hbaseMaster != null) {
656       HBaseRPC.stopProxy(this.hbaseMaster);
657       this.hbaseMaster = null;
658     }
659 
660     if (!killed) {
661       this.zooKeeperWrapper.close();
662       join();
663     }
664     LOG.info(Thread.currentThread().getName() + " exiting");
665   }
666 
667   /*
668    * Add to the passed <code>msgs</code> messages to pass to the master.
669    * @param msgs Current outboundMsgs array; we'll add messages to this List.
670    */
671   private void addOutboundMsgs(final List<HMsg> msgs) {
672     if (msgs.isEmpty()) {
673       this.outboundMsgs.drainTo(msgs);
674       return;
675     }
676     OUTER: for (HMsg m: this.outboundMsgs) {
677       for (HMsg mm: msgs) {
678         // Be careful don't add duplicates.
679         if (mm.equals(m)) {
680           continue OUTER;
681         }
682       }
683       msgs.add(m);
684     }
685   }
686 
687   /*
688    * Remove from this.outboundMsgs those messsages we sent the master.
689    * @param msgs Messages we sent the master.
690    */
691   private void updateOutboundMsgs(final List<HMsg> msgs) {
692     if (msgs.isEmpty()) return;
693     for (HMsg m: this.outboundMsgs) {
694       for (HMsg mm: msgs) {
695         if (mm.equals(m)) {
696           this.outboundMsgs.remove(m);
697           break;
698         }
699       }
700     }
701   }
702 
703   /*
704    * Run init. Sets up hlog and starts up all server threads.
705    * @param c Extra configuration.
706    */
707   protected void init(final MapWritable c) throws IOException {
708     try {
709       for (Map.Entry<Writable, Writable> e: c.entrySet()) {
710         String key = e.getKey().toString();
711         String value = e.getValue().toString();
712         if (LOG.isDebugEnabled()) {
713           LOG.debug("Config from master: " + key + "=" + value);
714         }
715         this.conf.set(key, value);
716       }
717       // Master may have sent us a new address with the other configs.
718       // Update our address in this case. See HBASE-719
719       String hra = conf.get("hbase.regionserver.address");
720       // TODO: The below used to be this.address != null.  Was broken by what
721       // looks like a mistake in:
722       //
723       // HBASE-1215 migration; metautils scan of meta region was broken; wouldn't see first row
724       // ------------------------------------------------------------------------
725       // r796326 | stack | 2009-07-21 07:40:34 -0700 (Tue, 21 Jul 2009) | 38 lines
726       if (hra != null) {
727         HServerAddress hsa = new HServerAddress (hra,
728           this.serverInfo.getServerAddress().getPort());
729         LOG.info("Master passed us address to use. Was=" +
730           this.serverInfo.getServerAddress() + ", Now=" + hra);
731         this.serverInfo.setServerAddress(hsa);
732       }
733 
734       // hack! Maps DFSClient => RegionServer for logs.  HDFS made this
735       // config param for task trackers, but we can piggyback off of it.
736       if (this.conf.get("mapred.task.id") == null) {
737         this.conf.set("mapred.task.id", 
738             "hb_rs_" + this.serverInfo.getServerName() + "_" +
739             System.currentTimeMillis());
740       }
741 
742       // Master sent us hbase.rootdir to use. Should be fully qualified
743       // path with file system specification included.  Set 'fs.defaultFS'
744       // to match the filesystem on hbase.rootdir else underlying hadoop hdfs
745       // accessors will be going against wrong filesystem (unless all is set
746       // to defaults).
747       this.conf.set("fs.defaultFS", this.conf.get("hbase.rootdir"));
748       // Get fs instance used by this RS
749       this.fs = FileSystem.get(this.conf);
750       this.rootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
751       this.hlog = setupHLog();
752       // Init in here rather than in constructor after thread name has been set
753       this.metrics = new RegionServerMetrics();
754       startServiceThreads();
755       isOnline = true;
756     } catch (Throwable e) {
757       this.isOnline = false;
758       this.stopRequested.set(true);
759       throw convertThrowableToIOE(cleanup(e, "Failed init"),
760         "Region server startup failed");
761     }
762   }
763 
764   /*
765    * @param r Region to get RegionLoad for.
766    * @return RegionLoad instance.
767    * @throws IOException
768    */
769   private HServerLoad.RegionLoad createRegionLoad(final HRegion r) {
770     byte[] name = r.getRegionName();
771     int stores = 0;
772     int storefiles = 0;
773     int storefileSizeMB = 0;
774     int memstoreSizeMB = (int)(r.memstoreSize.get()/1024/1024);
775     int storefileIndexSizeMB = 0;
776     synchronized (r.stores) {
777       stores += r.stores.size();
778       for (Store store: r.stores.values()) {
779         storefiles += store.getStorefilesCount();
780         storefileSizeMB +=
781           (int)(store.getStorefilesSize()/1024/1024);
782         storefileIndexSizeMB +=
783           (int)(store.getStorefilesIndexSize()/1024/1024);
784       }
785     }
786     return new HServerLoad.RegionLoad(name, stores, storefiles,
787       storefileSizeMB, memstoreSizeMB, storefileIndexSizeMB);
788   }
789 
790   /**
791    * @param regionName
792    * @return An instance of RegionLoad.
793    * @throws IOException
794    */
795   public HServerLoad.RegionLoad createRegionLoad(final byte [] regionName) {
796     return createRegionLoad(this.onlineRegions.get(Bytes.mapKey(regionName)));
797   }
798 
799   /*
800    * Cleanup after Throwable caught invoking method.  Converts <code>t</code>
801    * to IOE if it isn't already.
802    * @param t Throwable
803    * @return Throwable converted to an IOE; methods can only let out IOEs.
804    */
805   private Throwable cleanup(final Throwable t) {
806     return cleanup(t, null);
807   }
808 
809   /*
810    * Cleanup after Throwable caught invoking method.  Converts <code>t</code>
811    * to IOE if it isn't already.
812    * @param t Throwable
813    * @param msg Message to log in error.  Can be null.
814    * @return Throwable converted to an IOE; methods can only let out IOEs.
815    */
816   private Throwable cleanup(final Throwable t, final String msg) {
817     // Don't log as error if NSRE; NSRE is 'normal' operation.
818     if (t instanceof NotServingRegionException) {
819       LOG.debug("NotServingRegionException; " +  t.getMessage());
820       return t;
821     }
822     if (msg == null) {
823       LOG.error("", RemoteExceptionHandler.checkThrowable(t));
824     } else {
825       LOG.error(msg, RemoteExceptionHandler.checkThrowable(t));
826     }
827     if (!checkOOME(t)) {
828       checkFileSystem();
829     }
830     return t;
831   }
832 
833   /*
834    * @param t
835    * @return Make <code>t</code> an IOE if it isn't already.
836    */
837   private IOException convertThrowableToIOE(final Throwable t) {
838     return convertThrowableToIOE(t, null);
839   }
840 
841   /*
842    * @param t
843    * @param msg Message to put in new IOE if passed <code>t</code> is not an IOE
844    * @return Make <code>t</code> an IOE if it isn't already.
845    */
846   private IOException convertThrowableToIOE(final Throwable t,
847       final String msg) {
848     return (t instanceof IOException? (IOException)t:
849       msg == null || msg.length() == 0?
850         new IOException(t): new IOException(msg, t));
851   }
852   /*
853    * Check if an OOME and if so, call abort.
854    * @param e
855    * @return True if we OOME'd and are aborting.
856    */
857   public boolean checkOOME(final Throwable e) {
858     boolean stop = false;
859     if (e instanceof OutOfMemoryError ||
860       (e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||
861       (e.getMessage() != null &&
862         e.getMessage().contains("java.lang.OutOfMemoryError"))) {
863       abort("OutOfMemoryError, aborting", e);
864       stop = true;
865     }
866     return stop;
867   }
868 
869 
870   /**
871    * Checks to see if the file system is still accessible.
872    * If not, sets abortRequested and stopRequested
873    *
874    * @return false if file system is not available
875    */
876   protected boolean checkFileSystem() {
877     if (this.fsOk && this.fs != null) {
878       try {
879         FSUtils.checkFileSystemAvailable(this.fs);
880       } catch (IOException e) {
881         abort("File System not available", e);
882         this.fsOk = false;
883       }
884     }
885     return this.fsOk;
886   }
887 
888   /*
889    * Inner class that runs on a long period checking if regions need major
890    * compaction.
891    */
892   private static class MajorCompactionChecker extends Chore {
893     private final HRegionServer instance;
894 
895     MajorCompactionChecker(final HRegionServer h,
896         final int sleepTime, final AtomicBoolean stopper) {
897       super("MajorCompactionChecker", sleepTime, stopper);
898       this.instance = h;
899       LOG.info("Runs every " + sleepTime + "ms");
900     }
901 
902     @Override
903     protected void chore() {
904       Set<Integer> keys = this.instance.onlineRegions.keySet();
905       for (Integer i: keys) {
906         HRegion r = this.instance.onlineRegions.get(i);
907         try {
908           if (r != null && r.isMajorCompaction()) {
909             // Queue a compaction.  Will recognize if major is needed.
910             this.instance.compactSplitThread.
911               compactionRequested(r, getName() + " requests major compaction");
912           }
913         } catch (IOException e) {
914           LOG.warn("Failed major compaction check on " + r, e);
915         }
916       }
917     }
918   }
919 
920   /**
921    * Report the status of the server. A server is online once all the startup
922    * is completed (setting up filesystem, starting service threads, etc.). This
923    * method is designed mostly to be useful in tests.
924    * @return true if online, false if not.
925    */
926   public boolean isOnline() {
927     return isOnline;
928   }
929 
930   private HLog setupHLog() throws IOException {
931     final Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
932     Path logdir = new Path(rootDir, HLog.getHLogDirectoryName(this.serverInfo));
933     if (LOG.isDebugEnabled()) {
934       LOG.debug("Log dir " + logdir);
935     }
936     if (fs.exists(logdir)) {
937       throw new RegionServerRunningException("region server already " +
938         "running at " + this.serverInfo.getServerName() +
939         " because logdir " + logdir.toString() + " exists");
940     }
941     this.replicationHandler = new Replication(this.conf,this.serverInfo,
942         this.fs, logdir, oldLogDir, stopRequested);
943     HLog log = instantiateHLog(logdir, oldLogDir);
944     this.replicationHandler.addLogEntryVisitor(log);
945     return log;
946   }
947 
948   // instantiate
949   protected HLog instantiateHLog(Path logdir, Path oldLogDir) throws IOException {
950     return new HLog(this.fs, logdir, oldLogDir, this.conf, this.hlogRoller,
951       this.replicationHandler.getReplicationManager(),
952         this.serverInfo.getServerAddress().toString());
953   }
954 
955 
956   protected LogRoller getLogRoller() {
957     return hlogRoller;
958   }
959 
960   /*
961    * @param interval Interval since last time metrics were called.
962    */
963   protected void doMetrics() {
964     try {
965       metrics();
966     } catch (Throwable e) {
967       LOG.warn("Failed metrics", e);
968     }
969   }
970 
971   protected void metrics() {
972     this.metrics.regions.set(this.onlineRegions.size());
973     this.metrics.incrementRequests(this.requestCount.get());
974     // Is this too expensive every three seconds getting a lock on onlineRegions
975     // and then per store carried?  Can I make metrics be sloppier and avoid
976     // the synchronizations?
977     int stores = 0;
978     int storefiles = 0;
979     long memstoreSize = 0;
980     long storefileIndexSize = 0;
981     synchronized (this.onlineRegions) {
982       for (Map.Entry<Integer, HRegion> e: this.onlineRegions.entrySet()) {
983         HRegion r = e.getValue();
984         memstoreSize += r.memstoreSize.get();
985         synchronized (r.stores) {
986           stores += r.stores.size();
987           for(Map.Entry<byte [], Store> ee: r.stores.entrySet()) {
988             Store store = ee.getValue();
989             storefiles += store.getStorefilesCount();
990             storefileIndexSize += store.getStorefilesIndexSize();
991           }
992         }
993       }
994     }
995     this.metrics.stores.set(stores);
996     this.metrics.storefiles.set(storefiles);
997     this.metrics.memstoreSizeMB.set((int)(memstoreSize/(1024*1024)));
998     this.metrics.storefileIndexSizeMB.set((int)(storefileIndexSize/(1024*1024)));
999     this.metrics.compactionQueueSize.set(compactSplitThread.
1000       getCompactionQueueSize());
1001 
1002     LruBlockCache lruBlockCache = (LruBlockCache)StoreFile.getBlockCache(conf);
1003     if (lruBlockCache != null) {
1004       this.metrics.blockCacheCount.set(lruBlockCache.size());
1005       this.metrics.blockCacheFree.set(lruBlockCache.getFreeSize());
1006       this.metrics.blockCacheSize.set(lruBlockCache.getCurrentSize());
1007       double ratio = lruBlockCache.getStats().getHitRatio();
1008       int percent = (int) (ratio * 100);
1009       this.metrics.blockCacheHitRatio.set(percent);
1010     }
1011   }
1012 
1013   /**
1014    * @return Region server metrics instance.
1015    */
1016   public RegionServerMetrics getMetrics() {
1017     return this.metrics;
1018   }
1019 
1020   /*
1021    * Start maintanence Threads, Server, Worker and lease checker threads.
1022    * Install an UncaughtExceptionHandler that calls abort of RegionServer if we
1023    * get an unhandled exception.  We cannot set the handler on all threads.
1024    * Server's internal Listener thread is off limits.  For Server, if an OOME,
1025    * it waits a while then retries.  Meantime, a flush or a compaction that
1026    * tries to run should trigger same critical condition and the shutdown will
1027    * run.  On its way out, this server will shut down Server.  Leases are sort
1028    * of inbetween. It has an internal thread that while it inherits from
1029    * Chore, it keeps its own internal stop mechanism so needs to be stopped
1030    * by this hosting server.  Worker logs the exception and exits.
1031    */
1032   private void startServiceThreads() throws IOException {
1033     String n = Thread.currentThread().getName();
1034     UncaughtExceptionHandler handler = new UncaughtExceptionHandler() {
1035       public void uncaughtException(Thread t, Throwable e) {
1036         abort("Uncaught exception in service thread " + t.getName(), e);
1037       }
1038     };
1039     Threads.setDaemonThreadRunning(this.hlogRoller, n + ".logRoller",
1040         handler);
1041     Threads.setDaemonThreadRunning(this.cacheFlusher, n + ".cacheFlusher",
1042       handler);
1043     Threads.setDaemonThreadRunning(this.compactSplitThread, n + ".compactor",
1044         handler);
1045     Threads.setDaemonThreadRunning(this.workerThread, n + ".worker", handler);
1046     Threads.setDaemonThreadRunning(this.majorCompactionChecker,
1047         n + ".majorCompactionChecker", handler);
1048 
1049     // Leases is not a Thread. Internally it runs a daemon thread.  If it gets
1050     // an unhandled exception, it will just exit.
1051     this.leases.setName(n + ".leaseChecker");
1052     this.leases.start();
1053     // Put up info server.
1054     int port = this.conf.getInt("hbase.regionserver.info.port", 60030);
1055     // -1 is for disabling info server
1056     if (port >= 0) {
1057       String addr = this.conf.get("hbase.regionserver.info.bindAddress", "0.0.0.0");
1058       // check if auto port bind enabled
1059       boolean auto = this.conf.getBoolean("hbase.regionserver.info.port.auto",
1060           false);
1061       while (true) {
1062         try {
1063           this.infoServer = new InfoServer("regionserver", addr, port, false);
1064           this.infoServer.setAttribute("regionserver", this);
1065           this.infoServer.start();
1066           break;
1067         } catch (BindException e) {
1068           if (!auto){
1069             // auto bind disabled throw BindException
1070             throw e;
1071           }
1072           // auto bind enabled, try to use another port
1073           LOG.info("Failed binding http info server to port: " + port);
1074           port++;
1075           // update HRS server info port.
1076           this.serverInfo = new HServerInfo(this.serverInfo.getServerAddress(),
1077             this.serverInfo.getStartCode(), port,
1078             this.serverInfo.getHostname());
1079         }
1080       }
1081     }
1082 
1083     this.replicationHandler.startReplicationServices();
1084 
1085     // Start Server.  This service is like leases in that it internally runs
1086     // a thread.
1087     this.server.start();
1088     LOG.info("HRegionServer started at: " +
1089       this.serverInfo.getServerAddress().toString());
1090   }
1091 
1092   /*
1093    * Verify that server is healthy
1094    */
1095   private boolean isHealthy() {
1096     if (!fsOk) {
1097       // File system problem
1098       return false;
1099     }
1100     // Verify that all threads are alive
1101     if (!(leases.isAlive() && compactSplitThread.isAlive() &&
1102         cacheFlusher.isAlive() && hlogRoller.isAlive() &&
1103         workerThread.isAlive() && this.majorCompactionChecker.isAlive())) {
1104       // One or more threads are no longer alive - shut down
1105       stop();
1106       return false;
1107     }
1108     return true;
1109   }
1110 
1111   /*
1112    * Run some housekeeping tasks.
1113    */
1114   private void housekeeping() {
1115     // If the todo list has > 0 messages, iterate looking for open region
1116     // messages. Send the master a message that we're working on its
1117     // processing so it doesn't assign the region elsewhere.
1118     if (this.toDo.isEmpty()) {
1119       return;
1120     }
1121     // This iterator isn't safe if elements are gone and HRS.Worker could
1122     // remove them (it already checks for null there). Goes from oldest.
1123     for (ToDoEntry e: this.toDo) {
1124       if(e == null) {
1125         LOG.warn("toDo gave a null entry during iteration");
1126         break;
1127       }
1128       HMsg msg = e.msg;
1129       if (msg != null) {
1130         if (msg.isType(HMsg.Type.MSG_REGION_OPEN)) {
1131           addProcessingMessage(msg.getRegionInfo());
1132         }
1133       } else {
1134         LOG.warn("Message is empty: " + e);
1135       }
1136     }
1137   }
1138 
1139   /** @return the HLog */
1140   public HLog getLog() {
1141     return this.hlog;
1142   }
1143 
1144   /**
1145    * Sets a flag that will cause all the HRegionServer threads to shut down
1146    * in an orderly fashion.  Used by unit tests.
1147    */
1148   public void stop() {
1149     this.stopRequested.set(true);
1150     synchronized(this) {
1151       // Wakes run() if it is sleeping
1152       notifyAll(); // FindBugs NN_NAKED_NOTIFY
1153     }
1154   }
1155 
1156   /**
1157    * Cause the server to exit without closing the regions it is serving, the
1158    * log it is using and without notifying the master.
1159    * Used unit testing and on catastrophic events such as HDFS is yanked out
1160    * from under hbase or we OOME.
1161    * @param reason the reason we are aborting
1162    * @param cause the exception that caused the abort, or null
1163    */
1164   public void abort(String reason, Throwable cause) {
1165     if (cause != null) {
1166       LOG.fatal("Aborting region server " + this + ": " + reason, cause);
1167     } else {
1168       LOG.fatal("Aborting region server " + this + ": " + reason);
1169     }
1170     this.abortRequested = true;
1171     this.reservedSpace.clear();
1172     if (this.metrics != null) {
1173       LOG.info("Dump of metrics: " + this.metrics);
1174     }
1175     stop();
1176   }
1177   
1178   /**
1179    * @see HRegionServer#abort(String, Throwable)
1180    */
1181   public void abort(String reason) {
1182     abort(reason, null);
1183   }
1184 
1185   /*
1186    * Simulate a kill -9 of this server.
1187    * Exits w/o closing regions or cleaninup logs but it does close socket in
1188    * case want to bring up server on old hostname+port immediately.
1189    */
1190   protected void kill() {
1191     this.killed = true;
1192     abort("Simulated kill");
1193   }
1194 
1195   /**
1196    * Wait on all threads to finish.
1197    * Presumption is that all closes and stops have already been called.
1198    */
1199   protected void join() {
1200     Threads.shutdown(this.majorCompactionChecker);
1201     Threads.shutdown(this.workerThread);
1202     Threads.shutdown(this.cacheFlusher);
1203     Threads.shutdown(this.compactSplitThread);
1204     Threads.shutdown(this.hlogRoller);
1205     this.replicationHandler.join();
1206   }
1207 
1208   private boolean getMaster() {
1209     HServerAddress masterAddress = null;
1210     while (masterAddress == null) {
1211       if (stopRequested.get()) {
1212         return false;
1213       }
1214       try {
1215         masterAddress = zooKeeperWrapper.readMasterAddressOrThrow();
1216       } catch (IOException e) {
1217         LOG.warn("Unable to read master address from ZooKeeper. Retrying." +
1218                  " Error was:", e);
1219         sleeper.sleep();
1220       }
1221     }
1222 
1223     LOG.info("Telling master at " + masterAddress + " that we are up");
1224     HMasterRegionInterface master = null;
1225     while (!stopRequested.get() && master == null) {
1226       try {
1227         // Do initial RPC setup.  The final argument indicates that the RPC
1228         // should retry indefinitely.
1229         master = (HMasterRegionInterface)HBaseRPC.waitForProxy(
1230           HMasterRegionInterface.class, HBaseRPCProtocolVersion.versionID,
1231           masterAddress.getInetSocketAddress(), this.conf, -1, this.rpcTimeout);
1232       } catch (IOException e) {
1233         LOG.warn("Unable to connect to master. Retrying. Error was:", e);
1234         sleeper.sleep();
1235       }
1236     }
1237     this.hbaseMaster = master;
1238     return true;
1239   }
1240 
1241   /*
1242    * Let the master know we're here
1243    * Run initialization using parameters passed us by the master.
1244    */
1245   private MapWritable reportForDuty() {
1246     while (!stopRequested.get() && !getMaster()) {
1247       sleeper.sleep();
1248       LOG.warn("Unable to get master for initialization");
1249     }
1250 
1251     MapWritable result = null;
1252     long lastMsg = 0;
1253     while(!stopRequested.get()) {
1254       try {
1255         this.requestCount.set(0);
1256         MemoryUsage memory =
1257           ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
1258         HServerLoad hsl = new HServerLoad(0, (int)memory.getUsed()/1024/1024,
1259           (int)memory.getMax()/1024/1024);
1260         this.serverInfo.setLoad(hsl);
1261         if (LOG.isDebugEnabled())
1262           LOG.debug("sending initial server load: " + hsl);
1263         lastMsg = System.currentTimeMillis();
1264         boolean startCodeOk = false;
1265         while(!startCodeOk) {
1266           this.serverInfo = createServerInfoWithNewStartCode(this.serverInfo);
1267           startCodeOk = zooKeeperWrapper.writeRSLocation(this.serverInfo);
1268           if(!startCodeOk) {
1269            LOG.debug("Start code already taken, trying another one");
1270           }
1271         }
1272         result = this.hbaseMaster.regionServerStartup(this.serverInfo);
1273         break;
1274       } catch (IOException e) {
1275         LOG.warn("error telling master we are up", e);
1276       }
1277       sleeper.sleep(lastMsg);
1278     }
1279     return result;
1280   }
1281 
1282   private HServerInfo createServerInfoWithNewStartCode(final HServerInfo hsi) {
1283     return new HServerInfo(hsi.getServerAddress(), hsi.getInfoPort(),
1284       hsi.getHostname());
1285   }
1286 
1287   /* Add to the outbound message buffer */
1288   private void reportOpen(HRegionInfo region) {
1289     this.outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_OPEN, region));
1290   }
1291 
1292   /* Add to the outbound message buffer */
1293   private void reportClose(HRegionInfo region) {
1294     reportClose(region, null);
1295   }
1296 
1297   /* Add to the outbound message buffer */
1298   private void reportClose(final HRegionInfo region, final byte[] message) {
1299     this.outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_CLOSE, region, message));
1300   }
1301 
1302   /**
1303    * Add to the outbound message buffer
1304    *
1305    * When a region splits, we need to tell the master that there are two new
1306    * regions that need to be assigned.
1307    *
1308    * We do not need to inform the master about the old region, because we've
1309    * updated the meta or root regions, and the master will pick that up on its
1310    * next rescan of the root or meta tables.
1311    */
1312   void reportSplit(HRegionInfo oldRegion, HRegionInfo newRegionA,
1313       HRegionInfo newRegionB) {
1314     this.outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_SPLIT_INCLUDES_DAUGHTERS,
1315       oldRegion, newRegionA, newRegionB,
1316       Bytes.toBytes("Daughters; " +
1317           newRegionA.getRegionNameAsString() + ", " +
1318           newRegionB.getRegionNameAsString())));
1319   }
1320 
1321   //////////////////////////////////////////////////////////////////////////////
1322   // HMaster-given operations
1323   //////////////////////////////////////////////////////////////////////////////
1324 
1325   /*
1326    * Data structure to hold a HMsg and retries count.
1327    */
1328   private static final class ToDoEntry {
1329     protected final AtomicInteger tries = new AtomicInteger(0);
1330     protected final HMsg msg;
1331 
1332     ToDoEntry(final HMsg msg) {
1333       this.msg = msg;
1334     }
1335   }
1336 
1337   final BlockingQueue<ToDoEntry> toDo = new LinkedBlockingQueue<ToDoEntry>();
1338   private Worker worker;
1339   private Thread workerThread;
1340 
1341   /** Thread that performs long running requests from the master */
1342   class Worker implements Runnable {
1343     void stop() {
1344       synchronized(toDo) {
1345         toDo.notifyAll();
1346       }
1347     }
1348 
1349     public void run() {
1350       try {
1351         while(!stopRequested.get()) {
1352           ToDoEntry e = null;
1353           try {
1354             e = toDo.poll(threadWakeFrequency, TimeUnit.MILLISECONDS);
1355             if(e == null || stopRequested.get()) {
1356               continue;
1357             }
1358             LOG.info("Worker: " + e.msg);
1359             HRegion region = null;
1360             HRegionInfo info = e.msg.getRegionInfo();
1361             switch(e.msg.getType()) {
1362 
1363             case MSG_REGIONSERVER_QUIESCE:
1364               closeUserRegions();
1365               break;
1366 
1367             case MSG_REGION_OPEN:
1368               // Open a region
1369               if (!haveRootRegion.get() && !info.isRootRegion()) {
1370                 // root region is not online yet. requeue this task
1371                 LOG.info("putting region open request back into queue because" +
1372                     " root region is not yet available");
1373                 try {
1374                   toDo.put(e);
1375                 } catch (InterruptedException ex) {
1376                   LOG.warn("insertion into toDo queue was interrupted", ex);
1377                   break;
1378                 }
1379               }
1380               openRegion(info);
1381               break;
1382 
1383             case MSG_REGION_CLOSE:
1384               // Close a region
1385               closeRegion(e.msg.getRegionInfo(), true);
1386               break;
1387 
1388             case MSG_REGION_CLOSE_WITHOUT_REPORT:
1389               // Close a region, don't reply
1390               closeRegion(e.msg.getRegionInfo(), false);
1391               break;
1392 
1393             case MSG_REGION_SPLIT:
1394               region = getRegion(info.getRegionName());
1395               region.flushcache();
1396               region.shouldSplit(true);
1397               // force a compaction; split will be side-effect.
1398               compactSplitThread.compactionRequested(region,
1399                 e.msg.getType().name());
1400               break;
1401 
1402             case MSG_REGION_MAJOR_COMPACT:
1403             case MSG_REGION_COMPACT:
1404               // Compact a region
1405               region = getRegion(info.getRegionName());
1406               compactSplitThread.compactionRequested(region,
1407                 e.msg.isType(Type.MSG_REGION_MAJOR_COMPACT),
1408                 e.msg.getType().name());
1409               break;
1410 
1411             case MSG_REGION_FLUSH:
1412               region = getRegion(info.getRegionName());
1413               region.flushcache();
1414               break;
1415 
1416             case TESTING_MSG_BLOCK_RS:
1417               while (!stopRequested.get()) {
1418                 Threads.sleep(1000);
1419                 LOG.info("Regionserver blocked by " +
1420                   HMsg.Type.TESTING_MSG_BLOCK_RS + "; " + stopRequested.get());
1421               }
1422               break;
1423 
1424             default:
1425               throw new AssertionError(
1426                   "Impossible state during msg processing.  Instruction: "
1427                   + e.msg.toString());
1428             }
1429           } catch (InterruptedException ex) {
1430             LOG.warn("Processing Worker queue", ex);
1431           } catch (Exception ex) {
1432             if (ex instanceof IOException) {
1433               ex = RemoteExceptionHandler.checkIOException((IOException) ex);
1434             }
1435             if(e != null && e.tries.get() < numRetries) {
1436               LOG.warn(ex);
1437               e.tries.incrementAndGet();
1438               try {
1439                 toDo.put(e);
1440               } catch (InterruptedException ie) {
1441                 throw new RuntimeException("Putting into msgQueue was " +
1442                     "interrupted.", ex);
1443               }
1444             } else {
1445               LOG.error("unable to process message" +
1446                   (e != null ? (": " + e.msg.toString()) : ""), ex);
1447               if (!checkFileSystem()) {
1448                 break;
1449               }
1450             }
1451           }
1452         }
1453       } catch(Throwable t) {
1454         if (!checkOOME(t)) {
1455           LOG.fatal("Unhandled exception", t);
1456         }
1457       } finally {
1458         LOG.info("worker thread exiting");
1459       }
1460     }
1461   }
1462 
1463   void openRegion(final HRegionInfo regionInfo) {
1464     Integer mapKey = Bytes.mapKey(regionInfo.getRegionName());
1465     HRegion region = this.onlineRegions.get(mapKey);
1466     if (region == null) {
1467       try {
1468         region = instantiateRegion(regionInfo, this.hlog);
1469         // Startup a compaction early if one is needed, if region has references
1470         // or if a store has too many store files
1471         if (region.hasReferences() || region.hasTooManyStoreFiles()) {
1472           this.compactSplitThread.compactionRequested(region,
1473             region.hasReferences() ? "Region has references on open" :
1474                                      "Region has too many store files");
1475         }
1476       } catch (Throwable e) {
1477         Throwable t = cleanup(e,
1478           "Error opening " + regionInfo.getRegionNameAsString());
1479         // TODO: add an extra field in HRegionInfo to indicate that there is
1480         // an error. We can't do that now because that would be an incompatible
1481         // change that would require a migration
1482         reportClose(regionInfo, StringUtils.stringifyException(t).getBytes());
1483         return;
1484       }
1485       addToOnlineRegions(region);
1486     }
1487     reportOpen(regionInfo);
1488   }
1489 
1490   /*
1491    * @param regionInfo RegionInfo for the Region we're to instantiate and
1492    * initialize.
1493    * @param wal Set into here the regions' seqid.
1494    * @return
1495    * @throws IOException
1496    */
1497   protected HRegion instantiateRegion(final HRegionInfo regionInfo, final HLog wal)
1498   throws IOException {
1499     Path dir =
1500       HTableDescriptor.getTableDir(rootDir, regionInfo.getTableDesc().getName());
1501     HRegion r = HRegion.newHRegion(dir, this.hlog, this.fs, conf, regionInfo,
1502       this.cacheFlusher);
1503     long seqid = r.initialize(new Progressable() {
1504       public void progress() {
1505         addProcessingMessage(regionInfo);
1506       }
1507     });
1508     // If seqid  > current wal seqid, the wal seqid is updated.
1509     if (wal != null) wal.setSequenceNumber(seqid);
1510     return r;
1511   }
1512 
1513   /**
1514    * Add a MSG_REPORT_PROCESS_OPEN to the outbound queue.
1515    * This method is called while region is in the queue of regions to process
1516    * and then while the region is being opened, it is called from the Worker
1517    * thread that is running the region open.
1518    * @param hri Region to add the message for
1519    */
1520   public void addProcessingMessage(final HRegionInfo hri) {
1521     getOutboundMsgs().add(new HMsg(HMsg.Type.MSG_REPORT_PROCESS_OPEN, hri));
1522   }
1523 
1524   protected void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted)
1525   throws IOException {
1526     HRegion region = this.removeFromOnlineRegions(hri);
1527     if (region != null) {
1528       region.close();
1529       if(reportWhenCompleted) {
1530         reportClose(hri);
1531       }
1532     }
1533   }
1534 
1535   /** Called either when the master tells us to restart or from stop() */
1536   ArrayList<HRegion> closeAllRegions() {
1537     ArrayList<HRegion> regionsToClose = new ArrayList<HRegion>();
1538     this.lock.writeLock().lock();
1539     try {
1540       regionsToClose.addAll(onlineRegions.values());
1541       onlineRegions.clear();
1542     } finally {
1543       this.lock.writeLock().unlock();
1544     }
1545     // Close any outstanding scanners.  Means they'll get an UnknownScanner
1546     // exception next time they come in.
1547     for (Map.Entry<String, InternalScanner> e: this.scanners.entrySet()) {
1548       try {
1549         e.getValue().close();
1550       } catch (IOException ioe) {
1551         LOG.warn("Closing scanner " + e.getKey(), ioe);
1552       }
1553     }
1554     for (HRegion region: regionsToClose) {
1555       if (LOG.isDebugEnabled()) {
1556         LOG.debug("closing region " + Bytes.toString(region.getRegionName()));
1557       }
1558       try {
1559         region.close(abortRequested);
1560       } catch (Throwable e) {
1561         cleanup(e, "Error closing " + Bytes.toString(region.getRegionName()));
1562       }
1563     }
1564     return regionsToClose;
1565   }
1566 
1567   /*
1568    * Thread to run close of a region.
1569    */
1570   private static class RegionCloserThread extends Thread {
1571     private final HRegion r;
1572 
1573     protected RegionCloserThread(final HRegion r) {
1574       super(Thread.currentThread().getName() + ".regionCloser." + r.toString());
1575       this.r = r;
1576     }
1577 
1578     @Override
1579     public void run() {
1580       try {
1581         if (LOG.isDebugEnabled()) {
1582           LOG.debug("Closing region " + r.toString());
1583         }
1584         r.close();
1585       } catch (Throwable e) {
1586         LOG.error("Error closing region " + r.toString(),
1587           RemoteExceptionHandler.checkThrowable(e));
1588       }
1589     }
1590   }
1591 
1592   /** Called as the first stage of cluster shutdown. */
1593   void closeUserRegions() {
1594     ArrayList<HRegion> regionsToClose = new ArrayList<HRegion>();
1595     this.lock.writeLock().lock();
1596     try {
1597       synchronized (onlineRegions) {
1598         for (Iterator<Map.Entry<Integer, HRegion>> i =
1599             onlineRegions.entrySet().iterator(); i.hasNext();) {
1600           Map.Entry<Integer, HRegion> e = i.next();
1601           HRegion r = e.getValue();
1602           if (!r.getRegionInfo().isMetaRegion()) {
1603             regionsToClose.add(r);
1604             i.remove();
1605           }
1606         }
1607       }
1608     } finally {
1609       this.lock.writeLock().unlock();
1610     }
1611     // Run region closes in parallel.
1612     Set<Thread> threads = new HashSet<Thread>();
1613     try {
1614       for (final HRegion r : regionsToClose) {
1615         RegionCloserThread t = new RegionCloserThread(r);
1616         t.start();
1617         threads.add(t);
1618       }
1619     } finally {
1620       for (Thread t : threads) {
1621         while (t.isAlive()) {
1622           try {
1623             t.join();
1624           } catch (InterruptedException e) {
1625             e.printStackTrace();
1626           }
1627         }
1628       }
1629     }
1630     this.quiesced.set(true);
1631     if (onlineRegions.size() == 0) {
1632       outboundMsgs.add(REPORT_EXITING);
1633     } else {
1634       outboundMsgs.add(REPORT_QUIESCED);
1635     }
1636   }
1637 
1638   //
1639   // HRegionInterface
1640   //
1641 
1642   public HRegionInfo getRegionInfo(final byte [] regionName)
1643   throws NotServingRegionException {
1644     requestCount.incrementAndGet();
1645     return getRegion(regionName).getRegionInfo();
1646   }
1647 
1648 
1649   public Result getClosestRowBefore(final byte [] regionName,
1650     final byte [] row, final byte [] family)
1651   throws IOException {
1652     checkOpen();
1653     requestCount.incrementAndGet();
1654     try {
1655       // locate the region we're operating on
1656       HRegion region = getRegion(regionName);
1657       // ask the region for all the data
1658 
1659       Result r = region.getClosestRowBefore(row, family);
1660       return r;
1661     } catch (Throwable t) {
1662       throw convertThrowableToIOE(cleanup(t));
1663     }
1664   }
1665 
1666   /** {@inheritDoc} */
1667   public Result get(byte [] regionName, Get get) throws IOException {
1668     checkOpen();
1669     requestCount.incrementAndGet();
1670     try {
1671       HRegion region = getRegion(regionName);
1672       return region.get(get, getLockFromId(get.getLockId()));
1673     } catch(Throwable t) {
1674       throw convertThrowableToIOE(cleanup(t));
1675     }
1676   }
1677 
1678   public boolean exists(byte [] regionName, Get get) throws IOException {
1679     checkOpen();
1680     requestCount.incrementAndGet();
1681     try {
1682       HRegion region = getRegion(regionName);
1683       Result r = region.get(get, getLockFromId(get.getLockId()));
1684       return r != null && !r.isEmpty();
1685     } catch(Throwable t) {
1686       throw convertThrowableToIOE(cleanup(t));
1687     }
1688   }
1689 
1690   public void put(final byte [] regionName, final Put put)
1691   throws IOException {
1692     if (put.getRow() == null)
1693       throw new IllegalArgumentException("update has null row");
1694 
1695     checkOpen();
1696     this.requestCount.incrementAndGet();
1697     HRegion region = getRegion(regionName);
1698     try {
1699       if (!region.getRegionInfo().isMetaTable()) {
1700         this.cacheFlusher.reclaimMemStoreMemory();
1701       }
1702       boolean writeToWAL = put.getWriteToWAL();
1703       region.put(put, getLockFromId(put.getLockId()), writeToWAL);
1704     } catch (Throwable t) {
1705       throw convertThrowableToIOE(cleanup(t));
1706     }
1707   }
1708 
1709   public int put(final byte[] regionName, final List<Put> puts)
1710   throws IOException {
1711     checkOpen();
1712     HRegion region = null;
1713     try {
1714       region = getRegion(regionName);
1715       if (!region.getRegionInfo().isMetaTable()) {
1716         this.cacheFlusher.reclaimMemStoreMemory();
1717       }
1718       
1719       @SuppressWarnings("unchecked")
1720       Pair<Put, Integer>[] putsWithLocks = new Pair[puts.size()];
1721       
1722       int i = 0;
1723       for (Put p : puts) {
1724         Integer lock = getLockFromId(p.getLockId());
1725         putsWithLocks[i++] = new Pair<Put, Integer>(p, lock);
1726       }
1727       
1728       this.requestCount.addAndGet(puts.size());
1729       OperationStatusCode[] codes = region.put(putsWithLocks);
1730       for (i = 0; i < codes.length; i++) {
1731         if (codes[i] != OperationStatusCode.SUCCESS)
1732           return i;
1733       }
1734       return -1;
1735     } catch (Throwable t) {
1736       throw convertThrowableToIOE(cleanup(t));
1737     }
1738   }
1739 
1740   private boolean checkAndMutate(final byte[] regionName, final byte [] row,
1741       final byte [] family, final byte [] qualifier, final byte [] value,
1742       final Writable w, Integer lock) throws IOException {
1743     checkOpen();
1744     this.requestCount.incrementAndGet();
1745     HRegion region = getRegion(regionName);
1746     try {
1747       if (!region.getRegionInfo().isMetaTable()) {
1748         this.cacheFlusher.reclaimMemStoreMemory();
1749       }
1750       return region.checkAndMutate(row, family, qualifier, value, w, lock,
1751           true);
1752     } catch (Throwable t) {
1753       throw convertThrowableToIOE(cleanup(t));
1754     }
1755   }
1756 
1757 
1758   /**
1759    *
1760    * @param regionName
1761    * @param row
1762    * @param family
1763    * @param qualifier
1764    * @param value the expected value
1765    * @param put
1766    * @throws IOException
1767    * @return true if the new put was execute, false otherwise
1768    */
1769   public boolean checkAndPut(final byte[] regionName, final byte [] row,
1770       final byte [] family, final byte [] qualifier, final byte [] value,
1771       final Put put) throws IOException{
1772     return checkAndMutate(regionName, row, family, qualifier, value, put,
1773         getLockFromId(put.getLockId()));
1774   }
1775 
1776   /**
1777    *
1778    * @param regionName
1779    * @param row
1780    * @param family
1781    * @param qualifier
1782    * @param value the expected value
1783    * @param delete
1784    * @throws IOException
1785    * @return true if the new put was execute, false otherwise
1786    */
1787   public boolean checkAndDelete(final byte[] regionName, final byte [] row,
1788       final byte [] family, final byte [] qualifier, final byte [] value,
1789       final Delete delete) throws IOException{
1790     return checkAndMutate(regionName, row, family, qualifier, value, delete,
1791         getLockFromId(delete.getLockId()));
1792   }
1793 
1794   //
1795   // remote scanner interface
1796   //
1797 
1798   public long openScanner(byte [] regionName, Scan scan)
1799   throws IOException {
1800     checkOpen();
1801     NullPointerException npe = null;
1802     if (regionName == null) {
1803       npe = new NullPointerException("regionName is null");
1804     } else if (scan == null) {
1805       npe = new NullPointerException("scan is null");
1806     }
1807     if (npe != null) {
1808       throw new IOException("Invalid arguments to openScanner", npe);
1809     }
1810     requestCount.incrementAndGet();
1811     try {
1812       HRegion r = getRegion(regionName);
1813       return addScanner(r.getScanner(scan));
1814     } catch (Throwable t) {
1815       throw convertThrowableToIOE(cleanup(t, "Failed openScanner"));
1816     }
1817   }
1818 
1819   protected long addScanner(InternalScanner s) throws LeaseStillHeldException {
1820     long scannerId = -1L;
1821     scannerId = rand.nextLong();
1822     String scannerName = String.valueOf(scannerId);
1823     scanners.put(scannerName, s);
1824     this.leases.
1825       createLease(scannerName, new ScannerListener(scannerName));
1826     return scannerId;
1827   }
1828 
1829   public Result next(final long scannerId) throws IOException {
1830     Result [] res = next(scannerId, 1);
1831     if(res == null || res.length == 0) {
1832       return null;
1833     }
1834     return res[0];
1835   }
1836 
1837   public Result [] next(final long scannerId, int nbRows) throws IOException {
1838     try {
1839       String scannerName = String.valueOf(scannerId);
1840       InternalScanner s = this.scanners.get(scannerName);
1841       if (s == null) {
1842         throw new UnknownScannerException("Name: " + scannerName);
1843       }
1844       try {
1845         checkOpen();
1846       } catch (IOException e) {
1847         // If checkOpen failed, server not running or filesystem gone,
1848         // cancel this lease; filesystem is gone or we're closing or something.
1849         this.leases.cancelLease(scannerName);
1850         throw e;
1851       }
1852       this.leases.renewLease(scannerName);
1853       List<Result> results = new ArrayList<Result>(nbRows);
1854       long currentScanResultSize = 0;
1855       List<KeyValue> values = new ArrayList<KeyValue>();
1856       for (int i = 0; i < nbRows && currentScanResultSize < maxScannerResultSize; i++) {
1857         requestCount.incrementAndGet();
1858         // Collect values to be returned here
1859         boolean moreRows = s.next(values);
1860         if (!values.isEmpty()) {
1861           for (KeyValue kv : values) {
1862             currentScanResultSize += kv.heapSize();
1863           }
1864           results.add(new Result(values));
1865         }
1866         if (!moreRows) {
1867           break;
1868         }
1869         values.clear();
1870       }
1871       // Below is an ugly hack where we cast the InternalScanner to be a
1872       // HRegion.RegionScanner.  The alternative is to change InternalScanner
1873       // interface but its used everywhere whereas we just need a bit of info
1874       // from HRegion.RegionScanner, IF its filter if any is done with the scan
1875       // and wants to tell the client to stop the scan.  This is done by passing
1876       // a null result.
1877       return ((HRegion.RegionScanner)s).isFilterDone() && results.isEmpty()?
1878         null: results.toArray(new Result[0]);
1879     } catch (Throwable t) {
1880       if (t instanceof NotServingRegionException) {
1881         String scannerName = String.valueOf(scannerId);
1882         this.scanners.remove(scannerName);
1883       }
1884       throw convertThrowableToIOE(cleanup(t));
1885     }
1886   }
1887 
1888   public void close(final long scannerId) throws IOException {
1889     try {
1890       checkOpen();
1891       requestCount.incrementAndGet();
1892       String scannerName = String.valueOf(scannerId);
1893       InternalScanner s = scanners.remove(scannerName);
1894       if (s != null) {
1895         s.close();
1896         this.leases.cancelLease(scannerName);
1897       }
1898     } catch (Throwable t) {
1899       throw convertThrowableToIOE(cleanup(t));
1900     }
1901   }
1902 
1903   /**
1904    * Instantiated as a scanner lease.
1905    * If the lease times out, the scanner is closed
1906    */
1907   private class ScannerListener implements LeaseListener {
1908     private final String scannerName;
1909 
1910     ScannerListener(final String n) {
1911       this.scannerName = n;
1912     }
1913 
1914     public void leaseExpired() {
1915       LOG.info("Scanner " + this.scannerName + " lease expired");
1916       InternalScanner s = scanners.remove(this.scannerName);
1917       if (s != null) {
1918         try {
1919           s.close();
1920         } catch (IOException e) {
1921           LOG.error("Closing scanner", e);
1922         }
1923       }
1924     }
1925   }
1926 
1927   //
1928   // Methods that do the actual work for the remote API
1929   //
1930   public void delete(final byte [] regionName, final Delete delete)
1931   throws IOException {
1932     checkOpen();
1933     try {
1934       boolean writeToWAL = true;
1935       this.requestCount.incrementAndGet();
1936       HRegion region = getRegion(regionName);
1937       if (!region.getRegionInfo().isMetaTable()) {
1938         this.cacheFlusher.reclaimMemStoreMemory();
1939       }
1940       Integer lid = getLockFromId(delete.getLockId());
1941       region.delete(delete, lid, writeToWAL);
1942     } catch (Throwable t) {
1943       throw convertThrowableToIOE(cleanup(t));
1944     }
1945   }
1946 
1947   public int delete(final byte[] regionName, final List<Delete> deletes)
1948   throws IOException {
1949     // Count of Deletes processed.
1950     int i = 0;
1951     checkOpen();
1952     HRegion region = null;
1953     try {
1954       boolean writeToWAL = true;
1955       region = getRegion(regionName);
1956       if (!region.getRegionInfo().isMetaTable()) {
1957         this.cacheFlusher.reclaimMemStoreMemory();
1958       }
1959       int size = deletes.size();
1960       Integer[] locks = new Integer[size];
1961       for (Delete delete: deletes) {
1962         this.requestCount.incrementAndGet();
1963         locks[i] = getLockFromId(delete.getLockId());
1964         region.delete(delete, locks[i], writeToWAL);
1965         i++;
1966       }
1967     } catch (WrongRegionException ex) {
1968       LOG.debug("Batch deletes: " + i, ex);
1969       return i;
1970     } catch (NotServingRegionException ex) {
1971       return i;
1972     } catch (Throwable t) {
1973       throw convertThrowableToIOE(cleanup(t));
1974     }
1975     return -1;
1976   }
1977 
1978   public long lockRow(byte [] regionName, byte [] row)
1979   throws IOException {
1980     checkOpen();
1981     NullPointerException npe = null;
1982     if(regionName == null) {
1983       npe = new NullPointerException("regionName is null");
1984     } else if(row == null) {
1985       npe = new NullPointerException("row to lock is null");
1986     }
1987     if(npe != null) {
1988       IOException io = new IOException("Invalid arguments to lockRow");
1989       io.initCause(npe);
1990       throw io;
1991     }
1992     requestCount.incrementAndGet();
1993     try {
1994       HRegion region = getRegion(regionName);
1995       Integer r = region.obtainRowLock(row);
1996       long lockId = addRowLock(r,region);
1997       LOG.debug("Row lock " + lockId + " explicitly acquired by client");
1998       return lockId;
1999     } catch (Throwable t) {
2000       throw convertThrowableToIOE(cleanup(t,
2001         "Error obtaining row lock (fsOk: " + this.fsOk + ")"));
2002     }
2003   }
2004 
2005   protected long addRowLock(Integer r, HRegion region) throws LeaseStillHeldException {
2006     long lockId = -1L;
2007     lockId = rand.nextLong();
2008     String lockName = String.valueOf(lockId);
2009     rowlocks.put(lockName, r);
2010     this.leases.
2011       createLease(lockName, new RowLockListener(lockName, region));
2012     return lockId;
2013   }
2014 
2015   /**
2016    * Method to get the Integer lock identifier used internally
2017    * from the long lock identifier used by the client.
2018    * @param lockId long row lock identifier from client
2019    * @return intId Integer row lock used internally in HRegion
2020    * @throws IOException Thrown if this is not a valid client lock id.
2021    */
2022   Integer getLockFromId(long lockId)
2023   throws IOException {
2024     if (lockId == -1L) {
2025       return null;
2026     }
2027     String lockName = String.valueOf(lockId);
2028     Integer rl = rowlocks.get(lockName);
2029     if (rl == null) {
2030       throw new IOException("Invalid row lock");
2031     }
2032     this.leases.renewLease(lockName);
2033     return rl;
2034   }
2035 
2036   public void unlockRow(byte [] regionName, long lockId)
2037   throws IOException {
2038     checkOpen();
2039     NullPointerException npe = null;
2040     if(regionName == null) {
2041       npe = new NullPointerException("regionName is null");
2042     } else if(lockId == -1L) {
2043       npe = new NullPointerException("lockId is null");
2044     }
2045     if(npe != null) {
2046       IOException io = new IOException("Invalid arguments to unlockRow");
2047       io.initCause(npe);
2048       throw io;
2049     }
2050     requestCount.incrementAndGet();
2051     try {
2052       HRegion region = getRegion(regionName);
2053       String lockName = String.valueOf(lockId);
2054       Integer r = rowlocks.remove(lockName);
2055       if(r == null) {
2056         throw new UnknownRowLockException(lockName);
2057       }
2058       region.releaseRowLock(r);
2059       this.leases.cancelLease(lockName);
2060       LOG.debug("Row lock " + lockId + " has been explicitly released by client");
2061     } catch (Throwable t) {
2062       throw convertThrowableToIOE(cleanup(t));
2063     }
2064   }
2065 
2066   @Override
2067   public void bulkLoadHFile(
2068       String hfilePath, byte[] regionName, byte[] familyName)
2069   throws IOException {
2070     HRegion region = getRegion(regionName);
2071     region.bulkLoadHFile(hfilePath, familyName);
2072   }
2073 
2074   Map<String, Integer> rowlocks =
2075     new ConcurrentHashMap<String, Integer>();
2076 
2077   /**
2078    * Instantiated as a row lock lease.
2079    * If the lease times out, the row lock is released
2080    */
2081   private class RowLockListener implements LeaseListener {
2082     private final String lockName;
2083     private final HRegion region;
2084 
2085     RowLockListener(final String lockName, final HRegion region) {
2086       this.lockName = lockName;
2087       this.region = region;
2088     }
2089 
2090     public void leaseExpired() {
2091       LOG.info("Row Lock " + this.lockName + " lease expired");
2092       Integer r = rowlocks.remove(this.lockName);
2093       if(r != null) {
2094         region.releaseRowLock(r);
2095       }
2096     }
2097   }
2098 
2099   /** @return the info server */
2100   public InfoServer getInfoServer() {
2101     return infoServer;
2102   }
2103 
2104   /**
2105    * @return true if a stop has been requested.
2106    */
2107   public boolean isStopRequested() {
2108     return this.stopRequested.get();
2109   }
2110 
2111   /**
2112    *
2113    * @return the configuration
2114    */
2115   public Configuration getConfiguration() {
2116     return conf;
2117   }
2118 
2119   /** @return the write lock for the server */
2120   ReentrantReadWriteLock.WriteLock getWriteLock() {
2121     return lock.writeLock();
2122   }
2123 
2124   /**
2125    * @return Immutable list of this servers regions.
2126    */
2127   public Collection<HRegion> getOnlineRegions() {
2128     return Collections.unmodifiableCollection(onlineRegions.values());
2129   }
2130 
2131   public HRegion [] getOnlineRegionsAsArray() {
2132     return getOnlineRegions().toArray(new HRegion[0]);
2133   }
2134 
2135   /**
2136    * @return The HRegionInfos from online regions sorted
2137    */
2138   public SortedSet<HRegionInfo> getSortedOnlineRegionInfos() {
2139     SortedSet<HRegionInfo> result = new TreeSet<HRegionInfo>();
2140     synchronized(this.onlineRegions) {
2141       for (HRegion r: this.onlineRegions.values()) {
2142         result.add(r.getRegionInfo());
2143       }
2144     }
2145     return result;
2146   }
2147 
2148   public void addToOnlineRegions(final HRegion r) {
2149     this.lock.writeLock().lock();
2150     try {
2151       this.onlineRegions.put(Bytes.mapKey(r.getRegionInfo().getRegionName()), r);
2152     } finally {
2153       this.lock.writeLock().unlock();
2154     }
2155   }
2156 
2157   public HRegion removeFromOnlineRegions(HRegionInfo hri) {
2158     this.lock.writeLock().lock();
2159     HRegion toReturn = null;
2160     try {
2161       toReturn = onlineRegions.remove(Bytes.mapKey(hri.getRegionName()));
2162     } finally {
2163       this.lock.writeLock().unlock();
2164     }
2165     return toReturn;
2166   }
2167 
2168   /**
2169    * @return A new Map of online regions sorted by region size with the first
2170    * entry being the biggest.
2171    */
2172   public SortedMap<Long, HRegion> getCopyOfOnlineRegionsSortedBySize() {
2173     // we'll sort the regions in reverse
2174     SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
2175         new Comparator<Long>() {
2176           public int compare(Long a, Long b) {
2177             return -1 * a.compareTo(b);
2178           }
2179         });
2180     // Copy over all regions. Regions are sorted by size with biggest first.
2181     synchronized (this.onlineRegions) {
2182       for (HRegion region : this.onlineRegions.values()) {
2183         sortedRegions.put(Long.valueOf(region.memstoreSize.get()), region);
2184       }
2185     }
2186     return sortedRegions;
2187   }
2188 
2189   /**
2190    * @param regionName
2191    * @return HRegion for the passed <code>regionName</code> or null if named
2192    * region is not member of the online regions.
2193    */
2194   public HRegion getOnlineRegion(final byte [] regionName) {
2195     return onlineRegions.get(Bytes.mapKey(regionName));
2196   }
2197 
2198   /** @return the request count */
2199   public AtomicInteger getRequestCount() {
2200     return this.requestCount;
2201   }
2202 
2203   /** @return reference to FlushRequester */
2204   public FlushRequester getFlushRequester() {
2205     return this.cacheFlusher;
2206   }
2207 
2208   /**
2209    * Protected utility method for safely obtaining an HRegion handle.
2210    * @param regionName Name of online {@link HRegion} to return
2211    * @return {@link HRegion} for <code>regionName</code>
2212    * @throws NotServingRegionException
2213    */
2214   protected HRegion getRegion(final byte [] regionName)
2215   throws NotServingRegionException {
2216     HRegion region = null;
2217     this.lock.readLock().lock();
2218     try {
2219       region = onlineRegions.get(Integer.valueOf(Bytes.hashCode(regionName)));
2220       if (region == null) {
2221         throw new NotServingRegionException(regionName);
2222       }
2223       return region;
2224     } finally {
2225       this.lock.readLock().unlock();
2226     }
2227   }
2228 
2229   /**
2230    * Get the top N most loaded regions this server is serving so we can
2231    * tell the master which regions it can reallocate if we're overloaded.
2232    * TODO: actually calculate which regions are most loaded. (Right now, we're
2233    * just grabbing the first N regions being served regardless of load.)
2234    */
2235   protected HRegionInfo[] getMostLoadedRegions() {
2236     ArrayList<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2237     synchronized (onlineRegions) {
2238       for (HRegion r : onlineRegions.values()) {
2239         if (r.isClosed() || r.isClosing()) {
2240           continue;
2241         }
2242         if (regions.size() < numRegionsToReport) {
2243           regions.add(r.getRegionInfo());
2244         } else {
2245           break;
2246         }
2247       }
2248     }
2249     return regions.toArray(new HRegionInfo[regions.size()]);
2250   }
2251 
2252   /**
2253    * Called to verify that this server is up and running.
2254    *
2255    * @throws IOException
2256    */
2257   protected void checkOpen() throws IOException {
2258     if (this.stopRequested.get() || this.abortRequested) {
2259       throw new IOException("Server not running" +
2260         (this.abortRequested? ", aborting": ""));
2261     }
2262     if (!fsOk) {
2263       throw new IOException("File system not available");
2264     }
2265   }
2266 
2267   /**
2268    * @return Returns list of non-closed regions hosted on this server.  If no
2269    * regions to check, returns an empty list.
2270    */
2271   protected Set<HRegion> getRegionsToCheck() {
2272     HashSet<HRegion> regionsToCheck = new HashSet<HRegion>();
2273     //TODO: is this locking necessary?
2274     lock.readLock().lock();
2275     try {
2276       regionsToCheck.addAll(this.onlineRegions.values());
2277     } finally {
2278       lock.readLock().unlock();
2279     }
2280     // Purge closed regions.
2281     for (final Iterator<HRegion> i = regionsToCheck.iterator(); i.hasNext();) {
2282       HRegion r = i.next();
2283       if (r.isClosed()) {
2284         i.remove();
2285       }
2286     }
2287     return regionsToCheck;
2288   }
2289 
2290   public long getProtocolVersion(final String protocol,
2291       final long clientVersion)
2292   throws IOException {
2293     if (protocol.equals(HRegionInterface.class.getName())) {
2294       return HBaseRPCProtocolVersion.versionID;
2295     }
2296     throw new IOException("Unknown protocol to name node: " + protocol);
2297   }
2298 
2299   /**
2300    * @return Queue to which you can add outbound messages.
2301    */
2302   protected LinkedBlockingQueue<HMsg> getOutboundMsgs() {
2303     return this.outboundMsgs;
2304   }
2305 
2306   /**
2307    * Return the total size of all memstores in every region.
2308    * @return memstore size in bytes
2309    */
2310   public long getGlobalMemStoreSize() {
2311     long total = 0;
2312     synchronized (onlineRegions) {
2313       for (HRegion region : onlineRegions.values()) {
2314         total += region.memstoreSize.get();
2315       }
2316     }
2317     return total;
2318   }
2319 
2320   /**
2321    * @return Return the leases.
2322    */
2323   protected Leases getLeases() {
2324     return leases;
2325   }
2326 
2327   /**
2328    * @return Return the rootDir.
2329    */
2330   protected Path getRootDir() {
2331     return rootDir;
2332   }
2333 
2334   /**
2335    * @return Return the fs.
2336    */
2337   protected FileSystem getFileSystem() {
2338     return fs;
2339   }
2340 
2341   /**
2342    * @return Info on port this server has bound to, etc.
2343    */
2344   public HServerInfo getServerInfo() { return this.serverInfo; }
2345 
2346   /** {@inheritDoc} */
2347   public long incrementColumnValue(byte [] regionName, byte [] row,
2348       byte [] family, byte [] qualifier, long amount, boolean writeToWAL)
2349   throws IOException {
2350     checkOpen();
2351 
2352     if (regionName == null) {
2353       throw new IOException("Invalid arguments to incrementColumnValue " +
2354       "regionName is null");
2355     }
2356     requestCount.incrementAndGet();
2357     try {
2358       HRegion region = getRegion(regionName);
2359       long retval = region.incrementColumnValue(row, family, qualifier, amount,
2360           writeToWAL);
2361 
2362       return retval;
2363     } catch (IOException e) {
2364       checkFileSystem();
2365       throw e;
2366     }
2367   }
2368 
2369   /** {@inheritDoc} */
2370   public HRegionInfo[] getRegionsAssignment() throws IOException {
2371     HRegionInfo[] regions = new HRegionInfo[onlineRegions.size()];
2372     Iterator<HRegion> ite = onlineRegions.values().iterator();
2373     for(int i = 0; ite.hasNext(); i++) {
2374       regions[i] = ite.next().getRegionInfo();
2375     }
2376     return regions;
2377   }
2378 
2379   /** {@inheritDoc} */
2380   public HServerInfo getHServerInfo() throws IOException {
2381     return serverInfo;
2382   }
2383 
2384   @Override
2385   public MultiPutResponse multiPut(MultiPut puts) throws IOException {
2386     MultiPutResponse resp = new MultiPutResponse();
2387 
2388     // do each region as it's own.
2389     for( Map.Entry<byte[], List<Put>> e: puts.puts.entrySet()) {
2390       int result = put(e.getKey(), e.getValue());
2391       resp.addResult(e.getKey(), result);
2392 
2393       e.getValue().clear(); // clear some RAM
2394     }
2395 
2396     return resp;
2397   }
2398 
2399   public String toString() {
2400     return this.serverInfo.toString();
2401   }
2402 
2403   /**
2404    * Interval at which threads should run
2405    * @return the interval
2406    */
2407   public int getThreadWakeFrequency() {
2408     return threadWakeFrequency;
2409   }
2410 
2411   //
2412   // Main program and support routines
2413   //
2414 
2415   /**
2416    * @param hrs
2417    * @return Thread the RegionServer is running in correctly named.
2418    * @throws IOException
2419    */
2420   public static Thread startRegionServer(final HRegionServer hrs)
2421   throws IOException {
2422     return startRegionServer(hrs,
2423       "regionserver" + hrs.getServerInfo().getServerAddress().getPort());
2424   }
2425 
2426   /**
2427    * @param hrs
2428    * @param name
2429    * @return Thread the RegionServer is running in correctly named.
2430    * @throws IOException
2431    */
2432   public static Thread startRegionServer(final HRegionServer hrs,
2433       final String name)
2434   throws IOException {
2435     Thread t = new Thread(hrs);
2436     t.setName(name);
2437     t.start();
2438     // Install shutdown hook that will catch signals and run an orderly shutdown
2439     // of the hrs.
2440     ShutdownHook.install(hrs.getConfiguration(),
2441       FileSystem.get(hrs.getConfiguration()), hrs, t);
2442     return t;
2443   }
2444 
2445   private static void printUsageAndExit() {
2446     printUsageAndExit(null);
2447   }
2448 
2449   private static void printUsageAndExit(final String message) {
2450     if (message != null) {
2451       System.err.println(message);
2452     }
2453     System.err.println("Usage: java org.apache.hbase.HRegionServer start|stop [-D <conf.param=value>]");
2454     System.exit(0);
2455   }
2456 
2457   /**
2458    * Utility for constructing an instance of the passed HRegionServer class.
2459    * @param regionServerClass
2460    * @param conf2
2461    * @return HRegionServer instance.
2462    */
2463   public static HRegionServer constructRegionServer(Class<? extends HRegionServer> regionServerClass,
2464       final Configuration conf2)  {
2465     try {
2466       Constructor<? extends HRegionServer> c =
2467         regionServerClass.getConstructor(Configuration.class);
2468       return c.newInstance(conf2);
2469     } catch (Exception e) {
2470       throw new RuntimeException("Failed construction of " +
2471         "Master: " + regionServerClass.toString(), e);
2472     }
2473   }
2474 
2475   @Override
2476   public void replicateLogEntries(HLog.Entry[] entries) throws IOException {
2477     this.replicationHandler.replicateLogEntries(entries);
2478   }
2479 
2480   /**
2481    * Do class main.
2482    * @param args
2483    * @param regionServerClass HRegionServer to instantiate.
2484    */
2485   protected static void doMain(final String [] args,
2486       final Class<? extends HRegionServer> regionServerClass) {
2487     Configuration conf = HBaseConfiguration.create();
2488 
2489     Options opt = new Options();
2490     opt.addOption("D", true, "Override HBase Configuration Settings");
2491     try {
2492       CommandLine cmd = new GnuParser().parse(opt, args);
2493 
2494       if (cmd.hasOption("D")) {
2495         for (String confOpt : cmd.getOptionValues("D")) {
2496           String[] kv = confOpt.split("=", 2);
2497           if (kv.length == 2) {
2498             conf.set(kv[0], kv[1]);
2499             LOG.debug("-D configuration override: " + kv[0] + "=" + kv[1]);
2500           } else {
2501             throw new ParseException("-D option format invalid: " + confOpt);
2502           }
2503         }
2504       }
2505 
2506       if (cmd.getArgList().contains("start")) {
2507         try {
2508           // If 'local', don't start a region server here.  Defer to
2509           // LocalHBaseCluster.  It manages 'local' clusters.
2510           if (LocalHBaseCluster.isLocal(conf)) {
2511             LOG.warn("Not starting a distinct region server because " +
2512               HConstants.CLUSTER_DISTRIBUTED + " is false");
2513           } else {
2514             RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
2515             if (runtime != null) {
2516               LOG.info("vmInputArguments=" + runtime.getInputArguments());
2517             }
2518             HRegionServer hrs = constructRegionServer(regionServerClass, conf);
2519             startRegionServer(hrs);
2520           }
2521         } catch (Throwable t) {
2522           LOG.error( "Can not start region server because "+
2523               StringUtils.stringifyException(t) );
2524           System.exit(-1);
2525         }
2526       } else if (cmd.getArgList().contains("stop")) {
2527         throw new ParseException("To shutdown the regionserver run " +
2528             "bin/hbase-daemon.sh stop regionserver or send a kill signal to" +
2529             "the regionserver pid");
2530       } else {
2531         throw new ParseException("Unknown argument(s): " +
2532             org.apache.commons.lang.StringUtils.join(cmd.getArgs(), " "));
2533       }
2534     } catch (ParseException e) {
2535       LOG.error("Could not parse", e);
2536       printUsageAndExit();
2537     }
2538   }
2539 
2540   /**
2541    * @param args
2542    */
2543   public static void main(String [] args) {
2544     Configuration conf = HBaseConfiguration.create();
2545     @SuppressWarnings("unchecked")
2546     Class<? extends HRegionServer> regionServerClass =
2547       (Class<? extends HRegionServer>) conf.getClass(HConstants.REGION_SERVER_IMPL,
2548         HRegionServer.class);
2549     doMain(args, regionServerClass);
2550   }
2551 
2552   public int getNumberOfOnlineRegions() {
2553     return onlineRegions.size();
2554   }
2555 }