View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.lang.Thread.UncaughtExceptionHandler;
24  import java.lang.annotation.Retention;
25  import java.lang.annotation.RetentionPolicy;
26  import java.lang.management.ManagementFactory;
27  import java.lang.management.MemoryUsage;
28  import java.lang.reflect.Constructor;
29  import java.net.BindException;
30  import java.net.InetSocketAddress;
31  import java.util.ArrayList;
32  import java.util.Collection;
33  import java.util.Collections;
34  import java.util.Comparator;
35  import java.util.HashMap;
36  import java.util.HashSet;
37  import java.util.Iterator;
38  import java.util.List;
39  import java.util.Map;
40  import java.util.Map.Entry;
41  import java.util.NavigableMap;
42  import java.util.Random;
43  import java.util.Set;
44  import java.util.SortedMap;
45  import java.util.TreeMap;
46  import java.util.TreeSet;
47  import java.util.concurrent.atomic.AtomicLong;
48  import java.util.concurrent.ConcurrentHashMap;
49  import java.util.concurrent.ConcurrentMap;
50  import java.util.concurrent.ConcurrentSkipListMap;
51  import java.util.concurrent.locks.ReentrantReadWriteLock;
52  
53  import javax.management.ObjectName;
54  
55  import com.google.common.annotations.VisibleForTesting;
56  import com.google.common.collect.Maps;
57  
58  import org.apache.hadoop.hbase.util.ByteStringer;
59  import org.apache.commons.logging.Log;
60  import org.apache.commons.logging.LogFactory;
61  import org.apache.hadoop.hbase.classification.InterfaceAudience;
62  import org.apache.hadoop.conf.Configuration;
63  import org.apache.hadoop.fs.FileSystem;
64  import org.apache.hadoop.fs.Path;
65  import org.apache.hadoop.hbase.Cell;
66  import org.apache.hadoop.hbase.CellScannable;
67  import org.apache.hadoop.hbase.CellScanner;
68  import org.apache.hadoop.hbase.CellUtil;
69  import org.apache.hadoop.hbase.Chore;
70  import org.apache.hadoop.hbase.ClockOutOfSyncException;
71  import org.apache.hadoop.hbase.DoNotRetryIOException;
72  import org.apache.hadoop.hbase.HBaseConfiguration;
73  import org.apache.hadoop.hbase.HBaseIOException;
74  import org.apache.hadoop.hbase.HConstants;
75  import org.apache.hadoop.hbase.HRegionInfo;
76  import org.apache.hadoop.hbase.HTableDescriptor;
77  import org.apache.hadoop.hbase.HealthCheckChore;
78  import org.apache.hadoop.hbase.KeyValue;
79  import org.apache.hadoop.hbase.KeyValueUtil;
80  import org.apache.hadoop.hbase.NotServingRegionException;
81  import org.apache.hadoop.hbase.RemoteExceptionHandler;
82  import org.apache.hadoop.hbase.ServerName;
83  import org.apache.hadoop.hbase.Stoppable;
84  import org.apache.hadoop.hbase.TableDescriptors;
85  import org.apache.hadoop.hbase.TableName;
86  import org.apache.hadoop.hbase.UnknownScannerException;
87  import org.apache.hadoop.hbase.YouAreDeadException;
88  import org.apache.hadoop.hbase.ZNodeClearer;
89  import org.apache.hadoop.hbase.catalog.CatalogTracker;
90  import org.apache.hadoop.hbase.catalog.MetaEditor;
91  import org.apache.hadoop.hbase.catalog.MetaReader;
92  import org.apache.hadoop.hbase.client.Append;
93  import org.apache.hadoop.hbase.client.Delete;
94  import org.apache.hadoop.hbase.client.Get;
95  import org.apache.hadoop.hbase.client.HConnectionManager;
96  import org.apache.hadoop.hbase.client.Increment;
97  import org.apache.hadoop.hbase.client.Mutation;
98  import org.apache.hadoop.hbase.client.Put;
99  import org.apache.hadoop.hbase.client.Result;
100 import org.apache.hadoop.hbase.client.RowMutations;
101 import org.apache.hadoop.hbase.client.Scan;
102 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
103 import org.apache.hadoop.hbase.DroppedSnapshotException;
104 import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
105 import org.apache.hadoop.hbase.exceptions.OperationConflictException;
106 import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
107 import org.apache.hadoop.hbase.exceptions.RegionMovedException;
108 import org.apache.hadoop.hbase.exceptions.RegionOpeningException;
109 import org.apache.hadoop.hbase.exceptions.UnknownProtocolException;
110 import org.apache.hadoop.hbase.executor.ExecutorService;
111 import org.apache.hadoop.hbase.executor.ExecutorType;
112 import org.apache.hadoop.hbase.filter.ByteArrayComparable;
113 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
114 import org.apache.hadoop.hbase.fs.HFileSystem;
115 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
116 import org.apache.hadoop.hbase.io.hfile.HFile;
117 import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
118 import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
119 import org.apache.hadoop.hbase.ipc.PriorityFunction;
120 import org.apache.hadoop.hbase.ipc.RpcCallContext;
121 import org.apache.hadoop.hbase.ipc.RpcClient;
122 import org.apache.hadoop.hbase.ipc.RpcServer;
123 import org.apache.hadoop.hbase.ipc.RpcServer.BlockingServiceAndInterface;
124 import org.apache.hadoop.hbase.ipc.RpcServerInterface;
125 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
126 import org.apache.hadoop.hbase.ipc.ServerRpcController;
127 import org.apache.hadoop.hbase.master.RegionState.State;
128 import org.apache.hadoop.hbase.master.SplitLogManager;
129 import org.apache.hadoop.hbase.master.TableLockManager;
130 import org.apache.hadoop.hbase.procedure.RegionServerProcedureManagerHost;
131 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
132 import org.apache.hadoop.hbase.protobuf.RequestConverter;
133 import org.apache.hadoop.hbase.protobuf.ResponseConverter;
134 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
135 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
136 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionResponse;
137 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionRequest;
138 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionResponse;
139 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.FlushRegionRequest;
140 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.FlushRegionResponse;
141 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetOnlineRegionRequest;
142 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetOnlineRegionResponse;
143 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoRequest;
144 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse;
145 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetServerInfoRequest;
146 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetServerInfoResponse;
147 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileRequest;
148 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileResponse;
149 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.MergeRegionsRequest;
150 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.MergeRegionsResponse;
151 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest;
152 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest.RegionOpenInfo;
153 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse;
154 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse.RegionOpeningState;
155 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
156 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryResponse;
157 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.RollWALWriterRequest;
158 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.RollWALWriterResponse;
159 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.SplitRegionRequest;
160 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.SplitRegionResponse;
161 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.StopServerRequest;
162 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.StopServerResponse;
163 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest;
164 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateFavoredNodesResponse;
165 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WALEntry;
166 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
167 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
168 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest.FamilyPath;
169 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileResponse;
170 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Condition;
171 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
172 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceRequest;
173 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceResponse;
174 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
175 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
176 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MultiRequest;
177 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MultiResponse;
178 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateRequest;
179 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateResponse;
180 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto;
181 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
182 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.RegionAction;
183 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.RegionActionResult;
184 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ResultOrException;
185 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
186 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
187 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos;
188 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad;
189 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.Coprocessor;
190 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
191 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
192 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier;
193 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
194 import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader;
195 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdRequest;
196 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
197 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
198 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse;
199 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStatusService;
200 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
201 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
202 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
203 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
204 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
205 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
206 import org.apache.hadoop.hbase.regionserver.HRegion.Operation;
207 import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException;
208 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
209 import org.apache.hadoop.hbase.regionserver.handler.CloseMetaHandler;
210 import org.apache.hadoop.hbase.regionserver.handler.CloseRegionHandler;
211 import org.apache.hadoop.hbase.regionserver.handler.OpenMetaHandler;
212 import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
213 import org.apache.hadoop.hbase.regionserver.wal.HLog;
214 import org.apache.hadoop.hbase.regionserver.wal.HLogFactory;
215 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
216 import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
217 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
218 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
219 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
220 import org.apache.hadoop.hbase.security.UserProvider;
221 import org.apache.hadoop.hbase.trace.SpanReceiverHost;
222 import org.apache.hadoop.hbase.util.Bytes;
223 import org.apache.hadoop.hbase.util.CompressionTest;
224 import org.apache.hadoop.hbase.util.ConfigUtil;
225 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
226 import org.apache.hadoop.hbase.util.FSTableDescriptors;
227 import org.apache.hadoop.hbase.util.FSUtils;
228 import org.apache.hadoop.hbase.util.InfoServer;
229 import org.apache.hadoop.hbase.util.JvmPauseMonitor;
230 import org.apache.hadoop.hbase.util.Pair;
231 import org.apache.hadoop.hbase.util.Sleeper;
232 import org.apache.hadoop.hbase.util.Strings;
233 import org.apache.hadoop.hbase.util.Threads;
234 import org.apache.hadoop.hbase.util.VersionInfo;
235 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
236 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
237 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
238 import org.apache.hadoop.hbase.zookeeper.RecoveringRegionWatcher;
239 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
240 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
241 import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
242 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
243 import org.apache.hadoop.ipc.RemoteException;
244 import org.apache.hadoop.metrics.util.MBeanUtil;
245 import org.apache.hadoop.net.DNS;
246 import org.apache.hadoop.util.ReflectionUtils;
247 import org.apache.hadoop.util.StringUtils;
248 import org.apache.zookeeper.KeeperException;
249 import org.apache.zookeeper.KeeperException.NoNodeException;
250 import org.apache.zookeeper.data.Stat;
251 import org.cliffc.high_scale_lib.Counter;
252 
253 import com.google.protobuf.BlockingRpcChannel;
254 import com.google.protobuf.ByteString;
255 import com.google.protobuf.Descriptors;
256 import com.google.protobuf.Message;
257 import com.google.protobuf.RpcCallback;
258 import com.google.protobuf.RpcController;
259 import com.google.protobuf.Service;
260 import com.google.protobuf.ServiceException;
261 import com.google.protobuf.TextFormat;
262 
263 /**
264  * HRegionServer makes a set of HRegions available to clients. It checks in with
265  * the HMaster. There are many HRegionServers in a single HBase deployment.
266  */
267 @InterfaceAudience.Private
268 @SuppressWarnings("deprecation")
269 public class HRegionServer implements ClientProtos.ClientService.BlockingInterface,
270   AdminProtos.AdminService.BlockingInterface, Runnable, RegionServerServices,
271   HBaseRPCErrorHandler, LastSequenceId {
272 
273   public static final Log LOG = LogFactory.getLog(HRegionServer.class);
274 
275   private final Random rand;
276 
277   private final AtomicLong scannerIdGen = new AtomicLong(0L);
278 
279   /*
280    * Strings to be used in forming the exception message for
281    * RegionsAlreadyInTransitionException.
282    */
283   protected static final String OPEN = "OPEN";
284   protected static final String CLOSE = "CLOSE";
285 
286   //RegionName vs current action in progress
287   //true - if open region action in progress
288   //false - if close region action in progress
289   protected final ConcurrentMap<byte[], Boolean> regionsInTransitionInRS =
290     new ConcurrentSkipListMap<byte[], Boolean>(Bytes.BYTES_COMPARATOR);
291 
292   /** RPC scheduler to use for the region server. */
293   public static final String REGION_SERVER_RPC_SCHEDULER_FACTORY_CLASS =
294       "hbase.region.server.rpc.scheduler.factory.class";
295 
296   protected long maxScannerResultSize;
297 
298   // Cache flushing
299   protected MemStoreFlusher cacheFlusher;
300 
301   // catalog tracker
302   protected CatalogTracker catalogTracker;
303 
304   // Watch if a region is out of recovering state from ZooKeeper
305   @SuppressWarnings("unused")
306   private RecoveringRegionWatcher recoveringRegionWatcher;
307 
308   /**
309    * Go here to get table descriptors.
310    */
311   protected TableDescriptors tableDescriptors;
312 
313   // Replication services. If no replication, this handler will be null.
314   protected ReplicationSourceService replicationSourceHandler;
315   protected ReplicationSinkService replicationSinkHandler;
316 
317   // Compactions
318   public CompactSplitThread compactSplitThread;
319 
320   final ConcurrentHashMap<String, RegionScannerHolder> scanners =
321       new ConcurrentHashMap<String, RegionScannerHolder>();
322 
323   /**
324    * Map of regions currently being served by this region server. Key is the
325    * encoded region name.  All access should be synchronized.
326    */
327   protected final Map<String, HRegion> onlineRegions =
328     new ConcurrentHashMap<String, HRegion>();
329 
330   /**
331    * Map of encoded region names to the DataNode locations they should be hosted on
332    * We store the value as InetSocketAddress since this is used only in HDFS
333    * API (create() that takes favored nodes as hints for placing file blocks).
334    * We could have used ServerName here as the value class, but we'd need to
335    * convert it to InetSocketAddress at some point before the HDFS API call, and
336    * it seems a bit weird to store ServerName since ServerName refers to RegionServers
337    * and here we really mean DataNode locations.
338    */
339   protected final Map<String, InetSocketAddress[]> regionFavoredNodesMap =
340       new ConcurrentHashMap<String, InetSocketAddress[]>();
341 
342   /**
343    * Set of regions currently being in recovering state which means it can accept writes(edits from
344    * previous failed region server) but not reads. A recovering region is also an online region.
345    */
346   protected final Map<String, HRegion> recoveringRegions = Collections
347       .synchronizedMap(new HashMap<String, HRegion>());
348 
349   // Leases
350   protected Leases leases;
351 
352   // Instance of the hbase executor service.
353   protected ExecutorService service;
354 
355   // Request counter. (Includes requests that are not serviced by regions.)
356   final Counter requestCount = new Counter();
357 
358   // If false, the file system has become unavailable
359   protected volatile boolean fsOk;
360   protected HFileSystem fs;
361 
362   // Set when a report to the master comes back with a message asking us to
363   // shutdown. Also set by call to stop when debugging or running unit tests
364   // of HRegionServer in isolation.
365   protected volatile boolean stopped = false;
366 
367   // Go down hard. Used if file system becomes unavailable and also in
368   // debugging and unit tests.
369   protected volatile boolean abortRequested;
370 
371   // region server static info like info port
372   private RegionServerInfo.Builder rsInfo;
373 
374   ConcurrentMap<String, Integer> rowlocks = new ConcurrentHashMap<String, Integer>();
375 
376   // A state before we go into stopped state.  At this stage we're closing user
377   // space regions.
378   private boolean stopping = false;
379 
380   private volatile boolean killed = false;
381 
382   protected final Configuration conf;
383 
384   private Path rootDir;
385 
386   protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
387 
388   final int numRetries;
389   protected final int threadWakeFrequency;
390   private final int msgInterval;
391 
392   protected final int numRegionsToReport;
393 
394   // Stub to do region server status calls against the master.
395   private volatile RegionServerStatusService.BlockingInterface rssStub;
396   // RPC client. Used to make the stub above that does region server status checking.
397   RpcClient rpcClient;
398 
399   // Server to handle client requests. Default access so can be accessed by
400   // unit tests.
401   RpcServerInterface rpcServer;
402 
403   private final InetSocketAddress isa;
404   private UncaughtExceptionHandler uncaughtExceptionHandler;
405 
406   // Info server. Default access so can be used by unit tests. REGIONSERVER
407   // is name of the webapp and the attribute name used stuffing this instance
408   // into web context.
409   InfoServer infoServer;
410   private JvmPauseMonitor pauseMonitor;
411 
412   /** region server process name */
413   public static final String REGIONSERVER = "regionserver";
414 
415   /** region server configuration name */
416   public static final String REGIONSERVER_CONF = "regionserver_conf";
417 
418   private MetricsRegionServer metricsRegionServer;
419   private SpanReceiverHost spanReceiverHost;
420 
421   /*
422    * Check for compactions requests.
423    */
424   Chore compactionChecker;
425 
426   /*
427    * Check for flushes
428    */
429   Chore periodicFlusher;
430 
431   // HLog and HLog roller. log is protected rather than private to avoid
432   // eclipse warning when accessed by inner classes
433   protected volatile HLog hlog;
434   // The meta updates are written to a different hlog. If this
435   // regionserver holds meta regions, then this field will be non-null.
436   protected volatile HLog hlogForMeta;
437 
438   LogRoller hlogRoller;
439   LogRoller metaHLogRoller;
440 
441   // flag set after we're done setting up server threads (used for testing)
442   protected volatile boolean isOnline;
443 
444   // zookeeper connection and watcher
445   private ZooKeeperWatcher zooKeeper;
446 
447   // master address tracker
448   private MasterAddressTracker masterAddressTracker;
449 
450   // Cluster Status Tracker
451   private ClusterStatusTracker clusterStatusTracker;
452 
453   // Log Splitting Worker
454   private SplitLogWorker splitLogWorker;
455 
456   // A sleeper that sleeps for msgInterval.
457   private final Sleeper sleeper;
458 
459   private final int rpcTimeout;
460 
461   private final RegionServerAccounting regionServerAccounting;
462 
463   // Cache configuration and block cache reference
464   final CacheConfig cacheConfig;
465 
466   /** The health check chore. */
467   private HealthCheckChore healthCheckChore;
468 
469   /** The nonce manager chore. */
470   private Chore nonceManagerChore;
471 
472   private Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
473 
474   /**
475    * The server name the Master sees us as.  Its made from the hostname the
476    * master passes us, port, and server startcode. Gets set after registration
477    * against  Master.  The hostname can differ from the hostname in {@link #isa}
478    * but usually doesn't if both servers resolve .
479    */
480   private ServerName serverNameFromMasterPOV;
481 
482   /**
483    * This servers startcode.
484    */
485   private final long startcode;
486 
487   /**
488    * Unique identifier for the cluster we are a part of.
489    */
490   private String clusterId;
491 
492   /**
493    * MX Bean for RegionServerInfo
494    */
495   private ObjectName mxBean = null;
496 
497   /**
498    * Chore to clean periodically the moved region list
499    */
500   private MovedRegionsCleaner movedRegionsCleaner;
501 
502   /**
503    * The lease timeout period for client scanners (milliseconds).
504    */
505   private final int scannerLeaseTimeoutPeriod;
506 
507   /**
508    * The reference to the priority extraction function
509    */
510   private final PriorityFunction priority;
511 
512   private RegionServerCoprocessorHost rsHost;
513 
514   private RegionServerProcedureManagerHost rspmHost;
515 
516   // Table level lock manager for locking for region operations
517   private TableLockManager tableLockManager;
518 
519   private final boolean useZKForAssignment;
520 
521   // Used for 11059
522   private ServerName serverName;
523 
524   /**
525    * Nonce manager. Nonces are used to make operations like increment and append idempotent
526    * in the case where client doesn't receive the response from a successful operation and
527    * retries. We track the successful ops for some time via a nonce sent by client and handle
528    * duplicate operations (currently, by failing them; in future we might use MVCC to return
529    * result). Nonces are also recovered from WAL during, recovery; however, the caveats (from
530    * HBASE-3787) are:
531    * - WAL recovery is optimized, and under high load we won't read nearly nonce-timeout worth
532    *   of past records. If we don't read the records, we don't read and recover the nonces.
533    *   Some WALs within nonce-timeout at recovery may not even be present due to rolling/cleanup.
534    * - There's no WAL recovery during normal region move, so nonces will not be transfered.
535    * We can have separate additional "Nonce WAL". It will just contain bunch of numbers and
536    * won't be flushed on main path - because WAL itself also contains nonces, if we only flush
537    * it before memstore flush, for a given nonce we will either see it in the WAL (if it was
538    * never flushed to disk, it will be part of recovery), or we'll see it as part of the nonce
539    * log (or both occasionally, which doesn't matter). Nonce log file can be deleted after the
540    * latest nonce in it expired. It can also be recovered during move.
541    */
542   private final ServerNonceManager nonceManager;
543 
544   private UserProvider userProvider;
545 
546   /**
547    * Starts a HRegionServer at the default location
548    *
549    * @param conf
550    * @throws IOException
551    * @throws InterruptedException
552    */
553   public HRegionServer(Configuration conf)
554   throws IOException, InterruptedException {
555     this.fsOk = true;
556     this.conf = conf;
557     this.isOnline = false;
558     checkCodecs(this.conf);
559     this.userProvider = UserProvider.instantiate(conf);
560 
561     FSUtils.setupShortCircuitRead(this.conf);
562 
563     // Config'ed params
564     this.numRetries = this.conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
565         HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
566     this.threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
567     this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 3 * 1000);
568 
569     this.sleeper = new Sleeper(this.msgInterval, this);
570 
571     boolean isNoncesEnabled = conf.getBoolean(HConstants.HBASE_RS_NONCES_ENABLED, true);
572     this.nonceManager = isNoncesEnabled ? new ServerNonceManager(this.conf) : null;
573 
574     this.maxScannerResultSize = conf.getLong(
575       HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
576       HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
577 
578     this.numRegionsToReport = conf.getInt(
579       "hbase.regionserver.numregionstoreport", 10);
580 
581     this.rpcTimeout = conf.getInt(
582       HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
583       HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
584 
585     this.abortRequested = false;
586     this.stopped = false;
587 
588     this.scannerLeaseTimeoutPeriod = HBaseConfiguration.getInt(conf,
589       HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,
590       HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
591       HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD);
592 
593     // Server to handle client requests.
594     String hostname = conf.get("hbase.regionserver.ipc.address",
595       Strings.domainNamePointerToHostName(DNS.getDefaultHost(
596         conf.get("hbase.regionserver.dns.interface", "default"),
597         conf.get("hbase.regionserver.dns.nameserver", "default"))));
598     int port = conf.getInt(HConstants.REGIONSERVER_PORT,
599       HConstants.DEFAULT_REGIONSERVER_PORT);
600     // Creation of a HSA will force a resolve.
601     InetSocketAddress initialIsa = new InetSocketAddress(hostname, port);
602     if (initialIsa.getAddress() == null) {
603       throw new IllegalArgumentException("Failed resolve of " + initialIsa);
604     }
605     this.rand = new Random(initialIsa.hashCode());
606     String name = "regionserver/" + initialIsa.toString();
607     // Set how many times to retry talking to another server over HConnection.
608     HConnectionManager.setServerSideHConnectionRetries(this.conf, name, LOG);
609     this.priority = new AnnotationReadingPriorityFunction(this);
610     RpcSchedulerFactory rpcSchedulerFactory;
611     try {
612       Class<?> rpcSchedulerFactoryClass = conf.getClass(
613           REGION_SERVER_RPC_SCHEDULER_FACTORY_CLASS,
614           SimpleRpcSchedulerFactory.class);
615       rpcSchedulerFactory = ((RpcSchedulerFactory) rpcSchedulerFactoryClass.newInstance());
616     } catch (InstantiationException e) {
617       throw new IllegalArgumentException(e);
618     } catch (IllegalAccessException e) {
619       throw new IllegalArgumentException(e);
620     }
621 
622     this.rpcServer = new RpcServer(this, name, getServices(),
623       /*HBaseRPCErrorHandler.class, OnlineRegions.class},*/
624       initialIsa, // BindAddress is IP we got for this server.
625       conf,
626       rpcSchedulerFactory.create(conf, this));
627 
628     // Set our address.
629     this.isa = this.rpcServer.getListenerAddress();
630 
631     this.rpcServer.setErrorHandler(this);
632     this.startcode = System.currentTimeMillis();
633     serverName = ServerName.valueOf(isa.getHostName(), isa.getPort(), startcode);
634     useZKForAssignment = ConfigUtil.useZKForAssignment(conf);
635 
636     // login the zookeeper client principal (if using security)
637     ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file",
638       "hbase.zookeeper.client.kerberos.principal", this.isa.getHostName());
639 
640     // login the server principal (if using secure Hadoop)
641     userProvider.login("hbase.regionserver.keytab.file",
642       "hbase.regionserver.kerberos.principal", this.isa.getHostName());
643     regionServerAccounting = new RegionServerAccounting();
644     cacheConfig = new CacheConfig(conf);
645     uncaughtExceptionHandler = new UncaughtExceptionHandler() {
646       @Override
647       public void uncaughtException(Thread t, Throwable e) {
648         abort("Uncaught exception in service thread " + t.getName(), e);
649       }
650     };
651 
652     this.rsInfo = RegionServerInfo.newBuilder();
653     // Put up the webui. Webui may come up on port other than configured if
654     // that port is occupied. Adjust serverInfo if this is the case.
655     this.rsInfo.setInfoPort(putUpWebUI());
656   }
657 
658   @Override
659   public boolean registerService(Service instance) {
660     /*
661      * No stacking of instances is allowed for a single service name
662      */
663     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
664     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
665       LOG.error("Coprocessor service " + serviceDesc.getFullName()
666           + " already registered, rejecting request from " + instance);
667       return false;
668     }
669 
670     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
671     if (LOG.isDebugEnabled()) {
672       LOG.debug("Registered regionserver coprocessor service: service=" + serviceDesc.getFullName());
673     }
674     return true;
675   }
676 
677   /**
678    * @return list of blocking services and their security info classes that this server supports
679    */
680   private List<BlockingServiceAndInterface> getServices() {
681     List<BlockingServiceAndInterface> bssi = new ArrayList<BlockingServiceAndInterface>(2);
682     bssi.add(new BlockingServiceAndInterface(
683         ClientProtos.ClientService.newReflectiveBlockingService(this),
684         ClientProtos.ClientService.BlockingInterface.class));
685     bssi.add(new BlockingServiceAndInterface(
686         AdminProtos.AdminService.newReflectiveBlockingService(this),
687         AdminProtos.AdminService.BlockingInterface.class));
688     return bssi;
689   }
690 
691   /**
692    * Run test on configured codecs to make sure supporting libs are in place.
693    * @param c
694    * @throws IOException
695    */
696   private static void checkCodecs(final Configuration c) throws IOException {
697     // check to see if the codec list is available:
698     String [] codecs = c.getStrings("hbase.regionserver.codecs", (String[])null);
699     if (codecs == null) return;
700     for (String codec : codecs) {
701       if (!CompressionTest.testCompression(codec)) {
702         throw new IOException("Compression codec " + codec +
703           " not supported, aborting RS construction");
704       }
705     }
706   }
707 
708   String getClusterId() {
709     return this.clusterId;
710   }
711 
712   @Override
713   public int getPriority(RequestHeader header, Message param) {
714     return priority.getPriority(header, param);
715   }
716 
717   @Retention(RetentionPolicy.RUNTIME)
718   protected @interface QosPriority {
719     int priority() default 0;
720   }
721 
722   PriorityFunction getPriority() {
723     return priority;
724   }
725 
726   RegionScanner getScanner(long scannerId) {
727     String scannerIdString = Long.toString(scannerId);
728     RegionScannerHolder scannerHolder = scanners.get(scannerIdString);
729     if (scannerHolder != null) {
730       return scannerHolder.s;
731     }
732     return null;
733   }
734 
735   /**
736    * All initialization needed before we go register with Master.
737    *
738    * @throws IOException
739    * @throws InterruptedException
740    */
741   private void preRegistrationInitialization(){
742     try {
743       initializeZooKeeper();
744       initializeThreads();
745     } catch (Throwable t) {
746       // Call stop if error or process will stick around for ever since server
747       // puts up non-daemon threads.
748       this.rpcServer.stop();
749       abort("Initialization of RS failed.  Hence aborting RS.", t);
750     }
751   }
752 
753   /**
754    * Bring up connection to zk ensemble and then wait until a master for this
755    * cluster and then after that, wait until cluster 'up' flag has been set.
756    * This is the order in which master does things.
757    * Finally put up a catalog tracker.
758    * @throws IOException
759    * @throws InterruptedException
760    */
761   private void initializeZooKeeper() throws IOException, InterruptedException {
762     // Open connection to zookeeper and set primary watcher
763     this.zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + ":" +
764       this.isa.getPort(), this);
765 
766     // Create the master address tracker, register with zk, and start it.  Then
767     // block until a master is available.  No point in starting up if no master
768     // running.
769     this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
770     this.masterAddressTracker.start();
771     blockAndCheckIfStopped(this.masterAddressTracker);
772 
773     // Wait on cluster being up.  Master will set this flag up in zookeeper
774     // when ready.
775     this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this);
776     this.clusterStatusTracker.start();
777     blockAndCheckIfStopped(this.clusterStatusTracker);
778 
779     // Create the catalog tracker and start it;
780     this.catalogTracker = new CatalogTracker(this.zooKeeper, this.conf, this);
781     catalogTracker.start();
782 
783     // Retrieve clusterId
784     // Since cluster status is now up
785     // ID should have already been set by HMaster
786     try {
787       clusterId = ZKClusterId.readClusterIdZNode(this.zooKeeper);
788       if (clusterId == null) {
789         this.abort("Cluster ID has not been set");
790       }
791       LOG.info("ClusterId : "+clusterId);
792     } catch (KeeperException e) {
793       this.abort("Failed to retrieve Cluster ID",e);
794     }
795 
796     // watch for snapshots and other procedures
797     try {
798       rspmHost = new RegionServerProcedureManagerHost();
799       rspmHost.loadProcedures(conf);
800       rspmHost.initialize(this);
801     } catch (KeeperException e) {
802       this.abort("Failed to reach zk cluster when creating procedure handler.", e);
803     }
804     this.tableLockManager = TableLockManager.createTableLockManager(conf, zooKeeper,
805         ServerName.valueOf(isa.getHostName(), isa.getPort(), startcode));
806 
807     // register watcher for recovering regions
808     this.recoveringRegionWatcher = new RecoveringRegionWatcher(this.zooKeeper, this);
809   }
810 
811   /**
812    * Utilty method to wait indefinitely on a znode availability while checking
813    * if the region server is shut down
814    * @param tracker znode tracker to use
815    * @throws IOException any IO exception, plus if the RS is stopped
816    * @throws InterruptedException
817    */
818   private void blockAndCheckIfStopped(ZooKeeperNodeTracker tracker)
819       throws IOException, InterruptedException {
820     while (tracker.blockUntilAvailable(this.msgInterval, false) == null) {
821       if (this.stopped) {
822         throw new IOException("Received the shutdown message while waiting.");
823       }
824     }
825   }
826 
827   /**
828    * @return False if cluster shutdown in progress
829    */
830   private boolean isClusterUp() {
831     return this.clusterStatusTracker.isClusterUp();
832   }
833 
834   private void initializeThreads() throws IOException {
835     // Cache flushing thread.
836     this.cacheFlusher = new MemStoreFlusher(conf, this);
837 
838     // Compaction thread
839     this.compactSplitThread = new CompactSplitThread(this);
840 
841     // Background thread to check for compactions; needed if region has not gotten updates
842     // in a while. It will take care of not checking too frequently on store-by-store basis.
843     this.compactionChecker = new CompactionChecker(this, this.threadWakeFrequency, this);
844     this.periodicFlusher = new PeriodicMemstoreFlusher(this.threadWakeFrequency, this);
845     // Health checker thread.
846     int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
847       HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
848     if (isHealthCheckerConfigured()) {
849       healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
850     }
851 
852     this.leases = new Leases(this.threadWakeFrequency);
853 
854     // Create the thread to clean the moved regions list
855     movedRegionsCleaner = MovedRegionsCleaner.createAndStart(this);
856 
857     if (this.nonceManager != null) {
858       // Create the chore that cleans up nonces.
859       nonceManagerChore = this.nonceManager.createCleanupChore(this);
860     }
861 
862     // Setup RPC client for master communication
863     rpcClient = new RpcClient(conf, clusterId, new InetSocketAddress(
864         this.isa.getAddress(), 0));
865     this.pauseMonitor = new JvmPauseMonitor(conf);
866     pauseMonitor.start();
867   }
868 
869   /**
870    * The HRegionServer sticks in this loop until closed.
871    */
872   @Override
873   public void run() {
874     try {
875       // Do pre-registration initializations; zookeeper, lease threads, etc.
876       preRegistrationInitialization();
877     } catch (Throwable e) {
878       abort("Fatal exception during initialization", e);
879     }
880 
881     try {
882       // Try and register with the Master; tell it we are here.  Break if
883       // server is stopped or the clusterup flag is down or hdfs went wacky.
884       while (keepLooping()) {
885         RegionServerStartupResponse w = reportForDuty();
886         if (w == null) {
887           LOG.warn("reportForDuty failed; sleeping and then retrying.");
888           this.sleeper.sleep();
889         } else {
890           handleReportForDutyResponse(w);
891           break;
892         }
893       }
894 
895       // Initialize the RegionServerCoprocessorHost now that our ephemeral
896       // node was created by reportForDuty, in case any coprocessors want
897       // to use ZooKeeper
898       this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
899 
900       if (!this.stopped && isHealthy()){
901         // start the snapshot handler and other procedure handlers,
902         // since the server is ready to run
903         rspmHost.start();
904       }
905 
906       // We registered with the Master.  Go into run mode.
907       long lastMsg = 0;
908       long oldRequestCount = -1;
909       // The main run loop.
910       while (!this.stopped && isHealthy()) {
911         if (!isClusterUp()) {
912           if (isOnlineRegionsEmpty()) {
913             stop("Exiting; cluster shutdown set and not carrying any regions");
914           } else if (!this.stopping) {
915             this.stopping = true;
916             LOG.info("Closing user regions");
917             closeUserRegions(this.abortRequested);
918           } else if (this.stopping) {
919             boolean allUserRegionsOffline = areAllUserRegionsOffline();
920             if (allUserRegionsOffline) {
921               // Set stopped if no more write requests tp meta tables
922               // since last time we went around the loop.  Any open
923               // meta regions will be closed on our way out.
924               if (oldRequestCount == getWriteRequestCount()) {
925                 stop("Stopped; only catalog regions remaining online");
926                 break;
927               }
928               oldRequestCount = getWriteRequestCount();
929             } else {
930               // Make sure all regions have been closed -- some regions may
931               // have not got it because we were splitting at the time of
932               // the call to closeUserRegions.
933               closeUserRegions(this.abortRequested);
934             }
935             LOG.debug("Waiting on " + getOnlineRegionsAsPrintableString());
936           }
937         }
938         long now = System.currentTimeMillis();
939         if ((now - lastMsg) >= msgInterval) {
940           tryRegionServerReport(lastMsg, now);
941           lastMsg = System.currentTimeMillis();
942         }
943         if (!this.stopped) this.sleeper.sleep();
944       } // for
945     } catch (Throwable t) {
946       if (!checkOOME(t)) {
947         String prefix = t instanceof YouAreDeadException? "": "Unhandled: ";
948         abort(prefix + t.getMessage(), t);
949       }
950     }
951     // Run shutdown.
952     if (mxBean != null) {
953       MBeanUtil.unregisterMBean(mxBean);
954       mxBean = null;
955     }
956     if (this.leases != null) this.leases.closeAfterLeasesExpire();
957     this.rpcServer.stop();
958     if (this.splitLogWorker != null) {
959       splitLogWorker.stop();
960     }
961     if (this.infoServer != null) {
962       LOG.info("Stopping infoServer");
963       try {
964         this.infoServer.stop();
965       } catch (Exception e) {
966         e.printStackTrace();
967       }
968     }
969     // Send cache a shutdown.
970     if (cacheConfig.isBlockCacheEnabled()) {
971       cacheConfig.getBlockCache().shutdown();
972     }
973 
974     if (movedRegionsCleaner != null) {
975       movedRegionsCleaner.stop("Region Server stopping");
976     }
977 
978     // Send interrupts to wake up threads if sleeping so they notice shutdown.
979     // TODO: Should we check they are alive? If OOME could have exited already
980     if (this.cacheFlusher != null) this.cacheFlusher.interruptIfNecessary();
981     if (this.compactSplitThread != null) this.compactSplitThread.interruptIfNecessary();
982     if (this.hlogRoller != null) this.hlogRoller.interruptIfNecessary();
983     if (this.metaHLogRoller != null) this.metaHLogRoller.interruptIfNecessary();
984     if (this.compactionChecker != null)
985       this.compactionChecker.interrupt();
986     if (this.healthCheckChore != null) {
987       this.healthCheckChore.interrupt();
988     }
989     if (this.nonceManagerChore != null) {
990       this.nonceManagerChore.interrupt();
991     }
992 
993     // Stop the snapshot and other procedure handlers, forcefully killing all running tasks
994     if (rspmHost != null) {
995       rspmHost.stop(this.abortRequested || this.killed);
996     }
997 
998     if (this.killed) {
999       // Just skip out w/o closing regions.  Used when testing.
1000     } else if (abortRequested) {
1001       if (this.fsOk) {
1002         closeUserRegions(abortRequested); // Don't leave any open file handles
1003       }
1004       LOG.info("aborting server " + this.serverNameFromMasterPOV);
1005     } else {
1006       closeUserRegions(abortRequested);
1007       closeAllScanners();
1008       LOG.info("stopping server " + this.serverNameFromMasterPOV);
1009     }
1010     // Interrupt catalog tracker here in case any regions being opened out in
1011     // handlers are stuck waiting on meta.
1012     if (this.catalogTracker != null) this.catalogTracker.stop();
1013 
1014     // Closing the compactSplit thread before closing meta regions
1015     if (!this.killed && containsMetaTableRegions()) {
1016       if (!abortRequested || this.fsOk) {
1017         if (this.compactSplitThread != null) {
1018           this.compactSplitThread.join();
1019           this.compactSplitThread = null;
1020         }
1021         closeMetaTableRegions(abortRequested);
1022       }
1023     }
1024 
1025     if (!this.killed && this.fsOk) {
1026       waitOnAllRegionsToClose(abortRequested);
1027       LOG.info("stopping server " + this.serverNameFromMasterPOV +
1028         "; all regions closed.");
1029     }
1030 
1031     //fsOk flag may be changed when closing regions throws exception.
1032     if (this.fsOk) {
1033       closeWAL(!abortRequested);
1034     }
1035 
1036     // Make sure the proxy is down.
1037     if (this.rssStub != null) {
1038       this.rssStub = null;
1039     }
1040     if (this.rpcClient != null) {
1041       this.rpcClient.stop();
1042     }
1043     if (this.leases != null) {
1044       this.leases.close();
1045     }
1046     if (this.pauseMonitor != null) {
1047       this.pauseMonitor.stop();
1048     }
1049 
1050     if (!killed) {
1051       join();
1052     }
1053 
1054     try {
1055       deleteMyEphemeralNode();
1056     } catch (KeeperException e) {
1057       LOG.warn("Failed deleting my ephemeral node", e);
1058     }
1059     // We may have failed to delete the znode at the previous step, but
1060     //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
1061     ZNodeClearer.deleteMyEphemeralNodeOnDisk();
1062     if (this.zooKeeper != null) {
1063       this.zooKeeper.close();
1064     }
1065     LOG.info("stopping server " + this.serverNameFromMasterPOV +
1066       "; zookeeper connection closed.");
1067 
1068     LOG.info(Thread.currentThread().getName() + " exiting");
1069   }
1070 
1071   private boolean containsMetaTableRegions() {
1072     return onlineRegions.containsKey(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1073   }
1074 
1075   private boolean areAllUserRegionsOffline() {
1076     if (getNumberOfOnlineRegions() > 2) return false;
1077     boolean allUserRegionsOffline = true;
1078     for (Map.Entry<String, HRegion> e: this.onlineRegions.entrySet()) {
1079       if (!e.getValue().getRegionInfo().isMetaTable()) {
1080         allUserRegionsOffline = false;
1081         break;
1082       }
1083     }
1084     return allUserRegionsOffline;
1085   }
1086 
1087   /**
1088    * @return Current write count for all online regions.
1089    */
1090   private long getWriteRequestCount() {
1091     int writeCount = 0;
1092     for (Map.Entry<String, HRegion> e: this.onlineRegions.entrySet()) {
1093       writeCount += e.getValue().getWriteRequestsCount();
1094     }
1095     return writeCount;
1096   }
1097 
1098   @VisibleForTesting
1099   protected void tryRegionServerReport(long reportStartTime, long reportEndTime)
1100   throws IOException {
1101     RegionServerStatusService.BlockingInterface rss = rssStub;
1102     if (rss == null) {
1103       // the current server could be stopping.
1104       return;
1105     }
1106     ClusterStatusProtos.ServerLoad sl = buildServerLoad(reportStartTime, reportEndTime);
1107     try {
1108       RegionServerReportRequest.Builder request = RegionServerReportRequest.newBuilder();
1109       ServerName sn = ServerName.parseVersionedServerName(
1110         this.serverNameFromMasterPOV.getVersionedBytes());
1111       request.setServer(ProtobufUtil.toServerName(sn));
1112       request.setLoad(sl);
1113       rss.regionServerReport(null, request.build());
1114     } catch (ServiceException se) {
1115       IOException ioe = ProtobufUtil.getRemoteException(se);
1116       if (ioe instanceof YouAreDeadException) {
1117         // This will be caught and handled as a fatal error in run()
1118         throw ioe;
1119       }
1120       if (rssStub == rss) {
1121         rssStub = null;
1122       }
1123       // Couldn't connect to the master, get location from zk and reconnect
1124       // Method blocks until new master is found or we are stopped
1125       createRegionServerStatusStub();
1126     }
1127   }
1128 
1129   ClusterStatusProtos.ServerLoad buildServerLoad(long reportStartTime, long reportEndTime) {
1130     // We're getting the MetricsRegionServerWrapper here because the wrapper computes requests
1131     // per second, and other metrics  As long as metrics are part of ServerLoad it's best to use
1132     // the wrapper to compute those numbers in one place.
1133     // In the long term most of these should be moved off of ServerLoad and the heart beat.
1134     // Instead they should be stored in an HBase table so that external visibility into HBase is
1135     // improved; Additionally the load balancer will be able to take advantage of a more complete
1136     // history.
1137     MetricsRegionServerWrapper regionServerWrapper = this.metricsRegionServer.getRegionServerWrapper();
1138     Collection<HRegion> regions = getOnlineRegionsLocalContext();
1139     MemoryUsage memory =
1140       ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
1141 
1142     ClusterStatusProtos.ServerLoad.Builder serverLoad =
1143       ClusterStatusProtos.ServerLoad.newBuilder();
1144     serverLoad.setNumberOfRequests((int) regionServerWrapper.getRequestsPerSecond());
1145     serverLoad.setTotalNumberOfRequests((int) regionServerWrapper.getTotalRequestCount());
1146     serverLoad.setUsedHeapMB((int)(memory.getUsed() / 1024 / 1024));
1147     serverLoad.setMaxHeapMB((int) (memory.getMax() / 1024 / 1024));
1148     Set<String> coprocessors = this.hlog.getCoprocessorHost().getCoprocessors();
1149     for (String coprocessor : coprocessors) {
1150       serverLoad.addCoprocessors(
1151         Coprocessor.newBuilder().setName(coprocessor).build());
1152     }
1153     RegionLoad.Builder regionLoadBldr = RegionLoad.newBuilder();
1154     RegionSpecifier.Builder regionSpecifier = RegionSpecifier.newBuilder();
1155     for (HRegion region : regions) {
1156       serverLoad.addRegionLoads(createRegionLoad(region, regionLoadBldr, regionSpecifier));
1157     }
1158     serverLoad.setReportStartTime(reportStartTime);
1159     serverLoad.setReportEndTime(reportEndTime);
1160     if (this.infoServer != null) {
1161       serverLoad.setInfoServerPort(this.infoServer.getPort());
1162     } else {
1163       serverLoad.setInfoServerPort(-1);
1164     }
1165     return serverLoad.build();
1166   }
1167 
1168   String getOnlineRegionsAsPrintableString() {
1169     StringBuilder sb = new StringBuilder();
1170     for (HRegion r: this.onlineRegions.values()) {
1171       if (sb.length() > 0) sb.append(", ");
1172       sb.append(r.getRegionInfo().getEncodedName());
1173     }
1174     return sb.toString();
1175   }
1176 
1177   /**
1178    * Wait on regions close.
1179    */
1180   private void waitOnAllRegionsToClose(final boolean abort) {
1181     // Wait till all regions are closed before going out.
1182     int lastCount = -1;
1183     long previousLogTime = 0;
1184     Set<String> closedRegions = new HashSet<String>();
1185     while (!isOnlineRegionsEmpty()) {
1186       int count = getNumberOfOnlineRegions();
1187       // Only print a message if the count of regions has changed.
1188       if (count != lastCount) {
1189         // Log every second at most
1190         if (System.currentTimeMillis() > (previousLogTime + 1000)) {
1191           previousLogTime = System.currentTimeMillis();
1192           lastCount = count;
1193           LOG.info("Waiting on " + count + " regions to close");
1194           // Only print out regions still closing if a small number else will
1195           // swamp the log.
1196           if (count < 10 && LOG.isDebugEnabled()) {
1197             LOG.debug(this.onlineRegions);
1198           }
1199         }
1200       }
1201       // Ensure all user regions have been sent a close. Use this to
1202       // protect against the case where an open comes in after we start the
1203       // iterator of onlineRegions to close all user regions.
1204       for (Map.Entry<String, HRegion> e : this.onlineRegions.entrySet()) {
1205         HRegionInfo hri = e.getValue().getRegionInfo();
1206         if (!this.regionsInTransitionInRS.containsKey(hri.getEncodedNameAsBytes())
1207             && !closedRegions.contains(hri.getEncodedName())) {
1208           closedRegions.add(hri.getEncodedName());
1209           // Don't update zk with this close transition; pass false.
1210           closeRegionIgnoreErrors(hri, abort);
1211         }
1212       }
1213       // No regions in RIT, we could stop waiting now.
1214       if (this.regionsInTransitionInRS.isEmpty()) {
1215         if (!isOnlineRegionsEmpty()) {
1216           LOG.info("We were exiting though online regions are not empty," +
1217               " because some regions failed closing");
1218         }
1219         break;
1220       }
1221       Threads.sleep(200);
1222     }
1223   }
1224 
1225   private void closeWAL(final boolean delete) {
1226     if (this.hlogForMeta != null) {
1227       // All hlogs (meta and non-meta) are in the same directory. Don't call
1228       // closeAndDelete here since that would delete all hlogs not just the
1229       // meta ones. We will just 'close' the hlog for meta here, and leave
1230       // the directory cleanup to the follow-on closeAndDelete call.
1231       try {
1232         this.hlogForMeta.close();
1233       } catch (Throwable e) {
1234         LOG.error("Metalog close and delete failed", RemoteExceptionHandler.checkThrowable(e));
1235       }
1236     }
1237     if (this.hlog != null) {
1238       try {
1239         if (delete) {
1240           hlog.closeAndDelete();
1241         } else {
1242           hlog.close();
1243         }
1244       } catch (Throwable e) {
1245         LOG.error("Close and delete failed", RemoteExceptionHandler.checkThrowable(e));
1246       }
1247     }
1248   }
1249 
1250   private void closeAllScanners() {
1251     // Close any outstanding scanners. Means they'll get an UnknownScanner
1252     // exception next time they come in.
1253     for (Map.Entry<String, RegionScannerHolder> e : this.scanners.entrySet()) {
1254       try {
1255         e.getValue().s.close();
1256       } catch (IOException ioe) {
1257         LOG.warn("Closing scanner " + e.getKey(), ioe);
1258       }
1259     }
1260   }
1261 
1262   /*
1263    * Run init. Sets up hlog and starts up all server threads.
1264    *
1265    * @param c Extra configuration.
1266    */
1267   protected void handleReportForDutyResponse(final RegionServerStartupResponse c)
1268   throws IOException {
1269     try {
1270       for (NameStringPair e : c.getMapEntriesList()) {
1271         String key = e.getName();
1272         // The hostname the master sees us as.
1273         if (key.equals(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)) {
1274           String hostnameFromMasterPOV = e.getValue();
1275           this.serverNameFromMasterPOV = ServerName.valueOf(hostnameFromMasterPOV,
1276               this.isa.getPort(), this.startcode);
1277           if (!hostnameFromMasterPOV.equals(this.isa.getHostName())) {
1278             LOG.info("Master passed us a different hostname to use; was=" +
1279               this.isa.getHostName() + ", but now=" + hostnameFromMasterPOV);
1280           }
1281           continue;
1282         }
1283         String value = e.getValue();
1284         if (LOG.isDebugEnabled()) {
1285           LOG.debug("Config from master: " + key + "=" + value);
1286         }
1287         this.conf.set(key, value);
1288       }
1289 
1290       // hack! Maps DFSClient => RegionServer for logs.  HDFS made this
1291       // config param for task trackers, but we can piggyback off of it.
1292       if (this.conf.get("mapred.task.id") == null) {
1293         this.conf.set("mapred.task.id", "hb_rs_" +
1294           this.serverNameFromMasterPOV.toString());
1295       }
1296       // Set our ephemeral znode up in zookeeper now we have a name.
1297       createMyEphemeralNode();
1298 
1299       // Save it in a file, this will allow to see if we crash
1300       ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
1301 
1302       // Master sent us hbase.rootdir to use. Should be fully qualified
1303       // path with file system specification included. Set 'fs.defaultFS'
1304       // to match the filesystem on hbase.rootdir else underlying hadoop hdfs
1305       // accessors will be going against wrong filesystem (unless all is set
1306       // to defaults).
1307       FSUtils.setFsDefault(this.conf, FSUtils.getRootDir(this.conf));
1308       // Get fs instance used by this RS.  Do we use checksum verification in the hbase? If hbase
1309       // checksum verification enabled, then automatically switch off hdfs checksum verification.
1310       boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
1311       this.fs = new HFileSystem(this.conf, useHBaseChecksum);
1312       this.rootDir = FSUtils.getRootDir(this.conf);
1313       this.tableDescriptors = new FSTableDescriptors(this.conf, this.fs, this.rootDir, true, false);
1314       this.hlog = setupWALAndReplication();
1315       // Init in here rather than in constructor after thread name has been set
1316       this.metricsRegionServer = new MetricsRegionServer(new MetricsRegionServerWrapperImpl(this));
1317 
1318       spanReceiverHost = SpanReceiverHost.getInstance(getConfiguration());
1319 
1320       startServiceThreads();
1321       LOG.info("Serving as " + this.serverNameFromMasterPOV +
1322         ", RpcServer on " + this.isa +
1323         ", sessionid=0x" +
1324         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()));
1325       isOnline = true;
1326     } catch (Throwable e) {
1327       this.isOnline = false;
1328       stop("Failed initialization");
1329       throw convertThrowableToIOE(cleanup(e, "Failed init"),
1330           "Region server startup failed");
1331     } finally {
1332       sleeper.skipSleepCycle();
1333     }
1334   }
1335 
1336   private void createMyEphemeralNode() throws KeeperException, IOException {
1337     byte[] data = ProtobufUtil.prependPBMagic(rsInfo.build().toByteArray());
1338     ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper,
1339       getMyEphemeralNodePath(), data);
1340   }
1341 
1342   private void deleteMyEphemeralNode() throws KeeperException {
1343     ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
1344   }
1345 
1346   @Override
1347   public RegionServerAccounting getRegionServerAccounting() {
1348     return regionServerAccounting;
1349   }
1350 
1351   @Override
1352   public TableLockManager getTableLockManager() {
1353     return tableLockManager;
1354   }
1355 
1356   /*
1357    * @param r Region to get RegionLoad for.
1358    * @param regionLoadBldr the RegionLoad.Builder, can be null
1359    * @param regionSpecifier the RegionSpecifier.Builder, can be null
1360    * @return RegionLoad instance.
1361    *
1362    * @throws IOException
1363    */
1364   private RegionLoad createRegionLoad(final HRegion r, RegionLoad.Builder regionLoadBldr,
1365       RegionSpecifier.Builder regionSpecifier) {
1366     byte[] name = r.getRegionName();
1367     int stores = 0;
1368     int storefiles = 0;
1369     int storeUncompressedSizeMB = 0;
1370     int storefileSizeMB = 0;
1371     int memstoreSizeMB = (int) (r.memstoreSize.get() / 1024 / 1024);
1372     int storefileIndexSizeMB = 0;
1373     int rootIndexSizeKB = 0;
1374     int totalStaticIndexSizeKB = 0;
1375     int totalStaticBloomSizeKB = 0;
1376     long totalCompactingKVs = 0;
1377     long currentCompactedKVs = 0;
1378     synchronized (r.stores) {
1379       stores += r.stores.size();
1380       for (Store store : r.stores.values()) {
1381         storefiles += store.getStorefilesCount();
1382         storeUncompressedSizeMB += (int) (store.getStoreSizeUncompressed()
1383             / 1024 / 1024);
1384         storefileSizeMB += (int) (store.getStorefilesSize() / 1024 / 1024);
1385         storefileIndexSizeMB += (int) (store.getStorefilesIndexSize() / 1024 / 1024);
1386         CompactionProgress progress = store.getCompactionProgress();
1387         if (progress != null) {
1388           totalCompactingKVs += progress.totalCompactingKVs;
1389           currentCompactedKVs += progress.currentCompactedKVs;
1390         }
1391 
1392         rootIndexSizeKB +=
1393             (int) (store.getStorefilesIndexSize() / 1024);
1394 
1395         totalStaticIndexSizeKB +=
1396           (int) (store.getTotalStaticIndexSize() / 1024);
1397 
1398         totalStaticBloomSizeKB +=
1399           (int) (store.getTotalStaticBloomSize() / 1024);
1400       }
1401     }
1402     float dataLocality =
1403         r.getHDFSBlocksDistribution().getBlockLocalityIndex(serverName.getHostname());
1404     if (regionLoadBldr == null) {
1405       regionLoadBldr = RegionLoad.newBuilder();
1406     }
1407     if (regionSpecifier == null) {
1408       regionSpecifier = RegionSpecifier.newBuilder();
1409     }
1410     regionSpecifier.setType(RegionSpecifierType.REGION_NAME);
1411     regionSpecifier.setValue(ByteStringer.wrap(name));
1412     regionLoadBldr.setRegionSpecifier(regionSpecifier.build())
1413       .setStores(stores)
1414       .setStorefiles(storefiles)
1415       .setStoreUncompressedSizeMB(storeUncompressedSizeMB)
1416       .setStorefileSizeMB(storefileSizeMB)
1417       .setMemstoreSizeMB(memstoreSizeMB)
1418       .setStorefileIndexSizeMB(storefileIndexSizeMB)
1419       .setRootIndexSizeKB(rootIndexSizeKB)
1420       .setTotalStaticIndexSizeKB(totalStaticIndexSizeKB)
1421       .setTotalStaticBloomSizeKB(totalStaticBloomSizeKB)
1422       .setReadRequestsCount(r.readRequestsCount.get())
1423       .setWriteRequestsCount(r.writeRequestsCount.get())
1424       .setTotalCompactingKVs(totalCompactingKVs)
1425       .setCurrentCompactedKVs(currentCompactedKVs)
1426       .setCompleteSequenceId(r.completeSequenceId)
1427       .setDataLocality(dataLocality);
1428 
1429     return regionLoadBldr.build();
1430   }
1431 
1432   /**
1433    * @param encodedRegionName
1434    * @return An instance of RegionLoad.
1435    */
1436   public RegionLoad createRegionLoad(final String encodedRegionName) {
1437     HRegion r = null;
1438     r = this.onlineRegions.get(encodedRegionName);
1439     return r != null ? createRegionLoad(r, null, null) : null;
1440   }
1441 
1442   /*
1443    * Inner class that runs on a long period checking if regions need compaction.
1444    */
1445   private static class CompactionChecker extends Chore {
1446     private final HRegionServer instance;
1447     private final int majorCompactPriority;
1448     private final static int DEFAULT_PRIORITY = Integer.MAX_VALUE;
1449     private long iteration = 0;
1450 
1451     CompactionChecker(final HRegionServer h, final int sleepTime,
1452         final Stoppable stopper) {
1453       super("CompactionChecker", sleepTime, h);
1454       this.instance = h;
1455       LOG.info(this.getName() + " runs every " + StringUtils.formatTime(sleepTime));
1456 
1457       /* MajorCompactPriority is configurable.
1458        * If not set, the compaction will use default priority.
1459        */
1460       this.majorCompactPriority = this.instance.conf.
1461         getInt("hbase.regionserver.compactionChecker.majorCompactPriority",
1462         DEFAULT_PRIORITY);
1463     }
1464 
1465     @Override
1466     protected void chore() {
1467       for (HRegion r : this.instance.onlineRegions.values()) {
1468         if (r == null)
1469           continue;
1470         for (Store s : r.getStores().values()) {
1471           try {
1472             long multiplier = s.getCompactionCheckMultiplier();
1473             assert multiplier > 0;
1474             if (iteration % multiplier != 0) continue;
1475             if (s.needsCompaction()) {
1476               // Queue a compaction. Will recognize if major is needed.
1477               this.instance.compactSplitThread.requestSystemCompaction(r, s, getName()
1478                   + " requests compaction");
1479             } else if (s.isMajorCompaction()) {
1480               if (majorCompactPriority == DEFAULT_PRIORITY
1481                   || majorCompactPriority > r.getCompactPriority()) {
1482                 this.instance.compactSplitThread.requestCompaction(r, s, getName()
1483                     + " requests major compaction; use default priority", null);
1484               } else {
1485                 this.instance.compactSplitThread.requestCompaction(r, s, getName()
1486                     + " requests major compaction; use configured priority",
1487                   this.majorCompactPriority, null);
1488               }
1489             }
1490           } catch (IOException e) {
1491             LOG.warn("Failed major compaction check on " + r, e);
1492           }
1493         }
1494       }
1495       iteration = (iteration == Long.MAX_VALUE) ? 0 : (iteration + 1);
1496     }
1497   }
1498 
1499   class PeriodicMemstoreFlusher extends Chore {
1500     final HRegionServer server;
1501     final static int RANGE_OF_DELAY = 20000; //millisec
1502     final static int MIN_DELAY_TIME = 3000; //millisec
1503     public PeriodicMemstoreFlusher(int cacheFlushInterval, final HRegionServer server) {
1504       super(server.getServerName() + "-MemstoreFlusherChore", cacheFlushInterval, server);
1505       this.server = server;
1506     }
1507 
1508     @Override
1509     protected void chore() {
1510       for (HRegion r : this.server.onlineRegions.values()) {
1511         if (r == null)
1512           continue;
1513         if (r.shouldFlush()) {
1514           FlushRequester requester = server.getFlushRequester();
1515           if (requester != null) {
1516             long randomDelay = rand.nextInt(RANGE_OF_DELAY) + MIN_DELAY_TIME;
1517             LOG.info(getName() + " requesting flush for region " + r.getRegionNameAsString() +
1518                 " after a delay of " + randomDelay);
1519             //Throttle the flushes by putting a delay. If we don't throttle, and there
1520             //is a balanced write-load on the regions in a table, we might end up
1521             //overwhelming the filesystem with too many flushes at once.
1522             requester.requestDelayedFlush(r, randomDelay);
1523           }
1524         }
1525       }
1526     }
1527   }
1528 
1529   /**
1530    * Report the status of the server. A server is online once all the startup is
1531    * completed (setting up filesystem, starting service threads, etc.). This
1532    * method is designed mostly to be useful in tests.
1533    *
1534    * @return true if online, false if not.
1535    */
1536   public boolean isOnline() {
1537     return isOnline;
1538   }
1539 
1540   /**
1541    * Setup WAL log and replication if enabled.
1542    * Replication setup is done in here because it wants to be hooked up to WAL.
1543    * @return A WAL instance.
1544    * @throws IOException
1545    */
1546   private HLog setupWALAndReplication() throws IOException {
1547     final Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
1548     final String logName
1549       = HLogUtil.getHLogDirectoryName(this.serverNameFromMasterPOV.toString());
1550 
1551     Path logdir = new Path(rootDir, logName);
1552     if (LOG.isDebugEnabled()) LOG.debug("logdir=" + logdir);
1553     if (this.fs.exists(logdir)) {
1554       throw new RegionServerRunningException("Region server has already " +
1555         "created directory at " + this.serverNameFromMasterPOV.toString());
1556     }
1557 
1558     // Instantiate replication manager if replication enabled.  Pass it the
1559     // log directories.
1560     createNewReplicationInstance(conf, this, this.fs, logdir, oldLogDir);
1561 
1562     return instantiateHLog(rootDir, logName);
1563   }
1564 
1565   private HLog getMetaWAL() throws IOException {
1566     if (this.hlogForMeta != null) return this.hlogForMeta;
1567     final String logName = HLogUtil.getHLogDirectoryName(this.serverNameFromMasterPOV.toString());
1568     Path logdir = new Path(rootDir, logName);
1569     if (LOG.isDebugEnabled()) LOG.debug("logdir=" + logdir);
1570     this.hlogForMeta = HLogFactory.createMetaHLog(this.fs.getBackingFs(), rootDir, logName,
1571       this.conf, getMetaWALActionListeners(), this.serverNameFromMasterPOV.toString());
1572     return this.hlogForMeta;
1573   }
1574 
1575   /**
1576    * Called by {@link #setupWALAndReplication()} creating WAL instance.
1577    * @param rootdir
1578    * @param logName
1579    * @return WAL instance.
1580    * @throws IOException
1581    */
1582   protected HLog instantiateHLog(Path rootdir, String logName) throws IOException {
1583     return HLogFactory.createHLog(this.fs.getBackingFs(), rootdir, logName, this.conf,
1584       getWALActionListeners(), this.serverNameFromMasterPOV.toString());
1585   }
1586 
1587   /**
1588    * Called by {@link #instantiateHLog(Path, String)} setting up WAL instance.
1589    * Add any {@link WALActionsListener}s you want inserted before WAL startup.
1590    * @return List of WALActionsListener that will be passed in to
1591    * {@link org.apache.hadoop.hbase.regionserver.wal.FSHLog} on construction.
1592    */
1593   protected List<WALActionsListener> getWALActionListeners() {
1594     List<WALActionsListener> listeners = new ArrayList<WALActionsListener>();
1595     // Log roller.
1596     this.hlogRoller = new LogRoller(this, this);
1597     listeners.add(this.hlogRoller);
1598     if (this.replicationSourceHandler != null &&
1599         this.replicationSourceHandler.getWALActionsListener() != null) {
1600       // Replication handler is an implementation of WALActionsListener.
1601       listeners.add(this.replicationSourceHandler.getWALActionsListener());
1602     }
1603     return listeners;
1604   }
1605 
1606   protected List<WALActionsListener> getMetaWALActionListeners() {
1607     List<WALActionsListener> listeners = new ArrayList<WALActionsListener>();
1608     // Using a tmp log roller to ensure metaLogRoller is alive once it is not
1609     // null
1610     MetaLogRoller tmpLogRoller = new MetaLogRoller(this, this);
1611     String n = Thread.currentThread().getName();
1612     Threads.setDaemonThreadRunning(tmpLogRoller.getThread(),
1613         n + "-MetaLogRoller", uncaughtExceptionHandler);
1614     this.metaHLogRoller = tmpLogRoller;
1615     tmpLogRoller = null;
1616     listeners.add(this.metaHLogRoller);
1617     return listeners;
1618   }
1619 
1620   protected LogRoller getLogRoller() {
1621     return hlogRoller;
1622   }
1623 
1624   public MetricsRegionServer getMetrics() {
1625     return this.metricsRegionServer;
1626   }
1627 
1628   /**
1629    * @return Master address tracker instance.
1630    */
1631   public MasterAddressTracker getMasterAddressTracker() {
1632     return this.masterAddressTracker;
1633   }
1634 
1635   /*
1636    * Start maintenance Threads, Server, Worker and lease checker threads.
1637    * Install an UncaughtExceptionHandler that calls abort of RegionServer if we
1638    * get an unhandled exception. We cannot set the handler on all threads.
1639    * Server's internal Listener thread is off limits. For Server, if an OOME, it
1640    * waits a while then retries. Meantime, a flush or a compaction that tries to
1641    * run should trigger same critical condition and the shutdown will run. On
1642    * its way out, this server will shut down Server. Leases are sort of
1643    * inbetween. It has an internal thread that while it inherits from Chore, it
1644    * keeps its own internal stop mechanism so needs to be stopped by this
1645    * hosting server. Worker logs the exception and exits.
1646    */
1647   private void startServiceThreads() throws IOException {
1648     String n = Thread.currentThread().getName();
1649     // Start executor services
1650     this.service = new ExecutorService(getServerName().toShortString());
1651     this.service.startExecutorService(ExecutorType.RS_OPEN_REGION,
1652       conf.getInt("hbase.regionserver.executor.openregion.threads", 3));
1653     this.service.startExecutorService(ExecutorType.RS_OPEN_META,
1654       conf.getInt("hbase.regionserver.executor.openmeta.threads", 1));
1655     this.service.startExecutorService(ExecutorType.RS_CLOSE_REGION,
1656       conf.getInt("hbase.regionserver.executor.closeregion.threads", 3));
1657     this.service.startExecutorService(ExecutorType.RS_CLOSE_META,
1658       conf.getInt("hbase.regionserver.executor.closemeta.threads", 1));
1659     if (conf.getBoolean(StoreScanner.STORESCANNER_PARALLEL_SEEK_ENABLE, false)) {
1660       this.service.startExecutorService(ExecutorType.RS_PARALLEL_SEEK,
1661         conf.getInt("hbase.storescanner.parallel.seek.threads", 10));
1662     }
1663     this.service.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS,
1664       conf.getInt("hbase.regionserver.wal.max.splitters", SplitLogWorker.DEFAULT_MAX_SPLITTERS));
1665 
1666     Threads.setDaemonThreadRunning(this.hlogRoller.getThread(), n + ".logRoller",
1667         uncaughtExceptionHandler);
1668     this.cacheFlusher.start(uncaughtExceptionHandler);
1669     Threads.setDaemonThreadRunning(this.compactionChecker.getThread(), n +
1670       ".compactionChecker", uncaughtExceptionHandler);
1671     Threads.setDaemonThreadRunning(this.periodicFlusher.getThread(), n +
1672         ".periodicFlusher", uncaughtExceptionHandler);
1673     if (this.healthCheckChore != null) {
1674       Threads.setDaemonThreadRunning(this.healthCheckChore.getThread(), n + ".healthChecker",
1675             uncaughtExceptionHandler);
1676     }
1677     if (this.nonceManagerChore != null) {
1678       Threads.setDaemonThreadRunning(this.nonceManagerChore.getThread(), n + ".nonceCleaner",
1679             uncaughtExceptionHandler);
1680     }
1681 
1682     // Leases is not a Thread. Internally it runs a daemon thread. If it gets
1683     // an unhandled exception, it will just exit.
1684     this.leases.setName(n + ".leaseChecker");
1685     this.leases.start();
1686 
1687     if (this.replicationSourceHandler == this.replicationSinkHandler &&
1688         this.replicationSourceHandler != null) {
1689       this.replicationSourceHandler.startReplicationService();
1690     } else {
1691       if (this.replicationSourceHandler != null) {
1692         this.replicationSourceHandler.startReplicationService();
1693       }
1694       if (this.replicationSinkHandler != null) {
1695         this.replicationSinkHandler.startReplicationService();
1696       }
1697     }
1698 
1699     // Start Server.  This service is like leases in that it internally runs
1700     // a thread.
1701     this.rpcServer.start();
1702 
1703     // Create the log splitting worker and start it
1704     // set a smaller retries to fast fail otherwise splitlogworker could be blocked for
1705     // quite a while inside HConnection layer. The worker won't be available for other
1706     // tasks even after current task is preempted after a split task times out.
1707     Configuration sinkConf = HBaseConfiguration.create(conf);
1708     sinkConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
1709       conf.getInt("hbase.log.replay.retries.number", 8)); // 8 retries take about 23 seconds
1710     sinkConf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY,
1711       conf.getInt("hbase.log.replay.rpc.timeout", 30000)); // default 30 seconds
1712     sinkConf.setInt("hbase.client.serverside.retries.multiplier", 1);
1713     this.splitLogWorker = new SplitLogWorker(this.zooKeeper, sinkConf, this, this);
1714     splitLogWorker.start();
1715   }
1716 
1717   /**
1718    * Puts up the webui.
1719    * @return Returns final port -- maybe different from what we started with.
1720    * @throws IOException
1721    */
1722   private int putUpWebUI() throws IOException {
1723     int port = this.conf.getInt(HConstants.REGIONSERVER_INFO_PORT, 60030);
1724     // -1 is for disabling info server
1725     if (port < 0) return port;
1726     String addr = this.conf.get("hbase.regionserver.info.bindAddress", "0.0.0.0");
1727     // check if auto port bind enabled
1728     boolean auto = this.conf.getBoolean(HConstants.REGIONSERVER_INFO_PORT_AUTO,
1729         false);
1730     while (true) {
1731       try {
1732         this.infoServer = new InfoServer("regionserver", addr, port, false, this.conf);
1733         this.infoServer.addServlet("status", "/rs-status", RSStatusServlet.class);
1734         this.infoServer.addServlet("dump", "/dump", RSDumpServlet.class);
1735         this.infoServer.setAttribute(REGIONSERVER, this);
1736         this.infoServer.setAttribute(REGIONSERVER_CONF, conf);
1737         this.infoServer.start();
1738         break;
1739       } catch (BindException e) {
1740         if (!auto) {
1741           // auto bind disabled throw BindException
1742           LOG.error("Failed binding http info server to port: " + port);
1743           throw e;
1744         }
1745         // auto bind enabled, try to use another port
1746         LOG.info("Failed binding http info server to port: " + port);
1747         port++;
1748       }
1749     }
1750     return this.infoServer.getPort();
1751   }
1752 
1753   /*
1754    * Verify that server is healthy
1755    */
1756   private boolean isHealthy() {
1757     if (!fsOk) {
1758       // File system problem
1759       return false;
1760     }
1761     // Verify that all threads are alive
1762     if (!(leases.isAlive()
1763         && cacheFlusher.isAlive() && hlogRoller.isAlive()
1764         && this.compactionChecker.isAlive()
1765         && this.periodicFlusher.isAlive())) {
1766       stop("One or more threads are no longer alive -- stop");
1767       return false;
1768     }
1769     if (metaHLogRoller != null && !metaHLogRoller.isAlive()) {
1770       stop("Meta HLog roller thread is no longer alive -- stop");
1771       return false;
1772     }
1773     return true;
1774   }
1775 
1776   public HLog getWAL() {
1777     try {
1778       return getWAL(null);
1779     } catch (IOException e) {
1780       LOG.warn("getWAL threw exception " + e);
1781       return null;
1782     }
1783   }
1784 
1785   @Override
1786   public HLog getWAL(HRegionInfo regionInfo) throws IOException {
1787     //TODO: at some point this should delegate to the HLogFactory
1788     //currently, we don't care about the region as much as we care about the
1789     //table.. (hence checking the tablename below)
1790     //_ROOT_ and hbase:meta regions have separate WAL.
1791     if (regionInfo != null && regionInfo.isMetaTable()) {
1792       return getMetaWAL();
1793     }
1794     return this.hlog;
1795   }
1796 
1797   @Override
1798   public CatalogTracker getCatalogTracker() {
1799     return this.catalogTracker;
1800   }
1801 
1802   @Override
1803   public void stop(final String msg) {
1804     if (!this.stopped) {
1805       try {
1806         if (this.rsHost != null) {
1807           this.rsHost.preStop(msg);
1808         }
1809         this.stopped = true;
1810         LOG.info("STOPPED: " + msg);
1811         // Wakes run() if it is sleeping
1812         sleeper.skipSleepCycle();
1813       } catch (IOException exp) {
1814         LOG.warn("The region server did not stop", exp);
1815       }
1816     }
1817   }
1818 
1819   public void waitForServerOnline(){
1820     while (!isOnline() && !isStopped()){
1821        sleeper.sleep();
1822     }
1823   }
1824 
1825   @Override
1826   public void postOpenDeployTasks(final HRegion r, final CatalogTracker ct)
1827   throws KeeperException, IOException {
1828     checkOpen();
1829     LOG.info("Post open deploy tasks for region=" + r.getRegionNameAsString());
1830     // Do checks to see if we need to compact (references or too many files)
1831     for (Store s : r.getStores().values()) {
1832       if (s.hasReferences() || s.needsCompaction()) {
1833        this.compactSplitThread.requestSystemCompaction(r, s, "Opening Region");
1834       }
1835     }
1836     long openSeqNum = r.getOpenSeqNum();
1837     if (openSeqNum == HConstants.NO_SEQNUM) {
1838       // If we opened a region, we should have read some sequence number from it.
1839       LOG.error("No sequence number found when opening " + r.getRegionNameAsString());
1840       openSeqNum = 0;
1841     }
1842 
1843     // Update flushed sequence id of a recovering region in ZK
1844     updateRecoveringRegionLastFlushedSequenceId(r);
1845 
1846     if (useZKForAssignment) {
1847       if (r.getRegionInfo().isMetaRegion()) {
1848         LOG.info("Updating zk with meta location");
1849         // The state field is for zk less assignment 
1850         // For zk assignment, always set it to OPEN
1851         MetaRegionTracker.setMetaLocation(getZooKeeper(), this.serverNameFromMasterPOV, State.OPEN);
1852       } else {
1853         MetaEditor.updateRegionLocation(ct, r.getRegionInfo(), this.serverNameFromMasterPOV,
1854           openSeqNum);
1855       }
1856     }
1857      if (!useZKForAssignment
1858         && !reportRegionStateTransition(TransitionCode.OPENED, openSeqNum, r.getRegionInfo())) {
1859       throw new IOException("Failed to report opened region to master: "
1860           + r.getRegionNameAsString());
1861     }
1862 
1863     LOG.info("Finished post open deploy task for " + r.getRegionNameAsString());
1864 
1865   }
1866 
1867   @Override
1868   public RpcServerInterface getRpcServer() {
1869     return rpcServer;
1870   }
1871 
1872   /**
1873    * Cause the server to exit without closing the regions it is serving, the log
1874    * it is using and without notifying the master. Used unit testing and on
1875    * catastrophic events such as HDFS is yanked out from under hbase or we OOME.
1876    *
1877    * @param reason
1878    *          the reason we are aborting
1879    * @param cause
1880    *          the exception that caused the abort, or null
1881    */
1882   @Override
1883   public void abort(String reason, Throwable cause) {
1884     String msg = "ABORTING region server " + this + ": " + reason;
1885     if (cause != null) {
1886       LOG.fatal(msg, cause);
1887     } else {
1888       LOG.fatal(msg);
1889     }
1890     this.abortRequested = true;
1891     // HBASE-4014: show list of coprocessors that were loaded to help debug
1892     // regionserver crashes.Note that we're implicitly using
1893     // java.util.HashSet's toString() method to print the coprocessor names.
1894     LOG.fatal("RegionServer abort: loaded coprocessors are: " +
1895         CoprocessorHost.getLoadedCoprocessors());
1896     // Do our best to report our abort to the master, but this may not work
1897     try {
1898       if (cause != null) {
1899         msg += "\nCause:\n" + StringUtils.stringifyException(cause);
1900       }
1901       // Report to the master but only if we have already registered with the master.
1902       if (rssStub != null && this.serverNameFromMasterPOV != null) {
1903         ReportRSFatalErrorRequest.Builder builder =
1904           ReportRSFatalErrorRequest.newBuilder();
1905         ServerName sn =
1906           ServerName.parseVersionedServerName(this.serverNameFromMasterPOV.getVersionedBytes());
1907         builder.setServer(ProtobufUtil.toServerName(sn));
1908         builder.setErrorMessage(msg);
1909         rssStub.reportRSFatalError(null, builder.build());
1910       }
1911     } catch (Throwable t) {
1912       LOG.warn("Unable to report fatal error to master", t);
1913     }
1914     stop(reason);
1915   }
1916 
1917   /**
1918    * @see HRegionServer#abort(String, Throwable)
1919    */
1920   public void abort(String reason) {
1921     abort(reason, null);
1922   }
1923 
1924   @Override
1925   public boolean isAborted() {
1926     return this.abortRequested;
1927   }
1928 
1929   /*
1930    * Simulate a kill -9 of this server. Exits w/o closing regions or cleaninup
1931    * logs but it does close socket in case want to bring up server on old
1932    * hostname+port immediately.
1933    */
1934   protected void kill() {
1935     this.killed = true;
1936     abort("Simulated kill");
1937   }
1938 
1939   /**
1940    * Wait on all threads to finish. Presumption is that all closes and stops
1941    * have already been called.
1942    */
1943   protected void join() {
1944     if (this.nonceManagerChore != null) {
1945       Threads.shutdown(this.nonceManagerChore.getThread());
1946     }
1947     if (this.compactionChecker != null) {
1948       Threads.shutdown(this.compactionChecker.getThread());
1949     }
1950     if (this.periodicFlusher != null) {
1951       Threads.shutdown(this.periodicFlusher.getThread());
1952     }
1953     if (this.cacheFlusher != null) {
1954       this.cacheFlusher.join();
1955     }
1956     if (this.healthCheckChore != null) {
1957       Threads.shutdown(this.healthCheckChore.getThread());
1958     }
1959     if (this.spanReceiverHost != null) {
1960       this.spanReceiverHost.closeReceivers();
1961     }
1962     if (this.hlogRoller != null) {
1963       Threads.shutdown(this.hlogRoller.getThread());
1964     }
1965     if (this.metaHLogRoller != null) {
1966       Threads.shutdown(this.metaHLogRoller.getThread());
1967     }
1968     if (this.compactSplitThread != null) {
1969       this.compactSplitThread.join();
1970     }
1971     if (this.service != null) this.service.shutdown();
1972     if (this.replicationSourceHandler != null &&
1973         this.replicationSourceHandler == this.replicationSinkHandler) {
1974       this.replicationSourceHandler.stopReplicationService();
1975     } else {
1976       if (this.replicationSourceHandler != null) {
1977         this.replicationSourceHandler.stopReplicationService();
1978       }
1979       if (this.replicationSinkHandler != null) {
1980         this.replicationSinkHandler.stopReplicationService();
1981       }
1982     }
1983   }
1984 
1985   @Override
1986   public boolean reportRegionStateTransition(TransitionCode code, HRegionInfo... hris) {
1987     return reportRegionStateTransition(code, HConstants.NO_SEQNUM, hris);
1988   }
1989 
1990   @Override
1991   public boolean reportRegionStateTransition(TransitionCode code, long openSeqNum, HRegionInfo... hris) {
1992     ReportRegionStateTransitionRequest.Builder builder = ReportRegionStateTransitionRequest.newBuilder();
1993     builder.setServer(ProtobufUtil.toServerName(serverName));
1994     RegionStateTransition.Builder transition = builder.addTransitionBuilder();
1995     transition.setTransitionCode(code);
1996     if (code == TransitionCode.OPENED && openSeqNum >= 0) {
1997       transition.setOpenSeqNum(openSeqNum);
1998     }
1999     for (HRegionInfo hri : hris) {
2000       transition.addRegionInfo(HRegionInfo.convert(hri));
2001     }
2002     ReportRegionStateTransitionRequest request = builder.build();
2003     while (keepLooping()) {
2004       RegionServerStatusService.BlockingInterface rss = rssStub;
2005       try {
2006         if (rss == null) {
2007           createRegionServerStatusStub();
2008           continue;
2009         }
2010         ReportRegionStateTransitionResponse response = rss.reportRegionStateTransition(null, request);
2011         if (response.hasErrorMessage()) {
2012           LOG.info("Failed to transition " + hris[0] + " to " + code + ": "
2013               + response.getErrorMessage());
2014           return false;
2015         }
2016         return true;
2017       } catch (ServiceException se) {
2018         IOException ioe = ProtobufUtil.getRemoteException(se);
2019         LOG.info("Failed to report region transition, will retry", ioe);
2020         if (rssStub == rss) {
2021           rssStub = null;
2022         }
2023       }
2024     }
2025     return false;
2026   }
2027 
2028   /**
2029    * Get the current master from ZooKeeper and open the RPC connection to it.
2030    *
2031    * Method will block until a master is available. You can break from this
2032    * block by requesting the server stop.
2033    *
2034    * @return master + port, or null if server has been stopped
2035    */
2036   private synchronized ServerName
2037   createRegionServerStatusStub() {
2038     if (rssStub != null) {
2039       return masterAddressTracker.getMasterAddress();
2040     }
2041     ServerName sn = null;
2042     long previousLogTime = 0;
2043     RegionServerStatusService.BlockingInterface master = null;
2044     boolean refresh = false; // for the first time, use cached data
2045     RegionServerStatusService.BlockingInterface intf = null;
2046     while (keepLooping() && master == null) {
2047       sn = this.masterAddressTracker.getMasterAddress(refresh);
2048       if (sn == null) {
2049         if (!keepLooping()) {
2050           // give up with no connection.
2051           LOG.debug("No master found and cluster is stopped; bailing out");
2052           return null;
2053         }
2054         LOG.debug("No master found; retry");
2055         previousLogTime = System.currentTimeMillis();
2056         refresh = true; // let's try pull it from ZK directly
2057         sleeper.sleep();
2058         continue;
2059       }
2060 
2061       new InetSocketAddress(sn.getHostname(), sn.getPort());
2062       try {
2063         BlockingRpcChannel channel =
2064             this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), this.rpcTimeout);
2065         intf = RegionServerStatusService.newBlockingStub(channel);
2066         break;
2067       } catch (IOException e) {
2068         e = e instanceof RemoteException ?
2069             ((RemoteException)e).unwrapRemoteException() : e;
2070         if (e instanceof ServerNotRunningYetException) {
2071           if (System.currentTimeMillis() > (previousLogTime+1000)){
2072             LOG.info("Master isn't available yet, retrying");
2073             previousLogTime = System.currentTimeMillis();
2074           }
2075         } else {
2076           if (System.currentTimeMillis() > (previousLogTime + 1000)) {
2077             LOG.warn("Unable to connect to master. Retrying. Error was:", e);
2078             previousLogTime = System.currentTimeMillis();
2079           }
2080         }
2081         try {
2082           Thread.sleep(200);
2083         } catch (InterruptedException ignored) {
2084         }
2085       }
2086     }
2087     rssStub = intf;
2088     return sn;
2089   }
2090 
2091   /**
2092    * @return True if we should break loop because cluster is going down or
2093    * this server has been stopped or hdfs has gone bad.
2094    */
2095   private boolean keepLooping() {
2096     return !this.stopped && isClusterUp();
2097   }
2098 
2099   /*
2100    * Let the master know we're here Run initialization using parameters passed
2101    * us by the master.
2102    * @return A Map of key/value configurations we got from the Master else
2103    * null if we failed to register.
2104    * @throws IOException
2105    */
2106   private RegionServerStartupResponse reportForDuty() throws IOException {
2107     ServerName masterServerName = createRegionServerStatusStub();
2108     if (masterServerName == null) return null;
2109     RegionServerStartupResponse result = null;
2110     try {
2111       this.requestCount.set(0);
2112       LOG.info("reportForDuty to master=" + masterServerName + " with port=" + this.isa.getPort() +
2113         ", startcode=" + this.startcode);
2114       long now = EnvironmentEdgeManager.currentTimeMillis();
2115       int port = this.isa.getPort();
2116       RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder();
2117       request.setPort(port);
2118       request.setServerStartCode(this.startcode);
2119       request.setServerCurrentTime(now);
2120       result = this.rssStub.regionServerStartup(null, request.build());
2121     } catch (ServiceException se) {
2122       IOException ioe = ProtobufUtil.getRemoteException(se);
2123       if (ioe instanceof ClockOutOfSyncException) {
2124         LOG.fatal("Master rejected startup because clock is out of sync", ioe);
2125         // Re-throw IOE will cause RS to abort
2126         throw ioe;
2127       } else if (ioe instanceof ServerNotRunningYetException) {
2128         LOG.debug("Master is not running yet");
2129       } else {
2130         LOG.warn("error telling master we are up", se);
2131       }
2132     }
2133     return result;
2134   }
2135 
2136   @Override
2137   public long getLastSequenceId(byte[] region) {
2138     Long lastFlushedSequenceId = -1l;
2139     try {
2140       GetLastFlushedSequenceIdRequest req = RequestConverter
2141           .buildGetLastFlushedSequenceIdRequest(region);
2142       lastFlushedSequenceId = rssStub.getLastFlushedSequenceId(null, req)
2143           .getLastFlushedSequenceId();
2144     } catch (ServiceException e) {
2145       lastFlushedSequenceId = -1l;
2146       LOG.warn("Unable to connect to the master to check " + "the last flushed sequence id", e);
2147     }
2148     return lastFlushedSequenceId;
2149   }
2150 
2151   /**
2152    * Closes all regions.  Called on our way out.
2153    * Assumes that its not possible for new regions to be added to onlineRegions
2154    * while this method runs.
2155    */
2156   protected void closeAllRegions(final boolean abort) {
2157     closeUserRegions(abort);
2158     closeMetaTableRegions(abort);
2159   }
2160 
2161   /**
2162    * Close meta region if we carry it
2163    * @param abort Whether we're running an abort.
2164    */
2165   void closeMetaTableRegions(final boolean abort) {
2166     HRegion meta = null;
2167     this.lock.writeLock().lock();
2168     try {
2169       for (Map.Entry<String, HRegion> e: onlineRegions.entrySet()) {
2170         HRegionInfo hri = e.getValue().getRegionInfo();
2171         if (hri.isMetaRegion()) {
2172           meta = e.getValue();
2173         }
2174         if (meta != null) break;
2175       }
2176     } finally {
2177       this.lock.writeLock().unlock();
2178     }
2179     if (meta != null) closeRegionIgnoreErrors(meta.getRegionInfo(), abort);
2180   }
2181 
2182   /**
2183    * Schedule closes on all user regions.
2184    * Should be safe calling multiple times because it wont' close regions
2185    * that are already closed or that are closing.
2186    * @param abort Whether we're running an abort.
2187    */
2188   void closeUserRegions(final boolean abort) {
2189     this.lock.writeLock().lock();
2190     try {
2191       for (Map.Entry<String, HRegion> e: this.onlineRegions.entrySet()) {
2192         HRegion r = e.getValue();
2193         if (!r.getRegionInfo().isMetaTable() && r.isAvailable()) {
2194           // Don't update zk with this close transition; pass false.
2195           closeRegionIgnoreErrors(r.getRegionInfo(), abort);
2196         }
2197       }
2198     } finally {
2199       this.lock.writeLock().unlock();
2200     }
2201   }
2202 
2203   /** @return the info server */
2204   public InfoServer getInfoServer() {
2205     return infoServer;
2206   }
2207 
2208   /**
2209    * @return true if a stop has been requested.
2210    */
2211   @Override
2212   public boolean isStopped() {
2213     return this.stopped;
2214   }
2215 
2216   @Override
2217   public boolean isStopping() {
2218     return this.stopping;
2219   }
2220 
2221   @Override
2222   public Map<String, HRegion> getRecoveringRegions() {
2223     return this.recoveringRegions;
2224   }
2225 
2226   /**
2227    *
2228    * @return the configuration
2229    */
2230   @Override
2231   public Configuration getConfiguration() {
2232     return conf;
2233   }
2234 
2235   /** @return the write lock for the server */
2236   ReentrantReadWriteLock.WriteLock getWriteLock() {
2237     return lock.writeLock();
2238   }
2239 
2240   public int getNumberOfOnlineRegions() {
2241     return this.onlineRegions.size();
2242   }
2243 
2244   boolean isOnlineRegionsEmpty() {
2245     return this.onlineRegions.isEmpty();
2246   }
2247 
2248   /**
2249    * For tests, web ui and metrics.
2250    * This method will only work if HRegionServer is in the same JVM as client;
2251    * HRegion cannot be serialized to cross an rpc.
2252    */
2253   public Collection<HRegion> getOnlineRegionsLocalContext() {
2254     Collection<HRegion> regions = this.onlineRegions.values();
2255     return Collections.unmodifiableCollection(regions);
2256   }
2257 
2258   @Override
2259   public void addToOnlineRegions(HRegion region) {
2260     this.onlineRegions.put(region.getRegionInfo().getEncodedName(), region);
2261   }
2262 
2263   /**
2264    * @return A new Map of online regions sorted by region size with the first entry being the
2265    * biggest.  If two regions are the same size, then the last one found wins; i.e. this method
2266    * may NOT return all regions.
2267    */
2268   SortedMap<Long, HRegion> getCopyOfOnlineRegionsSortedBySize() {
2269     // we'll sort the regions in reverse
2270     SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
2271         new Comparator<Long>() {
2272           @Override
2273           public int compare(Long a, Long b) {
2274             return -1 * a.compareTo(b);
2275           }
2276         });
2277     // Copy over all regions. Regions are sorted by size with biggest first.
2278     for (HRegion region : this.onlineRegions.values()) {
2279       sortedRegions.put(region.memstoreSize.get(), region);
2280     }
2281     return sortedRegions;
2282   }
2283 
2284   /**
2285    * @return time stamp in millis of when this region server was started
2286    */
2287   public long getStartcode() {
2288     return this.startcode;
2289   }
2290 
2291   /** @return reference to FlushRequester */
2292   @Override
2293   public FlushRequester getFlushRequester() {
2294     return this.cacheFlusher;
2295   }
2296 
2297   /**
2298    * Get the top N most loaded regions this server is serving so we can tell the
2299    * master which regions it can reallocate if we're overloaded. TODO: actually
2300    * calculate which regions are most loaded. (Right now, we're just grabbing
2301    * the first N regions being served regardless of load.)
2302    */
2303   protected HRegionInfo[] getMostLoadedRegions() {
2304     ArrayList<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2305     for (HRegion r : onlineRegions.values()) {
2306       if (!r.isAvailable()) {
2307         continue;
2308       }
2309       if (regions.size() < numRegionsToReport) {
2310         regions.add(r.getRegionInfo());
2311       } else {
2312         break;
2313       }
2314     }
2315     return regions.toArray(new HRegionInfo[regions.size()]);
2316   }
2317 
2318   @Override
2319   public Leases getLeases() {
2320     return leases;
2321   }
2322 
2323   /**
2324    * @return Return the rootDir.
2325    */
2326   protected Path getRootDir() {
2327     return rootDir;
2328   }
2329 
2330   /**
2331    * @return Return the fs.
2332    */
2333   @Override
2334   public FileSystem getFileSystem() {
2335     return fs;
2336   }
2337 
2338   @Override
2339   public String toString() {
2340     return getServerName().toString();
2341   }
2342 
2343   /**
2344    * Interval at which threads should run
2345    *
2346    * @return the interval
2347    */
2348   public int getThreadWakeFrequency() {
2349     return threadWakeFrequency;
2350   }
2351 
2352   @Override
2353   public ZooKeeperWatcher getZooKeeper() {
2354     return zooKeeper;
2355   }
2356 
2357   @Override
2358   public ServerName getServerName() {
2359     // Our servername could change after we talk to the master.
2360     return this.serverNameFromMasterPOV == null?
2361         ServerName.valueOf(this.isa.getHostName(), this.isa.getPort(), this.startcode) :
2362         this.serverNameFromMasterPOV;
2363   }
2364 
2365   @Override
2366   public CompactionRequestor getCompactionRequester() {
2367     return this.compactSplitThread;
2368   }
2369 
2370   public ZooKeeperWatcher getZooKeeperWatcher() {
2371     return this.zooKeeper;
2372   }
2373 
2374   public RegionServerCoprocessorHost getCoprocessorHost(){
2375     return this.rsHost;
2376   }
2377 
2378   @Override
2379   public ConcurrentMap<byte[], Boolean> getRegionsInTransitionInRS() {
2380     return this.regionsInTransitionInRS;
2381   }
2382 
2383   @Override
2384   public ExecutorService getExecutorService() {
2385     return service;
2386   }
2387 
2388   //
2389   // Main program and support routines
2390   //
2391 
2392   /**
2393    * Load the replication service objects, if any
2394    */
2395   static private void createNewReplicationInstance(Configuration conf,
2396     HRegionServer server, FileSystem fs, Path logDir, Path oldLogDir) throws IOException{
2397 
2398     // If replication is not enabled, then return immediately.
2399     if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY,
2400         HConstants.REPLICATION_ENABLE_DEFAULT)) {
2401       return;
2402     }
2403 
2404     // read in the name of the source replication class from the config file.
2405     String sourceClassname = conf.get(HConstants.REPLICATION_SOURCE_SERVICE_CLASSNAME,
2406                                HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
2407 
2408     // read in the name of the sink replication class from the config file.
2409     String sinkClassname = conf.get(HConstants.REPLICATION_SINK_SERVICE_CLASSNAME,
2410                              HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
2411 
2412     // If both the sink and the source class names are the same, then instantiate
2413     // only one object.
2414     if (sourceClassname.equals(sinkClassname)) {
2415       server.replicationSourceHandler = (ReplicationSourceService)
2416                                          newReplicationInstance(sourceClassname,
2417                                          conf, server, fs, logDir, oldLogDir);
2418       server.replicationSinkHandler = (ReplicationSinkService)
2419                                          server.replicationSourceHandler;
2420     } else {
2421       server.replicationSourceHandler = (ReplicationSourceService)
2422                                          newReplicationInstance(sourceClassname,
2423                                          conf, server, fs, logDir, oldLogDir);
2424       server.replicationSinkHandler = (ReplicationSinkService)
2425                                          newReplicationInstance(sinkClassname,
2426                                          conf, server, fs, logDir, oldLogDir);
2427     }
2428   }
2429 
2430   static private ReplicationService newReplicationInstance(String classname,
2431     Configuration conf, HRegionServer server, FileSystem fs, Path logDir,
2432     Path oldLogDir) throws IOException{
2433 
2434     Class<?> clazz = null;
2435     try {
2436       ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
2437       clazz = Class.forName(classname, true, classLoader);
2438     } catch (java.lang.ClassNotFoundException nfe) {
2439       throw new IOException("Could not find class for " + classname);
2440     }
2441 
2442     // create an instance of the replication object.
2443     ReplicationService service = (ReplicationService)
2444                               ReflectionUtils.newInstance(clazz, conf);
2445     service.initialize(server, fs, logDir, oldLogDir);
2446     return service;
2447   }
2448 
2449   /**
2450    * @param hrs
2451    * @return Thread the RegionServer is running in correctly named.
2452    * @throws IOException
2453    */
2454   public static Thread startRegionServer(final HRegionServer hrs)
2455       throws IOException {
2456     return startRegionServer(hrs, "regionserver" + hrs.isa.getPort());
2457   }
2458 
2459   /**
2460    * @param hrs
2461    * @param name
2462    * @return Thread the RegionServer is running in correctly named.
2463    * @throws IOException
2464    */
2465   public static Thread startRegionServer(final HRegionServer hrs,
2466       final String name) throws IOException {
2467     Thread t = new Thread(hrs);
2468     t.setName(name);
2469     t.start();
2470     // Install shutdown hook that will catch signals and run an orderly shutdown
2471     // of the hrs.
2472     ShutdownHook.install(hrs.getConfiguration(), FileSystem.get(hrs
2473         .getConfiguration()), hrs, t);
2474     return t;
2475   }
2476 
2477   /**
2478    * Utility for constructing an instance of the passed HRegionServer class.
2479    *
2480    * @param regionServerClass
2481    * @param conf2
2482    * @return HRegionServer instance.
2483    */
2484   public static HRegionServer constructRegionServer(
2485       Class<? extends HRegionServer> regionServerClass,
2486       final Configuration conf2) {
2487     try {
2488       Constructor<? extends HRegionServer> c = regionServerClass
2489           .getConstructor(Configuration.class);
2490       return c.newInstance(conf2);
2491     } catch (Exception e) {
2492       throw new RuntimeException("Failed construction of " + "Regionserver: "
2493           + regionServerClass.toString(), e);
2494     }
2495   }
2496 
2497   /**
2498    * @see org.apache.hadoop.hbase.regionserver.HRegionServerCommandLine
2499    */
2500   public static void main(String[] args) throws Exception {
2501 	VersionInfo.logVersion();
2502     Configuration conf = HBaseConfiguration.create();
2503     @SuppressWarnings("unchecked")
2504     Class<? extends HRegionServer> regionServerClass = (Class<? extends HRegionServer>) conf
2505         .getClass(HConstants.REGION_SERVER_IMPL, HRegionServer.class);
2506 
2507     new HRegionServerCommandLine(regionServerClass).doMain(args);
2508   }
2509 
2510   /**
2511    * Gets the online regions of the specified table.
2512    * This method looks at the in-memory onlineRegions.  It does not go to <code>hbase:meta</code>.
2513    * Only returns <em>online</em> regions.  If a region on this table has been
2514    * closed during a disable, etc., it will not be included in the returned list.
2515    * So, the returned list may not necessarily be ALL regions in this table, its
2516    * all the ONLINE regions in the table.
2517    * @param tableName
2518    * @return Online regions from <code>tableName</code>
2519    */
2520   @Override
2521   public List<HRegion> getOnlineRegions(TableName tableName) {
2522      List<HRegion> tableRegions = new ArrayList<HRegion>();
2523      synchronized (this.onlineRegions) {
2524        for (HRegion region: this.onlineRegions.values()) {
2525          HRegionInfo regionInfo = region.getRegionInfo();
2526          if(regionInfo.getTable().equals(tableName)) {
2527            tableRegions.add(region);
2528          }
2529        }
2530      }
2531      return tableRegions;
2532    }
2533 
2534   // used by org/apache/hbase/tmpl/regionserver/RSStatusTmpl.jamon (HBASE-4070).
2535   public String[] getCoprocessors() {
2536     TreeSet<String> coprocessors = new TreeSet<String>(
2537         this.hlog.getCoprocessorHost().getCoprocessors());
2538     Collection<HRegion> regions = getOnlineRegionsLocalContext();
2539     for (HRegion region: regions) {
2540       coprocessors.addAll(region.getCoprocessorHost().getCoprocessors());
2541     }
2542     return coprocessors.toArray(new String[coprocessors.size()]);
2543   }
2544 
2545   /**
2546    * Instantiated as a scanner lease. If the lease times out, the scanner is
2547    * closed
2548    */
2549   private class ScannerListener implements LeaseListener {
2550     private final String scannerName;
2551 
2552     ScannerListener(final String n) {
2553       this.scannerName = n;
2554     }
2555 
2556     @Override
2557     public void leaseExpired() {
2558       RegionScannerHolder rsh = scanners.remove(this.scannerName);
2559       if (rsh != null) {
2560         RegionScanner s = rsh.s;
2561         LOG.info("Scanner " + this.scannerName + " lease expired on region "
2562             + s.getRegionInfo().getRegionNameAsString());
2563         try {
2564           HRegion region = getRegion(s.getRegionInfo().getRegionName());
2565           if (region != null && region.getCoprocessorHost() != null) {
2566             region.getCoprocessorHost().preScannerClose(s);
2567           }
2568 
2569           s.close();
2570           if (region != null && region.getCoprocessorHost() != null) {
2571             region.getCoprocessorHost().postScannerClose(s);
2572           }
2573         } catch (IOException e) {
2574           LOG.error("Closing scanner for "
2575               + s.getRegionInfo().getRegionNameAsString(), e);
2576         }
2577       } else {
2578         LOG.info("Scanner " + this.scannerName + " lease expired");
2579       }
2580     }
2581   }
2582 
2583   /**
2584    * Called to verify that this server is up and running.
2585    *
2586    * @throws IOException
2587    */
2588   protected void checkOpen() throws IOException {
2589     if (this.stopped || this.abortRequested) {
2590       throw new RegionServerStoppedException("Server " + getServerName() +
2591         " not running" + (this.abortRequested ? ", aborting" : ""));
2592     }
2593     if (!fsOk) {
2594       throw new RegionServerStoppedException("File system not available");
2595     }
2596   }
2597 
2598 
2599   /**
2600    * Try to close the region, logs a warning on failure but continues.
2601    * @param region Region to close
2602    */
2603   private void closeRegionIgnoreErrors(HRegionInfo region, final boolean abort) {
2604     try {
2605       if (!closeRegion(region.getEncodedName(), abort, false, -1, null)) {
2606         LOG.warn("Failed to close " + region.getRegionNameAsString() +
2607             " - ignoring and continuing");
2608       }
2609     } catch (IOException e) {
2610       LOG.warn("Failed to close " + region.getRegionNameAsString() +
2611           " - ignoring and continuing", e);
2612     }
2613   }
2614 
2615   /**
2616    * Close asynchronously a region, can be called from the master or internally by the regionserver
2617    * when stopping. If called from the master, the region will update the znode status.
2618    *
2619    * <p>
2620    * If an opening was in progress, this method will cancel it, but will not start a new close. The
2621    * coprocessors are not called in this case. A NotServingRegionException exception is thrown.
2622    * </p>
2623 
2624    * <p>
2625    *   If a close was in progress, this new request will be ignored, and an exception thrown.
2626    * </p>
2627    *
2628    * @param encodedName Region to close
2629    * @param abort True if we are aborting
2630    * @param zk True if we are to update zk about the region close; if the close
2631    * was orchestrated by master, then update zk.  If the close is being run by
2632    * the regionserver because its going down, don't update zk.
2633    * @param versionOfClosingNode the version of znode to compare when RS transitions the znode from
2634    *   CLOSING state.
2635    * @return True if closed a region.
2636    * @throws NotServingRegionException if the region is not online
2637    * @throws RegionAlreadyInTransitionException if the region is already closing
2638    */
2639   protected boolean closeRegion(String encodedName, final boolean abort,
2640       final boolean zk, final int versionOfClosingNode, final ServerName sn)
2641       throws NotServingRegionException, RegionAlreadyInTransitionException {
2642     //Check for permissions to close.
2643     HRegion actualRegion = this.getFromOnlineRegions(encodedName);
2644     if ((actualRegion != null) && (actualRegion.getCoprocessorHost() != null)) {
2645       try {
2646         actualRegion.getCoprocessorHost().preClose(false);
2647       } catch (IOException exp) {
2648         LOG.warn("Unable to close region: the coprocessor launched an error ", exp);
2649         return false;
2650       }
2651     }
2652 
2653     final Boolean previous = this.regionsInTransitionInRS.putIfAbsent(encodedName.getBytes(),
2654         Boolean.FALSE);
2655 
2656     if (Boolean.TRUE.equals(previous)) {
2657       LOG.info("Received CLOSE for the region:" + encodedName + " , which we are already " +
2658           "trying to OPEN. Cancelling OPENING.");
2659       if (!regionsInTransitionInRS.replace(encodedName.getBytes(), previous, Boolean.FALSE)){
2660         // The replace failed. That should be an exceptional case, but theoretically it can happen.
2661         // We're going to try to do a standard close then.
2662         LOG.warn("The opening for region " + encodedName + " was done before we could cancel it." +
2663             " Doing a standard close now");
2664         return closeRegion(encodedName, abort, zk, versionOfClosingNode, sn);
2665       }
2666       // Let's get the region from the online region list again
2667       actualRegion = this.getFromOnlineRegions(encodedName);
2668       if (actualRegion == null) { // If already online, we still need to close it.
2669         LOG.info("The opening previously in progress has been cancelled by a CLOSE request.");
2670         // The master deletes the znode when it receives this exception.
2671         throw new RegionAlreadyInTransitionException("The region " + encodedName +
2672           " was opening but not yet served. Opening is cancelled.");
2673       }
2674     } else if (Boolean.FALSE.equals(previous)) {
2675       LOG.info("Received CLOSE for the region: " + encodedName +
2676         " ,which we are already trying to CLOSE, but not completed yet");
2677       // The master will retry till the region is closed. We need to do this since
2678       // the region could fail to close somehow. If we mark the region closed in master
2679       // while it is not, there could be data loss.
2680       // If the region stuck in closing for a while, and master runs out of retries,
2681       // master will move the region to failed_to_close. Later on, if the region
2682       // is indeed closed, master can properly re-assign it.
2683       throw new RegionAlreadyInTransitionException("The region " + encodedName +
2684         " was already closing. New CLOSE request is ignored.");
2685     }
2686 
2687     if (actualRegion == null) {
2688       LOG.error("Received CLOSE for a region which is not online, and we're not opening.");
2689       this.regionsInTransitionInRS.remove(encodedName.getBytes());
2690       // The master deletes the znode when it receives this exception.
2691       throw new NotServingRegionException("The region " + encodedName +
2692           " is not online, and is not opening.");
2693     }
2694 
2695     CloseRegionHandler crh;
2696     final HRegionInfo hri = actualRegion.getRegionInfo();
2697     if (hri.isMetaRegion()) {
2698       crh = new CloseMetaHandler(this, this, hri, abort, zk, versionOfClosingNode);
2699     } else {
2700       crh = new CloseRegionHandler(this, this, hri, abort, zk, versionOfClosingNode, sn);
2701     }
2702     this.service.submit(crh);
2703     return true;
2704   }
2705 
2706    /**
2707    * @param regionName
2708    * @return HRegion for the passed binary <code>regionName</code> or null if
2709    *         named region is not member of the online regions.
2710    */
2711   public HRegion getOnlineRegion(final byte[] regionName) {
2712     String encodedRegionName = HRegionInfo.encodeRegionName(regionName);
2713     return this.onlineRegions.get(encodedRegionName);
2714   }
2715 
2716   public InetSocketAddress[] getRegionBlockLocations(final String encodedRegionName) {
2717     return this.regionFavoredNodesMap.get(encodedRegionName);
2718   }
2719 
2720   @Override
2721   public HRegion getFromOnlineRegions(final String encodedRegionName) {
2722     return this.onlineRegions.get(encodedRegionName);
2723   }
2724 
2725 
2726   @Override
2727   public boolean removeFromOnlineRegions(final HRegion r, ServerName destination) {
2728     HRegion toReturn = this.onlineRegions.remove(r.getRegionInfo().getEncodedName());
2729 
2730     if (destination != null) {
2731       HLog wal = getWAL();
2732       long closeSeqNum = wal.getEarliestMemstoreSeqNum(r.getRegionInfo().getEncodedNameAsBytes());
2733       if (closeSeqNum == HConstants.NO_SEQNUM) {
2734         // No edits in WAL for this region; get the sequence number when the region was opened.
2735         closeSeqNum = r.getOpenSeqNum();
2736         if (closeSeqNum == HConstants.NO_SEQNUM) {
2737           closeSeqNum = 0;
2738         }
2739       }
2740       addToMovedRegions(r.getRegionInfo().getEncodedName(), destination, closeSeqNum);
2741     }
2742     this.regionFavoredNodesMap.remove(r.getRegionInfo().getEncodedName());
2743     return toReturn != null;
2744   }
2745 
2746   /**
2747    * Protected utility method for safely obtaining an HRegion handle.
2748    *
2749    * @param regionName
2750    *          Name of online {@link HRegion} to return
2751    * @return {@link HRegion} for <code>regionName</code>
2752    * @throws NotServingRegionException
2753    */
2754   protected HRegion getRegion(final byte[] regionName)
2755       throws NotServingRegionException {
2756     String encodedRegionName = HRegionInfo.encodeRegionName(regionName);
2757     return getRegionByEncodedName(regionName, encodedRegionName);
2758   }
2759 
2760   protected HRegion getRegionByEncodedName(String encodedRegionName)
2761       throws NotServingRegionException {
2762     return getRegionByEncodedName(null, encodedRegionName);
2763   }
2764 
2765   protected HRegion getRegionByEncodedName(byte[] regionName, String encodedRegionName)
2766     throws NotServingRegionException {
2767     HRegion region = this.onlineRegions.get(encodedRegionName);
2768     if (region == null) {
2769       MovedRegionInfo moveInfo = getMovedRegion(encodedRegionName);
2770       if (moveInfo != null) {
2771         throw new RegionMovedException(moveInfo.getServerName(), moveInfo.getSeqNum());
2772       }
2773       Boolean isOpening = this.regionsInTransitionInRS.get(Bytes.toBytes(encodedRegionName));
2774       String regionNameStr = regionName == null?
2775         encodedRegionName: Bytes.toStringBinary(regionName);
2776       if (isOpening != null && isOpening.booleanValue()) {
2777         throw new RegionOpeningException("Region " + regionNameStr +
2778           " is opening on " + this.serverNameFromMasterPOV);
2779       }
2780       throw new NotServingRegionException("Region " + regionNameStr +
2781         " is not online on " + this.serverNameFromMasterPOV);
2782     }
2783     return region;
2784   }
2785 
2786   /*
2787    * Cleanup after Throwable caught invoking method. Converts <code>t</code> to
2788    * IOE if it isn't already.
2789    *
2790    * @param t Throwable
2791    *
2792    * @return Throwable converted to an IOE; methods can only let out IOEs.
2793    */
2794   protected Throwable cleanup(final Throwable t) {
2795     return cleanup(t, null);
2796   }
2797 
2798   /*
2799    * Cleanup after Throwable caught invoking method. Converts <code>t</code> to
2800    * IOE if it isn't already.
2801    *
2802    * @param t Throwable
2803    *
2804    * @param msg Message to log in error. Can be null.
2805    *
2806    * @return Throwable converted to an IOE; methods can only let out IOEs.
2807    */
2808   protected Throwable cleanup(final Throwable t, final String msg) {
2809     // Don't log as error if NSRE; NSRE is 'normal' operation.
2810     if (t instanceof NotServingRegionException) {
2811       LOG.debug("NotServingRegionException; " + t.getMessage());
2812       return t;
2813     }
2814     if (msg == null) {
2815       LOG.error("", RemoteExceptionHandler.checkThrowable(t));
2816     } else {
2817       LOG.error(msg, RemoteExceptionHandler.checkThrowable(t));
2818     }
2819     if (!checkOOME(t)) {
2820       checkFileSystem();
2821     }
2822     return t;
2823   }
2824 
2825   /*
2826    * @param t
2827    *
2828    * @param msg Message to put in new IOE if passed <code>t</code> is not an IOE
2829    *
2830    * @return Make <code>t</code> an IOE if it isn't already.
2831    */
2832   protected IOException convertThrowableToIOE(final Throwable t, final String msg) {
2833     return (t instanceof IOException ? (IOException) t : msg == null
2834         || msg.length() == 0 ? new IOException(t) : new IOException(msg, t));
2835   }
2836 
2837   /*
2838    * Check if an OOME and, if so, abort immediately to avoid creating more objects.
2839    *
2840    * @param e
2841    *
2842    * @return True if we OOME'd and are aborting.
2843    */
2844   @Override
2845   public boolean checkOOME(final Throwable e) {
2846     boolean stop = false;
2847     try {
2848       if (e instanceof OutOfMemoryError
2849           || (e.getCause() != null && e.getCause() instanceof OutOfMemoryError)
2850           || (e.getMessage() != null && e.getMessage().contains(
2851               "java.lang.OutOfMemoryError"))) {
2852         stop = true;
2853         LOG.fatal(
2854           "Run out of memory; HRegionServer will abort itself immediately", e);
2855       }
2856     } finally {
2857       if (stop) {
2858         Runtime.getRuntime().halt(1);
2859       }
2860     }
2861     return stop;
2862   }
2863 
2864   /**
2865    * Checks to see if the file system is still accessible. If not, sets
2866    * abortRequested and stopRequested
2867    *
2868    * @return false if file system is not available
2869    */
2870   public boolean checkFileSystem() {
2871     if (this.fsOk && this.fs != null) {
2872       try {
2873         FSUtils.checkFileSystemAvailable(this.fs);
2874       } catch (IOException e) {
2875         abort("File System not available", e);
2876         this.fsOk = false;
2877       }
2878     }
2879     return this.fsOk;
2880   }
2881 
2882   protected long addScanner(RegionScanner s, HRegion r) throws LeaseStillHeldException {
2883     long scannerId = this.scannerIdGen.incrementAndGet();
2884     String scannerName = String.valueOf(scannerId);
2885 
2886     RegionScannerHolder existing =
2887       scanners.putIfAbsent(scannerName, new RegionScannerHolder(s, r));
2888     assert existing == null : "scannerId must be unique within regionserver's whole lifecycle!";
2889 
2890     this.leases.createLease(scannerName, this.scannerLeaseTimeoutPeriod,
2891         new ScannerListener(scannerName));
2892 
2893     return scannerId;
2894   }
2895 
2896   // Start Client methods
2897 
2898   /**
2899    * Get data from a table.
2900    *
2901    * @param controller the RPC controller
2902    * @param request the get request
2903    * @throws ServiceException
2904    */
2905   @Override
2906   public GetResponse get(final RpcController controller,
2907       final GetRequest request) throws ServiceException {
2908     long before = EnvironmentEdgeManager.currentTimeMillis();
2909     try {
2910       checkOpen();
2911       requestCount.increment();
2912       HRegion region = getRegion(request.getRegion());
2913 
2914       GetResponse.Builder builder = GetResponse.newBuilder();
2915       ClientProtos.Get get = request.getGet();
2916       Boolean existence = null;
2917       Result r = null;
2918 
2919       if (get.hasClosestRowBefore() && get.getClosestRowBefore()) {
2920         if (get.getColumnCount() != 1) {
2921           throw new DoNotRetryIOException(
2922             "get ClosestRowBefore supports one and only one family now, not "
2923               + get.getColumnCount() + " families");
2924         }
2925         byte[] row = get.getRow().toByteArray();
2926         byte[] family = get.getColumn(0).getFamily().toByteArray();
2927         r = region.getClosestRowBefore(row, family);
2928       } else {
2929         Get clientGet = ProtobufUtil.toGet(get);
2930         if (get.getExistenceOnly() && region.getCoprocessorHost() != null) {
2931           existence = region.getCoprocessorHost().preExists(clientGet);
2932         }
2933         if (existence == null) {
2934           r = region.get(clientGet);
2935           if (get.getExistenceOnly()) {
2936             boolean exists = r.getExists();
2937             if (region.getCoprocessorHost() != null) {
2938               exists = region.getCoprocessorHost().postExists(clientGet, exists);
2939             }
2940             existence = exists;
2941           }
2942         }
2943       }
2944       if (existence != null){
2945         ClientProtos.Result pbr = ProtobufUtil.toResult(existence);
2946         builder.setResult(pbr);
2947       } else  if (r != null) {
2948         ClientProtos.Result pbr = ProtobufUtil.toResult(r);
2949         builder.setResult(pbr);
2950       }
2951       return builder.build();
2952     } catch (IOException ie) {
2953       throw new ServiceException(ie);
2954     } finally {
2955       metricsRegionServer.updateGet(EnvironmentEdgeManager.currentTimeMillis() - before);
2956     }
2957   }
2958 
2959 
2960   /**
2961    * Mutate data in a table.
2962    *
2963    * @param rpcc the RPC controller
2964    * @param request the mutate request
2965    * @throws ServiceException
2966    */
2967   @Override
2968   public MutateResponse mutate(final RpcController rpcc,
2969       final MutateRequest request) throws ServiceException {
2970     // rpc controller is how we bring in data via the back door;  it is unprotobuf'ed data.
2971     // It is also the conduit via which we pass back data.
2972     PayloadCarryingRpcController controller = (PayloadCarryingRpcController)rpcc;
2973     CellScanner cellScanner = controller != null? controller.cellScanner(): null;
2974     // Clear scanner so we are not holding on to reference across call.
2975     if (controller != null) controller.setCellScanner(null);
2976     try {
2977       checkOpen();
2978       requestCount.increment();
2979       HRegion region = getRegion(request.getRegion());
2980       MutateResponse.Builder builder = MutateResponse.newBuilder();
2981       MutationProto mutation = request.getMutation();
2982       if (!region.getRegionInfo().isMetaTable()) {
2983         cacheFlusher.reclaimMemStoreMemory();
2984       }
2985       long nonceGroup = request.hasNonceGroup()
2986           ? request.getNonceGroup() : HConstants.NO_NONCE;
2987       Result r = null;
2988       Boolean processed = null;
2989       MutationType type = mutation.getMutateType();
2990       switch (type) {
2991       case APPEND:
2992         // TODO: this doesn't actually check anything.
2993         r = append(region, mutation, cellScanner, nonceGroup);
2994         break;
2995       case INCREMENT:
2996         // TODO: this doesn't actually check anything.
2997         r = increment(region, mutation, cellScanner, nonceGroup);
2998         break;
2999       case PUT:
3000         Put put = ProtobufUtil.toPut(mutation, cellScanner);
3001         if (request.hasCondition()) {
3002           Condition condition = request.getCondition();
3003           byte[] row = condition.getRow().toByteArray();
3004           byte[] family = condition.getFamily().toByteArray();
3005           byte[] qualifier = condition.getQualifier().toByteArray();
3006           CompareOp compareOp = CompareOp.valueOf(condition.getCompareType().name());
3007           ByteArrayComparable comparator =
3008             ProtobufUtil.toComparator(condition.getComparator());
3009           if (region.getCoprocessorHost() != null) {
3010             processed = region.getCoprocessorHost().preCheckAndPut(
3011               row, family, qualifier, compareOp, comparator, put);
3012           }
3013           if (processed == null) {
3014             boolean result = region.checkAndMutate(row, family,
3015               qualifier, compareOp, comparator, put, true);
3016             if (region.getCoprocessorHost() != null) {
3017               result = region.getCoprocessorHost().postCheckAndPut(row, family,
3018                 qualifier, compareOp, comparator, put, result);
3019             }
3020             processed = result;
3021           }
3022         } else {
3023           region.put(put);
3024           processed = Boolean.TRUE;
3025         }
3026         break;
3027       case DELETE:
3028         Delete delete = ProtobufUtil.toDelete(mutation, cellScanner);
3029         if (request.hasCondition()) {
3030           Condition condition = request.getCondition();
3031           byte[] row = condition.getRow().toByteArray();
3032           byte[] family = condition.getFamily().toByteArray();
3033           byte[] qualifier = condition.getQualifier().toByteArray();
3034           CompareOp compareOp = CompareOp.valueOf(condition.getCompareType().name());
3035           ByteArrayComparable comparator =
3036             ProtobufUtil.toComparator(condition.getComparator());
3037           if (region.getCoprocessorHost() != null) {
3038             processed = region.getCoprocessorHost().preCheckAndDelete(
3039               row, family, qualifier, compareOp, comparator, delete);
3040           }
3041           if (processed == null) {
3042             boolean result = region.checkAndMutate(row, family,
3043               qualifier, compareOp, comparator, delete, true);
3044             if (region.getCoprocessorHost() != null) {
3045               result = region.getCoprocessorHost().postCheckAndDelete(row, family,
3046                 qualifier, compareOp, comparator, delete, result);
3047             }
3048             processed = result;
3049           }
3050         } else {
3051           region.delete(delete);
3052           processed = Boolean.TRUE;
3053         }
3054         break;
3055         default:
3056           throw new DoNotRetryIOException(
3057             "Unsupported mutate type: " + type.name());
3058       }
3059       if (processed != null) builder.setProcessed(processed.booleanValue());
3060       addResult(builder, r, controller);
3061       return builder.build();
3062     } catch (IOException ie) {
3063       checkFileSystem();
3064       throw new ServiceException(ie);
3065     }
3066   }
3067 
3068 
3069   /**
3070    * @return True if current call supports cellblocks
3071    */
3072   private boolean isClientCellBlockSupport() {
3073     RpcCallContext context = RpcServer.getCurrentCall();
3074     return context != null && context.isClientCellBlockSupport();
3075   }
3076 
3077   private void addResult(final MutateResponse.Builder builder,
3078       final Result result, final PayloadCarryingRpcController rpcc) {
3079     if (result == null) return;
3080     if (isClientCellBlockSupport()) {
3081       builder.setResult(ProtobufUtil.toResultNoData(result));
3082       rpcc.setCellScanner(result.cellScanner());
3083     } else {
3084       ClientProtos.Result pbr = ProtobufUtil.toResult(result);
3085       builder.setResult(pbr);
3086     }
3087   }
3088 
3089   //
3090   // remote scanner interface
3091   //
3092 
3093   /**
3094    * Scan data in a table.
3095    *
3096    * @param controller the RPC controller
3097    * @param request the scan request
3098    * @throws ServiceException
3099    */
3100   @Override
3101   public ScanResponse scan(final RpcController controller, final ScanRequest request)
3102   throws ServiceException {
3103     Leases.Lease lease = null;
3104     String scannerName = null;
3105     try {
3106       if (!request.hasScannerId() && !request.hasScan()) {
3107         throw new DoNotRetryIOException(
3108           "Missing required input: scannerId or scan");
3109       }
3110       long scannerId = -1;
3111       if (request.hasScannerId()) {
3112         scannerId = request.getScannerId();
3113         scannerName = String.valueOf(scannerId);
3114       }
3115       try {
3116         checkOpen();
3117       } catch (IOException e) {
3118         // If checkOpen failed, server not running or filesystem gone,
3119         // cancel this lease; filesystem is gone or we're closing or something.
3120         if (scannerName != null) {
3121           try {
3122             leases.cancelLease(scannerName);
3123           } catch (LeaseException le) {
3124             LOG.info("Server shutting down and client tried to access missing scanner " +
3125               scannerName);
3126           }
3127         }
3128         throw e;
3129       }
3130       requestCount.increment();
3131 
3132       int ttl = 0;
3133       HRegion region = null;
3134       RegionScanner scanner = null;
3135       RegionScannerHolder rsh = null;
3136       boolean moreResults = true;
3137       boolean closeScanner = false;
3138       ScanResponse.Builder builder = ScanResponse.newBuilder();
3139       if (request.hasCloseScanner()) {
3140         closeScanner = request.getCloseScanner();
3141       }
3142       int rows = closeScanner ? 0 : 1;
3143       if (request.hasNumberOfRows()) {
3144         rows = request.getNumberOfRows();
3145       }
3146       if (request.hasScannerId()) {
3147         rsh = scanners.get(scannerName);
3148         if (rsh == null) {
3149           LOG.info("Client tried to access missing scanner " + scannerName);
3150           throw new UnknownScannerException(
3151             "Name: " + scannerName + ", already closed?");
3152         }
3153         scanner = rsh.s;
3154         HRegionInfo hri = scanner.getRegionInfo();
3155         region = getRegion(hri.getRegionName());
3156         if (region != rsh.r) { // Yes, should be the same instance
3157           throw new NotServingRegionException("Region was re-opened after the scanner"
3158             + scannerName + " was created: " + hri.getRegionNameAsString());
3159         }
3160       } else {
3161         region = getRegion(request.getRegion());
3162         ClientProtos.Scan protoScan = request.getScan();
3163         boolean isLoadingCfsOnDemandSet = protoScan.hasLoadColumnFamiliesOnDemand();
3164         Scan scan = ProtobufUtil.toScan(protoScan);
3165         // if the request doesn't set this, get the default region setting.
3166         if (!isLoadingCfsOnDemandSet) {
3167           scan.setLoadColumnFamiliesOnDemand(region.isLoadingCfsOnDemandDefault());
3168         }
3169         scan.getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
3170         region.prepareScanner(scan);
3171         if (region.getCoprocessorHost() != null) {
3172           scanner = region.getCoprocessorHost().preScannerOpen(scan);
3173         }
3174         if (scanner == null) {
3175           scanner = region.getScanner(scan);
3176         }
3177         if (region.getCoprocessorHost() != null) {
3178           scanner = region.getCoprocessorHost().postScannerOpen(scan, scanner);
3179         }
3180         scannerId = addScanner(scanner, region);
3181         scannerName = String.valueOf(scannerId);
3182         ttl = this.scannerLeaseTimeoutPeriod;
3183       }
3184 
3185       if (rows > 0) {
3186         // if nextCallSeq does not match throw Exception straight away. This needs to be
3187         // performed even before checking of Lease.
3188         // See HBASE-5974
3189         if (request.hasNextCallSeq()) {
3190           if (rsh == null) {
3191             rsh = scanners.get(scannerName);
3192           }
3193           if (rsh != null) {
3194             if (request.getNextCallSeq() != rsh.nextCallSeq) {
3195               throw new OutOfOrderScannerNextException("Expected nextCallSeq: " + rsh.nextCallSeq
3196                 + " But the nextCallSeq got from client: " + request.getNextCallSeq() +
3197                 "; request=" + TextFormat.shortDebugString(request));
3198             }
3199             // Increment the nextCallSeq value which is the next expected from client.
3200             rsh.nextCallSeq++;
3201           }
3202         }
3203         try {
3204           // Remove lease while its being processed in server; protects against case
3205           // where processing of request takes > lease expiration time.
3206           lease = leases.removeLease(scannerName);
3207           List<Result> results = new ArrayList<Result>(rows);
3208           long currentScanResultSize = 0;
3209           long totalKvSize = 0;
3210 
3211           boolean done = false;
3212           // Call coprocessor. Get region info from scanner.
3213           if (region != null && region.getCoprocessorHost() != null) {
3214             Boolean bypass = region.getCoprocessorHost().preScannerNext(
3215               scanner, results, rows);
3216             if (!results.isEmpty()) {
3217               for (Result r : results) {
3218                 if (maxScannerResultSize < Long.MAX_VALUE){
3219                   for (Cell cell : r.rawCells()) {
3220                     KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
3221                     currentScanResultSize += kv.heapSize();
3222                     totalKvSize += kv.getLength();
3223                   }
3224                 }
3225               }
3226             }
3227             if (bypass != null && bypass.booleanValue()) {
3228               done = true;
3229             }
3230           }
3231 
3232           if (!done) {
3233             long maxResultSize = scanner.getMaxResultSize();
3234             if (maxResultSize <= 0) {
3235               maxResultSize = maxScannerResultSize;
3236             }
3237             List<Cell> values = new ArrayList<Cell>();
3238             region.startRegionOperation(Operation.SCAN);
3239             try {
3240               int i = 0;
3241               synchronized(scanner) {
3242                 while (i < rows) {
3243                   // Stop collecting results if maxScannerResultSize is set and we have exceeded it
3244                   if ((maxScannerResultSize < Long.MAX_VALUE) &&
3245                       (currentScanResultSize >= maxResultSize)) {
3246                     break;
3247                   }
3248                   // Collect values to be returned here
3249                   boolean moreRows = scanner.nextRaw(values);
3250                   if (!values.isEmpty()) {
3251                     for (Cell cell : values) {
3252                       KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
3253                       currentScanResultSize += kv.heapSize();
3254                       totalKvSize += kv.getLength();
3255                     }
3256                     results.add(Result.create(values));
3257                     i++;
3258                   }
3259                   if (!moreRows) {
3260                     break;
3261                   }
3262                   values.clear();
3263                 }
3264               }
3265               region.readRequestsCount.add(i);
3266               region.getMetrics().updateScanNext(totalKvSize);
3267             } finally {
3268               region.closeRegionOperation();
3269             }
3270 
3271             // coprocessor postNext hook
3272             if (region != null && region.getCoprocessorHost() != null) {
3273               region.getCoprocessorHost().postScannerNext(scanner, results, rows, true);
3274             }
3275           }
3276 
3277           // If the scanner's filter - if any - is done with the scan
3278           // and wants to tell the client to stop the scan. This is done by passing
3279           // a null result, and setting moreResults to false.
3280           if (scanner.isFilterDone() && results.isEmpty()) {
3281             moreResults = false;
3282             results = null;
3283           } else {
3284             addResults(builder, results, controller);
3285           }
3286         } finally {
3287           // We're done. On way out re-add the above removed lease.
3288           // Adding resets expiration time on lease.
3289           if (scanners.containsKey(scannerName)) {
3290             if (lease != null) leases.addLease(lease);
3291             ttl = this.scannerLeaseTimeoutPeriod;
3292           }
3293         }
3294       }
3295 
3296       if (!moreResults || closeScanner) {
3297         ttl = 0;
3298         moreResults = false;
3299         if (region != null && region.getCoprocessorHost() != null) {
3300           if (region.getCoprocessorHost().preScannerClose(scanner)) {
3301             return builder.build(); // bypass
3302           }
3303         }
3304         rsh = scanners.remove(scannerName);
3305         if (rsh != null) {
3306           scanner = rsh.s;
3307           scanner.close();
3308           leases.cancelLease(scannerName);
3309           if (region != null && region.getCoprocessorHost() != null) {
3310             region.getCoprocessorHost().postScannerClose(scanner);
3311           }
3312         }
3313       }
3314 
3315       if (ttl > 0) {
3316         builder.setTtl(ttl);
3317       }
3318       builder.setScannerId(scannerId);
3319       builder.setMoreResults(moreResults);
3320       return builder.build();
3321     } catch (IOException ie) {
3322       if (scannerName != null && ie instanceof NotServingRegionException) {
3323         RegionScannerHolder rsh = scanners.remove(scannerName);
3324         if (rsh != null) {
3325           try {
3326             RegionScanner scanner = rsh.s;
3327             LOG.warn(scannerName + " encountered " + ie.getMessage() + ", closing ...");
3328             scanner.close();
3329             leases.cancelLease(scannerName);
3330           } catch (IOException e) {
3331             LOG.warn("Getting exception closing " + scannerName, e);
3332           }
3333         }
3334       }
3335       throw new ServiceException(ie);
3336     }
3337   }
3338 
3339   private void addResults(final ScanResponse.Builder builder, final List<Result> results,
3340       final RpcController controller) {
3341     if (results == null || results.isEmpty()) return;
3342     if (isClientCellBlockSupport()) {
3343       for (Result res : results) {
3344         builder.addCellsPerResult(res.size());
3345       }
3346       ((PayloadCarryingRpcController)controller).
3347         setCellScanner(CellUtil.createCellScanner(results));
3348     } else {
3349       for (Result res: results) {
3350         ClientProtos.Result pbr = ProtobufUtil.toResult(res);
3351         builder.addResults(pbr);
3352       }
3353     }
3354   }
3355 
3356   /**
3357    * Atomically bulk load several HFiles into an open region
3358    * @return true if successful, false is failed but recoverably (no action)
3359    * @throws IOException if failed unrecoverably
3360    */
3361   @Override
3362   public BulkLoadHFileResponse bulkLoadHFile(final RpcController controller,
3363       final BulkLoadHFileRequest request) throws ServiceException {
3364     try {
3365       checkOpen();
3366       requestCount.increment();
3367       HRegion region = getRegion(request.getRegion());
3368       List<Pair<byte[], String>> familyPaths = new ArrayList<Pair<byte[], String>>();
3369       for (FamilyPath familyPath: request.getFamilyPathList()) {
3370         familyPaths.add(new Pair<byte[], String>(familyPath.getFamily().toByteArray(),
3371           familyPath.getPath()));
3372       }
3373       boolean bypass = false;
3374       if (region.getCoprocessorHost() != null) {
3375         bypass = region.getCoprocessorHost().preBulkLoadHFile(familyPaths);
3376       }
3377       boolean loaded = false;
3378       if (!bypass) {
3379         loaded = region.bulkLoadHFiles(familyPaths, request.getAssignSeqNum());
3380       }
3381       if (region.getCoprocessorHost() != null) {
3382         loaded = region.getCoprocessorHost().postBulkLoadHFile(familyPaths, loaded);
3383       }
3384       BulkLoadHFileResponse.Builder builder = BulkLoadHFileResponse.newBuilder();
3385       builder.setLoaded(loaded);
3386       return builder.build();
3387     } catch (IOException ie) {
3388       throw new ServiceException(ie);
3389     }
3390   }
3391 
3392   @Override
3393   public CoprocessorServiceResponse execService(final RpcController controller,
3394       final CoprocessorServiceRequest request) throws ServiceException {
3395     try {
3396       checkOpen();
3397       requestCount.increment();
3398       HRegion region = getRegion(request.getRegion());
3399       Message result = execServiceOnRegion(region, request.getCall());
3400       CoprocessorServiceResponse.Builder builder =
3401           CoprocessorServiceResponse.newBuilder();
3402       builder.setRegion(RequestConverter.buildRegionSpecifier(
3403           RegionSpecifierType.REGION_NAME, region.getRegionName()));
3404       builder.setValue(
3405           builder.getValueBuilder().setName(result.getClass().getName())
3406               .setValue(result.toByteString()));
3407       return builder.build();
3408     } catch (IOException ie) {
3409       throw new ServiceException(ie);
3410     }
3411   }
3412 
3413   private Message execServiceOnRegion(HRegion region,
3414       final ClientProtos.CoprocessorServiceCall serviceCall) throws IOException {
3415     // ignore the passed in controller (from the serialized call)
3416     ServerRpcController execController = new ServerRpcController();
3417     Message result = region.execService(execController, serviceCall);
3418     if (execController.getFailedOn() != null) {
3419       throw execController.getFailedOn();
3420     }
3421     return result;
3422   }
3423 
3424   @Override
3425   public CoprocessorServiceResponse execRegionServerService(final RpcController controller,
3426       final CoprocessorServiceRequest serviceRequest) throws ServiceException {
3427     try {
3428       ServerRpcController execController = new ServerRpcController();
3429       CoprocessorServiceCall call = serviceRequest.getCall();
3430       String serviceName = call.getServiceName();
3431       String methodName = call.getMethodName();
3432       if (!coprocessorServiceHandlers.containsKey(serviceName)) {
3433         throw new UnknownProtocolException(null,
3434             "No registered coprocessor service found for name " + serviceName);
3435       }
3436       Service service = coprocessorServiceHandlers.get(serviceName);
3437       Descriptors.ServiceDescriptor serviceDesc = service.getDescriptorForType();
3438       Descriptors.MethodDescriptor methodDesc = serviceDesc.findMethodByName(methodName);
3439       if (methodDesc == null) {
3440         throw new UnknownProtocolException(service.getClass(), "Unknown method " + methodName
3441             + " called on service " + serviceName);
3442       }
3443       Message request =
3444           service.getRequestPrototype(methodDesc).newBuilderForType().mergeFrom(call.getRequest())
3445               .build();
3446       final Message.Builder responseBuilder =
3447           service.getResponsePrototype(methodDesc).newBuilderForType();
3448       service.callMethod(methodDesc, controller, request, new RpcCallback<Message>() {
3449         @Override
3450         public void run(Message message) {
3451           if (message != null) {
3452             responseBuilder.mergeFrom(message);
3453           }
3454         }
3455       });
3456       Message execResult = responseBuilder.build();
3457       if (execController.getFailedOn() != null) {
3458         throw execController.getFailedOn();
3459       }
3460       ClientProtos.CoprocessorServiceResponse.Builder builder =
3461           ClientProtos.CoprocessorServiceResponse.newBuilder();
3462       builder.setRegion(RequestConverter.buildRegionSpecifier(RegionSpecifierType.REGION_NAME,
3463         HConstants.EMPTY_BYTE_ARRAY));
3464       builder.setValue(builder.getValueBuilder().setName(execResult.getClass().getName())
3465           .setValue(execResult.toByteString()));
3466       return builder.build();
3467     } catch (IOException ie) {
3468       throw new ServiceException(ie);
3469     }
3470   }
3471 
3472   /**
3473    * @return Return the object that implements the replication
3474    * source service.
3475    */
3476   public ReplicationSourceService getReplicationSourceService() {
3477     return replicationSourceHandler;
3478   }
3479 
3480   /**
3481    * @return Return the object that implements the replication
3482    * sink service.
3483    */
3484   public ReplicationSinkService getReplicationSinkService() {
3485     return replicationSinkHandler;
3486   }
3487 
3488   /**
3489    * Execute multiple actions on a table: get, mutate, and/or execCoprocessor
3490    *
3491    * @param rpcc the RPC controller
3492    * @param request the multi request
3493    * @throws ServiceException
3494    */
3495   @Override
3496   public MultiResponse multi(final RpcController rpcc, final MultiRequest request)
3497   throws ServiceException {
3498     try {
3499       checkOpen();
3500     } catch (IOException ie) {
3501       throw new ServiceException(ie);
3502     }
3503 
3504     // rpc controller is how we bring in data via the back door;  it is unprotobuf'ed data.
3505     // It is also the conduit via which we pass back data.
3506     PayloadCarryingRpcController controller = (PayloadCarryingRpcController)rpcc;
3507     CellScanner cellScanner = controller != null ? controller.cellScanner(): null;
3508     if (controller != null) controller.setCellScanner(null);
3509 
3510     long nonceGroup = request.hasNonceGroup() ? request.getNonceGroup() : HConstants.NO_NONCE;
3511 
3512     // this will contain all the cells that we need to return. It's created later, if needed.
3513     List<CellScannable> cellsToReturn = null;
3514     MultiResponse.Builder responseBuilder = MultiResponse.newBuilder();
3515     RegionActionResult.Builder regionActionResultBuilder = RegionActionResult.newBuilder();
3516     Boolean processed = null;
3517 
3518     for (RegionAction regionAction : request.getRegionActionList()) {
3519       this.requestCount.add(regionAction.getActionCount());
3520       HRegion region;
3521       regionActionResultBuilder.clear();
3522       try {
3523         region = getRegion(regionAction.getRegion());
3524       } catch (IOException e) {
3525         regionActionResultBuilder.setException(ResponseConverter.buildException(e));
3526         responseBuilder.addRegionActionResult(regionActionResultBuilder.build());
3527         continue;  // For this region it's a failure.
3528       }
3529 
3530       if (regionAction.hasAtomic() && regionAction.getAtomic()) {
3531         // How does this call happen?  It may need some work to play well w/ the surroundings.
3532         // Need to return an item per Action along w/ Action index.  TODO.
3533         try {
3534           if (request.hasCondition()) {
3535             Condition condition = request.getCondition();
3536             byte[] row = condition.getRow().toByteArray();
3537             byte[] family = condition.getFamily().toByteArray();
3538             byte[] qualifier = condition.getQualifier().toByteArray();
3539             CompareOp compareOp = CompareOp.valueOf(condition.getCompareType().name());
3540             ByteArrayComparable comparator =
3541               ProtobufUtil.toComparator(condition.getComparator());
3542             processed = checkAndRowMutate(region, regionAction.getActionList(),
3543               cellScanner, row, family, qualifier, compareOp, comparator);
3544           } else {
3545             mutateRows(region, regionAction.getActionList(), cellScanner);
3546             processed = Boolean.TRUE;
3547           }
3548         } catch (IOException e) {
3549           // As it's atomic, we may expect it's a global failure.
3550           regionActionResultBuilder.setException(ResponseConverter.buildException(e));
3551         }
3552       } else {
3553         // doNonAtomicRegionMutation manages the exception internally
3554         cellsToReturn = doNonAtomicRegionMutation(region, regionAction, cellScanner,
3555             regionActionResultBuilder, cellsToReturn, nonceGroup);
3556       }
3557       responseBuilder.addRegionActionResult(regionActionResultBuilder.build());
3558     }
3559     // Load the controller with the Cells to return.
3560     if (cellsToReturn != null && !cellsToReturn.isEmpty() && controller != null) {
3561       controller.setCellScanner(CellUtil.createCellScanner(cellsToReturn));
3562     }
3563     if (processed != null) responseBuilder.setProcessed(processed);
3564     return responseBuilder.build();
3565   }
3566 
3567   /**
3568    * Run through the regionMutation <code>rm</code> and per Mutation, do the work, and then when
3569    * done, add an instance of a {@link ResultOrException} that corresponds to each Mutation.
3570    * @param region
3571    * @param actions
3572    * @param cellScanner
3573    * @param builder
3574    * @param cellsToReturn  Could be null. May be allocated in this method.  This is what this
3575    * method returns as a 'result'.
3576    * @return Return the <code>cellScanner</code> passed
3577    */
3578   private List<CellScannable> doNonAtomicRegionMutation(final HRegion region,
3579       final RegionAction actions, final CellScanner cellScanner,
3580       final RegionActionResult.Builder builder, List<CellScannable> cellsToReturn, long nonceGroup) {
3581     // Gather up CONTIGUOUS Puts and Deletes in this mutations List.  Idea is that rather than do
3582     // one at a time, we instead pass them in batch.  Be aware that the corresponding
3583     // ResultOrException instance that matches each Put or Delete is then added down in the
3584     // doBatchOp call.  We should be staying aligned though the Put and Delete are deferred/batched
3585     List<ClientProtos.Action> mutations = null;
3586     for (ClientProtos.Action action: actions.getActionList()) {
3587       ClientProtos.ResultOrException.Builder resultOrExceptionBuilder = null;
3588       try {
3589         Result r = null;
3590         if (action.hasGet()) {
3591           Get get = ProtobufUtil.toGet(action.getGet());
3592           r = region.get(get);
3593         } else if (action.hasServiceCall()) {
3594           resultOrExceptionBuilder = ResultOrException.newBuilder();
3595           try {
3596             Message result = execServiceOnRegion(region, action.getServiceCall());
3597             ClientProtos.CoprocessorServiceResult.Builder serviceResultBuilder =
3598                 ClientProtos.CoprocessorServiceResult.newBuilder();
3599             resultOrExceptionBuilder.setServiceResult(
3600                 serviceResultBuilder.setValue(
3601                   serviceResultBuilder.getValueBuilder()
3602                     .setName(result.getClass().getName())
3603                     .setValue(result.toByteString())));
3604           } catch (IOException ioe) {
3605             resultOrExceptionBuilder.setException(ResponseConverter.buildException(ioe));
3606           }
3607         } else if (action.hasMutation()) {
3608           MutationType type = action.getMutation().getMutateType();
3609           if (type != MutationType.PUT && type != MutationType.DELETE && mutations != null &&
3610               !mutations.isEmpty()) {
3611             // Flush out any Puts or Deletes already collected.
3612             doBatchOp(builder, region, mutations, cellScanner);
3613             mutations.clear();
3614           }
3615           switch (type) {
3616           case APPEND:
3617             r = append(region, action.getMutation(), cellScanner, nonceGroup);
3618             break;
3619           case INCREMENT:
3620             r = increment(region, action.getMutation(), cellScanner,  nonceGroup);
3621             break;
3622           case PUT:
3623           case DELETE:
3624             // Collect the individual mutations and apply in a batch
3625             if (mutations == null) {
3626               mutations = new ArrayList<ClientProtos.Action>(actions.getActionCount());
3627             }
3628             mutations.add(action);
3629             break;
3630           default:
3631             throw new DoNotRetryIOException("Unsupported mutate type: " + type.name());
3632           }
3633         } else {
3634           throw new HBaseIOException("Unexpected Action type");
3635         }
3636         if (r != null) {
3637           ClientProtos.Result pbResult = null;
3638           if (isClientCellBlockSupport()) {
3639             pbResult = ProtobufUtil.toResultNoData(r);
3640             //  Hard to guess the size here.  Just make a rough guess.
3641             if (cellsToReturn == null) cellsToReturn = new ArrayList<CellScannable>();
3642             cellsToReturn.add(r);
3643           } else {
3644             pbResult = ProtobufUtil.toResult(r);
3645           }
3646           resultOrExceptionBuilder =
3647             ClientProtos.ResultOrException.newBuilder().setResult(pbResult);
3648         }
3649         // Could get to here and there was no result and no exception.  Presumes we added
3650         // a Put or Delete to the collecting Mutations List for adding later.  In this
3651         // case the corresponding ResultOrException instance for the Put or Delete will be added
3652         // down in the doBatchOp method call rather than up here.
3653       } catch (IOException ie) {
3654         resultOrExceptionBuilder = ResultOrException.newBuilder().
3655           setException(ResponseConverter.buildException(ie));
3656       }
3657       if (resultOrExceptionBuilder != null) {
3658         // Propagate index.
3659         resultOrExceptionBuilder.setIndex(action.getIndex());
3660         builder.addResultOrException(resultOrExceptionBuilder.build());
3661       }
3662     }
3663     // Finish up any outstanding mutations
3664     if (mutations != null && !mutations.isEmpty()) {
3665       doBatchOp(builder, region, mutations, cellScanner);
3666     }
3667     return cellsToReturn;
3668   }
3669 
3670 // End Client methods
3671 // Start Admin methods
3672 
3673   @Override
3674   @QosPriority(priority=HConstants.HIGH_QOS)
3675   public GetRegionInfoResponse getRegionInfo(final RpcController controller,
3676       final GetRegionInfoRequest request) throws ServiceException {
3677     try {
3678       checkOpen();
3679       requestCount.increment();
3680       HRegion region = getRegion(request.getRegion());
3681       HRegionInfo info = region.getRegionInfo();
3682       GetRegionInfoResponse.Builder builder = GetRegionInfoResponse.newBuilder();
3683       builder.setRegionInfo(HRegionInfo.convert(info));
3684       if (request.hasCompactionState() && request.getCompactionState()) {
3685         builder.setCompactionState(region.getCompactionState());
3686       }
3687       builder.setIsRecovering(region.isRecovering());
3688       return builder.build();
3689     } catch (IOException ie) {
3690       throw new ServiceException(ie);
3691     }
3692   }
3693 
3694   @Override
3695   public GetStoreFileResponse getStoreFile(final RpcController controller,
3696       final GetStoreFileRequest request) throws ServiceException {
3697     try {
3698       checkOpen();
3699       HRegion region = getRegion(request.getRegion());
3700       requestCount.increment();
3701       Set<byte[]> columnFamilies;
3702       if (request.getFamilyCount() == 0) {
3703         columnFamilies = region.getStores().keySet();
3704       } else {
3705         columnFamilies = new TreeSet<byte[]>(Bytes.BYTES_RAWCOMPARATOR);
3706         for (ByteString cf: request.getFamilyList()) {
3707           columnFamilies.add(cf.toByteArray());
3708         }
3709       }
3710       int nCF = columnFamilies.size();
3711       List<String>  fileList = region.getStoreFileList(
3712         columnFamilies.toArray(new byte[nCF][]));
3713       GetStoreFileResponse.Builder builder = GetStoreFileResponse.newBuilder();
3714       builder.addAllStoreFile(fileList);
3715       return builder.build();
3716     } catch (IOException ie) {
3717       throw new ServiceException(ie);
3718     }
3719   }
3720 
3721   @Override
3722   @QosPriority(priority=HConstants.HIGH_QOS)
3723   public GetOnlineRegionResponse getOnlineRegion(final RpcController controller,
3724       final GetOnlineRegionRequest request) throws ServiceException {
3725     try {
3726       checkOpen();
3727       requestCount.increment();
3728       List<HRegionInfo> list = new ArrayList<HRegionInfo>(onlineRegions.size());
3729       for (HRegion region: this.onlineRegions.values()) {
3730         list.add(region.getRegionInfo());
3731       }
3732       Collections.sort(list);
3733       return ResponseConverter.buildGetOnlineRegionResponse(list);
3734     } catch (IOException ie) {
3735       throw new ServiceException(ie);
3736     }
3737   }
3738 
3739   // Region open/close direct RPCs
3740 
3741   /**
3742    * Open asynchronously a region or a set of regions on the region server.
3743    *
3744    * The opening is coordinated by ZooKeeper, and this method requires the znode to be created
3745    *  before being called. As a consequence, this method should be called only from the master.
3746    * <p>
3747    * Different manages states for the region are:<ul>
3748    *  <li>region not opened: the region opening will start asynchronously.</li>
3749    *  <li>a close is already in progress: this is considered as an error.</li>
3750    *  <li>an open is already in progress: this new open request will be ignored. This is important
3751    *  because the Master can do multiple requests if it crashes.</li>
3752    *  <li>the region is already opened:  this new open request will be ignored./li>
3753    *  </ul>
3754    * </p>
3755    * <p>
3756    * Bulk assign: If there are more than 1 region to open, it will be considered as a bulk assign.
3757    * For a single region opening, errors are sent through a ServiceException. For bulk assign,
3758    * errors are put in the response as FAILED_OPENING.
3759    * </p>
3760    * @param controller the RPC controller
3761    * @param request the request
3762    * @throws ServiceException
3763    */
3764   @Override
3765   @QosPriority(priority=HConstants.HIGH_QOS)
3766   public OpenRegionResponse openRegion(final RpcController controller,
3767       final OpenRegionRequest request) throws ServiceException {
3768     try {
3769       checkOpen();
3770     } catch (IOException ie) {
3771       throw new ServiceException(ie);
3772     }
3773     requestCount.increment();
3774     if (request.hasServerStartCode() && this.serverNameFromMasterPOV != null) {
3775       // check that we are the same server that this RPC is intended for.
3776       long serverStartCode = request.getServerStartCode();
3777       if (this.serverNameFromMasterPOV.getStartcode() !=  serverStartCode) {
3778         throw new ServiceException(new DoNotRetryIOException("This RPC was intended for a " +
3779             "different server with startCode: " + serverStartCode + ", this server is: "
3780             + this.serverNameFromMasterPOV));
3781       }
3782     }
3783     OpenRegionResponse.Builder builder = OpenRegionResponse.newBuilder();
3784     final int regionCount = request.getOpenInfoCount();
3785     final Map<TableName, HTableDescriptor> htds =
3786         new HashMap<TableName, HTableDescriptor>(regionCount);
3787     final boolean isBulkAssign = regionCount > 1;
3788     for (RegionOpenInfo regionOpenInfo : request.getOpenInfoList()) {
3789       final HRegionInfo region = HRegionInfo.convert(regionOpenInfo.getRegion());
3790 
3791       int versionOfOfflineNode = -1;
3792       if (regionOpenInfo.hasVersionOfOfflineNode()) {
3793         versionOfOfflineNode = regionOpenInfo.getVersionOfOfflineNode();
3794       }
3795       HTableDescriptor htd;
3796       try {
3797         final HRegion onlineRegion = getFromOnlineRegions(region.getEncodedName());
3798         if (onlineRegion != null) {
3799           //Check if the region can actually be opened.
3800           if (onlineRegion.getCoprocessorHost() != null) {
3801             onlineRegion.getCoprocessorHost().preOpen();
3802           }
3803           // See HBASE-5094. Cross check with hbase:meta if still this RS is owning
3804           // the region.
3805           Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(
3806               this.catalogTracker, region.getRegionName());
3807           if (this.getServerName().equals(p.getSecond())) {
3808             Boolean closing = regionsInTransitionInRS.get(region.getEncodedNameAsBytes());
3809             // Map regionsInTransitionInRSOnly has an entry for a region only if the region
3810             // is in transition on this RS, so here closing can be null. If not null, it can
3811             // be true or false. True means the region is opening on this RS; while false
3812             // means the region is closing. Only return ALREADY_OPENED if not closing (i.e.
3813             // not in transition any more, or still transition to open.
3814             if (!Boolean.FALSE.equals(closing)
3815                 && getFromOnlineRegions(region.getEncodedName()) != null) {
3816               LOG.warn("Attempted open of " + region.getEncodedName()
3817                 + " but already online on this server");
3818               builder.addOpeningState(RegionOpeningState.ALREADY_OPENED);
3819               continue;
3820             }
3821           } else {
3822             LOG.warn("The region " + region.getEncodedName() + " is online on this server" +
3823                 " but hbase:meta does not have this server - continue opening.");
3824             removeFromOnlineRegions(onlineRegion, null);
3825           }
3826         }
3827         LOG.info("Open " + region.getRegionNameAsString());
3828         htd = htds.get(region.getTable());
3829         if (htd == null) {
3830           htd = this.tableDescriptors.get(region.getTable());
3831           htds.put(region.getTable(), htd);
3832         }
3833 
3834         final Boolean previous = this.regionsInTransitionInRS.putIfAbsent(
3835             region.getEncodedNameAsBytes(), Boolean.TRUE);
3836 
3837         if (Boolean.FALSE.equals(previous)) {
3838           // There is a close in progress. We need to mark this open as failed in ZK.
3839           OpenRegionHandler.
3840               tryTransitionFromOfflineToFailedOpen(this, region, versionOfOfflineNode);
3841 
3842           throw new RegionAlreadyInTransitionException("Received OPEN for the region:" +
3843               region.getRegionNameAsString() + " , which we are already trying to CLOSE ");
3844         }
3845 
3846         if (Boolean.TRUE.equals(previous)) {
3847           // An open is in progress. This is supported, but let's log this.
3848           LOG.info("Receiving OPEN for the region:" +
3849               region.getRegionNameAsString() + " , which we are already trying to OPEN" +
3850               " - ignoring this new request for this region.");
3851         }
3852 
3853         // We are opening this region. If it moves back and forth for whatever reason, we don't
3854         // want to keep returning the stale moved record while we are opening/if we close again.
3855         removeFromMovedRegions(region.getEncodedName());
3856 
3857         if (previous == null) {
3858           // check if the region to be opened is marked in recovering state in ZK
3859           if (SplitLogManager.isRegionMarkedRecoveringInZK(this.zooKeeper,
3860                 region.getEncodedName())) {
3861             // check if current region open is for distributedLogReplay. This check is to support
3862             // rolling restart/upgrade where we want to Master/RS see same configuration
3863             if (!regionOpenInfo.hasOpenForDistributedLogReplay()
3864                   || regionOpenInfo.getOpenForDistributedLogReplay()) {
3865               this.recoveringRegions.put(region.getEncodedName(), null);
3866             } else {
3867               // remove stale recovery region from ZK when we open region not for recovering which
3868               // could happen when turn distributedLogReplay off from on.
3869               List<String> tmpRegions = new ArrayList<String>();
3870               tmpRegions.add(region.getEncodedName());
3871               SplitLogManager.deleteRecoveringRegionZNodes(this.zooKeeper, tmpRegions);
3872             }
3873           }
3874           // If there is no action in progress, we can submit a specific handler.
3875           // Need to pass the expected version in the constructor.
3876           if (region.isMetaRegion()) {
3877             this.service.submit(new OpenMetaHandler(this, this, region, htd,
3878                 versionOfOfflineNode));
3879           } else {
3880             updateRegionFavoredNodesMapping(region.getEncodedName(),
3881                 regionOpenInfo.getFavoredNodesList());
3882             this.service.submit(new OpenRegionHandler(this, this, region, htd,
3883                 versionOfOfflineNode));
3884           }
3885         }
3886 
3887         builder.addOpeningState(RegionOpeningState.OPENED);
3888 
3889       } catch (KeeperException zooKeeperEx) {
3890         LOG.error("Can't retrieve recovering state from zookeeper", zooKeeperEx);
3891         throw new ServiceException(zooKeeperEx);
3892       } catch (IOException ie) {
3893         LOG.warn("Failed opening region " + region.getRegionNameAsString(), ie);
3894         if (isBulkAssign) {
3895           builder.addOpeningState(RegionOpeningState.FAILED_OPENING);
3896         } else {
3897           throw new ServiceException(ie);
3898         }
3899       }
3900     }
3901 
3902     return builder.build();
3903   }
3904 
3905   @Override
3906   public void updateRegionFavoredNodesMapping(String encodedRegionName,
3907       List<org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName> favoredNodes) {
3908     InetSocketAddress[] addr = new InetSocketAddress[favoredNodes.size()];
3909     // Refer to the comment on the declaration of regionFavoredNodesMap on why
3910     // it is a map of region name to InetSocketAddress[]
3911     for (int i = 0; i < favoredNodes.size(); i++) {
3912       addr[i] = InetSocketAddress.createUnresolved(favoredNodes.get(i).getHostName(),
3913           favoredNodes.get(i).getPort());
3914     }
3915     regionFavoredNodesMap.put(encodedRegionName, addr);
3916   }
3917 
3918   /**
3919    * Return the favored nodes for a region given its encoded name. Look at the
3920    * comment around {@link #regionFavoredNodesMap} on why it is InetSocketAddress[]
3921    * @param encodedRegionName
3922    * @return array of favored locations
3923    */
3924   @Override
3925   public InetSocketAddress[] getFavoredNodesForRegion(String encodedRegionName) {
3926     return regionFavoredNodesMap.get(encodedRegionName);
3927   }
3928 
3929   /**
3930    * Close a region on the region server.
3931    *
3932    * @param controller the RPC controller
3933    * @param request the request
3934    * @throws ServiceException
3935    */
3936   @Override
3937   @QosPriority(priority=HConstants.HIGH_QOS)
3938   public CloseRegionResponse closeRegion(final RpcController controller,
3939       final CloseRegionRequest request) throws ServiceException {
3940     int versionOfClosingNode = -1;
3941     if (request.hasVersionOfClosingNode()) {
3942       versionOfClosingNode = request.getVersionOfClosingNode();
3943     }
3944     boolean zk = request.getTransitionInZK();
3945     final ServerName sn = (request.hasDestinationServer() ?
3946       ProtobufUtil.toServerName(request.getDestinationServer()) : null);
3947 
3948     try {
3949       checkOpen();
3950       if (request.hasServerStartCode() && this.serverNameFromMasterPOV != null) {
3951         // check that we are the same server that this RPC is intended for.
3952         long serverStartCode = request.getServerStartCode();
3953         if (this.serverNameFromMasterPOV.getStartcode() !=  serverStartCode) {
3954           throw new ServiceException(new DoNotRetryIOException("This RPC was intended for a " +
3955               "different server with startCode: " + serverStartCode + ", this server is: "
3956               + this.serverNameFromMasterPOV));
3957         }
3958       }
3959       final String encodedRegionName = ProtobufUtil.getRegionEncodedName(request.getRegion());
3960 
3961       // Can be null if we're calling close on a region that's not online
3962       final HRegion region = this.getFromOnlineRegions(encodedRegionName);
3963       if ((region  != null) && (region .getCoprocessorHost() != null)) {
3964         region.getCoprocessorHost().preClose(false);
3965       }
3966 
3967       requestCount.increment();
3968       LOG.info("Close " + encodedRegionName + ", via zk=" + (zk ? "yes" : "no") +
3969         ", znode version=" + versionOfClosingNode + ", on " + sn);
3970 
3971       boolean closed = closeRegion(encodedRegionName, false, zk, versionOfClosingNode, sn);
3972       CloseRegionResponse.Builder builder = CloseRegionResponse.newBuilder().setClosed(closed);
3973       return builder.build();
3974     } catch (IOException ie) {
3975       throw new ServiceException(ie);
3976     }
3977   }
3978 
3979   /**
3980    * Flush a region on the region server.
3981    *
3982    * @param controller the RPC controller
3983    * @param request the request
3984    * @throws ServiceException
3985    */
3986   @Override
3987   @QosPriority(priority=HConstants.HIGH_QOS)
3988   public FlushRegionResponse flushRegion(final RpcController controller,
3989       final FlushRegionRequest request) throws ServiceException {
3990     try {
3991       checkOpen();
3992       requestCount.increment();
3993       HRegion region = getRegion(request.getRegion());
3994       LOG.info("Flushing " + region.getRegionNameAsString());
3995       boolean shouldFlush = true;
3996       if (request.hasIfOlderThanTs()) {
3997         shouldFlush = region.getLastFlushTime() < request.getIfOlderThanTs();
3998       }
3999       FlushRegionResponse.Builder builder = FlushRegionResponse.newBuilder();
4000       if (shouldFlush) {
4001         long startTime = EnvironmentEdgeManager.currentTimeMillis();
4002         HRegion.FlushResult flushResult = region.flushcache();
4003         if (flushResult.isFlushSucceeded()) {
4004           long endTime = EnvironmentEdgeManager.currentTimeMillis();
4005           metricsRegionServer.updateFlushTime(endTime - startTime);
4006         }
4007         boolean result = flushResult.isCompactionNeeded();
4008         if (result) {
4009           this.compactSplitThread.requestSystemCompaction(region,
4010               "Compaction through user triggered flush");
4011         }
4012         builder.setFlushed(result);
4013       }
4014       builder.setLastFlushTime(region.getLastFlushTime());
4015       return builder.build();
4016     } catch (DroppedSnapshotException ex) {
4017       // Cache flush can fail in a few places. If it fails in a critical
4018       // section, we get a DroppedSnapshotException and a replay of hlog
4019       // is required. Currently the only way to do this is a restart of
4020       // the server.
4021       abort("Replay of HLog required. Forcing server shutdown", ex);
4022       throw new ServiceException(ex);
4023     } catch (IOException ie) {
4024       throw new ServiceException(ie);
4025     }
4026   }
4027 
4028   /**
4029    * Split a region on the region server.
4030    *
4031    * @param controller the RPC controller
4032    * @param request the request
4033    * @throws ServiceException
4034    */
4035   @Override
4036   @QosPriority(priority=HConstants.HIGH_QOS)
4037   public SplitRegionResponse splitRegion(final RpcController controller,
4038       final SplitRegionRequest request) throws ServiceException {
4039     try {
4040       checkOpen();
4041       requestCount.increment();
4042       HRegion region = getRegion(request.getRegion());
4043       region.startRegionOperation(Operation.SPLIT_REGION);
4044       LOG.info("Splitting " + region.getRegionNameAsString());
4045       long startTime = EnvironmentEdgeManager.currentTimeMillis();
4046       HRegion.FlushResult flushResult = region.flushcache();
4047       if (flushResult.isFlushSucceeded()) {
4048         long endTime = EnvironmentEdgeManager.currentTimeMillis();
4049         metricsRegionServer.updateFlushTime(endTime - startTime);
4050       }
4051       byte[] splitPoint = null;
4052       if (request.hasSplitPoint()) {
4053         splitPoint = request.getSplitPoint().toByteArray();
4054       }
4055       region.forceSplit(splitPoint);
4056       compactSplitThread.requestSplit(region, region.checkSplit());
4057       return SplitRegionResponse.newBuilder().build();
4058     } catch (IOException ie) {
4059       throw new ServiceException(ie);
4060     }
4061   }
4062 
4063   /**
4064    * Merge regions on the region server.
4065    *
4066    * @param controller the RPC controller
4067    * @param request the request
4068    * @return merge regions response
4069    * @throws ServiceException
4070    */
4071   @Override
4072   @QosPriority(priority = HConstants.HIGH_QOS)
4073   public MergeRegionsResponse mergeRegions(final RpcController controller,
4074       final MergeRegionsRequest request) throws ServiceException {
4075     try {
4076       checkOpen();
4077       requestCount.increment();
4078       HRegion regionA = getRegion(request.getRegionA());
4079       HRegion regionB = getRegion(request.getRegionB());
4080       boolean forcible = request.getForcible();
4081       regionA.startRegionOperation(Operation.MERGE_REGION);
4082       regionB.startRegionOperation(Operation.MERGE_REGION);
4083       LOG.info("Receiving merging request for  " + regionA + ", " + regionB
4084           + ",forcible=" + forcible);
4085       long startTime = EnvironmentEdgeManager.currentTimeMillis();
4086       HRegion.FlushResult flushResult = regionA.flushcache();
4087       if (flushResult.isFlushSucceeded()) {
4088         long endTime = EnvironmentEdgeManager.currentTimeMillis();
4089         metricsRegionServer.updateFlushTime(endTime - startTime);
4090       }
4091       startTime = EnvironmentEdgeManager.currentTimeMillis();
4092       flushResult = regionB.flushcache();
4093       if (flushResult.isFlushSucceeded()) {
4094         long endTime = EnvironmentEdgeManager.currentTimeMillis();
4095         metricsRegionServer.updateFlushTime(endTime - startTime);
4096       }
4097       compactSplitThread.requestRegionsMerge(regionA, regionB, forcible);
4098       return MergeRegionsResponse.newBuilder().build();
4099     } catch (IOException ie) {
4100       throw new ServiceException(ie);
4101     }
4102   }
4103 
4104   /**
4105    * Compact a region on the region server.
4106    *
4107    * @param controller the RPC controller
4108    * @param request the request
4109    * @throws ServiceException
4110    */
4111   @Override
4112   @QosPriority(priority=HConstants.HIGH_QOS)
4113   public CompactRegionResponse compactRegion(final RpcController controller,
4114       final CompactRegionRequest request) throws ServiceException {
4115     try {
4116       checkOpen();
4117       requestCount.increment();
4118       HRegion region = getRegion(request.getRegion());
4119       region.startRegionOperation(Operation.COMPACT_REGION);
4120       LOG.info("Compacting " + region.getRegionNameAsString());
4121       boolean major = false;
4122       byte [] family = null;
4123       Store store = null;
4124       if (request.hasFamily()) {
4125         family = request.getFamily().toByteArray();
4126         store = region.getStore(family);
4127         if (store == null) {
4128           throw new ServiceException(new IOException("column family " + Bytes.toString(family) +
4129             " does not exist in region " + region.getRegionNameAsString()));
4130         }
4131       }
4132       if (request.hasMajor()) {
4133         major = request.getMajor();
4134       }
4135       if (major) {
4136         if (family != null) {
4137           store.triggerMajorCompaction();
4138         } else {
4139           region.triggerMajorCompaction();
4140         }
4141       }
4142 
4143       String familyLogMsg = (family != null)?" for column family: " + Bytes.toString(family):"";
4144       LOG.trace("User-triggered compaction requested for region " +
4145         region.getRegionNameAsString() + familyLogMsg);
4146       String log = "User-triggered " + (major ? "major " : "") + "compaction" + familyLogMsg;
4147       if(family != null) {
4148         compactSplitThread.requestCompaction(region, store, log,
4149           Store.PRIORITY_USER, null);
4150       } else {
4151         compactSplitThread.requestCompaction(region, log,
4152           Store.PRIORITY_USER, null);
4153       }
4154       return CompactRegionResponse.newBuilder().build();
4155     } catch (IOException ie) {
4156       throw new ServiceException(ie);
4157     }
4158   }
4159 
4160   /**
4161    * Replicate WAL entries on the region server.
4162    *
4163    * @param controller the RPC controller
4164    * @param request the request
4165    * @throws ServiceException
4166    */
4167   @Override
4168   @QosPriority(priority=HConstants.REPLICATION_QOS)
4169   public ReplicateWALEntryResponse replicateWALEntry(final RpcController controller,
4170       final ReplicateWALEntryRequest request)
4171   throws ServiceException {
4172     try {
4173       if (replicationSinkHandler != null) {
4174         checkOpen();
4175         requestCount.increment();
4176         this.replicationSinkHandler.replicateLogEntries(request.getEntryList(),
4177           ((PayloadCarryingRpcController)controller).cellScanner());
4178       }
4179       return ReplicateWALEntryResponse.newBuilder().build();
4180     } catch (IOException ie) {
4181       throw new ServiceException(ie);
4182     }
4183   }
4184 
4185   /**
4186    * Replay the given changes when distributedLogReplay WAL edits from a failed RS. The guarantee is
4187    * that the given mutations will be durable on the receiving RS if this method returns without any
4188    * exception.
4189    * @param controller the RPC controller
4190    * @param request the request
4191    * @throws ServiceException
4192    */
4193   @Override
4194   @QosPriority(priority = HConstants.REPLAY_QOS)
4195   public ReplicateWALEntryResponse replay(final RpcController controller,
4196       final ReplicateWALEntryRequest request) throws ServiceException {
4197     long before = EnvironmentEdgeManager.currentTimeMillis();
4198     CellScanner cells = ((PayloadCarryingRpcController) controller).cellScanner();
4199     try {
4200       checkOpen();
4201       List<WALEntry> entries = request.getEntryList();
4202       if (entries == null || entries.isEmpty()) {
4203         // empty input
4204         return ReplicateWALEntryResponse.newBuilder().build();
4205       }
4206       HRegion region = this.getRegionByEncodedName(
4207         entries.get(0).getKey().getEncodedRegionName().toStringUtf8());
4208       RegionCoprocessorHost coprocessorHost = region.getCoprocessorHost();
4209       List<Pair<HLogKey, WALEdit>> walEntries = new ArrayList<Pair<HLogKey, WALEdit>>();
4210       List<HLogSplitter.MutationReplay> mutations = new ArrayList<HLogSplitter.MutationReplay>();
4211       // when tag is enabled, we need tag replay edits with log sequence number
4212       boolean needAddReplayTag = (HFile.getFormatVersion(this.conf) >= 3);
4213       for (WALEntry entry : entries) {
4214         if (nonceManager != null) {
4215           long nonceGroup = entry.getKey().hasNonceGroup()
4216               ? entry.getKey().getNonceGroup() : HConstants.NO_NONCE;
4217           long nonce = entry.getKey().hasNonce() ? entry.getKey().getNonce() : HConstants.NO_NONCE;
4218           nonceManager.reportOperationFromWal(nonceGroup, nonce, entry.getKey().getWriteTime());
4219         }
4220         Pair<HLogKey, WALEdit> walEntry = (coprocessorHost == null) ? null :
4221           new Pair<HLogKey, WALEdit>();
4222         List<HLogSplitter.MutationReplay> edits = HLogSplitter.getMutationsFromWALEntry(entry,
4223           cells, walEntry, needAddReplayTag);
4224         if (coprocessorHost != null) {
4225           // Start coprocessor replay here. The coprocessor is for each WALEdit instead of a
4226           // KeyValue.
4227           if (coprocessorHost.preWALRestore(region.getRegionInfo(), walEntry.getFirst(),
4228             walEntry.getSecond())) {
4229             // if bypass this log entry, ignore it ...
4230             continue;
4231           }
4232           walEntries.add(walEntry);
4233         }
4234         mutations.addAll(edits);
4235       }
4236 
4237       if (!mutations.isEmpty()) {
4238         OperationStatus[] result = doReplayBatchOp(region, mutations);
4239         // check if it's a partial success
4240         for (int i = 0; result != null && i < result.length; i++) {
4241           if (result[i] != OperationStatus.SUCCESS) {
4242             throw new IOException(result[i].getExceptionMsg());
4243           }
4244         }
4245       }
4246       if (coprocessorHost != null) {
4247         for (Pair<HLogKey, WALEdit> wal : walEntries) {
4248           coprocessorHost.postWALRestore(region.getRegionInfo(), wal.getFirst(),
4249             wal.getSecond());
4250         }
4251       }
4252       return ReplicateWALEntryResponse.newBuilder().build();
4253     } catch (IOException ie) {
4254       throw new ServiceException(ie);
4255     } finally {
4256       metricsRegionServer.updateReplay(EnvironmentEdgeManager.currentTimeMillis() - before);
4257     }
4258   }
4259 
4260   /**
4261    * Roll the WAL writer of the region server.
4262    * @param controller the RPC controller
4263    * @param request the request
4264    * @throws ServiceException
4265    */
4266   @Override
4267   public RollWALWriterResponse rollWALWriter(final RpcController controller,
4268       final RollWALWriterRequest request) throws ServiceException {
4269     try {
4270       checkOpen();
4271       requestCount.increment();
4272       if (this.rsHost != null) {
4273         this.rsHost.preRollWALWriterRequest();
4274       }
4275       HLog wal = this.getWAL();
4276       byte[][] regionsToFlush = wal.rollWriter(true);
4277       RollWALWriterResponse.Builder builder = RollWALWriterResponse.newBuilder();
4278       if (regionsToFlush != null) {
4279         for (byte[] region: regionsToFlush) {
4280           builder.addRegionToFlush(ByteStringer.wrap(region));
4281         }
4282       }
4283       return builder.build();
4284     } catch (IOException ie) {
4285       throw new ServiceException(ie);
4286     }
4287   }
4288 
4289   /**
4290    * Stop the region server.
4291    *
4292    * @param controller the RPC controller
4293    * @param request the request
4294    * @throws ServiceException
4295    */
4296   @Override
4297   public StopServerResponse stopServer(final RpcController controller,
4298       final StopServerRequest request) throws ServiceException {
4299     requestCount.increment();
4300     String reason = request.getReason();
4301     stop(reason);
4302     return StopServerResponse.newBuilder().build();
4303   }
4304 
4305   /**
4306    * Get some information of the region server.
4307    *
4308    * @param controller the RPC controller
4309    * @param request the request
4310    * @throws ServiceException
4311    */
4312   @Override
4313   public GetServerInfoResponse getServerInfo(final RpcController controller,
4314       final GetServerInfoRequest request) throws ServiceException {
4315     try {
4316       checkOpen();
4317     } catch (IOException ie) {
4318       throw new ServiceException(ie);
4319     }
4320     ServerName serverName = getServerName();
4321     requestCount.increment();
4322     return ResponseConverter.buildGetServerInfoResponse(serverName, rsInfo.getInfoPort());
4323   }
4324 
4325 // End Admin methods
4326 
4327   /**
4328    * Find the HRegion based on a region specifier
4329    *
4330    * @param regionSpecifier the region specifier
4331    * @return the corresponding region
4332    * @throws IOException if the specifier is not null,
4333    *    but failed to find the region
4334    */
4335   protected HRegion getRegion(
4336       final RegionSpecifier regionSpecifier) throws IOException {
4337     return getRegionByEncodedName(regionSpecifier.getValue().toByteArray(),
4338         ProtobufUtil.getRegionEncodedName(regionSpecifier));
4339   }
4340 
4341   /**
4342    * Execute an append mutation.
4343    *
4344    * @param region
4345    * @param m
4346    * @param cellScanner
4347    * @return result to return to client if default operation should be
4348    * bypassed as indicated by RegionObserver, null otherwise
4349    * @throws IOException
4350    */
4351   protected Result append(final HRegion region,
4352       final MutationProto m, final CellScanner cellScanner, long nonceGroup) throws IOException {
4353     long before = EnvironmentEdgeManager.currentTimeMillis();
4354     Append append = ProtobufUtil.toAppend(m, cellScanner);
4355     Result r = null;
4356     if (region.getCoprocessorHost() != null) {
4357       r = region.getCoprocessorHost().preAppend(append);
4358     }
4359     if (r == null) {
4360       long nonce = startNonceOperation(m, nonceGroup);
4361       boolean success = false;
4362       try {
4363         r = region.append(append, nonceGroup, nonce);
4364         success = true;
4365       } finally {
4366         endNonceOperation(m, nonceGroup, success);
4367       }
4368       if (region.getCoprocessorHost() != null) {
4369         region.getCoprocessorHost().postAppend(append, r);
4370       }
4371     }
4372     metricsRegionServer.updateAppend(EnvironmentEdgeManager.currentTimeMillis() - before);
4373     return r;
4374   }
4375 
4376   /**
4377    * Execute an increment mutation.
4378    *
4379    * @param region
4380    * @param mutation
4381    * @return the Result
4382    * @throws IOException
4383    */
4384   protected Result increment(final HRegion region, final MutationProto mutation,
4385       final CellScanner cells, long nonceGroup) throws IOException {
4386     long before = EnvironmentEdgeManager.currentTimeMillis();
4387     Increment increment = ProtobufUtil.toIncrement(mutation, cells);
4388     Result r = null;
4389     if (region.getCoprocessorHost() != null) {
4390       r = region.getCoprocessorHost().preIncrement(increment);
4391     }
4392     if (r == null) {
4393       long nonce = startNonceOperation(mutation, nonceGroup);
4394       boolean success = false;
4395       try {
4396         r = region.increment(increment, nonceGroup, nonce);
4397         success = true;
4398       } finally {
4399         endNonceOperation(mutation, nonceGroup, success);
4400       }
4401       if (region.getCoprocessorHost() != null) {
4402         r = region.getCoprocessorHost().postIncrement(increment, r);
4403       }
4404     }
4405     metricsRegionServer.updateIncrement(EnvironmentEdgeManager.currentTimeMillis() - before);
4406     return r;
4407   }
4408 
4409   /**
4410    * Starts the nonce operation for a mutation, if needed.
4411    * @param mutation Mutation.
4412    * @param nonceGroup Nonce group from the request.
4413    * @returns Nonce used (can be NO_NONCE).
4414    */
4415   private long startNonceOperation(final MutationProto mutation, long nonceGroup)
4416       throws IOException, OperationConflictException {
4417     if (nonceManager == null || !mutation.hasNonce()) return HConstants.NO_NONCE;
4418     boolean canProceed = false;
4419     try {
4420       canProceed = nonceManager.startOperation(nonceGroup, mutation.getNonce(), this);
4421     } catch (InterruptedException ex) {
4422       throw new InterruptedIOException("Nonce start operation interrupted");
4423     }
4424     if (!canProceed) {
4425       // TODO: instead, we could convert append/increment to get w/mvcc
4426       String message = "The operation with nonce {" + nonceGroup + ", " + mutation.getNonce()
4427           + "} on row [" + Bytes.toString(mutation.getRow().toByteArray())
4428           + "] may have already completed";
4429       throw new OperationConflictException(message);
4430     }
4431     return mutation.getNonce();
4432   }
4433 
4434   /**
4435    * Ends nonce operation for a mutation, if needed.
4436    * @param mutation Mutation.
4437    * @param nonceGroup Nonce group from the request. Always 0 in initial implementation.
4438    * @param success Whether the operation for this nonce has succeeded.
4439    */
4440   private void endNonceOperation(final MutationProto mutation, long nonceGroup,
4441       boolean success) {
4442     if (nonceManager == null || !mutation.hasNonce()) return;
4443     nonceManager.endOperation(nonceGroup, mutation.getNonce(), success);
4444   }
4445 
4446   @Override
4447   public ServerNonceManager getNonceManager() {
4448     return this.nonceManager;
4449   }
4450 
4451   /**
4452    * Execute a list of Put/Delete mutations.
4453    *
4454    * @param builder
4455    * @param region
4456    * @param mutations
4457    */
4458   protected void doBatchOp(final RegionActionResult.Builder builder, final HRegion region,
4459       final List<ClientProtos.Action> mutations, final CellScanner cells) {
4460     Mutation[] mArray = new Mutation[mutations.size()];
4461     long before = EnvironmentEdgeManager.currentTimeMillis();
4462     boolean batchContainsPuts = false, batchContainsDelete = false;
4463     try {
4464       int i = 0;
4465       for (ClientProtos.Action action: mutations) {
4466         MutationProto m = action.getMutation();
4467         Mutation mutation;
4468         if (m.getMutateType() == MutationType.PUT) {
4469           mutation = ProtobufUtil.toPut(m, cells);
4470           batchContainsPuts = true;
4471         } else {
4472           mutation = ProtobufUtil.toDelete(m, cells);
4473           batchContainsDelete = true;
4474         }
4475         mArray[i++] = mutation;
4476       }
4477 
4478       if (!region.getRegionInfo().isMetaTable()) {
4479         cacheFlusher.reclaimMemStoreMemory();
4480       }
4481 
4482       OperationStatus codes[] = region.batchMutate(mArray);
4483       for (i = 0; i < codes.length; i++) {
4484         int index = mutations.get(i).getIndex();
4485         Exception e = null;
4486         switch (codes[i].getOperationStatusCode()) {
4487           case BAD_FAMILY:
4488             e = new NoSuchColumnFamilyException(codes[i].getExceptionMsg());
4489             builder.addResultOrException(getResultOrException(e, index));
4490             break;
4491 
4492           case SANITY_CHECK_FAILURE:
4493             e = new FailedSanityCheckException(codes[i].getExceptionMsg());
4494             builder.addResultOrException(getResultOrException(e, index));
4495             break;
4496 
4497           default:
4498             e = new DoNotRetryIOException(codes[i].getExceptionMsg());
4499             builder.addResultOrException(getResultOrException(e, index));
4500             break;
4501 
4502           case SUCCESS:
4503             builder.addResultOrException(getResultOrException(ClientProtos.Result.getDefaultInstance(), index));
4504             break;
4505         }
4506       }
4507     } catch (IOException ie) {
4508       for (int i = 0; i < mutations.size(); i++) {
4509         builder.addResultOrException(getResultOrException(ie, mutations.get(i).getIndex()));
4510       }
4511     }
4512     long after = EnvironmentEdgeManager.currentTimeMillis();
4513     if (batchContainsPuts) {
4514       metricsRegionServer.updatePut(after - before);
4515     }
4516     if (batchContainsDelete) {
4517       metricsRegionServer.updateDelete(after - before);
4518     }
4519   }
4520   private static ResultOrException getResultOrException(final ClientProtos.Result r,
4521       final int index) {
4522     return getResultOrException(ResponseConverter.buildActionResult(r), index);
4523   }
4524   private static ResultOrException getResultOrException(final Exception e, final int index) {
4525     return getResultOrException(ResponseConverter.buildActionResult(e), index);
4526   }
4527 
4528   private static ResultOrException getResultOrException(final ResultOrException.Builder builder,
4529       final int index) {
4530     return builder.setIndex(index).build();
4531   }
4532 
4533   /**
4534    * Execute a list of Put/Delete mutations. The function returns OperationStatus instead of
4535    * constructing MultiResponse to save a possible loop if caller doesn't need MultiResponse.
4536    * @param region
4537    * @param mutations
4538    * @return an array of OperationStatus which internally contains the OperationStatusCode and the
4539    *         exceptionMessage if any
4540    * @throws IOException
4541    */
4542   protected OperationStatus [] doReplayBatchOp(final HRegion region,
4543       final List<HLogSplitter.MutationReplay> mutations) throws IOException {
4544 
4545     long before = EnvironmentEdgeManager.currentTimeMillis();
4546     boolean batchContainsPuts = false, batchContainsDelete = false;
4547     try {
4548       for (Iterator<HLogSplitter.MutationReplay> it = mutations.iterator(); it.hasNext();) {
4549         HLogSplitter.MutationReplay m = it.next();
4550         if (m.type == MutationType.PUT) {
4551           batchContainsPuts = true;
4552         } else {
4553           batchContainsDelete = true;
4554         }
4555         NavigableMap<byte[], List<Cell>> map = m.mutation.getFamilyCellMap();
4556         List<Cell> metaCells = map.get(WALEdit.METAFAMILY);
4557         if (metaCells != null && !metaCells.isEmpty()) {
4558           for (Cell metaCell : metaCells) {
4559             CompactionDescriptor compactionDesc = WALEdit.getCompaction(metaCell);
4560             if (compactionDesc != null) {
4561               region.completeCompactionMarker(compactionDesc);
4562             }
4563           }
4564           it.remove();
4565         }
4566       }
4567       requestCount.add(mutations.size());
4568       if (!region.getRegionInfo().isMetaTable()) {
4569         cacheFlusher.reclaimMemStoreMemory();
4570       }
4571       return region.batchReplay(mutations.toArray(
4572         new HLogSplitter.MutationReplay[mutations.size()]));
4573     } finally {
4574       long after = EnvironmentEdgeManager.currentTimeMillis();
4575       if (batchContainsPuts) {
4576         metricsRegionServer.updatePut(after - before);
4577       }
4578       if (batchContainsDelete) {
4579         metricsRegionServer.updateDelete(after - before);
4580       }
4581     }
4582   }
4583 
4584   /**
4585    * Mutate a list of rows atomically.
4586    *
4587    * @param region
4588    * @param actions
4589    * @param cellScanner if non-null, the mutation data -- the Cell content.
4590    * @throws IOException
4591    */
4592   protected void mutateRows(final HRegion region, final List<ClientProtos.Action> actions,
4593       final CellScanner cellScanner)
4594   throws IOException {
4595     if (!region.getRegionInfo().isMetaTable()) {
4596       cacheFlusher.reclaimMemStoreMemory();
4597     }
4598     RowMutations rm = null;
4599     for (ClientProtos.Action action: actions) {
4600       if (action.hasGet()) {
4601         throw new DoNotRetryIOException("Atomic put and/or delete only, not a Get=" +
4602           action.getGet());
4603       }
4604       MutationType type = action.getMutation().getMutateType();
4605       if (rm == null) {
4606         rm = new RowMutations(action.getMutation().getRow().toByteArray());
4607       }
4608       switch (type) {
4609       case PUT:
4610         rm.add(ProtobufUtil.toPut(action.getMutation(), cellScanner));
4611         break;
4612       case DELETE:
4613         rm.add(ProtobufUtil.toDelete(action.getMutation(), cellScanner));
4614         break;
4615       default:
4616           throw new DoNotRetryIOException("Atomic put and/or delete only, not " + type.name());
4617       }
4618     }
4619     region.mutateRow(rm);
4620   }
4621 
4622   /**
4623    * Mutate a list of rows atomically.
4624    *
4625    * @param region
4626    * @param actions
4627    * @param cellScanner if non-null, the mutation data -- the Cell content.
4628    * @param row
4629    * @param family
4630    * @param qualifier
4631    * @param compareOp
4632    * @param comparator
4633    * @throws IOException
4634    */
4635   private boolean checkAndRowMutate(final HRegion region, final List<ClientProtos.Action> actions,
4636       final CellScanner cellScanner, byte[] row, byte[] family, byte[] qualifier,
4637       CompareOp compareOp, ByteArrayComparable comparator) throws IOException {
4638     if (!region.getRegionInfo().isMetaTable()) {
4639       cacheFlusher.reclaimMemStoreMemory();
4640     }
4641     RowMutations rm = null;
4642     for (ClientProtos.Action action: actions) {
4643       if (action.hasGet()) {
4644         throw new DoNotRetryIOException("Atomic put and/or delete only, not a Get=" +
4645             action.getGet());
4646       }
4647       MutationType type = action.getMutation().getMutateType();
4648       if (rm == null) {
4649         rm = new RowMutations(action.getMutation().getRow().toByteArray());
4650       }
4651       switch (type) {
4652       case PUT:
4653         rm.add(ProtobufUtil.toPut(action.getMutation(), cellScanner));
4654         break;
4655       case DELETE:
4656         rm.add(ProtobufUtil.toDelete(action.getMutation(), cellScanner));
4657         break;
4658       default:
4659         throw new DoNotRetryIOException("Atomic put and/or delete only, not " + type.name());
4660       }
4661     }
4662     return region.checkAndRowMutate(row, family, qualifier, compareOp, comparator, rm, Boolean.TRUE);
4663   }
4664 
4665   private static class MovedRegionInfo {
4666     private final ServerName serverName;
4667     private final long seqNum;
4668     private final long ts;
4669 
4670     public MovedRegionInfo(ServerName serverName, long closeSeqNum) {
4671       this.serverName = serverName;
4672       this.seqNum = closeSeqNum;
4673       ts = EnvironmentEdgeManager.currentTimeMillis();
4674      }
4675 
4676     public ServerName getServerName() {
4677       return serverName;
4678     }
4679 
4680     public long getSeqNum() {
4681       return seqNum;
4682     }
4683 
4684     public long getMoveTime() {
4685       return ts;
4686     }
4687   }
4688 
4689   // This map will contains all the regions that we closed for a move.
4690   //  We add the time it was moved as we don't want to keep too old information
4691   protected Map<String, MovedRegionInfo> movedRegions =
4692       new ConcurrentHashMap<String, MovedRegionInfo>(3000);
4693 
4694   // We need a timeout. If not there is a risk of giving a wrong information: this would double
4695   //  the number of network calls instead of reducing them.
4696   private static final int TIMEOUT_REGION_MOVED = (2 * 60 * 1000);
4697 
4698   protected void addToMovedRegions(String encodedName, ServerName destination, long closeSeqNum) {
4699     if (ServerName.isSameHostnameAndPort(destination, this.getServerName())) {
4700       LOG.warn("Not adding moved region record: " + encodedName + " to self.");
4701       return;
4702     }
4703     LOG.info("Adding moved region record: " + encodedName + " to "
4704         + destination.getServerName() + ":" + destination.getPort()
4705         + " as of " + closeSeqNum);
4706     movedRegions.put(encodedName, new MovedRegionInfo(destination, closeSeqNum));
4707   }
4708 
4709   private void removeFromMovedRegions(String encodedName) {
4710     movedRegions.remove(encodedName);
4711   }
4712 
4713   private MovedRegionInfo getMovedRegion(final String encodedRegionName) {
4714     MovedRegionInfo dest = movedRegions.get(encodedRegionName);
4715 
4716     long now = EnvironmentEdgeManager.currentTimeMillis();
4717     if (dest != null) {
4718       if (dest.getMoveTime() > (now - TIMEOUT_REGION_MOVED)) {
4719         return dest;
4720       } else {
4721         movedRegions.remove(encodedRegionName);
4722       }
4723     }
4724 
4725     return null;
4726   }
4727 
4728   /**
4729    * Remove the expired entries from the moved regions list.
4730    */
4731   protected void cleanMovedRegions() {
4732     final long cutOff = System.currentTimeMillis() - TIMEOUT_REGION_MOVED;
4733     Iterator<Entry<String, MovedRegionInfo>> it = movedRegions.entrySet().iterator();
4734 
4735     while (it.hasNext()){
4736       Map.Entry<String, MovedRegionInfo> e = it.next();
4737       if (e.getValue().getMoveTime() < cutOff) {
4738         it.remove();
4739       }
4740     }
4741   }
4742 
4743   /**
4744    * Creates a Chore thread to clean the moved region cache.
4745    */
4746   protected static class MovedRegionsCleaner extends Chore implements Stoppable {
4747     private HRegionServer regionServer;
4748     Stoppable stoppable;
4749 
4750     private MovedRegionsCleaner(
4751       HRegionServer regionServer, Stoppable stoppable){
4752       super("MovedRegionsCleaner for region "+regionServer, TIMEOUT_REGION_MOVED, stoppable);
4753       this.regionServer = regionServer;
4754       this.stoppable = stoppable;
4755     }
4756 
4757     static MovedRegionsCleaner createAndStart(HRegionServer rs){
4758       Stoppable stoppable = new Stoppable() {
4759         private volatile boolean isStopped = false;
4760         @Override public void stop(String why) { isStopped = true;}
4761         @Override public boolean isStopped() {return isStopped;}
4762       };
4763 
4764       return new MovedRegionsCleaner(rs, stoppable);
4765     }
4766 
4767     @Override
4768     protected void chore() {
4769       regionServer.cleanMovedRegions();
4770     }
4771 
4772     @Override
4773     public void stop(String why) {
4774       stoppable.stop(why);
4775     }
4776 
4777     @Override
4778     public boolean isStopped() {
4779       return stoppable.isStopped();
4780     }
4781   }
4782 
4783   private String getMyEphemeralNodePath() {
4784     return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
4785   }
4786 
4787   /**
4788    * Holder class which holds the RegionScanner and nextCallSeq together.
4789    */
4790   private static class RegionScannerHolder {
4791     private RegionScanner s;
4792     private long nextCallSeq = 0L;
4793     private HRegion r;
4794 
4795     public RegionScannerHolder(RegionScanner s, HRegion r) {
4796       this.s = s;
4797       this.r = r;
4798     }
4799   }
4800 
4801   private boolean isHealthCheckerConfigured() {
4802     String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
4803     return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
4804   }
4805 
4806   /**
4807    * @return the underlying {@link CompactSplitThread} for the servers
4808    */
4809   public CompactSplitThread getCompactSplitThread() {
4810     return this.compactSplitThread;
4811   }
4812 
4813   /**
4814    * A helper function to store the last flushed sequence Id with the previous failed RS for a
4815    * recovering region. The Id is used to skip wal edits which are flushed. Since the flushed
4816    * sequence id is only valid for each RS, we associate the Id with corresponding failed RS.
4817    * @throws KeeperException
4818    * @throws IOException
4819    */
4820   private void updateRecoveringRegionLastFlushedSequenceId(HRegion r) throws KeeperException,
4821       IOException {
4822     if (!r.isRecovering()) {
4823       // return immdiately for non-recovering regions
4824       return;
4825     }
4826 
4827     HRegionInfo region = r.getRegionInfo();
4828     ZooKeeperWatcher zkw = getZooKeeper();
4829     String previousRSName = this.getLastFailedRSFromZK(region.getEncodedName());
4830     Map<byte[], Long> maxSeqIdInStores = r.getMaxStoreSeqIdForLogReplay();
4831     long minSeqIdForLogReplay = -1;
4832     for (Long storeSeqIdForReplay : maxSeqIdInStores.values()) {
4833       if (minSeqIdForLogReplay == -1 || storeSeqIdForReplay < minSeqIdForLogReplay) {
4834         minSeqIdForLogReplay = storeSeqIdForReplay;
4835       }
4836     }
4837 
4838     try {
4839       long lastRecordedFlushedSequenceId = -1;
4840       String nodePath = ZKUtil.joinZNode(this.zooKeeper.recoveringRegionsZNode,
4841         region.getEncodedName());
4842       // recovering-region level
4843       byte[] data = ZKUtil.getData(zkw, nodePath);
4844       if (data != null) {
4845         lastRecordedFlushedSequenceId = SplitLogManager.parseLastFlushedSequenceIdFrom(data);
4846       }
4847       if (data == null || lastRecordedFlushedSequenceId < minSeqIdForLogReplay) {
4848         ZKUtil.setData(zkw, nodePath, ZKUtil.positionToByteArray(minSeqIdForLogReplay));
4849       }
4850       if (previousRSName != null) {
4851         // one level deeper for the failed RS
4852         nodePath = ZKUtil.joinZNode(nodePath, previousRSName);
4853         ZKUtil.setData(zkw, nodePath,
4854           ZKUtil.regionSequenceIdsToByteArray(minSeqIdForLogReplay, maxSeqIdInStores));
4855         LOG.debug("Update last flushed sequence id of region " + region.getEncodedName() + " for "
4856             + previousRSName);
4857       } else {
4858         LOG.warn("Can't find failed region server for recovering region " + region.getEncodedName());
4859       }
4860     } catch (NoNodeException ignore) {
4861       LOG.debug("Region " + region.getEncodedName() +
4862         " must have completed recovery because its recovery znode has been removed", ignore);
4863     }
4864   }
4865 
4866   /**
4867    * Return the last failed RS name under /hbase/recovering-regions/encodedRegionName
4868    * @param encodedRegionName
4869    * @throws KeeperException
4870    */
4871   private String getLastFailedRSFromZK(String encodedRegionName) throws KeeperException {
4872     String result = null;
4873     long maxZxid = 0;
4874     ZooKeeperWatcher zkw = this.getZooKeeper();
4875     String nodePath = ZKUtil.joinZNode(zkw.recoveringRegionsZNode, encodedRegionName);
4876     List<String> failedServers = ZKUtil.listChildrenNoWatch(zkw, nodePath);
4877     if (failedServers == null || failedServers.isEmpty()) {
4878       return result;
4879     }
4880     for (String failedServer : failedServers) {
4881       String rsPath = ZKUtil.joinZNode(nodePath, failedServer);
4882       Stat stat = new Stat();
4883       ZKUtil.getDataNoWatch(zkw, rsPath, stat);
4884       if (maxZxid < stat.getCzxid()) {
4885         maxZxid = stat.getCzxid();
4886         result = failedServer;
4887       }
4888     }
4889     return result;
4890   }
4891 
4892   @Override
4893   public UpdateFavoredNodesResponse updateFavoredNodes(RpcController controller,
4894       UpdateFavoredNodesRequest request) throws ServiceException {
4895     List<UpdateFavoredNodesRequest.RegionUpdateInfo> openInfoList = request.getUpdateInfoList();
4896     UpdateFavoredNodesResponse.Builder respBuilder = UpdateFavoredNodesResponse.newBuilder();
4897     for (UpdateFavoredNodesRequest.RegionUpdateInfo regionUpdateInfo : openInfoList) {
4898       HRegionInfo hri = HRegionInfo.convert(regionUpdateInfo.getRegion());
4899       updateRegionFavoredNodesMapping(hri.getEncodedName(),
4900           regionUpdateInfo.getFavoredNodesList());
4901     }
4902     respBuilder.setResponse(openInfoList.size());
4903     return respBuilder.build();
4904   }
4905 
4906   /**
4907    * @return The cache config instance used by the regionserver.
4908    */
4909   public CacheConfig getCacheConfig() {
4910     return this.cacheConfig;
4911   }
4912 
4913 }