1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.io.InterruptedIOException;
23 import java.lang.Thread.UncaughtExceptionHandler;
24 import java.lang.management.ManagementFactory;
25 import java.lang.management.MemoryUsage;
26 import java.lang.reflect.Constructor;
27 import java.net.BindException;
28 import java.net.InetAddress;
29 import java.net.InetSocketAddress;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.Comparator;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.Iterator;
37 import java.util.List;
38 import java.util.Map;
39 import java.util.Map.Entry;
40 import java.util.Set;
41 import java.util.SortedMap;
42 import java.util.TreeMap;
43 import java.util.TreeSet;
44 import java.util.concurrent.ConcurrentHashMap;
45 import java.util.concurrent.ConcurrentMap;
46 import java.util.concurrent.ConcurrentSkipListMap;
47 import java.util.concurrent.atomic.AtomicBoolean;
48 import java.util.concurrent.atomic.AtomicReference;
49 import java.util.concurrent.locks.ReentrantReadWriteLock;
50
51 import javax.management.MalformedObjectNameException;
52 import javax.management.ObjectName;
53 import javax.servlet.http.HttpServlet;
54
55 import org.apache.commons.lang.math.RandomUtils;
56 import org.apache.commons.logging.Log;
57 import org.apache.commons.logging.LogFactory;
58 import org.apache.hadoop.conf.Configuration;
59 import org.apache.hadoop.fs.FileSystem;
60 import org.apache.hadoop.fs.Path;
61 import org.apache.hadoop.hbase.Chore;
62 import org.apache.hadoop.hbase.ClockOutOfSyncException;
63 import org.apache.hadoop.hbase.CoordinatedStateManager;
64 import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
65 import org.apache.hadoop.hbase.HBaseConfiguration;
66 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
67 import org.apache.hadoop.hbase.HConstants;
68 import org.apache.hadoop.hbase.HRegionInfo;
69 import org.apache.hadoop.hbase.HealthCheckChore;
70 import org.apache.hadoop.hbase.MetaTableAccessor;
71 import org.apache.hadoop.hbase.NotServingRegionException;
72 import org.apache.hadoop.hbase.RemoteExceptionHandler;
73 import org.apache.hadoop.hbase.ServerName;
74 import org.apache.hadoop.hbase.Stoppable;
75 import org.apache.hadoop.hbase.TableDescriptors;
76 import org.apache.hadoop.hbase.TableName;
77 import org.apache.hadoop.hbase.YouAreDeadException;
78 import org.apache.hadoop.hbase.ZNodeClearer;
79 import org.apache.hadoop.hbase.classification.InterfaceAudience;
80 import org.apache.hadoop.hbase.client.ClusterConnection;
81 import org.apache.hadoop.hbase.client.ConnectionFactory;
82 import org.apache.hadoop.hbase.client.ConnectionUtils;
83 import org.apache.hadoop.hbase.conf.ConfigurationManager;
84 import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
85 import org.apache.hadoop.hbase.coordination.CloseRegionCoordination;
86 import org.apache.hadoop.hbase.coordination.SplitLogWorkerCoordination;
87 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
88 import org.apache.hadoop.hbase.exceptions.RegionMovedException;
89 import org.apache.hadoop.hbase.exceptions.RegionOpeningException;
90 import org.apache.hadoop.hbase.exceptions.UnknownProtocolException;
91 import org.apache.hadoop.hbase.executor.ExecutorService;
92 import org.apache.hadoop.hbase.executor.ExecutorType;
93 import org.apache.hadoop.hbase.fs.HFileSystem;
94 import org.apache.hadoop.hbase.http.InfoServer;
95 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
96 import org.apache.hadoop.hbase.ipc.RpcClient;
97 import org.apache.hadoop.hbase.ipc.RpcClientFactory;
98 import org.apache.hadoop.hbase.ipc.RpcServerInterface;
99 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
100 import org.apache.hadoop.hbase.ipc.ServerRpcController;
101 import org.apache.hadoop.hbase.master.HMaster;
102 import org.apache.hadoop.hbase.master.RegionState.State;
103 import org.apache.hadoop.hbase.master.TableLockManager;
104 import org.apache.hadoop.hbase.procedure.RegionServerProcedureManagerHost;
105 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
106 import org.apache.hadoop.hbase.protobuf.RequestConverter;
107 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
108 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
109 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceRequest;
110 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceResponse;
111 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos;
112 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad;
113 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.Coprocessor;
114 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
115 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
116 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier;
117 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
118 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdRequest;
119 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
120 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
121 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse;
122 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStatusService;
123 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
124 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
125 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
126 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
127 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
128 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
129 import org.apache.hadoop.hbase.regionserver.handler.CloseMetaHandler;
130 import org.apache.hadoop.hbase.regionserver.handler.CloseRegionHandler;
131 import org.apache.hadoop.hbase.wal.DefaultWALProvider;
132 import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
133 import org.apache.hadoop.hbase.wal.WAL;
134 import org.apache.hadoop.hbase.wal.WALFactory;
135 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
136 import org.apache.hadoop.hbase.replication.regionserver.ReplicationLoad;
137 import org.apache.hadoop.hbase.security.UserProvider;
138 import org.apache.hadoop.hbase.trace.SpanReceiverHost;
139 import org.apache.hadoop.hbase.util.Addressing;
140 import org.apache.hadoop.hbase.util.ByteStringer;
141 import org.apache.hadoop.hbase.util.Bytes;
142 import org.apache.hadoop.hbase.util.CompressionTest;
143 import org.apache.hadoop.hbase.util.ConfigUtil;
144 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
145 import org.apache.hadoop.hbase.util.FSTableDescriptors;
146 import org.apache.hadoop.hbase.util.FSUtils;
147 import org.apache.hadoop.hbase.util.HasThread;
148 import org.apache.hadoop.hbase.util.JSONBean;
149 import org.apache.hadoop.hbase.util.JvmPauseMonitor;
150 import org.apache.hadoop.hbase.util.Sleeper;
151 import org.apache.hadoop.hbase.util.Threads;
152 import org.apache.hadoop.hbase.util.VersionInfo;
153 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
154 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
155 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
156 import org.apache.hadoop.hbase.zookeeper.RecoveringRegionWatcher;
157 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
158 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
159 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
160 import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
161 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
162 import org.apache.hadoop.ipc.RemoteException;
163 import org.apache.hadoop.metrics.util.MBeanUtil;
164 import org.apache.hadoop.util.ReflectionUtils;
165 import org.apache.hadoop.util.StringUtils;
166 import org.apache.zookeeper.KeeperException;
167 import org.apache.zookeeper.KeeperException.NoNodeException;
168 import org.apache.zookeeper.data.Stat;
169
170 import com.google.common.annotations.VisibleForTesting;
171 import com.google.common.collect.Maps;
172 import com.google.protobuf.BlockingRpcChannel;
173 import com.google.protobuf.Descriptors;
174 import com.google.protobuf.Message;
175 import com.google.protobuf.RpcCallback;
176 import com.google.protobuf.RpcController;
177 import com.google.protobuf.Service;
178 import com.google.protobuf.ServiceException;
179
180
181
182
183
184 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
185 @SuppressWarnings("deprecation")
186 public class HRegionServer extends HasThread implements
187 RegionServerServices, LastSequenceId {
188
189 public static final Log LOG = LogFactory.getLog(HRegionServer.class);
190
191
192
193
194
195 protected static final String OPEN = "OPEN";
196 protected static final String CLOSE = "CLOSE";
197
198
199
200
201 protected final ConcurrentMap<byte[], Boolean> regionsInTransitionInRS =
202 new ConcurrentSkipListMap<byte[], Boolean>(Bytes.BYTES_COMPARATOR);
203
204
205 protected MemStoreFlusher cacheFlusher;
206
207 protected HeapMemoryManager hMemManager;
208
209
210
211
212
213
214 protected ClusterConnection clusterConnection;
215
216
217
218
219
220
221
222 protected MetaTableLocator metaTableLocator;
223
224
225 @SuppressWarnings("unused")
226 private RecoveringRegionWatcher recoveringRegionWatcher;
227
228
229
230
231 protected TableDescriptors tableDescriptors;
232
233
234 protected ReplicationSourceService replicationSourceHandler;
235 protected ReplicationSinkService replicationSinkHandler;
236
237
238 public CompactSplitThread compactSplitThread;
239
240
241
242
243
244 protected final Map<String, HRegion> onlineRegions =
245 new ConcurrentHashMap<String, HRegion>();
246
247
248
249
250
251
252
253
254
255
256 protected final Map<String, InetSocketAddress[]> regionFavoredNodesMap =
257 new ConcurrentHashMap<String, InetSocketAddress[]>();
258
259
260
261
262
263 protected final Map<String, HRegion> recoveringRegions = Collections
264 .synchronizedMap(new HashMap<String, HRegion>());
265
266
267 protected Leases leases;
268
269
270 protected ExecutorService service;
271
272
273 protected volatile boolean fsOk;
274 protected HFileSystem fs;
275
276
277
278
279 private volatile boolean stopped = false;
280
281
282
283 private volatile boolean abortRequested;
284
285 ConcurrentMap<String, Integer> rowlocks = new ConcurrentHashMap<String, Integer>();
286
287
288
289 private boolean stopping = false;
290
291 private volatile boolean killed = false;
292
293 protected final Configuration conf;
294
295 private Path rootDir;
296
297 protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
298
299 final int numRetries;
300 protected final int threadWakeFrequency;
301 protected final int msgInterval;
302
303 protected final int numRegionsToReport;
304
305
306 private volatile RegionServerStatusService.BlockingInterface rssStub;
307
308 RpcClient rpcClient;
309
310 private UncaughtExceptionHandler uncaughtExceptionHandler;
311
312
313
314
315 protected InfoServer infoServer;
316 private JvmPauseMonitor pauseMonitor;
317
318
319 public static final String REGIONSERVER = "regionserver";
320
321 MetricsRegionServer metricsRegionServer;
322 private SpanReceiverHost spanReceiverHost;
323
324
325
326
327 Chore compactionChecker;
328
329
330
331
332 Chore periodicFlusher;
333
334 protected volatile WALFactory walFactory;
335
336
337
338 final LogRoller walRoller;
339
340 final AtomicReference<LogRoller> metawalRoller = new AtomicReference<LogRoller>();
341
342
343 final AtomicBoolean online = new AtomicBoolean(false);
344
345
346 protected ZooKeeperWatcher zooKeeper;
347
348
349 private MasterAddressTracker masterAddressTracker;
350
351
352 protected ClusterStatusTracker clusterStatusTracker;
353
354
355 private SplitLogWorker splitLogWorker;
356
357
358 protected final Sleeper sleeper;
359
360 private final int operationTimeout;
361
362 private final RegionServerAccounting regionServerAccounting;
363
364
365 protected CacheConfig cacheConfig;
366
367
368 private HealthCheckChore healthCheckChore;
369
370
371 private Chore nonceManagerChore;
372
373 private Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
374
375
376
377
378
379
380 protected ServerName serverName;
381
382
383
384
385 protected final long startcode;
386
387
388
389
390 private String clusterId;
391
392
393
394
395 private ObjectName mxBean = null;
396
397
398
399
400 private MovedRegionsCleaner movedRegionsCleaner;
401
402
403 private StorefileRefresherChore storefileRefresher;
404
405 private RegionServerCoprocessorHost rsHost;
406
407 private RegionServerProcedureManagerHost rspmHost;
408
409
410 protected TableLockManager tableLockManager;
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430 final ServerNonceManager nonceManager;
431
432 private UserProvider userProvider;
433
434 protected final RSRpcServices rpcServices;
435
436 protected BaseCoordinatedStateManager csm;
437
438 private final boolean useZKForAssignment;
439
440
441
442
443
444 protected final ConfigurationManager configurationManager;
445
446
447
448
449
450
451
452 public HRegionServer(Configuration conf) throws IOException, InterruptedException {
453 this(conf, CoordinatedStateManagerFactory.getCoordinatedStateManager(conf));
454 }
455
456
457
458
459
460
461
462
463 public HRegionServer(Configuration conf, CoordinatedStateManager csm)
464 throws IOException, InterruptedException {
465 this.fsOk = true;
466 this.conf = conf;
467 checkCodecs(this.conf);
468 this.userProvider = UserProvider.instantiate(conf);
469 FSUtils.setupShortCircuitRead(this.conf);
470
471
472 this.numRetries = this.conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
473 HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
474 this.threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
475 this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 3 * 1000);
476
477 this.sleeper = new Sleeper(this.msgInterval, this);
478
479 boolean isNoncesEnabled = conf.getBoolean(HConstants.HBASE_RS_NONCES_ENABLED, true);
480 this.nonceManager = isNoncesEnabled ? new ServerNonceManager(this.conf) : null;
481
482 this.numRegionsToReport = conf.getInt(
483 "hbase.regionserver.numregionstoreport", 10);
484
485 this.operationTimeout = conf.getInt(
486 HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
487 HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
488
489 this.abortRequested = false;
490 this.stopped = false;
491
492 rpcServices = createRpcServices();
493 this.startcode = System.currentTimeMillis();
494 String hostName = rpcServices.isa.getHostName();
495 serverName = ServerName.valueOf(hostName, rpcServices.isa.getPort(), startcode);
496
497
498 ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file",
499 "hbase.zookeeper.client.kerberos.principal", hostName);
500
501 login(userProvider, hostName);
502
503 regionServerAccounting = new RegionServerAccounting();
504 uncaughtExceptionHandler = new UncaughtExceptionHandler() {
505 @Override
506 public void uncaughtException(Thread t, Throwable e) {
507 abort("Uncaught exception in service thread " + t.getName(), e);
508 }
509 };
510
511 useZKForAssignment = ConfigUtil.useZKForAssignment(conf);
512
513
514
515
516 FSUtils.setFsDefault(this.conf, FSUtils.getRootDir(this.conf));
517
518
519 boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
520 this.fs = new HFileSystem(this.conf, useHBaseChecksum);
521 this.rootDir = FSUtils.getRootDir(this.conf);
522 this.tableDescriptors = new FSTableDescriptors(
523 this.conf, this.fs, this.rootDir, !canUpdateTableDescriptor(), false);
524
525 service = new ExecutorService(getServerName().toShortString());
526 spanReceiverHost = SpanReceiverHost.getInstance(getConfiguration());
527
528
529 if (!conf.getBoolean("hbase.testing.nocluster", false)) {
530
531 zooKeeper = new ZooKeeperWatcher(conf, getProcessName() + ":" +
532 rpcServices.isa.getPort(), this, canCreateBaseZNode());
533
534 this.csm = (BaseCoordinatedStateManager) csm;
535 this.csm.initialize(this);
536 this.csm.start();
537
538 tableLockManager = TableLockManager.createTableLockManager(
539 conf, zooKeeper, serverName);
540
541 masterAddressTracker = new MasterAddressTracker(getZooKeeper(), this);
542 masterAddressTracker.start();
543
544 clusterStatusTracker = new ClusterStatusTracker(zooKeeper, this);
545 clusterStatusTracker.start();
546 }
547 this.configurationManager = new ConfigurationManager();
548
549 rpcServices.start();
550 putUpWebUI();
551 this.walRoller = new LogRoller(this, this);
552 }
553
554 protected void login(UserProvider user, String host) throws IOException {
555 user.login("hbase.regionserver.keytab.file",
556 "hbase.regionserver.kerberos.principal", host);
557 }
558
559 protected void waitForMasterActive(){
560 }
561
562 protected String getProcessName() {
563 return REGIONSERVER;
564 }
565
566 protected boolean canCreateBaseZNode() {
567 return false;
568 }
569
570 protected boolean canUpdateTableDescriptor() {
571 return false;
572 }
573
574 protected RSRpcServices createRpcServices() throws IOException {
575 return new RSRpcServices(this);
576 }
577
578 protected void configureInfoServer() {
579 infoServer.addServlet("rs-status", "/rs-status", RSStatusServlet.class);
580 infoServer.setAttribute(REGIONSERVER, this);
581 }
582
583 protected Class<? extends HttpServlet> getDumpServlet() {
584 return RSDumpServlet.class;
585 }
586
587 protected void doMetrics() {
588 }
589
590 @Override
591 public boolean registerService(Service instance) {
592
593
594
595 Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
596 if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
597 LOG.error("Coprocessor service " + serviceDesc.getFullName()
598 + " already registered, rejecting request from " + instance);
599 return false;
600 }
601
602 coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
603 if (LOG.isDebugEnabled()) {
604 LOG.debug("Registered regionserver coprocessor service: service=" + serviceDesc.getFullName());
605 }
606 return true;
607 }
608
609
610
611
612
613
614
615 @VisibleForTesting
616 protected ClusterConnection createClusterConnection() throws IOException {
617
618
619
620 return ConnectionUtils.createShortCircuitHConnection(
621 ConnectionFactory.createConnection(conf), serverName, rpcServices, rpcServices);
622 }
623
624
625
626
627
628
629 private static void checkCodecs(final Configuration c) throws IOException {
630
631 String [] codecs = c.getStrings("hbase.regionserver.codecs", (String[])null);
632 if (codecs == null) return;
633 for (String codec : codecs) {
634 if (!CompressionTest.testCompression(codec)) {
635 throw new IOException("Compression codec " + codec +
636 " not supported, aborting RS construction");
637 }
638 }
639 }
640
641 public String getClusterId() {
642 return this.clusterId;
643 }
644
645
646
647
648
649 protected synchronized void setupClusterConnection() throws IOException {
650 if (clusterConnection == null) {
651 clusterConnection = createClusterConnection();
652 metaTableLocator = new MetaTableLocator();
653 }
654 }
655
656
657
658
659
660
661
662 private void preRegistrationInitialization(){
663 try {
664 setupClusterConnection();
665
666
667 if (isHealthCheckerConfigured()) {
668 int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
669 HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
670 healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
671 }
672 this.pauseMonitor = new JvmPauseMonitor(conf);
673 pauseMonitor.start();
674
675 initializeZooKeeper();
676 if (!isStopped() && !isAborted()) {
677 initializeThreads();
678 }
679 } catch (Throwable t) {
680
681
682 this.rpcServices.stop();
683 abort("Initialization of RS failed. Hence aborting RS.", t);
684 }
685 }
686
687
688
689
690
691
692
693
694
695 private void initializeZooKeeper() throws IOException, InterruptedException {
696
697
698
699 blockAndCheckIfStopped(this.masterAddressTracker);
700
701
702
703 blockAndCheckIfStopped(this.clusterStatusTracker);
704
705
706
707
708 try {
709 clusterId = ZKClusterId.readClusterIdZNode(this.zooKeeper);
710 if (clusterId == null) {
711 this.abort("Cluster ID has not been set");
712 }
713 LOG.info("ClusterId : "+clusterId);
714 } catch (KeeperException e) {
715 this.abort("Failed to retrieve Cluster ID",e);
716 }
717
718
719
720
721
722 waitForMasterActive();
723 if (isStopped() || isAborted()) {
724 return;
725 }
726
727
728 try {
729 rspmHost = new RegionServerProcedureManagerHost();
730 rspmHost.loadProcedures(conf);
731 rspmHost.initialize(this);
732 } catch (KeeperException e) {
733 this.abort("Failed to reach zk cluster when creating procedure handler.", e);
734 }
735
736 this.recoveringRegionWatcher = new RecoveringRegionWatcher(this.zooKeeper, this);
737 }
738
739
740
741
742
743
744
745
746 private void blockAndCheckIfStopped(ZooKeeperNodeTracker tracker)
747 throws IOException, InterruptedException {
748 while (tracker.blockUntilAvailable(this.msgInterval, false) == null) {
749 if (this.stopped) {
750 throw new IOException("Received the shutdown message while waiting.");
751 }
752 }
753 }
754
755
756
757
758 private boolean isClusterUp() {
759 return clusterStatusTracker != null && clusterStatusTracker.isClusterUp();
760 }
761
762 private void initializeThreads() throws IOException {
763
764 this.cacheFlusher = new MemStoreFlusher(conf, this);
765
766
767 this.compactSplitThread = new CompactSplitThread(this);
768
769
770
771 this.compactionChecker = new CompactionChecker(this, this.threadWakeFrequency, this);
772 this.periodicFlusher = new PeriodicMemstoreFlusher(this.threadWakeFrequency, this);
773 this.leases = new Leases(this.threadWakeFrequency);
774
775
776 movedRegionsCleaner = MovedRegionsCleaner.createAndStart(this);
777
778 if (this.nonceManager != null) {
779
780 nonceManagerChore = this.nonceManager.createCleanupChore(this);
781 }
782
783
784 rpcClient = RpcClientFactory.createClient(conf, clusterId, new InetSocketAddress(
785 rpcServices.isa.getAddress(), 0));
786
787 int storefileRefreshPeriod = conf.getInt(
788 StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD
789 , StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD);
790 if (storefileRefreshPeriod > 0) {
791 this.storefileRefresher = new StorefileRefresherChore(storefileRefreshPeriod, this, this);
792 }
793 registerConfigurationObservers();
794 }
795
796 private void registerConfigurationObservers() {
797
798 configurationManager.registerObserver(this.compactSplitThread);
799 }
800
801
802
803
804 @Override
805 public void run() {
806 try {
807
808 preRegistrationInitialization();
809 } catch (Throwable e) {
810 abort("Fatal exception during initialization", e);
811 }
812
813 try {
814 if (!isStopped() && !isAborted()) {
815 ShutdownHook.install(conf, fs, this, Thread.currentThread());
816
817 createMyEphemeralNode();
818
819
820 this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
821 }
822
823
824
825 while (keepLooping()) {
826 RegionServerStartupResponse w = reportForDuty();
827 if (w == null) {
828 LOG.warn("reportForDuty failed; sleeping and then retrying.");
829 this.sleeper.sleep();
830 } else {
831 handleReportForDutyResponse(w);
832 break;
833 }
834 }
835
836 if (!isStopped() && isHealthy()){
837
838
839 rspmHost.start();
840 }
841
842
843 long lastMsg = System.currentTimeMillis();
844 long oldRequestCount = -1;
845
846 while (!isStopped() && isHealthy()) {
847 if (!isClusterUp()) {
848 if (isOnlineRegionsEmpty()) {
849 stop("Exiting; cluster shutdown set and not carrying any regions");
850 } else if (!this.stopping) {
851 this.stopping = true;
852 LOG.info("Closing user regions");
853 closeUserRegions(this.abortRequested);
854 } else if (this.stopping) {
855 boolean allUserRegionsOffline = areAllUserRegionsOffline();
856 if (allUserRegionsOffline) {
857
858
859
860 if (oldRequestCount == getWriteRequestCount()) {
861 stop("Stopped; only catalog regions remaining online");
862 break;
863 }
864 oldRequestCount = getWriteRequestCount();
865 } else {
866
867
868
869 closeUserRegions(this.abortRequested);
870 }
871 LOG.debug("Waiting on " + getOnlineRegionsAsPrintableString());
872 }
873 }
874 long now = System.currentTimeMillis();
875 if ((now - lastMsg) >= msgInterval) {
876 tryRegionServerReport(lastMsg, now);
877 lastMsg = System.currentTimeMillis();
878 doMetrics();
879 }
880 if (!isStopped() && !isAborted()) {
881 this.sleeper.sleep();
882 }
883 }
884 } catch (Throwable t) {
885 if (!rpcServices.checkOOME(t)) {
886 String prefix = t instanceof YouAreDeadException? "": "Unhandled: ";
887 abort(prefix + t.getMessage(), t);
888 }
889 }
890
891 if (mxBean != null) {
892 MBeanUtil.unregisterMBean(mxBean);
893 mxBean = null;
894 }
895 if (this.leases != null) this.leases.closeAfterLeasesExpire();
896 if (this.splitLogWorker != null) {
897 splitLogWorker.stop();
898 }
899 if (this.infoServer != null) {
900 LOG.info("Stopping infoServer");
901 try {
902 this.infoServer.stop();
903 } catch (Exception e) {
904 LOG.error("Failed to stop infoServer", e);
905 }
906 }
907
908 if (cacheConfig != null && cacheConfig.isBlockCacheEnabled()) {
909 cacheConfig.getBlockCache().shutdown();
910 }
911
912 if (movedRegionsCleaner != null) {
913 movedRegionsCleaner.stop("Region Server stopping");
914 }
915
916
917
918 if(this.hMemManager != null) this.hMemManager.stop();
919 if (this.cacheFlusher != null) this.cacheFlusher.interruptIfNecessary();
920 if (this.compactSplitThread != null) this.compactSplitThread.interruptIfNecessary();
921 if (this.compactionChecker != null)
922 this.compactionChecker.interrupt();
923 if (this.healthCheckChore != null) {
924 this.healthCheckChore.interrupt();
925 }
926 if (this.nonceManagerChore != null) {
927 this.nonceManagerChore.interrupt();
928 }
929 if (this.storefileRefresher != null) {
930 this.storefileRefresher.interrupt();
931 }
932
933
934 if (rspmHost != null) {
935 rspmHost.stop(this.abortRequested || this.killed);
936 }
937
938 if (this.killed) {
939
940 } else if (abortRequested) {
941 if (this.fsOk) {
942 closeUserRegions(abortRequested);
943 }
944 LOG.info("aborting server " + this.serverName);
945 } else {
946 closeUserRegions(abortRequested);
947 LOG.info("stopping server " + this.serverName);
948 }
949
950
951 if (this.metaTableLocator != null) this.metaTableLocator.stop();
952 if (this.clusterConnection != null && !clusterConnection.isClosed()) {
953 try {
954 this.clusterConnection.close();
955 } catch (IOException e) {
956
957
958 LOG.warn("Attempt to close server's short circuit HConnection failed.", e);
959 }
960 }
961
962
963 if (!this.killed && containsMetaTableRegions()) {
964 if (!abortRequested || this.fsOk) {
965 if (this.compactSplitThread != null) {
966 this.compactSplitThread.join();
967 this.compactSplitThread = null;
968 }
969 closeMetaTableRegions(abortRequested);
970 }
971 }
972
973 if (!this.killed && this.fsOk) {
974 waitOnAllRegionsToClose(abortRequested);
975 LOG.info("stopping server " + this.serverName +
976 "; all regions closed.");
977 }
978
979
980 if (this.fsOk) {
981 shutdownWAL(!abortRequested);
982 }
983
984
985 if (this.rssStub != null) {
986 this.rssStub = null;
987 }
988 if (this.rpcClient != null) {
989 this.rpcClient.close();
990 }
991 if (this.leases != null) {
992 this.leases.close();
993 }
994 if (this.pauseMonitor != null) {
995 this.pauseMonitor.stop();
996 }
997
998 if (!killed) {
999 stopServiceThreads();
1000 }
1001
1002 if (this.rpcServices != null) {
1003 this.rpcServices.stop();
1004 }
1005
1006 try {
1007 deleteMyEphemeralNode();
1008 } catch (KeeperException.NoNodeException nn) {
1009 } catch (KeeperException e) {
1010 LOG.warn("Failed deleting my ephemeral node", e);
1011 }
1012
1013
1014 ZNodeClearer.deleteMyEphemeralNodeOnDisk();
1015
1016 if (this.zooKeeper != null) {
1017 this.zooKeeper.close();
1018 }
1019 LOG.info("stopping server " + this.serverName +
1020 "; zookeeper connection closed.");
1021
1022 LOG.info(Thread.currentThread().getName() + " exiting");
1023 }
1024
1025 private boolean containsMetaTableRegions() {
1026 return onlineRegions.containsKey(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1027 }
1028
1029 private boolean areAllUserRegionsOffline() {
1030 if (getNumberOfOnlineRegions() > 2) return false;
1031 boolean allUserRegionsOffline = true;
1032 for (Map.Entry<String, HRegion> e: this.onlineRegions.entrySet()) {
1033 if (!e.getValue().getRegionInfo().isMetaTable()) {
1034 allUserRegionsOffline = false;
1035 break;
1036 }
1037 }
1038 return allUserRegionsOffline;
1039 }
1040
1041
1042
1043
1044 private long getWriteRequestCount() {
1045 int writeCount = 0;
1046 for (Map.Entry<String, HRegion> e: this.onlineRegions.entrySet()) {
1047 writeCount += e.getValue().getWriteRequestsCount();
1048 }
1049 return writeCount;
1050 }
1051
1052 @VisibleForTesting
1053 protected void tryRegionServerReport(long reportStartTime, long reportEndTime)
1054 throws IOException {
1055 RegionServerStatusService.BlockingInterface rss = rssStub;
1056 if (rss == null) {
1057
1058 return;
1059 }
1060 ClusterStatusProtos.ServerLoad sl = buildServerLoad(reportStartTime, reportEndTime);
1061 try {
1062 RegionServerReportRequest.Builder request = RegionServerReportRequest.newBuilder();
1063 ServerName sn = ServerName.parseVersionedServerName(
1064 this.serverName.getVersionedBytes());
1065 request.setServer(ProtobufUtil.toServerName(sn));
1066 request.setLoad(sl);
1067 rss.regionServerReport(null, request.build());
1068 } catch (ServiceException se) {
1069 IOException ioe = ProtobufUtil.getRemoteException(se);
1070 if (ioe instanceof YouAreDeadException) {
1071
1072 throw ioe;
1073 }
1074 if (rssStub == rss) {
1075 rssStub = null;
1076 }
1077
1078
1079 createRegionServerStatusStub();
1080 }
1081 }
1082
1083 ClusterStatusProtos.ServerLoad buildServerLoad(long reportStartTime, long reportEndTime)
1084 throws IOException {
1085
1086
1087
1088
1089
1090
1091
1092 MetricsRegionServerWrapper regionServerWrapper = this.metricsRegionServer.getRegionServerWrapper();
1093 Collection<HRegion> regions = getOnlineRegionsLocalContext();
1094 MemoryUsage memory =
1095 ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
1096
1097 ClusterStatusProtos.ServerLoad.Builder serverLoad =
1098 ClusterStatusProtos.ServerLoad.newBuilder();
1099 serverLoad.setNumberOfRequests((int) regionServerWrapper.getRequestsPerSecond());
1100 serverLoad.setTotalNumberOfRequests((int) regionServerWrapper.getTotalRequestCount());
1101 serverLoad.setUsedHeapMB((int)(memory.getUsed() / 1024 / 1024));
1102 serverLoad.setMaxHeapMB((int) (memory.getMax() / 1024 / 1024));
1103 Set<String> coprocessors = getWAL(null).getCoprocessorHost().getCoprocessors();
1104 for (String coprocessor : coprocessors) {
1105 serverLoad.addCoprocessors(
1106 Coprocessor.newBuilder().setName(coprocessor).build());
1107 }
1108 RegionLoad.Builder regionLoadBldr = RegionLoad.newBuilder();
1109 RegionSpecifier.Builder regionSpecifier = RegionSpecifier.newBuilder();
1110 for (HRegion region : regions) {
1111 serverLoad.addRegionLoads(createRegionLoad(region, regionLoadBldr, regionSpecifier));
1112 for (String coprocessor :
1113 getWAL(region.getRegionInfo()).getCoprocessorHost().getCoprocessors()) {
1114 serverLoad.addCoprocessors(Coprocessor.newBuilder().setName(coprocessor).build());
1115 }
1116 }
1117 serverLoad.setReportStartTime(reportStartTime);
1118 serverLoad.setReportEndTime(reportEndTime);
1119 if (this.infoServer != null) {
1120 serverLoad.setInfoServerPort(this.infoServer.getPort());
1121 } else {
1122 serverLoad.setInfoServerPort(-1);
1123 }
1124
1125
1126
1127 ReplicationSourceService rsources = getReplicationSourceService();
1128
1129 if (rsources != null) {
1130
1131 ReplicationLoad rLoad = rsources.refreshAndGetReplicationLoad();
1132 if (rLoad != null) {
1133 serverLoad.setReplLoadSink(rLoad.getReplicationLoadSink());
1134 for (ClusterStatusProtos.ReplicationLoadSource rLS : rLoad.getReplicationLoadSourceList()) {
1135 serverLoad.addReplLoadSource(rLS);
1136 }
1137 }
1138 }
1139
1140 return serverLoad.build();
1141 }
1142
1143 String getOnlineRegionsAsPrintableString() {
1144 StringBuilder sb = new StringBuilder();
1145 for (HRegion r: this.onlineRegions.values()) {
1146 if (sb.length() > 0) sb.append(", ");
1147 sb.append(r.getRegionInfo().getEncodedName());
1148 }
1149 return sb.toString();
1150 }
1151
1152
1153
1154
1155 private void waitOnAllRegionsToClose(final boolean abort) {
1156
1157 int lastCount = -1;
1158 long previousLogTime = 0;
1159 Set<String> closedRegions = new HashSet<String>();
1160 boolean interrupted = false;
1161 try {
1162 while (!isOnlineRegionsEmpty()) {
1163 int count = getNumberOfOnlineRegions();
1164
1165 if (count != lastCount) {
1166
1167 if (System.currentTimeMillis() > (previousLogTime + 1000)) {
1168 previousLogTime = System.currentTimeMillis();
1169 lastCount = count;
1170 LOG.info("Waiting on " + count + " regions to close");
1171
1172
1173 if (count < 10 && LOG.isDebugEnabled()) {
1174 LOG.debug(this.onlineRegions);
1175 }
1176 }
1177 }
1178
1179
1180
1181 for (Map.Entry<String, HRegion> e : this.onlineRegions.entrySet()) {
1182 HRegionInfo hri = e.getValue().getRegionInfo();
1183 if (!this.regionsInTransitionInRS.containsKey(hri.getEncodedNameAsBytes())
1184 && !closedRegions.contains(hri.getEncodedName())) {
1185 closedRegions.add(hri.getEncodedName());
1186
1187 closeRegionIgnoreErrors(hri, abort);
1188 }
1189 }
1190
1191 if (this.regionsInTransitionInRS.isEmpty()) {
1192 if (!isOnlineRegionsEmpty()) {
1193 LOG.info("We were exiting though online regions are not empty," +
1194 " because some regions failed closing");
1195 }
1196 break;
1197 }
1198 if (sleep(200)) {
1199 interrupted = true;
1200 }
1201 }
1202 } finally {
1203 if (interrupted) {
1204 Thread.currentThread().interrupt();
1205 }
1206 }
1207 }
1208
1209 private boolean sleep(long millis) {
1210 boolean interrupted = false;
1211 try {
1212 Thread.sleep(millis);
1213 } catch (InterruptedException e) {
1214 LOG.warn("Interrupted while sleeping");
1215 interrupted = true;
1216 }
1217 return interrupted;
1218 }
1219
1220 private void shutdownWAL(final boolean close) {
1221 if (this.walFactory != null) {
1222 try {
1223 if (close) {
1224 walFactory.close();
1225 } else {
1226 walFactory.shutdown();
1227 }
1228 } catch (Throwable e) {
1229 e = RemoteExceptionHandler.checkThrowable(e);
1230 LOG.error("Shutdown / close of WAL failed: " + e);
1231 LOG.debug("Shutdown / close exception details:", e);
1232 }
1233 }
1234 }
1235
1236
1237
1238
1239
1240
1241 protected void handleReportForDutyResponse(final RegionServerStartupResponse c)
1242 throws IOException {
1243 try {
1244 for (NameStringPair e : c.getMapEntriesList()) {
1245 String key = e.getName();
1246
1247 if (key.equals(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)) {
1248 String hostnameFromMasterPOV = e.getValue();
1249 this.serverName = ServerName.valueOf(hostnameFromMasterPOV,
1250 rpcServices.isa.getPort(), this.startcode);
1251 if (!hostnameFromMasterPOV.equals(rpcServices.isa.getHostName())) {
1252 LOG.info("Master passed us a different hostname to use; was=" +
1253 rpcServices.isa.getHostName() + ", but now=" + hostnameFromMasterPOV);
1254 }
1255 continue;
1256 }
1257 String value = e.getValue();
1258 if (LOG.isDebugEnabled()) {
1259 LOG.info("Config from master: " + key + "=" + value);
1260 }
1261 this.conf.set(key, value);
1262 }
1263
1264
1265
1266 if (this.conf.get("mapreduce.task.attempt.id") == null) {
1267 this.conf.set("mapreduce.task.attempt.id", "hb_rs_" +
1268 this.serverName.toString());
1269 }
1270
1271
1272 ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
1273
1274 this.cacheConfig = new CacheConfig(conf);
1275 this.walFactory = setupWALAndReplication();
1276
1277 this.metricsRegionServer = new MetricsRegionServer(new MetricsRegionServerWrapperImpl(this));
1278
1279 startServiceThreads();
1280 startHeapMemoryManager();
1281 LOG.info("Serving as " + this.serverName +
1282 ", RpcServer on " + rpcServices.isa +
1283 ", sessionid=0x" +
1284 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()));
1285
1286
1287 synchronized (online) {
1288 online.set(true);
1289 online.notifyAll();
1290 }
1291 } catch (Throwable e) {
1292 stop("Failed initialization");
1293 throw convertThrowableToIOE(cleanup(e, "Failed init"),
1294 "Region server startup failed");
1295 } finally {
1296 sleeper.skipSleepCycle();
1297 }
1298 }
1299
1300 private void startHeapMemoryManager() {
1301 this.hMemManager = HeapMemoryManager.create(this.conf, this.cacheFlusher, this);
1302 if (this.hMemManager != null) {
1303 this.hMemManager.start();
1304 }
1305 }
1306
1307 private void createMyEphemeralNode() throws KeeperException, IOException {
1308 RegionServerInfo.Builder rsInfo = RegionServerInfo.newBuilder();
1309 rsInfo.setInfoPort(infoServer != null ? infoServer.getPort() : -1);
1310 byte[] data = ProtobufUtil.prependPBMagic(rsInfo.build().toByteArray());
1311 ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper,
1312 getMyEphemeralNodePath(), data);
1313 }
1314
1315 private void deleteMyEphemeralNode() throws KeeperException {
1316 ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
1317 }
1318
1319 @Override
1320 public RegionServerAccounting getRegionServerAccounting() {
1321 return regionServerAccounting;
1322 }
1323
1324 @Override
1325 public TableLockManager getTableLockManager() {
1326 return tableLockManager;
1327 }
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337 private RegionLoad createRegionLoad(final HRegion r, RegionLoad.Builder regionLoadBldr,
1338 RegionSpecifier.Builder regionSpecifier) {
1339 byte[] name = r.getRegionName();
1340 int stores = 0;
1341 int storefiles = 0;
1342 int storeUncompressedSizeMB = 0;
1343 int storefileSizeMB = 0;
1344 int memstoreSizeMB = (int) (r.memstoreSize.get() / 1024 / 1024);
1345 int storefileIndexSizeMB = 0;
1346 int rootIndexSizeKB = 0;
1347 int totalStaticIndexSizeKB = 0;
1348 int totalStaticBloomSizeKB = 0;
1349 long totalCompactingKVs = 0;
1350 long currentCompactedKVs = 0;
1351 synchronized (r.stores) {
1352 stores += r.stores.size();
1353 for (Store store : r.stores.values()) {
1354 storefiles += store.getStorefilesCount();
1355 storeUncompressedSizeMB += (int) (store.getStoreSizeUncompressed()
1356 / 1024 / 1024);
1357 storefileSizeMB += (int) (store.getStorefilesSize() / 1024 / 1024);
1358 storefileIndexSizeMB += (int) (store.getStorefilesIndexSize() / 1024 / 1024);
1359 CompactionProgress progress = store.getCompactionProgress();
1360 if (progress != null) {
1361 totalCompactingKVs += progress.totalCompactingKVs;
1362 currentCompactedKVs += progress.currentCompactedKVs;
1363 }
1364
1365 rootIndexSizeKB +=
1366 (int) (store.getStorefilesIndexSize() / 1024);
1367
1368 totalStaticIndexSizeKB +=
1369 (int) (store.getTotalStaticIndexSize() / 1024);
1370
1371 totalStaticBloomSizeKB +=
1372 (int) (store.getTotalStaticBloomSize() / 1024);
1373 }
1374 }
1375 float dataLocality =
1376 r.getHDFSBlocksDistribution().getBlockLocalityIndex(serverName.getHostname());
1377 if (regionLoadBldr == null) {
1378 regionLoadBldr = RegionLoad.newBuilder();
1379 }
1380 if (regionSpecifier == null) {
1381 regionSpecifier = RegionSpecifier.newBuilder();
1382 }
1383 regionSpecifier.setType(RegionSpecifierType.REGION_NAME);
1384 regionSpecifier.setValue(ByteStringer.wrap(name));
1385 regionLoadBldr.setRegionSpecifier(regionSpecifier.build())
1386 .setStores(stores)
1387 .setStorefiles(storefiles)
1388 .setStoreUncompressedSizeMB(storeUncompressedSizeMB)
1389 .setStorefileSizeMB(storefileSizeMB)
1390 .setMemstoreSizeMB(memstoreSizeMB)
1391 .setStorefileIndexSizeMB(storefileIndexSizeMB)
1392 .setRootIndexSizeKB(rootIndexSizeKB)
1393 .setTotalStaticIndexSizeKB(totalStaticIndexSizeKB)
1394 .setTotalStaticBloomSizeKB(totalStaticBloomSizeKB)
1395 .setReadRequestsCount(r.readRequestsCount.get())
1396 .setWriteRequestsCount(r.writeRequestsCount.get())
1397 .setTotalCompactingKVs(totalCompactingKVs)
1398 .setCurrentCompactedKVs(currentCompactedKVs)
1399 .setCompleteSequenceId(r.lastFlushSeqId)
1400 .setDataLocality(dataLocality);
1401
1402 return regionLoadBldr.build();
1403 }
1404
1405
1406
1407
1408
1409 public RegionLoad createRegionLoad(final String encodedRegionName) {
1410 HRegion r = null;
1411 r = this.onlineRegions.get(encodedRegionName);
1412 return r != null ? createRegionLoad(r, null, null) : null;
1413 }
1414
1415
1416
1417
1418 private static class CompactionChecker extends Chore {
1419 private final HRegionServer instance;
1420 private final int majorCompactPriority;
1421 private final static int DEFAULT_PRIORITY = Integer.MAX_VALUE;
1422 private long iteration = 0;
1423
1424 CompactionChecker(final HRegionServer h, final int sleepTime,
1425 final Stoppable stopper) {
1426 super("CompactionChecker", sleepTime, h);
1427 this.instance = h;
1428 LOG.info(this.getName() + " runs every " + StringUtils.formatTime(sleepTime));
1429
1430
1431
1432
1433 this.majorCompactPriority = this.instance.conf.
1434 getInt("hbase.regionserver.compactionChecker.majorCompactPriority",
1435 DEFAULT_PRIORITY);
1436 }
1437
1438 @Override
1439 protected void chore() {
1440 for (HRegion r : this.instance.onlineRegions.values()) {
1441 if (r == null)
1442 continue;
1443 for (Store s : r.getStores().values()) {
1444 try {
1445 long multiplier = s.getCompactionCheckMultiplier();
1446 assert multiplier > 0;
1447 if (iteration % multiplier != 0) continue;
1448 if (s.needsCompaction()) {
1449
1450 this.instance.compactSplitThread.requestSystemCompaction(r, s, getName()
1451 + " requests compaction");
1452 } else if (s.isMajorCompaction()) {
1453 if (majorCompactPriority == DEFAULT_PRIORITY
1454 || majorCompactPriority > r.getCompactPriority()) {
1455 this.instance.compactSplitThread.requestCompaction(r, s, getName()
1456 + " requests major compaction; use default priority", null);
1457 } else {
1458 this.instance.compactSplitThread.requestCompaction(r, s, getName()
1459 + " requests major compaction; use configured priority",
1460 this.majorCompactPriority, null);
1461 }
1462 }
1463 } catch (IOException e) {
1464 LOG.warn("Failed major compaction check on " + r, e);
1465 }
1466 }
1467 }
1468 iteration = (iteration == Long.MAX_VALUE) ? 0 : (iteration + 1);
1469 }
1470 }
1471
1472 static class PeriodicMemstoreFlusher extends Chore {
1473 final HRegionServer server;
1474 final static int RANGE_OF_DELAY = 20000;
1475 final static int MIN_DELAY_TIME = 3000;
1476 public PeriodicMemstoreFlusher(int cacheFlushInterval, final HRegionServer server) {
1477 super(server.getServerName() + "-MemstoreFlusherChore", cacheFlushInterval, server);
1478 this.server = server;
1479 }
1480
1481 @Override
1482 protected void chore() {
1483 for (HRegion r : this.server.onlineRegions.values()) {
1484 if (r == null)
1485 continue;
1486 if (r.shouldFlush()) {
1487 FlushRequester requester = server.getFlushRequester();
1488 if (requester != null) {
1489 long randomDelay = RandomUtils.nextInt(RANGE_OF_DELAY) + MIN_DELAY_TIME;
1490 LOG.info(getName() + " requesting flush for region " + r.getRegionNameAsString() +
1491 " after a delay of " + randomDelay);
1492
1493
1494
1495 requester.requestDelayedFlush(r, randomDelay);
1496 }
1497 }
1498 }
1499 }
1500 }
1501
1502
1503
1504
1505
1506
1507
1508
1509 public boolean isOnline() {
1510 return online.get();
1511 }
1512
1513
1514
1515
1516
1517
1518
1519 private WALFactory setupWALAndReplication() throws IOException {
1520
1521 final Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
1522 final String logName = DefaultWALProvider.getWALDirectoryName(this.serverName.toString());
1523
1524 Path logdir = new Path(rootDir, logName);
1525 if (LOG.isDebugEnabled()) LOG.debug("logdir=" + logdir);
1526 if (this.fs.exists(logdir)) {
1527 throw new RegionServerRunningException("Region server has already " +
1528 "created directory at " + this.serverName.toString());
1529 }
1530
1531
1532
1533 createNewReplicationInstance(conf, this, this.fs, logdir, oldLogDir);
1534
1535
1536 final List<WALActionsListener> listeners = new ArrayList<WALActionsListener>();
1537 listeners.add(new MetricsWAL());
1538 if (this.replicationSourceHandler != null &&
1539 this.replicationSourceHandler.getWALActionsListener() != null) {
1540
1541 listeners.add(this.replicationSourceHandler.getWALActionsListener());
1542 }
1543
1544 return new WALFactory(conf, listeners, serverName.toString());
1545 }
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555 protected LogRoller ensureMetaWALRoller() {
1556
1557
1558 LogRoller roller = metawalRoller.get();
1559 if (null == roller) {
1560 LogRoller tmpLogRoller = new LogRoller(this, this);
1561 String n = Thread.currentThread().getName();
1562 Threads.setDaemonThreadRunning(tmpLogRoller.getThread(),
1563 n + "-MetaLogRoller", uncaughtExceptionHandler);
1564 if (metawalRoller.compareAndSet(null, tmpLogRoller)) {
1565 roller = tmpLogRoller;
1566 } else {
1567
1568 Threads.shutdown(tmpLogRoller.getThread());
1569 roller = metawalRoller.get();
1570 }
1571 }
1572 return roller;
1573 }
1574
1575 public MetricsRegionServer getRegionServerMetrics() {
1576 return this.metricsRegionServer;
1577 }
1578
1579
1580
1581
1582 public MasterAddressTracker getMasterAddressTracker() {
1583 return this.masterAddressTracker;
1584 }
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598 private void startServiceThreads() throws IOException {
1599
1600 this.service.startExecutorService(ExecutorType.RS_OPEN_REGION,
1601 conf.getInt("hbase.regionserver.executor.openregion.threads", 3));
1602 this.service.startExecutorService(ExecutorType.RS_OPEN_META,
1603 conf.getInt("hbase.regionserver.executor.openmeta.threads", 1));
1604 this.service.startExecutorService(ExecutorType.RS_CLOSE_REGION,
1605 conf.getInt("hbase.regionserver.executor.closeregion.threads", 3));
1606 this.service.startExecutorService(ExecutorType.RS_CLOSE_META,
1607 conf.getInt("hbase.regionserver.executor.closemeta.threads", 1));
1608 if (conf.getBoolean(StoreScanner.STORESCANNER_PARALLEL_SEEK_ENABLE, false)) {
1609 this.service.startExecutorService(ExecutorType.RS_PARALLEL_SEEK,
1610 conf.getInt("hbase.storescanner.parallel.seek.threads", 10));
1611 }
1612 this.service.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS, conf.getInt(
1613 "hbase.regionserver.wal.max.splitters", SplitLogWorkerCoordination.DEFAULT_MAX_SPLITTERS));
1614
1615 Threads.setDaemonThreadRunning(this.walRoller.getThread(), getName() + ".logRoller",
1616 uncaughtExceptionHandler);
1617 this.cacheFlusher.start(uncaughtExceptionHandler);
1618 Threads.setDaemonThreadRunning(this.compactionChecker.getThread(), getName() +
1619 ".compactionChecker", uncaughtExceptionHandler);
1620 Threads.setDaemonThreadRunning(this.periodicFlusher.getThread(), getName() +
1621 ".periodicFlusher", uncaughtExceptionHandler);
1622 if (this.healthCheckChore != null) {
1623 Threads.setDaemonThreadRunning(this.healthCheckChore.getThread(), getName() + ".healthChecker",
1624 uncaughtExceptionHandler);
1625 }
1626 if (this.nonceManagerChore != null) {
1627 Threads.setDaemonThreadRunning(this.nonceManagerChore.getThread(), getName() + ".nonceCleaner",
1628 uncaughtExceptionHandler);
1629 }
1630 if (this.storefileRefresher != null) {
1631 Threads.setDaemonThreadRunning(this.storefileRefresher.getThread(), getName() + ".storefileRefresher",
1632 uncaughtExceptionHandler);
1633 }
1634
1635
1636
1637 this.leases.setName(getName() + ".leaseChecker");
1638 this.leases.start();
1639
1640 if (this.replicationSourceHandler == this.replicationSinkHandler &&
1641 this.replicationSourceHandler != null) {
1642 this.replicationSourceHandler.startReplicationService();
1643 } else {
1644 if (this.replicationSourceHandler != null) {
1645 this.replicationSourceHandler.startReplicationService();
1646 }
1647 if (this.replicationSinkHandler != null) {
1648 this.replicationSinkHandler.startReplicationService();
1649 }
1650 }
1651
1652
1653
1654
1655
1656 Configuration sinkConf = HBaseConfiguration.create(conf);
1657 sinkConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
1658 conf.getInt("hbase.log.replay.retries.number", 8));
1659 sinkConf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY,
1660 conf.getInt("hbase.log.replay.rpc.timeout", 30000));
1661 sinkConf.setInt("hbase.client.serverside.retries.multiplier", 1);
1662 this.splitLogWorker = new SplitLogWorker(this, sinkConf, this, this, walFactory);
1663 splitLogWorker.start();
1664 }
1665
1666
1667
1668
1669
1670
1671 private int putUpWebUI() throws IOException {
1672 int port = this.conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1673 HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1674
1675 if (port < 0) return port;
1676 String addr = this.conf.get("hbase.regionserver.info.bindAddress", "0.0.0.0");
1677 if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
1678 String msg =
1679 "Failed to start http info server. Address " + addr
1680 + " does not belong to this host. Correct configuration parameter: "
1681 + "hbase.regionserver.info.bindAddress";
1682 LOG.error(msg);
1683 throw new IOException(msg);
1684 }
1685
1686 boolean auto = this.conf.getBoolean(HConstants.REGIONSERVER_INFO_PORT_AUTO,
1687 false);
1688 while (true) {
1689 try {
1690 this.infoServer = new InfoServer(getProcessName(), addr, port, false, this.conf);
1691 infoServer.addServlet("dump", "/dump", getDumpServlet());
1692 configureInfoServer();
1693 this.infoServer.start();
1694 break;
1695 } catch (BindException e) {
1696 if (!auto) {
1697
1698 LOG.error("Failed binding http info server to port: " + port);
1699 throw e;
1700 }
1701
1702 LOG.info("Failed binding http info server to port: " + port);
1703 port++;
1704 }
1705 }
1706 port = this.infoServer.getPort();
1707 conf.setInt(HConstants.REGIONSERVER_INFO_PORT, port);
1708 int masterInfoPort = conf.getInt(HConstants.MASTER_INFO_PORT,
1709 HConstants.DEFAULT_MASTER_INFOPORT);
1710 conf.setInt("hbase.master.info.port.orig", masterInfoPort);
1711 conf.setInt(HConstants.MASTER_INFO_PORT, port);
1712 return port;
1713 }
1714
1715
1716
1717
1718 private boolean isHealthy() {
1719 if (!fsOk) {
1720
1721 return false;
1722 }
1723
1724 if (!(leases.isAlive()
1725 && cacheFlusher.isAlive() && walRoller.isAlive()
1726 && this.compactionChecker.isAlive()
1727 && this.periodicFlusher.isAlive())) {
1728 stop("One or more threads are no longer alive -- stop");
1729 return false;
1730 }
1731 final LogRoller metawalRoller = this.metawalRoller.get();
1732 if (metawalRoller != null && !metawalRoller.isAlive()) {
1733 stop("Meta WAL roller thread is no longer alive -- stop");
1734 return false;
1735 }
1736 return true;
1737 }
1738
1739 private static final byte[] UNSPECIFIED_REGION = new byte[]{};
1740
1741 @Override
1742 public WAL getWAL(HRegionInfo regionInfo) throws IOException {
1743 WAL wal;
1744 LogRoller roller = walRoller;
1745
1746 if (regionInfo != null && regionInfo.isMetaTable()) {
1747 roller = ensureMetaWALRoller();
1748 wal = walFactory.getMetaWAL(regionInfo.getEncodedNameAsBytes());
1749 } else if (regionInfo == null) {
1750 wal = walFactory.getWAL(UNSPECIFIED_REGION);
1751 } else {
1752 wal = walFactory.getWAL(regionInfo.getEncodedNameAsBytes());
1753 }
1754 roller.addWAL(wal);
1755 return wal;
1756 }
1757
1758 @Override
1759 public ClusterConnection getConnection() {
1760 return this.clusterConnection;
1761 }
1762
1763 @Override
1764 public MetaTableLocator getMetaTableLocator() {
1765 return this.metaTableLocator;
1766 }
1767
1768 @Override
1769 public void stop(final String msg) {
1770 if (!this.stopped) {
1771 try {
1772 if (this.rsHost != null) {
1773 this.rsHost.preStop(msg);
1774 }
1775 this.stopped = true;
1776 LOG.info("STOPPED: " + msg);
1777
1778 sleeper.skipSleepCycle();
1779 } catch (IOException exp) {
1780 LOG.warn("The region server did not stop", exp);
1781 }
1782 }
1783 }
1784
1785 public void waitForServerOnline(){
1786 while (!isStopped() && !isOnline()) {
1787 synchronized (online) {
1788 try {
1789 online.wait(msgInterval);
1790 } catch (InterruptedException ie) {
1791 Thread.currentThread().interrupt();
1792 break;
1793 }
1794 }
1795 }
1796 }
1797
1798 @Override
1799 public void postOpenDeployTasks(final HRegion r)
1800 throws KeeperException, IOException {
1801 rpcServices.checkOpen();
1802 LOG.info("Post open deploy tasks for " + r.getRegionNameAsString());
1803
1804 for (Store s : r.getStores().values()) {
1805 if (s.hasReferences() || s.needsCompaction()) {
1806 this.compactSplitThread.requestSystemCompaction(r, s, "Opening Region");
1807 }
1808 }
1809 long openSeqNum = r.getOpenSeqNum();
1810 if (openSeqNum == HConstants.NO_SEQNUM) {
1811
1812 LOG.error("No sequence number found when opening " + r.getRegionNameAsString());
1813 openSeqNum = 0;
1814 }
1815
1816
1817 updateRecoveringRegionLastFlushedSequenceId(r);
1818
1819
1820 if (r.getRegionInfo().isMetaRegion()) {
1821 MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, State.OPEN);
1822 } else if (useZKForAssignment) {
1823 MetaTableAccessor.updateRegionLocation(getConnection(), r.getRegionInfo(),
1824 this.serverName, openSeqNum);
1825 }
1826 if (!useZKForAssignment && !reportRegionStateTransition(
1827 TransitionCode.OPENED, openSeqNum, r.getRegionInfo())) {
1828 throw new IOException("Failed to report opened region to master: "
1829 + r.getRegionNameAsString());
1830 }
1831
1832 LOG.debug("Finished post open deploy task for " + r.getRegionNameAsString());
1833 }
1834
1835 @Override
1836 public boolean reportRegionStateTransition(TransitionCode code, HRegionInfo... hris) {
1837 return reportRegionStateTransition(code, HConstants.NO_SEQNUM, hris);
1838 }
1839
1840 @Override
1841 public boolean reportRegionStateTransition(
1842 TransitionCode code, long openSeqNum, HRegionInfo... hris) {
1843 ReportRegionStateTransitionRequest.Builder builder =
1844 ReportRegionStateTransitionRequest.newBuilder();
1845 builder.setServer(ProtobufUtil.toServerName(serverName));
1846 RegionStateTransition.Builder transition = builder.addTransitionBuilder();
1847 transition.setTransitionCode(code);
1848 if (code == TransitionCode.OPENED && openSeqNum >= 0) {
1849 transition.setOpenSeqNum(openSeqNum);
1850 }
1851 for (HRegionInfo hri: hris) {
1852 transition.addRegionInfo(HRegionInfo.convert(hri));
1853 }
1854 ReportRegionStateTransitionRequest request = builder.build();
1855 while (keepLooping()) {
1856 RegionServerStatusService.BlockingInterface rss = rssStub;
1857 try {
1858 if (rss == null) {
1859 createRegionServerStatusStub();
1860 continue;
1861 }
1862 ReportRegionStateTransitionResponse response =
1863 rss.reportRegionStateTransition(null, request);
1864 if (response.hasErrorMessage()) {
1865 LOG.info("Failed to transition " + hris[0]
1866 + " to " + code + ": " + response.getErrorMessage());
1867 return false;
1868 }
1869 return true;
1870 } catch (ServiceException se) {
1871 IOException ioe = ProtobufUtil.getRemoteException(se);
1872 LOG.info("Failed to report region transition, will retry", ioe);
1873 if (rssStub == rss) {
1874 rssStub = null;
1875 }
1876 }
1877 }
1878 return false;
1879 }
1880
1881 @Override
1882 public RpcServerInterface getRpcServer() {
1883 return rpcServices.rpcServer;
1884 }
1885
1886 @VisibleForTesting
1887 public RSRpcServices getRSRpcServices() {
1888 return rpcServices;
1889 }
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901 @Override
1902 public void abort(String reason, Throwable cause) {
1903 String msg = "ABORTING region server " + this + ": " + reason;
1904 if (cause != null) {
1905 LOG.fatal(msg, cause);
1906 } else {
1907 LOG.fatal(msg);
1908 }
1909 this.abortRequested = true;
1910
1911
1912
1913 LOG.fatal("RegionServer abort: loaded coprocessors are: " +
1914 CoprocessorHost.getLoadedCoprocessors());
1915
1916 try {
1917 LOG.info("Dump of metrics as JSON on abort: " + JSONBean.dumpRegionServerMetrics());
1918 } catch (MalformedObjectNameException | IOException e) {
1919 LOG.warn("Failed dumping metrics", e);
1920 }
1921
1922
1923 try {
1924 if (cause != null) {
1925 msg += "\nCause:\n" + StringUtils.stringifyException(cause);
1926 }
1927
1928 if (rssStub != null && this.serverName != null) {
1929 ReportRSFatalErrorRequest.Builder builder =
1930 ReportRSFatalErrorRequest.newBuilder();
1931 ServerName sn =
1932 ServerName.parseVersionedServerName(this.serverName.getVersionedBytes());
1933 builder.setServer(ProtobufUtil.toServerName(sn));
1934 builder.setErrorMessage(msg);
1935 rssStub.reportRSFatalError(null, builder.build());
1936 }
1937 } catch (Throwable t) {
1938 LOG.warn("Unable to report fatal error to master", t);
1939 }
1940 stop(reason);
1941 }
1942
1943
1944
1945
1946 public void abort(String reason) {
1947 abort(reason, null);
1948 }
1949
1950 @Override
1951 public boolean isAborted() {
1952 return this.abortRequested;
1953 }
1954
1955
1956
1957
1958
1959
1960 protected void kill() {
1961 this.killed = true;
1962 abort("Simulated kill");
1963 }
1964
1965
1966
1967
1968
1969 protected void stopServiceThreads() {
1970 if (this.nonceManagerChore != null) {
1971 Threads.shutdown(this.nonceManagerChore.getThread());
1972 }
1973 if (this.compactionChecker != null) {
1974 Threads.shutdown(this.compactionChecker.getThread());
1975 }
1976 if (this.periodicFlusher != null) {
1977 Threads.shutdown(this.periodicFlusher.getThread());
1978 }
1979 if (this.cacheFlusher != null) {
1980 this.cacheFlusher.join();
1981 }
1982 if (this.healthCheckChore != null) {
1983 Threads.shutdown(this.healthCheckChore.getThread());
1984 }
1985 if (this.spanReceiverHost != null) {
1986 this.spanReceiverHost.closeReceivers();
1987 }
1988 if (this.walRoller != null) {
1989 Threads.shutdown(this.walRoller.getThread());
1990 }
1991 final LogRoller metawalRoller = this.metawalRoller.get();
1992 if (metawalRoller != null) {
1993 Threads.shutdown(metawalRoller.getThread());
1994 }
1995 if (this.compactSplitThread != null) {
1996 this.compactSplitThread.join();
1997 }
1998 if (this.service != null) this.service.shutdown();
1999 if (this.replicationSourceHandler != null &&
2000 this.replicationSourceHandler == this.replicationSinkHandler) {
2001 this.replicationSourceHandler.stopReplicationService();
2002 } else {
2003 if (this.replicationSourceHandler != null) {
2004 this.replicationSourceHandler.stopReplicationService();
2005 }
2006 if (this.replicationSinkHandler != null) {
2007 this.replicationSinkHandler.stopReplicationService();
2008 }
2009 }
2010 if (this.storefileRefresher != null) {
2011 Threads.shutdown(this.storefileRefresher.getThread());
2012 }
2013 }
2014
2015
2016
2017
2018
2019 ReplicationSourceService getReplicationSourceService() {
2020 return replicationSourceHandler;
2021 }
2022
2023
2024
2025
2026
2027 ReplicationSinkService getReplicationSinkService() {
2028 return replicationSinkHandler;
2029 }
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039 private synchronized ServerName createRegionServerStatusStub() {
2040 if (rssStub != null) {
2041 return masterAddressTracker.getMasterAddress();
2042 }
2043 ServerName sn = null;
2044 long previousLogTime = 0;
2045 boolean refresh = false;
2046 RegionServerStatusService.BlockingInterface intf = null;
2047 boolean interrupted = false;
2048 try {
2049 while (keepLooping()) {
2050 sn = this.masterAddressTracker.getMasterAddress(refresh);
2051 if (sn == null) {
2052 if (!keepLooping()) {
2053
2054 LOG.debug("No master found and cluster is stopped; bailing out");
2055 return null;
2056 }
2057 if (System.currentTimeMillis() > (previousLogTime + 1000)) {
2058 LOG.debug("No master found; retry");
2059 previousLogTime = System.currentTimeMillis();
2060 }
2061 refresh = true;
2062 if (sleep(200)) {
2063 interrupted = true;
2064 }
2065 continue;
2066 }
2067
2068
2069 if (this instanceof HMaster && sn.equals(getServerName())) {
2070 intf = ((HMaster)this).getMasterRpcServices();
2071 break;
2072 }
2073 try {
2074 BlockingRpcChannel channel =
2075 this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), operationTimeout);
2076 intf = RegionServerStatusService.newBlockingStub(channel);
2077 break;
2078 } catch (IOException e) {
2079 if (System.currentTimeMillis() > (previousLogTime + 1000)) {
2080 e = e instanceof RemoteException ?
2081 ((RemoteException)e).unwrapRemoteException() : e;
2082 if (e instanceof ServerNotRunningYetException) {
2083 LOG.info("Master isn't available yet, retrying");
2084 } else {
2085 LOG.warn("Unable to connect to master. Retrying. Error was:", e);
2086 }
2087 previousLogTime = System.currentTimeMillis();
2088 }
2089 if (sleep(200)) {
2090 interrupted = true;
2091 }
2092 }
2093 }
2094 } finally {
2095 if (interrupted) {
2096 Thread.currentThread().interrupt();
2097 }
2098 }
2099 rssStub = intf;
2100 return sn;
2101 }
2102
2103
2104
2105
2106
2107 private boolean keepLooping() {
2108 return !this.stopped && isClusterUp();
2109 }
2110
2111
2112
2113
2114
2115
2116
2117
2118 private RegionServerStartupResponse reportForDuty() throws IOException {
2119 ServerName masterServerName = createRegionServerStatusStub();
2120 if (masterServerName == null) return null;
2121 RegionServerStartupResponse result = null;
2122 try {
2123 rpcServices.requestCount.set(0);
2124 LOG.info("reportForDuty to master=" + masterServerName + " with port="
2125 + rpcServices.isa.getPort() + ", startcode=" + this.startcode);
2126 long now = EnvironmentEdgeManager.currentTime();
2127 int port = rpcServices.isa.getPort();
2128 RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder();
2129 request.setPort(port);
2130 request.setServerStartCode(this.startcode);
2131 request.setServerCurrentTime(now);
2132 result = this.rssStub.regionServerStartup(null, request.build());
2133 } catch (ServiceException se) {
2134 IOException ioe = ProtobufUtil.getRemoteException(se);
2135 if (ioe instanceof ClockOutOfSyncException) {
2136 LOG.fatal("Master rejected startup because clock is out of sync", ioe);
2137
2138 throw ioe;
2139 } else if (ioe instanceof ServerNotRunningYetException) {
2140 LOG.debug("Master is not running yet");
2141 } else {
2142 LOG.warn("error telling master we are up", se);
2143 }
2144 }
2145 return result;
2146 }
2147
2148 @Override
2149 public long getLastSequenceId(byte[] encodedRegionName) {
2150 long lastFlushedSequenceId = -1L;
2151 try {
2152 GetLastFlushedSequenceIdRequest req = RequestConverter
2153 .buildGetLastFlushedSequenceIdRequest(encodedRegionName);
2154 RegionServerStatusService.BlockingInterface rss = rssStub;
2155 if (rss == null) {
2156 createRegionServerStatusStub();
2157 rss = rssStub;
2158 if (rss == null) {
2159
2160 LOG.warn("Unable to connect to the master to check "
2161 + "the last flushed sequence id");
2162 return -1L;
2163 }
2164 }
2165 lastFlushedSequenceId = rss.getLastFlushedSequenceId(null, req)
2166 .getLastFlushedSequenceId();
2167 } catch (ServiceException e) {
2168 lastFlushedSequenceId = -1l;
2169 LOG.warn("Unable to connect to the master to check "
2170 + "the last flushed sequence id", e);
2171 }
2172 return lastFlushedSequenceId;
2173 }
2174
2175
2176
2177
2178
2179
2180 protected void closeAllRegions(final boolean abort) {
2181 closeUserRegions(abort);
2182 closeMetaTableRegions(abort);
2183 }
2184
2185
2186
2187
2188
2189 void closeMetaTableRegions(final boolean abort) {
2190 HRegion meta = null;
2191 this.lock.writeLock().lock();
2192 try {
2193 for (Map.Entry<String, HRegion> e: onlineRegions.entrySet()) {
2194 HRegionInfo hri = e.getValue().getRegionInfo();
2195 if (hri.isMetaRegion()) {
2196 meta = e.getValue();
2197 }
2198 if (meta != null) break;
2199 }
2200 } finally {
2201 this.lock.writeLock().unlock();
2202 }
2203 if (meta != null) closeRegionIgnoreErrors(meta.getRegionInfo(), abort);
2204 }
2205
2206
2207
2208
2209
2210
2211
2212 void closeUserRegions(final boolean abort) {
2213 this.lock.writeLock().lock();
2214 try {
2215 for (Map.Entry<String, HRegion> e: this.onlineRegions.entrySet()) {
2216 HRegion r = e.getValue();
2217 if (!r.getRegionInfo().isMetaTable() && r.isAvailable()) {
2218
2219 closeRegionIgnoreErrors(r.getRegionInfo(), abort);
2220 }
2221 }
2222 } finally {
2223 this.lock.writeLock().unlock();
2224 }
2225 }
2226
2227
2228 public InfoServer getInfoServer() {
2229 return infoServer;
2230 }
2231
2232
2233
2234
2235 @Override
2236 public boolean isStopped() {
2237 return this.stopped;
2238 }
2239
2240 @Override
2241 public boolean isStopping() {
2242 return this.stopping;
2243 }
2244
2245 @Override
2246 public Map<String, HRegion> getRecoveringRegions() {
2247 return this.recoveringRegions;
2248 }
2249
2250
2251
2252
2253
2254 @Override
2255 public Configuration getConfiguration() {
2256 return conf;
2257 }
2258
2259
2260 ReentrantReadWriteLock.WriteLock getWriteLock() {
2261 return lock.writeLock();
2262 }
2263
2264 public int getNumberOfOnlineRegions() {
2265 return this.onlineRegions.size();
2266 }
2267
2268 boolean isOnlineRegionsEmpty() {
2269 return this.onlineRegions.isEmpty();
2270 }
2271
2272
2273
2274
2275
2276
2277 public Collection<HRegion> getOnlineRegionsLocalContext() {
2278 Collection<HRegion> regions = this.onlineRegions.values();
2279 return Collections.unmodifiableCollection(regions);
2280 }
2281
2282 @Override
2283 public void addToOnlineRegions(HRegion region) {
2284 this.onlineRegions.put(region.getRegionInfo().getEncodedName(), region);
2285 configurationManager.registerObserver(region);
2286 }
2287
2288
2289
2290
2291
2292
2293 SortedMap<Long, HRegion> getCopyOfOnlineRegionsSortedBySize() {
2294
2295 SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
2296 new Comparator<Long>() {
2297 @Override
2298 public int compare(Long a, Long b) {
2299 return -1 * a.compareTo(b);
2300 }
2301 });
2302
2303 for (HRegion region : this.onlineRegions.values()) {
2304 sortedRegions.put(region.memstoreSize.get(), region);
2305 }
2306 return sortedRegions;
2307 }
2308
2309
2310
2311
2312 public long getStartcode() {
2313 return this.startcode;
2314 }
2315
2316
2317 @Override
2318 public FlushRequester getFlushRequester() {
2319 return this.cacheFlusher;
2320 }
2321
2322
2323
2324
2325
2326
2327
2328 protected HRegionInfo[] getMostLoadedRegions() {
2329 ArrayList<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2330 for (HRegion r : onlineRegions.values()) {
2331 if (!r.isAvailable()) {
2332 continue;
2333 }
2334 if (regions.size() < numRegionsToReport) {
2335 regions.add(r.getRegionInfo());
2336 } else {
2337 break;
2338 }
2339 }
2340 return regions.toArray(new HRegionInfo[regions.size()]);
2341 }
2342
2343 @Override
2344 public Leases getLeases() {
2345 return leases;
2346 }
2347
2348
2349
2350
2351 protected Path getRootDir() {
2352 return rootDir;
2353 }
2354
2355
2356
2357
2358 @Override
2359 public FileSystem getFileSystem() {
2360 return fs;
2361 }
2362
2363 @Override
2364 public String toString() {
2365 return getServerName().toString();
2366 }
2367
2368
2369
2370
2371
2372
2373 public int getThreadWakeFrequency() {
2374 return threadWakeFrequency;
2375 }
2376
2377 @Override
2378 public ZooKeeperWatcher getZooKeeper() {
2379 return zooKeeper;
2380 }
2381
2382 @Override
2383 public BaseCoordinatedStateManager getCoordinatedStateManager() {
2384 return csm;
2385 }
2386
2387 @Override
2388 public ServerName getServerName() {
2389 return serverName;
2390 }
2391
2392 @Override
2393 public CompactionRequestor getCompactionRequester() {
2394 return this.compactSplitThread;
2395 }
2396
2397 public RegionServerCoprocessorHost getRegionServerCoprocessorHost(){
2398 return this.rsHost;
2399 }
2400
2401 @Override
2402 public ConcurrentMap<byte[], Boolean> getRegionsInTransitionInRS() {
2403 return this.regionsInTransitionInRS;
2404 }
2405
2406 @Override
2407 public ExecutorService getExecutorService() {
2408 return service;
2409 }
2410
2411
2412
2413
2414
2415
2416
2417
2418 static private void createNewReplicationInstance(Configuration conf,
2419 HRegionServer server, FileSystem fs, Path logDir, Path oldLogDir) throws IOException{
2420
2421
2422 if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY,
2423 HConstants.REPLICATION_ENABLE_DEFAULT)) {
2424 return;
2425 }
2426
2427
2428 String sourceClassname = conf.get(HConstants.REPLICATION_SOURCE_SERVICE_CLASSNAME,
2429 HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
2430
2431
2432 String sinkClassname = conf.get(HConstants.REPLICATION_SINK_SERVICE_CLASSNAME,
2433 HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
2434
2435
2436
2437 if (sourceClassname.equals(sinkClassname)) {
2438 server.replicationSourceHandler = (ReplicationSourceService)
2439 newReplicationInstance(sourceClassname,
2440 conf, server, fs, logDir, oldLogDir);
2441 server.replicationSinkHandler = (ReplicationSinkService)
2442 server.replicationSourceHandler;
2443 } else {
2444 server.replicationSourceHandler = (ReplicationSourceService)
2445 newReplicationInstance(sourceClassname,
2446 conf, server, fs, logDir, oldLogDir);
2447 server.replicationSinkHandler = (ReplicationSinkService)
2448 newReplicationInstance(sinkClassname,
2449 conf, server, fs, logDir, oldLogDir);
2450 }
2451 }
2452
2453 static private ReplicationService newReplicationInstance(String classname,
2454 Configuration conf, HRegionServer server, FileSystem fs, Path logDir,
2455 Path oldLogDir) throws IOException{
2456
2457 Class<?> clazz = null;
2458 try {
2459 ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
2460 clazz = Class.forName(classname, true, classLoader);
2461 } catch (java.lang.ClassNotFoundException nfe) {
2462 throw new IOException("Could not find class for " + classname);
2463 }
2464
2465
2466 ReplicationService service = (ReplicationService)
2467 ReflectionUtils.newInstance(clazz, conf);
2468 service.initialize(server, fs, logDir, oldLogDir);
2469 return service;
2470 }
2471
2472
2473
2474
2475
2476
2477
2478
2479 public static HRegionServer constructRegionServer(
2480 Class<? extends HRegionServer> regionServerClass,
2481 final Configuration conf2, CoordinatedStateManager cp) {
2482 try {
2483 Constructor<? extends HRegionServer> c = regionServerClass
2484 .getConstructor(Configuration.class, CoordinatedStateManager.class);
2485 return c.newInstance(conf2, cp);
2486 } catch (Exception e) {
2487 throw new RuntimeException("Failed construction of " + "Regionserver: "
2488 + regionServerClass.toString(), e);
2489 }
2490 }
2491
2492
2493
2494
2495 public static void main(String[] args) throws Exception {
2496 VersionInfo.logVersion();
2497 Configuration conf = HBaseConfiguration.create();
2498 @SuppressWarnings("unchecked")
2499 Class<? extends HRegionServer> regionServerClass = (Class<? extends HRegionServer>) conf
2500 .getClass(HConstants.REGION_SERVER_IMPL, HRegionServer.class);
2501
2502 new HRegionServerCommandLine(regionServerClass).doMain(args);
2503 }
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515 @Override
2516 public List<HRegion> getOnlineRegions(TableName tableName) {
2517 List<HRegion> tableRegions = new ArrayList<HRegion>();
2518 synchronized (this.onlineRegions) {
2519 for (HRegion region: this.onlineRegions.values()) {
2520 HRegionInfo regionInfo = region.getRegionInfo();
2521 if(regionInfo.getTable().equals(tableName)) {
2522 tableRegions.add(region);
2523 }
2524 }
2525 }
2526 return tableRegions;
2527 }
2528
2529
2530 public String[] getRegionServerCoprocessors() {
2531 TreeSet<String> coprocessors = new TreeSet<String>();
2532 try {
2533 coprocessors.addAll(getWAL(null).getCoprocessorHost().getCoprocessors());
2534 } catch (IOException exception) {
2535 LOG.warn("Exception attempting to fetch wal coprocessor information for the common wal; " +
2536 "skipping.");
2537 LOG.debug("Exception details for failure to fetch wal coprocessor information.", exception);
2538 }
2539 Collection<HRegion> regions = getOnlineRegionsLocalContext();
2540 for (HRegion region: regions) {
2541 coprocessors.addAll(region.getCoprocessorHost().getCoprocessors());
2542 try {
2543 coprocessors.addAll(getWAL(region.getRegionInfo()).getCoprocessorHost().getCoprocessors());
2544 } catch (IOException exception) {
2545 LOG.warn("Exception attempting to fetch wal coprocessor information for region " + region +
2546 "; skipping.");
2547 LOG.debug("Exception details for failure to fetch wal coprocessor information.", exception);
2548 }
2549 }
2550 return coprocessors.toArray(new String[coprocessors.size()]);
2551 }
2552
2553
2554
2555
2556
2557 private void closeRegionIgnoreErrors(HRegionInfo region, final boolean abort) {
2558 try {
2559 CloseRegionCoordination.CloseRegionDetails details =
2560 csm.getCloseRegionCoordination().getDetaultDetails();
2561 if (!closeRegion(region.getEncodedName(), abort, details, null)) {
2562 LOG.warn("Failed to close " + region.getRegionNameAsString() +
2563 " - ignoring and continuing");
2564 }
2565 } catch (IOException e) {
2566 LOG.warn("Failed to close " + region.getRegionNameAsString() +
2567 " - ignoring and continuing", e);
2568 }
2569 }
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591 protected boolean closeRegion(String encodedName, final boolean abort,
2592 CloseRegionCoordination.CloseRegionDetails crd, final ServerName sn)
2593 throws NotServingRegionException, RegionAlreadyInTransitionException {
2594
2595 HRegion actualRegion = this.getFromOnlineRegions(encodedName);
2596 if ((actualRegion != null) && (actualRegion.getCoprocessorHost() != null)) {
2597 try {
2598 actualRegion.getCoprocessorHost().preClose(false);
2599 } catch (IOException exp) {
2600 LOG.warn("Unable to close region: the coprocessor launched an error ", exp);
2601 return false;
2602 }
2603 }
2604
2605 final Boolean previous = this.regionsInTransitionInRS.putIfAbsent(encodedName.getBytes(),
2606 Boolean.FALSE);
2607
2608 if (Boolean.TRUE.equals(previous)) {
2609 LOG.info("Received CLOSE for the region:" + encodedName + " , which we are already " +
2610 "trying to OPEN. Cancelling OPENING.");
2611 if (!regionsInTransitionInRS.replace(encodedName.getBytes(), previous, Boolean.FALSE)){
2612
2613
2614 LOG.warn("The opening for region " + encodedName + " was done before we could cancel it." +
2615 " Doing a standard close now");
2616 return closeRegion(encodedName, abort, crd, sn);
2617 }
2618
2619 actualRegion = this.getFromOnlineRegions(encodedName);
2620 if (actualRegion == null) {
2621 LOG.info("The opening previously in progress has been cancelled by a CLOSE request.");
2622
2623 throw new RegionAlreadyInTransitionException("The region " + encodedName +
2624 " was opening but not yet served. Opening is cancelled.");
2625 }
2626 } else if (Boolean.FALSE.equals(previous)) {
2627 LOG.info("Received CLOSE for the region: " + encodedName +
2628 ", which we are already trying to CLOSE, but not completed yet");
2629
2630
2631
2632
2633
2634
2635 throw new RegionAlreadyInTransitionException("The region " + encodedName +
2636 " was already closing. New CLOSE request is ignored.");
2637 }
2638
2639 if (actualRegion == null) {
2640 LOG.error("Received CLOSE for a region which is not online, and we're not opening.");
2641 this.regionsInTransitionInRS.remove(encodedName.getBytes());
2642
2643 throw new NotServingRegionException("The region " + encodedName +
2644 " is not online, and is not opening.");
2645 }
2646
2647 CloseRegionHandler crh;
2648 final HRegionInfo hri = actualRegion.getRegionInfo();
2649 if (hri.isMetaRegion()) {
2650 crh = new CloseMetaHandler(this, this, hri, abort,
2651 csm.getCloseRegionCoordination(), crd);
2652 } else {
2653 crh = new CloseRegionHandler(this, this, hri, abort,
2654 csm.getCloseRegionCoordination(), crd, sn);
2655 }
2656 this.service.submit(crh);
2657 return true;
2658 }
2659
2660
2661
2662
2663
2664
2665 public HRegion getOnlineRegion(final byte[] regionName) {
2666 String encodedRegionName = HRegionInfo.encodeRegionName(regionName);
2667 return this.onlineRegions.get(encodedRegionName);
2668 }
2669
2670 public InetSocketAddress[] getRegionBlockLocations(final String encodedRegionName) {
2671 return this.regionFavoredNodesMap.get(encodedRegionName);
2672 }
2673
2674 @Override
2675 public HRegion getFromOnlineRegions(final String encodedRegionName) {
2676 return this.onlineRegions.get(encodedRegionName);
2677 }
2678
2679
2680 @Override
2681 public boolean removeFromOnlineRegions(final HRegion r, ServerName destination) {
2682 HRegion toReturn = this.onlineRegions.remove(r.getRegionInfo().getEncodedName());
2683
2684 if (destination != null) {
2685 try {
2686 WAL wal = getWAL(r.getRegionInfo());
2687 long closeSeqNum = wal.getEarliestMemstoreSeqNum(r.getRegionInfo().getEncodedNameAsBytes());
2688 if (closeSeqNum == HConstants.NO_SEQNUM) {
2689
2690 closeSeqNum = r.getOpenSeqNum();
2691 if (closeSeqNum == HConstants.NO_SEQNUM) {
2692 closeSeqNum = 0;
2693 }
2694 }
2695 addToMovedRegions(r.getRegionInfo().getEncodedName(), destination, closeSeqNum);
2696 } catch (IOException exception) {
2697 LOG.error("Could not retrieve WAL information for region " + r.getRegionInfo() +
2698 "; not adding to moved regions.");
2699 LOG.debug("Exception details for failure to get wal", exception);
2700 }
2701 }
2702 this.regionFavoredNodesMap.remove(r.getRegionInfo().getEncodedName());
2703 return toReturn != null;
2704 }
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714 protected HRegion getRegion(final byte[] regionName)
2715 throws NotServingRegionException {
2716 String encodedRegionName = HRegionInfo.encodeRegionName(regionName);
2717 return getRegionByEncodedName(regionName, encodedRegionName);
2718 }
2719
2720 public HRegion getRegionByEncodedName(String encodedRegionName)
2721 throws NotServingRegionException {
2722 return getRegionByEncodedName(null, encodedRegionName);
2723 }
2724
2725 protected HRegion getRegionByEncodedName(byte[] regionName, String encodedRegionName)
2726 throws NotServingRegionException {
2727 HRegion region = this.onlineRegions.get(encodedRegionName);
2728 if (region == null) {
2729 MovedRegionInfo moveInfo = getMovedRegion(encodedRegionName);
2730 if (moveInfo != null) {
2731 throw new RegionMovedException(moveInfo.getServerName(), moveInfo.getSeqNum());
2732 }
2733 Boolean isOpening = this.regionsInTransitionInRS.get(Bytes.toBytes(encodedRegionName));
2734 String regionNameStr = regionName == null?
2735 encodedRegionName: Bytes.toStringBinary(regionName);
2736 if (isOpening != null && isOpening.booleanValue()) {
2737 throw new RegionOpeningException("Region " + regionNameStr +
2738 " is opening on " + this.serverName);
2739 }
2740 throw new NotServingRegionException("Region " + regionNameStr +
2741 " is not online on " + this.serverName);
2742 }
2743 return region;
2744 }
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756 private Throwable cleanup(final Throwable t, final String msg) {
2757
2758 if (t instanceof NotServingRegionException) {
2759 LOG.debug("NotServingRegionException; " + t.getMessage());
2760 return t;
2761 }
2762 if (msg == null) {
2763 LOG.error("", RemoteExceptionHandler.checkThrowable(t));
2764 } else {
2765 LOG.error(msg, RemoteExceptionHandler.checkThrowable(t));
2766 }
2767 if (!rpcServices.checkOOME(t)) {
2768 checkFileSystem();
2769 }
2770 return t;
2771 }
2772
2773
2774
2775
2776
2777
2778
2779
2780 protected IOException convertThrowableToIOE(final Throwable t, final String msg) {
2781 return (t instanceof IOException ? (IOException) t : msg == null
2782 || msg.length() == 0 ? new IOException(t) : new IOException(msg, t));
2783 }
2784
2785
2786
2787
2788
2789
2790
2791 public boolean checkFileSystem() {
2792 if (this.fsOk && this.fs != null) {
2793 try {
2794 FSUtils.checkFileSystemAvailable(this.fs);
2795 } catch (IOException e) {
2796 abort("File System not available", e);
2797 this.fsOk = false;
2798 }
2799 }
2800 return this.fsOk;
2801 }
2802
2803 @Override
2804 public void updateRegionFavoredNodesMapping(String encodedRegionName,
2805 List<org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName> favoredNodes) {
2806 InetSocketAddress[] addr = new InetSocketAddress[favoredNodes.size()];
2807
2808
2809 for (int i = 0; i < favoredNodes.size(); i++) {
2810 addr[i] = InetSocketAddress.createUnresolved(favoredNodes.get(i).getHostName(),
2811 favoredNodes.get(i).getPort());
2812 }
2813 regionFavoredNodesMap.put(encodedRegionName, addr);
2814 }
2815
2816
2817
2818
2819
2820
2821
2822 @Override
2823 public InetSocketAddress[] getFavoredNodesForRegion(String encodedRegionName) {
2824 return regionFavoredNodesMap.get(encodedRegionName);
2825 }
2826
2827 @Override
2828 public ServerNonceManager getNonceManager() {
2829 return this.nonceManager;
2830 }
2831
2832 private static class MovedRegionInfo {
2833 private final ServerName serverName;
2834 private final long seqNum;
2835 private final long ts;
2836
2837 public MovedRegionInfo(ServerName serverName, long closeSeqNum) {
2838 this.serverName = serverName;
2839 this.seqNum = closeSeqNum;
2840 ts = EnvironmentEdgeManager.currentTime();
2841 }
2842
2843 public ServerName getServerName() {
2844 return serverName;
2845 }
2846
2847 public long getSeqNum() {
2848 return seqNum;
2849 }
2850
2851 public long getMoveTime() {
2852 return ts;
2853 }
2854 }
2855
2856
2857
2858 protected Map<String, MovedRegionInfo> movedRegions =
2859 new ConcurrentHashMap<String, MovedRegionInfo>(3000);
2860
2861
2862
2863 private static final int TIMEOUT_REGION_MOVED = (2 * 60 * 1000);
2864
2865 protected void addToMovedRegions(String encodedName, ServerName destination, long closeSeqNum) {
2866 if (ServerName.isSameHostnameAndPort(destination, this.getServerName())) {
2867 LOG.warn("Not adding moved region record: " + encodedName + " to self.");
2868 return;
2869 }
2870 LOG.info("Adding moved region record: "
2871 + encodedName + " to " + destination + " as of " + closeSeqNum);
2872 movedRegions.put(encodedName, new MovedRegionInfo(destination, closeSeqNum));
2873 }
2874
2875 void removeFromMovedRegions(String encodedName) {
2876 movedRegions.remove(encodedName);
2877 }
2878
2879 private MovedRegionInfo getMovedRegion(final String encodedRegionName) {
2880 MovedRegionInfo dest = movedRegions.get(encodedRegionName);
2881
2882 long now = EnvironmentEdgeManager.currentTime();
2883 if (dest != null) {
2884 if (dest.getMoveTime() > (now - TIMEOUT_REGION_MOVED)) {
2885 return dest;
2886 } else {
2887 movedRegions.remove(encodedRegionName);
2888 }
2889 }
2890
2891 return null;
2892 }
2893
2894
2895
2896
2897 protected void cleanMovedRegions() {
2898 final long cutOff = System.currentTimeMillis() - TIMEOUT_REGION_MOVED;
2899 Iterator<Entry<String, MovedRegionInfo>> it = movedRegions.entrySet().iterator();
2900
2901 while (it.hasNext()){
2902 Map.Entry<String, MovedRegionInfo> e = it.next();
2903 if (e.getValue().getMoveTime() < cutOff) {
2904 it.remove();
2905 }
2906 }
2907 }
2908
2909
2910
2911
2912 protected static class MovedRegionsCleaner extends Chore implements Stoppable {
2913 private HRegionServer regionServer;
2914 Stoppable stoppable;
2915
2916 private MovedRegionsCleaner(
2917 HRegionServer regionServer, Stoppable stoppable){
2918 super("MovedRegionsCleaner for region "+regionServer, TIMEOUT_REGION_MOVED, stoppable);
2919 this.regionServer = regionServer;
2920 this.stoppable = stoppable;
2921 }
2922
2923 static MovedRegionsCleaner createAndStart(HRegionServer rs){
2924 Stoppable stoppable = new Stoppable() {
2925 private volatile boolean isStopped = false;
2926 @Override public void stop(String why) { isStopped = true;}
2927 @Override public boolean isStopped() {return isStopped;}
2928 };
2929
2930 return new MovedRegionsCleaner(rs, stoppable);
2931 }
2932
2933 @Override
2934 protected void chore() {
2935 regionServer.cleanMovedRegions();
2936 }
2937
2938 @Override
2939 public void stop(String why) {
2940 stoppable.stop(why);
2941 }
2942
2943 @Override
2944 public boolean isStopped() {
2945 return stoppable.isStopped();
2946 }
2947 }
2948
2949 private String getMyEphemeralNodePath() {
2950 return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
2951 }
2952
2953 private boolean isHealthCheckerConfigured() {
2954 String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
2955 return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
2956 }
2957
2958
2959
2960
2961 public CompactSplitThread getCompactSplitThread() {
2962 return this.compactSplitThread;
2963 }
2964
2965
2966
2967
2968
2969
2970
2971
2972 private void updateRecoveringRegionLastFlushedSequenceId(HRegion r) throws KeeperException,
2973 IOException {
2974 if (!r.isRecovering()) {
2975
2976 return;
2977 }
2978
2979 HRegionInfo region = r.getRegionInfo();
2980 ZooKeeperWatcher zkw = getZooKeeper();
2981 String previousRSName = this.getLastFailedRSFromZK(region.getEncodedName());
2982 Map<byte[], Long> maxSeqIdInStores = r.getMaxStoreSeqIdForLogReplay();
2983 long minSeqIdForLogReplay = -1;
2984 for (Long storeSeqIdForReplay : maxSeqIdInStores.values()) {
2985 if (minSeqIdForLogReplay == -1 || storeSeqIdForReplay < minSeqIdForLogReplay) {
2986 minSeqIdForLogReplay = storeSeqIdForReplay;
2987 }
2988 }
2989
2990 try {
2991 long lastRecordedFlushedSequenceId = -1;
2992 String nodePath = ZKUtil.joinZNode(this.zooKeeper.recoveringRegionsZNode,
2993 region.getEncodedName());
2994
2995 byte[] data;
2996 try {
2997 data = ZKUtil.getData(zkw, nodePath);
2998 } catch (InterruptedException e) {
2999 throw new InterruptedIOException();
3000 }
3001 if (data != null) {
3002 lastRecordedFlushedSequenceId = ZKSplitLog.parseLastFlushedSequenceIdFrom(data);
3003 }
3004 if (data == null || lastRecordedFlushedSequenceId < minSeqIdForLogReplay) {
3005 ZKUtil.setData(zkw, nodePath, ZKUtil.positionToByteArray(minSeqIdForLogReplay));
3006 }
3007 if (previousRSName != null) {
3008
3009 nodePath = ZKUtil.joinZNode(nodePath, previousRSName);
3010 ZKUtil.setData(zkw, nodePath,
3011 ZKUtil.regionSequenceIdsToByteArray(minSeqIdForLogReplay, maxSeqIdInStores));
3012 LOG.debug("Update last flushed sequence id of region " + region.getEncodedName() + " for "
3013 + previousRSName);
3014 } else {
3015 LOG.warn("Can't find failed region server for recovering region " +
3016 region.getEncodedName());
3017 }
3018 } catch (NoNodeException ignore) {
3019 LOG.debug("Region " + region.getEncodedName() +
3020 " must have completed recovery because its recovery znode has been removed", ignore);
3021 }
3022 }
3023
3024
3025
3026
3027
3028
3029 private String getLastFailedRSFromZK(String encodedRegionName) throws KeeperException {
3030 String result = null;
3031 long maxZxid = 0;
3032 ZooKeeperWatcher zkw = this.getZooKeeper();
3033 String nodePath = ZKUtil.joinZNode(zkw.recoveringRegionsZNode, encodedRegionName);
3034 List<String> failedServers = ZKUtil.listChildrenNoWatch(zkw, nodePath);
3035 if (failedServers == null || failedServers.isEmpty()) {
3036 return result;
3037 }
3038 for (String failedServer : failedServers) {
3039 String rsPath = ZKUtil.joinZNode(nodePath, failedServer);
3040 Stat stat = new Stat();
3041 ZKUtil.getDataNoWatch(zkw, rsPath, stat);
3042 if (maxZxid < stat.getCzxid()) {
3043 maxZxid = stat.getCzxid();
3044 result = failedServer;
3045 }
3046 }
3047 return result;
3048 }
3049
3050 public CoprocessorServiceResponse execRegionServerService(final RpcController controller,
3051 final CoprocessorServiceRequest serviceRequest) throws ServiceException {
3052 try {
3053 ServerRpcController execController = new ServerRpcController();
3054 CoprocessorServiceCall call = serviceRequest.getCall();
3055 String serviceName = call.getServiceName();
3056 String methodName = call.getMethodName();
3057 if (!coprocessorServiceHandlers.containsKey(serviceName)) {
3058 throw new UnknownProtocolException(null,
3059 "No registered coprocessor service found for name " + serviceName);
3060 }
3061 Service service = coprocessorServiceHandlers.get(serviceName);
3062 Descriptors.ServiceDescriptor serviceDesc = service.getDescriptorForType();
3063 Descriptors.MethodDescriptor methodDesc = serviceDesc.findMethodByName(methodName);
3064 if (methodDesc == null) {
3065 throw new UnknownProtocolException(service.getClass(), "Unknown method " + methodName
3066 + " called on service " + serviceName);
3067 }
3068 Message request =
3069 service.getRequestPrototype(methodDesc).newBuilderForType().mergeFrom(call.getRequest())
3070 .build();
3071 final Message.Builder responseBuilder =
3072 service.getResponsePrototype(methodDesc).newBuilderForType();
3073 service.callMethod(methodDesc, controller, request, new RpcCallback<Message>() {
3074 @Override
3075 public void run(Message message) {
3076 if (message != null) {
3077 responseBuilder.mergeFrom(message);
3078 }
3079 }
3080 });
3081 Message execResult = responseBuilder.build();
3082 if (execController.getFailedOn() != null) {
3083 throw execController.getFailedOn();
3084 }
3085 ClientProtos.CoprocessorServiceResponse.Builder builder =
3086 ClientProtos.CoprocessorServiceResponse.newBuilder();
3087 builder.setRegion(RequestConverter.buildRegionSpecifier(RegionSpecifierType.REGION_NAME,
3088 HConstants.EMPTY_BYTE_ARRAY));
3089 builder.setValue(builder.getValueBuilder().setName(execResult.getClass().getName())
3090 .setValue(execResult.toByteString()));
3091 return builder.build();
3092 } catch (IOException ie) {
3093 throw new ServiceException(ie);
3094 }
3095 }
3096
3097
3098
3099
3100 public CacheConfig getCacheConfig() {
3101 return this.cacheConfig;
3102 }
3103
3104
3105
3106
3107 protected ConfigurationManager getConfigurationManager() {
3108 return configurationManager;
3109 }
3110
3111
3112
3113
3114 public void updateConfiguration() {
3115 LOG.info("Reloading the configuration from disk.");
3116
3117 conf.reloadConfiguration();
3118 configurationManager.notifyAllObservers(conf);
3119 }
3120
3121 @Override
3122 public HeapMemoryManager getHeapMemoryManager() {
3123 return hMemManager;
3124 }
3125 }