1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.util.concurrent.atomic.AtomicBoolean;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.ZNodeClearer;
28 import org.apache.hadoop.hbase.exceptions.DeserializationException;
29 import org.apache.hadoop.hbase.Server;
30 import org.apache.hadoop.hbase.ServerName;
31 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
32 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
33 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
34 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
35 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
36 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
37 import org.apache.zookeeper.KeeperException;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 @InterfaceAudience.Private
53 class ActiveMasterManager extends ZooKeeperListener {
54 private static final Log LOG = LogFactory.getLog(ActiveMasterManager.class);
55
56 final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
57
58 private final ServerName sn;
59 private final Server master;
60
61
62
63
64
65
66 ActiveMasterManager(ZooKeeperWatcher watcher, ServerName sn, Server master) {
67 super(watcher);
68 this.sn = sn;
69 this.master = master;
70 }
71
72 @Override
73 public void nodeCreated(String path) {
74 handle(path);
75 }
76
77 @Override
78 public void nodeDeleted(String path) {
79 handle(path);
80 }
81
82 void handle(final String path) {
83 if (path.equals(watcher.getMasterAddressZNode()) && !master.isStopped()) {
84 handleMasterNodeChange();
85 }
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101 private void handleMasterNodeChange() {
102
103 try {
104 synchronized(clusterHasActiveMaster) {
105 if (ZKUtil.watchAndCheckExists(watcher, watcher.getMasterAddressZNode())) {
106
107 LOG.debug("A master is now available");
108 clusterHasActiveMaster.set(true);
109 } else {
110
111 LOG.debug("No master available. Notifying waiting threads");
112 clusterHasActiveMaster.set(false);
113
114 clusterHasActiveMaster.notifyAll();
115 }
116 }
117 } catch (KeeperException ke) {
118 master.abort("Received an unexpected KeeperException, aborting", ke);
119 }
120 }
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135 boolean blockUntilBecomingActiveMaster(MonitoredTask startupStatus,
136 ClusterStatusTracker clusterStatusTracker) {
137 while (true) {
138 startupStatus.setStatus("Trying to register in ZK as active master");
139
140
141 try {
142 String backupZNode =
143 ZKUtil.joinZNode(this.watcher.backupMasterAddressesZNode, this.sn.toString());
144 if (MasterAddressTracker.setMasterAddress(this.watcher,
145 this.watcher.getMasterAddressZNode(), this.sn)) {
146
147
148
149 if (ZKUtil.checkExists(this.watcher, backupZNode) != -1) {
150 LOG.info("Deleting ZNode for " + backupZNode + " from backup master directory");
151 ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
152 }
153
154 ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString());
155
156
157 startupStatus.setStatus("Successfully registered as active master.");
158 this.clusterHasActiveMaster.set(true);
159 LOG.info("Master=" + this.sn);
160 return true;
161 }
162
163
164
165 this.clusterHasActiveMaster.set(true);
166
167
168
169
170
171
172
173
174
175 LOG.info("Adding ZNode for " + backupZNode + " in backup master directory");
176 MasterAddressTracker.setMasterAddress(this.watcher, backupZNode, this.sn);
177
178 String msg;
179 byte[] bytes =
180 ZKUtil.getDataAndWatch(this.watcher, this.watcher.getMasterAddressZNode());
181 if (bytes == null) {
182 msg = ("A master was detected, but went down before its address " +
183 "could be read. Attempting to become the next active master");
184 } else {
185 ServerName currentMaster;
186 try {
187 currentMaster = ServerName.parseFrom(bytes);
188 } catch (DeserializationException e) {
189 LOG.warn("Failed parse", e);
190
191 continue;
192 }
193 if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
194 msg = ("Current master has this master's address, " +
195 currentMaster + "; master was restarted? Deleting node.");
196
197 ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
198
199
200
201 ZNodeClearer.deleteMyEphemeralNodeOnDisk();
202 } else {
203 msg = "Another master is the active master, " + currentMaster +
204 "; waiting to become the next active master";
205 }
206 }
207 LOG.info(msg);
208 startupStatus.setStatus(msg);
209 } catch (KeeperException ke) {
210 master.abort("Received an unexpected KeeperException, aborting", ke);
211 return false;
212 }
213 synchronized (this.clusterHasActiveMaster) {
214 while (this.clusterHasActiveMaster.get() && !this.master.isStopped()) {
215 try {
216 this.clusterHasActiveMaster.wait();
217 } catch (InterruptedException e) {
218
219
220 LOG.debug("Interrupted waiting for master to die", e);
221 }
222 }
223 if (!clusterStatusTracker.isClusterUp()) {
224 this.master.stop(
225 "Cluster went down before this master became active");
226 }
227 if (this.master.isStopped()) {
228 return false;
229 }
230
231 }
232 }
233 }
234
235
236
237
238 public boolean isActiveMaster() {
239 try {
240 if (ZKUtil.checkExists(watcher, watcher.getMasterAddressZNode()) >= 0) {
241 return true;
242 }
243 }
244 catch (KeeperException ke) {
245 LOG.info("Received an unexpected KeeperException when checking " +
246 "isActiveMaster : "+ ke);
247 }
248 return false;
249 }
250
251 public void stop() {
252 try {
253
254 ServerName activeMaster = null;
255 try {
256 activeMaster = MasterAddressTracker.getMasterAddress(this.watcher);
257 } catch (IOException e) {
258 LOG.warn("Failed get of master address: " + e.toString());
259 }
260 if (activeMaster != null && activeMaster.equals(this.sn)) {
261 ZKUtil.deleteNode(watcher, watcher.getMasterAddressZNode());
262
263
264 ZNodeClearer.deleteMyEphemeralNodeOnDisk();
265 }
266 } catch (KeeperException e) {
267 LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);
268 }
269 }
270 }