1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.procedure;
19
20 import java.io.IOException;
21 import java.util.Arrays;
22 import java.util.List;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.classification.InterfaceAudience;
27 import org.apache.hadoop.classification.InterfaceStability;
28 import org.apache.hadoop.hbase.errorhandling.ForeignException;
29 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
30 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
31 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
32 import org.apache.zookeeper.KeeperException;
33
34 import com.google.protobuf.InvalidProtocolBufferException;
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 @InterfaceAudience.Public
55 @InterfaceStability.Evolving
56 public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs {
57
58 private static final Log LOG = LogFactory.getLog(ZKProcedureMemberRpcs.class);
59 private final String memberName;
60
61 protected ProcedureMember member;
62 private ZKProcedureUtil zkController;
63
64
65
66
67
68
69
70
71
72 public ZKProcedureMemberRpcs(ZooKeeperWatcher watcher,
73 String procType, String memberName) throws KeeperException {
74 this.zkController = new ZKProcedureUtil(watcher, procType, memberName) {
75 @Override
76 public void nodeCreated(String path) {
77 if (!isInProcedurePath(path)) {
78 return;
79 }
80
81 LOG.info("Received created event:" + path);
82
83 if (isAcquiredNode(path)) {
84 waitForNewProcedures();
85 return;
86 } else if (isAbortNode(path)) {
87 watchForAbortedProcedures();
88 return;
89 }
90 String parent = ZKUtil.getParent(path);
91
92 if (isReachedNode(parent)) {
93 receivedReachedGlobalBarrier(path);
94 return;
95 } else if (isAbortNode(parent)) {
96 abort(path);
97 return;
98 } else if (isAcquiredNode(parent)) {
99 startNewSubprocedure(path);
100 } else {
101 LOG.debug("Ignoring created notification for node:" + path);
102 }
103 }
104
105 @Override
106 public void nodeChildrenChanged(String path) {
107 if (path.equals(this.acquiredZnode)) {
108 LOG.info("Received procedure start children changed event: " + path);
109 waitForNewProcedures();
110 } else if (path.equals(this.abortZnode)) {
111 LOG.info("Received procedure abort children changed event: " + path);
112 watchForAbortedProcedures();
113 }
114 }
115 };
116 this.memberName = memberName;
117 }
118
119 public ZKProcedureUtil getZkController() {
120 return zkController;
121 }
122
123 @Override
124 public String getMemberName() {
125 return memberName;
126 }
127
128
129
130
131
132 private void receivedReachedGlobalBarrier(String path) {
133 LOG.debug("Recieved reached global barrier:" + path);
134 String procName = ZKUtil.getNodeName(path);
135 this.member.receivedReachedGlobalBarrier(procName);
136 }
137
138 private void watchForAbortedProcedures() {
139 LOG.debug("Checking for aborted procedures on node: '" + zkController.getAbortZnode() + "'");
140 try {
141
142 for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(),
143 zkController.getAbortZnode())) {
144 String abortNode = ZKUtil.joinZNode(zkController.getAbortZnode(), node);
145 abort(abortNode);
146 }
147 } catch (KeeperException e) {
148 member.controllerConnectionFailure("Failed to list children for abort node:"
149 + zkController.getAbortZnode(), new IOException(e));
150 }
151 }
152
153 private void waitForNewProcedures() {
154
155 LOG.debug("Looking for new procedures under znode:'" + zkController.getAcquiredBarrier() + "'");
156 List<String> runningProcedures = null;
157 try {
158 runningProcedures = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(),
159 zkController.getAcquiredBarrier());
160 if (runningProcedures == null) {
161 LOG.debug("No running procedures.");
162 return;
163 }
164 } catch (KeeperException e) {
165 member.controllerConnectionFailure("General failure when watching for new procedures",
166 new IOException(e));
167 }
168 if (runningProcedures == null) {
169 LOG.debug("No running procedures.");
170 return;
171 }
172 for (String procName : runningProcedures) {
173
174 String path = ZKUtil.joinZNode(zkController.getAcquiredBarrier(), procName);
175 startNewSubprocedure(path);
176 }
177 }
178
179
180
181
182
183
184
185
186 private synchronized void startNewSubprocedure(String path) {
187 LOG.debug("Found procedure znode: " + path);
188 String opName = ZKUtil.getNodeName(path);
189
190 String abortZNode = zkController.getAbortZNode(opName);
191 try {
192 if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), abortZNode)) {
193 LOG.debug("Not starting:" + opName + " because we already have an abort notification.");
194 return;
195 }
196 } catch (KeeperException e) {
197 member.controllerConnectionFailure("Failed to get the abort znode (" + abortZNode
198 + ") for procedure :" + opName, new IOException(e));
199 return;
200 }
201
202
203 Subprocedure subproc = null;
204 try {
205 byte[] data = ZKUtil.getData(zkController.getWatcher(), path);
206 LOG.debug("start proc data length is " + data.length);
207 if (!ProtobufUtil.isPBMagicPrefix(data)) {
208 String msg = "Data in for starting procuedure " + opName + " is illegally formatted. "
209 + "Killing the procedure.";
210 LOG.error(msg);
211 throw new IllegalArgumentException(msg);
212 }
213 data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);
214 LOG.debug("Found data for znode:" + path);
215 subproc = member.createSubprocedure(opName, data);
216 member.submitSubprocedure(subproc);
217 } catch (IllegalArgumentException iae ) {
218 LOG.error("Illegal argument exception", iae);
219 sendMemberAborted(subproc, new ForeignException(getMemberName(), iae));
220 } catch (IllegalStateException ise) {
221 LOG.error("Illegal state exception ", ise);
222 sendMemberAborted(subproc, new ForeignException(getMemberName(), ise));
223 } catch (KeeperException e) {
224 member.controllerConnectionFailure("Failed to get data for new procedure:" + opName,
225 new IOException(e));
226 }
227 }
228
229
230
231
232
233
234
235 @Override
236 public void sendMemberAcquired(Subprocedure sub) throws IOException {
237 String procName = sub.getName();
238 try {
239 LOG.debug("Member: '" + memberName + "' joining acquired barrier for procedure (" + procName
240 + ") in zk");
241 String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode(
242 zkController, procName), memberName);
243 ZKUtil.createAndFailSilent(zkController.getWatcher(), acquiredZNode);
244
245
246 String reachedBarrier = zkController.getReachedBarrierNode(procName);
247 LOG.debug("Watch for global barrier reached:" + reachedBarrier);
248 if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), reachedBarrier)) {
249 receivedReachedGlobalBarrier(reachedBarrier);
250 }
251 } catch (KeeperException e) {
252 member.controllerConnectionFailure("Failed to acquire barrier for procedure: "
253 + procName + " and member: " + memberName, new IOException(e));
254 }
255 }
256
257
258
259
260 @Override
261 public void sendMemberCompleted(Subprocedure sub) throws IOException {
262 String procName = sub.getName();
263 LOG.debug("Marking procedure '" + procName + "' completed for member '" + memberName
264 + "' in zk");
265 String joinPath = ZKUtil.joinZNode(zkController.getReachedBarrierNode(procName), memberName);
266 try {
267 ZKUtil.createAndFailSilent(zkController.getWatcher(), joinPath);
268 } catch (KeeperException e) {
269 member.controllerConnectionFailure("Failed to post zk node:" + joinPath
270 + " to join procedure barrier.", new IOException(e));
271 }
272 }
273
274
275
276
277
278 @Override
279 public void sendMemberAborted(Subprocedure sub, ForeignException ee) {
280 if (sub == null) {
281 LOG.error("Failed due to null subprocedure", ee);
282 return;
283 }
284 String procName = sub.getName();
285 LOG.debug("Aborting procedure (" + procName + ") in zk");
286 String procAbortZNode = zkController.getAbortZNode(procName);
287 try {
288 String source = (ee.getSource() == null) ? memberName: ee.getSource();
289 byte[] errorInfo = ProtobufUtil.prependPBMagic(ForeignException.serialize(source, ee));
290 ZKUtil.createAndFailSilent(zkController.getWatcher(), procAbortZNode, errorInfo);
291 LOG.debug("Finished creating abort znode:" + procAbortZNode);
292 } catch (KeeperException e) {
293
294
295 zkController.logZKTree(zkController.getBaseZnode());
296 member.controllerConnectionFailure("Failed to post zk node:" + procAbortZNode
297 + " to abort procedure", new IOException(e));
298 }
299 }
300
301
302
303
304
305 protected void abort(String abortZNode) {
306 LOG.debug("Aborting procedure member for znode " + abortZNode);
307 String opName = ZKUtil.getNodeName(abortZNode);
308 try {
309 byte[] data = ZKUtil.getData(zkController.getWatcher(), abortZNode);
310
311
312 ForeignException ee;
313 try {
314 if (!ProtobufUtil.isPBMagicPrefix(data)) {
315 String msg = "Illegally formatted data in abort node for proc " + opName
316 + ". Killing the procedure.";
317 LOG.error(msg);
318
319 ee = new ForeignException(getMemberName(), new IllegalArgumentException(msg));
320 } else {
321 data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);
322 ee = ForeignException.deserialize(data);
323 }
324 } catch (InvalidProtocolBufferException e) {
325 LOG.warn("Got an error notification for op:" + opName
326 + " but we can't read the information. Killing the procedure.");
327
328 ee = new ForeignException(getMemberName(), e);
329 }
330
331 this.member.receiveAbortProcedure(opName, ee);
332 } catch (KeeperException e) {
333 member.controllerConnectionFailure("Failed to get data for abort znode:" + abortZNode
334 + zkController.getAbortZnode(), new IOException(e));
335 }
336 }
337
338 public void start(ProcedureMember listener) {
339 LOG.debug("Starting procedure member '" + this.memberName + "'");
340 this.member = listener;
341 watchForAbortedProcedures();
342 waitForNewProcedures();
343 }
344
345 @Override
346 public void close() throws IOException {
347 zkController.close();
348 }
349
350 }