1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.replication;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.HashMap;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.UUID;
30 import java.util.concurrent.ConcurrentHashMap;
31 import java.util.concurrent.ConcurrentMap;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.hbase.Abortable;
37 import org.apache.hadoop.hbase.ServerName;
38 import org.apache.hadoop.hbase.exceptions.DeserializationException;
39 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
40 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
43 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
44 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
45 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
46 import org.apache.zookeeper.KeeperException;
47 import org.apache.zookeeper.KeeperException.AuthFailedException;
48 import org.apache.zookeeper.KeeperException.ConnectionLossException;
49 import org.apache.zookeeper.KeeperException.SessionExpiredException;
50
51 import com.google.protobuf.InvalidProtocolBufferException;
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79 public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements ReplicationPeers {
80
81
82 private Map<String, ReplicationPeer> peerClusters;
83 private final String tableCFsNodeName;
84
85 private static final Log LOG = LogFactory.getLog(ReplicationPeersZKImpl.class);
86
87 public ReplicationPeersZKImpl(final ZooKeeperWatcher zk, final Configuration conf,
88 Abortable abortable) {
89 super(zk, conf, abortable);
90 this.tableCFsNodeName = conf.get("zookeeper.znode.replication.peers.tableCFs", "tableCFs");
91 this.peerClusters = new ConcurrentHashMap<String, ReplicationPeer>();
92 }
93
94 @Override
95 public void init() throws ReplicationException {
96 try {
97 if (ZKUtil.checkExists(this.zookeeper, this.peersZNode) < 0) {
98 ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
99 }
100 } catch (KeeperException e) {
101 throw new ReplicationException("Could not initialize replication peers", e);
102 }
103 connectExistingPeers();
104 }
105
106 @Override
107 public void addPeer(String id, String clusterKey) throws ReplicationException {
108 addPeer(id, clusterKey, null);
109 }
110
111 @Override
112 public void addPeer(String id, String clusterKey, String tableCFs) throws ReplicationException {
113 try {
114 if (peerExists(id)) {
115 throw new IllegalArgumentException("Cannot add a peer with id=" + id
116 + " because that id already exists.");
117 }
118 ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
119 ZKUtil.createAndWatch(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id),
120 toByteArray(clusterKey));
121
122
123
124 ZKUtil.createNodeIfNotExistsAndWatch(this.zookeeper, getPeerStateNode(id),
125 ENABLED_ZNODE_BYTES);
126
127
128 String tableCFsStr = (tableCFs == null) ? "" : tableCFs;
129 ZKUtil.createNodeIfNotExistsAndWatch(this.zookeeper, getTableCFsNode(id),
130 Bytes.toBytes(tableCFsStr));
131 } catch (KeeperException e) {
132 throw new ReplicationException("Could not add peer with id=" + id
133 + ", clusterKey=" + clusterKey, e);
134 }
135 }
136
137 @Override
138 public void removePeer(String id) throws ReplicationException {
139 try {
140 if (!peerExists(id)) {
141 throw new IllegalArgumentException("Cannot remove peer with id=" + id
142 + " because that id does not exist.");
143 }
144 ZKUtil.deleteNodeRecursively(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id));
145 } catch (KeeperException e) {
146 throw new ReplicationException("Could not remove peer with id=" + id, e);
147 }
148 }
149
150 @Override
151 public void enablePeer(String id) throws ReplicationException {
152 changePeerState(id, ZooKeeperProtos.ReplicationState.State.ENABLED);
153 LOG.info("peer " + id + " is enabled");
154 }
155
156 @Override
157 public void disablePeer(String id) throws ReplicationException {
158 changePeerState(id, ZooKeeperProtos.ReplicationState.State.DISABLED);
159 LOG.info("peer " + id + " is disabled");
160 }
161
162 @Override
163 public String getPeerTableCFsConfig(String id) throws ReplicationException {
164 try {
165 if (!peerExists(id)) {
166 throw new IllegalArgumentException("peer " + id + " doesn't exist");
167 }
168 try {
169 return Bytes.toString(ZKUtil.getData(this.zookeeper, getTableCFsNode(id)));
170 } catch (Exception e) {
171 throw new ReplicationException(e);
172 }
173 } catch (KeeperException e) {
174 throw new ReplicationException("Unable to get tableCFs of the peer with id=" + id, e);
175 }
176 }
177
178 @Override
179 public void setPeerTableCFsConfig(String id, String tableCFsStr) throws ReplicationException {
180 try {
181 if (!peerExists(id)) {
182 throw new IllegalArgumentException("Cannot set peer tableCFs because id=" + id
183 + " does not exist.");
184 }
185 String tableCFsZKNode = getTableCFsNode(id);
186 byte[] tableCFs = Bytes.toBytes(tableCFsStr);
187 if (ZKUtil.checkExists(this.zookeeper, tableCFsZKNode) != -1) {
188 ZKUtil.setData(this.zookeeper, tableCFsZKNode, tableCFs);
189 } else {
190 ZKUtil.createAndWatch(this.zookeeper, tableCFsZKNode, tableCFs);
191 }
192 LOG.info("Peer tableCFs with id= " + id + " is now " + tableCFsStr);
193 } catch (KeeperException e) {
194 throw new ReplicationException("Unable to change tableCFs of the peer with id=" + id, e);
195 }
196 }
197
198 @Override
199 public Map<String, List<String>> getTableCFs(String id) throws IllegalArgumentException {
200 ReplicationPeer replicationPeer = this.peerClusters.get(id);
201 if (replicationPeer == null) {
202 throw new IllegalArgumentException("Peer with id= " + id + " is not connected");
203 }
204 return replicationPeer.getTableCFs();
205 }
206
207 @Override
208 public boolean getStatusOfConnectedPeer(String id) {
209 ReplicationPeer replicationPeer = this.peerClusters.get(id);
210 if (replicationPeer == null) {
211 throw new IllegalArgumentException("Peer with id= " + id + " is not connected");
212 }
213 return replicationPeer.getPeerEnabled().get();
214 }
215
216 @Override
217 public boolean getStatusOfPeerFromBackingStore(String id) throws ReplicationException {
218 try {
219 if (!peerExists(id)) {
220 throw new IllegalArgumentException("peer " + id + " doesn't exist");
221 }
222 String peerStateZNode = getPeerStateNode(id);
223 try {
224 return ReplicationPeer.isStateEnabled(ZKUtil.getData(this.zookeeper, peerStateZNode));
225 } catch (KeeperException e) {
226 throw new ReplicationException(e);
227 } catch (DeserializationException e) {
228 throw new ReplicationException(e);
229 }
230 } catch (KeeperException e) {
231 throw new ReplicationException("Unable to get status of the peer with id=" + id +
232 " from backing store", e);
233 }
234 }
235
236 @Override
237 public boolean connectToPeer(String peerId) throws ReplicationException {
238 if (peerClusters == null) {
239 return false;
240 }
241 if (this.peerClusters.containsKey(peerId)) {
242 return false;
243 }
244 ReplicationPeer peer = null;
245 try {
246 peer = getPeer(peerId);
247 } catch (Exception e) {
248 throw new ReplicationException("Error connecting to peer with id=" + peerId, e);
249 }
250 if (peer == null) {
251 return false;
252 }
253 ((ConcurrentMap<String, ReplicationPeer>) peerClusters).putIfAbsent(peerId, peer);
254 LOG.info("Added new peer cluster " + peer.getClusterKey());
255 return true;
256 }
257
258 @Override
259 public void disconnectFromPeer(String peerId) {
260 ReplicationPeer rp = this.peerClusters.get(peerId);
261 if (rp != null) {
262 rp.getZkw().close();
263 ((ConcurrentMap<String, ReplicationPeer>) peerClusters).remove(peerId, rp);
264 }
265 }
266
267 @Override
268 public Map<String, String> getAllPeerClusterKeys() {
269 Map<String, String> peers = new TreeMap<String, String>();
270 List<String> ids = null;
271 try {
272 ids = ZKUtil.listChildrenNoWatch(this.zookeeper, this.peersZNode);
273 for (String id : ids) {
274 byte[] bytes = ZKUtil.getData(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id));
275 String clusterKey = null;
276 try {
277 clusterKey = parsePeerFrom(bytes);
278 } catch (DeserializationException de) {
279 LOG.warn("Failed parse of clusterid=" + id + " znode content, continuing.");
280 continue;
281 }
282 peers.put(id, clusterKey);
283 }
284 } catch (KeeperException e) {
285 this.abortable.abort("Cannot get the list of peers ", e);
286 }
287 return peers;
288 }
289
290 @Override
291 public List<ServerName> getRegionServersOfConnectedPeer(String peerId) {
292 if (this.peerClusters.size() == 0) {
293 return Collections.emptyList();
294 }
295 ReplicationPeer peer = this.peerClusters.get(peerId);
296 if (peer == null) {
297 return Collections.emptyList();
298 }
299
300
301
302
303 synchronized (peer) {
304 List<ServerName> addresses;
305 try {
306 addresses = fetchSlavesAddresses(peer.getZkw());
307 }
308 catch (KeeperException ke) {
309 if (LOG.isDebugEnabled()) {
310 LOG.debug("Fetch salves addresses failed.", ke);
311 }
312 reconnectPeer(ke, peer);
313 addresses = Collections.emptyList();
314 }
315 peer.setRegionServers(addresses);
316 }
317
318 return peer.getRegionServers();
319 }
320
321 @Override
322 public UUID getPeerUUID(String peerId) {
323 ReplicationPeer peer = this.peerClusters.get(peerId);
324 if (peer == null) {
325 return null;
326 }
327 UUID peerUUID = null;
328
329
330
331
332 synchronized (peer) {
333 try {
334 peerUUID = ZKClusterId.getUUIDForCluster(peer.getZkw());
335 } catch (KeeperException ke) {
336 reconnectPeer(ke, peer);
337 }
338 }
339 return peerUUID;
340 }
341
342 @Override
343 public Set<String> getConnectedPeers() {
344 return this.peerClusters.keySet();
345 }
346
347 @Override
348 public Configuration getPeerConf(String peerId) throws ReplicationException {
349 String znode = ZKUtil.joinZNode(this.peersZNode, peerId);
350 byte[] data = null;
351 try {
352 data = ZKUtil.getData(this.zookeeper, znode);
353 } catch (KeeperException e) {
354 throw new ReplicationException("Error getting configuration for peer with id="
355 + peerId, e);
356 }
357 if (data == null) {
358 LOG.error("Could not get configuration for peer because it doesn't exist. peerId=" + peerId);
359 return null;
360 }
361 String otherClusterKey = "";
362 try {
363 otherClusterKey = parsePeerFrom(data);
364 } catch (DeserializationException e) {
365 LOG.warn("Failed to parse cluster key from peerId=" + peerId
366 + ", specifically the content from the following znode: " + znode);
367 return null;
368 }
369
370 Configuration otherConf = new Configuration(this.conf);
371 try {
372 ZKUtil.applyClusterKeyToConf(otherConf, otherClusterKey);
373 } catch (IOException e) {
374 LOG.error("Can't get peer configuration for peerId=" + peerId + " because:", e);
375 return null;
376 }
377 return otherConf;
378 }
379
380
381
382
383 @Override
384 public List<String> getAllPeerIds() {
385 List<String> ids = null;
386 try {
387 ids = ZKUtil.listChildrenAndWatchThem(this.zookeeper, this.peersZNode);
388 } catch (KeeperException e) {
389 this.abortable.abort("Cannot get the list of peers ", e);
390 }
391 return ids;
392 }
393
394 @Override
395 public long getTimestampOfLastChangeToPeer(String peerId) {
396 ReplicationPeer peer = this.peerClusters.get(peerId);
397 if (peer == null) {
398 throw new IllegalArgumentException("Unknown peer id: " + peerId);
399 }
400 return peer.getLastRegionserverUpdate();
401 }
402
403
404
405
406
407 private void connectExistingPeers() throws ReplicationException {
408 List<String> znodes = null;
409 try {
410 znodes = ZKUtil.listChildrenNoWatch(this.zookeeper, this.peersZNode);
411 } catch (KeeperException e) {
412 throw new ReplicationException("Error getting the list of peer clusters.", e);
413 }
414 if (znodes != null) {
415 for (String z : znodes) {
416 connectToPeer(z);
417 }
418 }
419 }
420
421
422
423
424
425
426 private void reconnectPeer(KeeperException ke, ReplicationPeer peer) {
427 if (ke instanceof ConnectionLossException || ke instanceof SessionExpiredException
428 || ke instanceof AuthFailedException) {
429 LOG.warn("Lost the ZooKeeper connection for peer " + peer.getClusterKey(), ke);
430 try {
431 peer.reloadZkWatcher();
432 peer.getZkw().registerListener(new PeerRegionServerListener(peer));
433 } catch (IOException io) {
434 LOG.warn("Creation of ZookeeperWatcher failed for peer " + peer.getClusterKey(), io);
435 }
436 }
437 }
438
439
440
441
442
443
444 private static List<ServerName> fetchSlavesAddresses(ZooKeeperWatcher zkw)
445 throws KeeperException {
446 List<String> children = ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.rsZNode);
447 if (children == null) {
448 return Collections.emptyList();
449 }
450 List<ServerName> addresses = new ArrayList<ServerName>(children.size());
451 for (String child : children) {
452 addresses.add(ServerName.parseServerName(child));
453 }
454 return addresses;
455 }
456
457 private String getTableCFsNode(String id) {
458 return ZKUtil.joinZNode(this.peersZNode, ZKUtil.joinZNode(id, this.tableCFsNodeName));
459 }
460
461 private String getPeerStateNode(String id) {
462 return ZKUtil.joinZNode(this.peersZNode, ZKUtil.joinZNode(id, this.peerStateNodeName));
463 }
464
465
466
467
468
469
470 private void changePeerState(String id, ZooKeeperProtos.ReplicationState.State state)
471 throws ReplicationException {
472 try {
473 if (!peerExists(id)) {
474 throw new IllegalArgumentException("Cannot enable/disable peer because id=" + id
475 + " does not exist.");
476 }
477 String peerStateZNode = getPeerStateNode(id);
478 byte[] stateBytes =
479 (state == ZooKeeperProtos.ReplicationState.State.ENABLED) ? ENABLED_ZNODE_BYTES
480 : DISABLED_ZNODE_BYTES;
481 if (ZKUtil.checkExists(this.zookeeper, peerStateZNode) != -1) {
482 ZKUtil.setData(this.zookeeper, peerStateZNode, stateBytes);
483 } else {
484 ZKUtil.createAndWatch(this.zookeeper, peerStateZNode, stateBytes);
485 }
486 LOG.info("Peer with id= " + id + " is now " + state.name());
487 } catch (KeeperException e) {
488 throw new ReplicationException("Unable to change state of the peer with id=" + id, e);
489 }
490 }
491
492
493
494
495
496
497
498 private ReplicationPeer getPeer(String peerId) throws ReplicationException {
499 Configuration peerConf = getPeerConf(peerId);
500 if (peerConf == null) {
501 return null;
502 }
503 if (this.ourClusterKey.equals(ZKUtil.getZooKeeperClusterKey(peerConf))) {
504 LOG.debug("Not connecting to " + peerId + " because it's us");
505 return null;
506 }
507
508 ReplicationPeer peer =
509 new ReplicationPeer(peerConf, peerId);
510 try {
511 peer.startStateTracker(this.zookeeper, this.getPeerStateNode(peerId));
512 } catch (KeeperException e) {
513 throw new ReplicationException("Error starting the peer state tracker for peerId=" +
514 peerId, e);
515 }
516
517 try {
518 peer.startTableCFsTracker(this.zookeeper, this.getTableCFsNode(peerId));
519 } catch (KeeperException e) {
520 throw new ReplicationException("Error starting the peer tableCFs tracker for peerId=" +
521 peerId, e);
522 }
523
524 peer.getZkw().registerListener(new PeerRegionServerListener(peer));
525 return peer;
526 }
527
528
529
530
531
532
533 private static String parsePeerFrom(final byte[] bytes) throws DeserializationException {
534 if (ProtobufUtil.isPBMagicPrefix(bytes)) {
535 int pblen = ProtobufUtil.lengthOfPBMagic();
536 ZooKeeperProtos.ReplicationPeer.Builder builder =
537 ZooKeeperProtos.ReplicationPeer.newBuilder();
538 ZooKeeperProtos.ReplicationPeer peer;
539 try {
540 peer = builder.mergeFrom(bytes, pblen, bytes.length - pblen).build();
541 } catch (InvalidProtocolBufferException e) {
542 throw new DeserializationException(e);
543 }
544 return peer.getClusterkey();
545 } else {
546 if (bytes.length > 0) {
547 return Bytes.toString(bytes);
548 }
549 return "";
550 }
551 }
552
553
554
555
556
557
558
559 private static byte[] toByteArray(final String clusterKey) {
560 byte[] bytes =
561 ZooKeeperProtos.ReplicationPeer.newBuilder().setClusterkey(clusterKey).build()
562 .toByteArray();
563 return ProtobufUtil.prependPBMagic(bytes);
564 }
565
566
567
568
569 public static class PeerRegionServerListener extends ZooKeeperListener {
570
571 private ReplicationPeer peer;
572 private String regionServerListNode;
573
574 public PeerRegionServerListener(ReplicationPeer replicationPeer) {
575 super(replicationPeer.getZkw());
576 this.peer = replicationPeer;
577 this.regionServerListNode = peer.getZkw().rsZNode;
578 }
579
580 public PeerRegionServerListener(String regionServerListNode, ZooKeeperWatcher zkw) {
581 super(zkw);
582 this.regionServerListNode = regionServerListNode;
583 }
584
585 @Override
586 public synchronized void nodeChildrenChanged(String path) {
587 if (path.equals(regionServerListNode)) {
588 try {
589 LOG.info("Detected change to peer regionservers, fetching updated list");
590 peer.setRegionServers(fetchSlavesAddresses(peer.getZkw()));
591 } catch (KeeperException e) {
592 LOG.fatal("Error reading slave addresses", e);
593 }
594 }
595 }
596
597 }
598 }