View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.replication;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.HashMap;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.Set;
28  import java.util.TreeMap;
29  import java.util.UUID;
30  import java.util.concurrent.ConcurrentHashMap;
31  import java.util.concurrent.ConcurrentMap;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.hbase.Abortable;
37  import org.apache.hadoop.hbase.ServerName;
38  import org.apache.hadoop.hbase.exceptions.DeserializationException;
39  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
40  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
41  import org.apache.hadoop.hbase.util.Bytes;
42  import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
43  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
44  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
45  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
46  import org.apache.zookeeper.KeeperException;
47  import org.apache.zookeeper.KeeperException.AuthFailedException;
48  import org.apache.zookeeper.KeeperException.ConnectionLossException;
49  import org.apache.zookeeper.KeeperException.SessionExpiredException;
50  
51  import com.google.protobuf.InvalidProtocolBufferException;
52  
53  /**
54   * This class provides an implementation of the ReplicationPeers interface using Zookeeper. The
55   * peers znode contains a list of all peer replication clusters and the current replication state of
56   * those clusters. It has one child peer znode for each peer cluster. The peer znode is named with
57   * the cluster id provided by the user in the HBase shell. The value of the peer znode contains the
58   * peers cluster key provided by the user in the HBase Shell. The cluster key contains a list of
59   * zookeeper quorum peers, the client port for the zookeeper quorum, and the base znode for HBase.
60   * For example:
61   *
62   *  /hbase/replication/peers/1 [Value: zk1.host.com,zk2.host.com,zk3.host.com:2181:/hbase]
63   *  /hbase/replication/peers/2 [Value: zk5.host.com,zk6.host.com,zk7.host.com:2181:/hbase]
64   *
65   * Each of these peer znodes has a child znode that indicates whether or not replication is enabled
66   * on that peer cluster. These peer-state znodes do not have child znodes and simply contain a
67   * boolean value (i.e. ENABLED or DISABLED). This value is read/maintained by the
68   * ReplicationPeer.PeerStateTracker class. For example:
69   *
70   * /hbase/replication/peers/1/peer-state [Value: ENABLED]
71   *
72   * Each of these peer znodes has a child znode that indicates which data will be replicated
73   * to the peer cluster. These peer-tableCFs znodes do not have child znodes and only have a
74   * table/cf list config. This value is read/maintained by the ReplicationPeer.TableCFsTracker
75   * class. For example:
76   *
77   * /hbase/replication/peers/1/tableCFs [Value: "table1; table2:cf1,cf3; table3:cfx,cfy"]
78   */
79  public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements ReplicationPeers {
80  
81    // Map of peer clusters keyed by their id
82    private Map<String, ReplicationPeer> peerClusters;
83    private final String tableCFsNodeName;
84  
85    private static final Log LOG = LogFactory.getLog(ReplicationPeersZKImpl.class);
86  
87    public ReplicationPeersZKImpl(final ZooKeeperWatcher zk, final Configuration conf,
88        Abortable abortable) {
89      super(zk, conf, abortable);
90      this.tableCFsNodeName = conf.get("zookeeper.znode.replication.peers.tableCFs", "tableCFs");
91      this.peerClusters = new ConcurrentHashMap<String, ReplicationPeer>();
92    }
93  
94    @Override
95    public void init() throws ReplicationException {
96      try {
97        if (ZKUtil.checkExists(this.zookeeper, this.peersZNode) < 0) {
98          ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
99        }
100     } catch (KeeperException e) {
101       throw new ReplicationException("Could not initialize replication peers", e);
102     }
103     connectExistingPeers();
104   }
105 
106   @Override
107   public void addPeer(String id, String clusterKey) throws ReplicationException {
108     addPeer(id, clusterKey, null);
109   }
110 
111   @Override
112   public void addPeer(String id, String clusterKey, String tableCFs) throws ReplicationException {
113     try {
114       if (peerExists(id)) {
115         throw new IllegalArgumentException("Cannot add a peer with id=" + id
116             + " because that id already exists.");
117       }
118       ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
119       ZKUtil.createAndWatch(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id),
120         toByteArray(clusterKey));
121       // There is a race b/w PeerWatcher and ReplicationZookeeper#add method to create the
122       // peer-state znode. This happens while adding a peer.
123       // The peer state data is set as "ENABLED" by default.
124       ZKUtil.createNodeIfNotExistsAndWatch(this.zookeeper, getPeerStateNode(id),
125         ENABLED_ZNODE_BYTES);
126       // A peer is enabled by default
127 
128       String tableCFsStr = (tableCFs == null) ? "" : tableCFs;
129       ZKUtil.createNodeIfNotExistsAndWatch(this.zookeeper, getTableCFsNode(id),
130                     Bytes.toBytes(tableCFsStr));
131     } catch (KeeperException e) {
132       throw new ReplicationException("Could not add peer with id=" + id
133           + ", clusterKey=" + clusterKey, e);
134     }
135   }
136 
137   @Override
138   public void removePeer(String id) throws ReplicationException {
139     try {
140       if (!peerExists(id)) {
141         throw new IllegalArgumentException("Cannot remove peer with id=" + id
142             + " because that id does not exist.");
143       }
144       ZKUtil.deleteNodeRecursively(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id));
145     } catch (KeeperException e) {
146       throw new ReplicationException("Could not remove peer with id=" + id, e);
147     }
148   }
149 
150   @Override
151   public void enablePeer(String id) throws ReplicationException {
152     changePeerState(id, ZooKeeperProtos.ReplicationState.State.ENABLED);
153     LOG.info("peer " + id + " is enabled");
154   }
155 
156   @Override
157   public void disablePeer(String id) throws ReplicationException {
158     changePeerState(id, ZooKeeperProtos.ReplicationState.State.DISABLED);
159     LOG.info("peer " + id + " is disabled");
160   }
161 
162   @Override
163   public String getPeerTableCFsConfig(String id) throws ReplicationException {
164     try {
165       if (!peerExists(id)) {
166         throw new IllegalArgumentException("peer " + id + " doesn't exist");
167       }
168       try {
169         return Bytes.toString(ZKUtil.getData(this.zookeeper, getTableCFsNode(id)));
170       } catch (Exception e) {
171         throw new ReplicationException(e);
172       }
173     } catch (KeeperException e) {
174       throw new ReplicationException("Unable to get tableCFs of the peer with id=" + id, e);
175     }
176   }
177 
178   @Override
179   public void setPeerTableCFsConfig(String id, String tableCFsStr) throws ReplicationException {
180     try {
181       if (!peerExists(id)) {
182         throw new IllegalArgumentException("Cannot set peer tableCFs because id=" + id
183             + " does not exist.");
184       }
185       String tableCFsZKNode = getTableCFsNode(id);
186       byte[] tableCFs = Bytes.toBytes(tableCFsStr);
187       if (ZKUtil.checkExists(this.zookeeper, tableCFsZKNode) != -1) {
188         ZKUtil.setData(this.zookeeper, tableCFsZKNode, tableCFs);
189       } else {
190         ZKUtil.createAndWatch(this.zookeeper, tableCFsZKNode, tableCFs);
191       }
192       LOG.info("Peer tableCFs with id= " + id + " is now " + tableCFsStr);
193     } catch (KeeperException e) {
194       throw new ReplicationException("Unable to change tableCFs of the peer with id=" + id, e);
195     }
196   }
197 
198   @Override
199   public Map<String, List<String>> getTableCFs(String id) throws IllegalArgumentException {
200     ReplicationPeer replicationPeer = this.peerClusters.get(id);
201     if (replicationPeer == null) {
202       throw new IllegalArgumentException("Peer with id= " + id + " is not connected");
203     }
204     return replicationPeer.getTableCFs();
205   }
206 
207   @Override
208   public boolean getStatusOfConnectedPeer(String id) {
209     ReplicationPeer replicationPeer = this.peerClusters.get(id);
210     if (replicationPeer == null) {
211       throw new IllegalArgumentException("Peer with id= " + id + " is not connected");
212     } 
213     return replicationPeer.getPeerEnabled().get();
214   }
215 
216   @Override
217   public boolean getStatusOfPeerFromBackingStore(String id) throws ReplicationException {
218     try {
219       if (!peerExists(id)) {
220         throw new IllegalArgumentException("peer " + id + " doesn't exist");
221       }
222       String peerStateZNode = getPeerStateNode(id);
223       try {
224         return ReplicationPeer.isStateEnabled(ZKUtil.getData(this.zookeeper, peerStateZNode));
225       } catch (KeeperException e) {
226         throw new ReplicationException(e);
227       } catch (DeserializationException e) {
228         throw new ReplicationException(e);
229       }
230     } catch (KeeperException e) {
231       throw new ReplicationException("Unable to get status of the peer with id=" + id +
232           " from backing store", e);
233     }
234   }
235 
236   @Override
237   public boolean connectToPeer(String peerId) throws ReplicationException {
238     if (peerClusters == null) {
239       return false;
240     }
241     if (this.peerClusters.containsKey(peerId)) {
242       return false;
243     }
244     ReplicationPeer peer = null;
245     try {
246       peer = getPeer(peerId);
247     } catch (Exception e) {
248       throw new ReplicationException("Error connecting to peer with id=" + peerId, e);
249     }
250     if (peer == null) {
251       return false;
252     }
253     ((ConcurrentMap<String, ReplicationPeer>) peerClusters).putIfAbsent(peerId, peer);
254     LOG.info("Added new peer cluster " + peer.getClusterKey());
255     return true;
256   }
257 
258   @Override
259   public void disconnectFromPeer(String peerId) {
260     ReplicationPeer rp = this.peerClusters.get(peerId);
261     if (rp != null) {
262       rp.getZkw().close();
263       ((ConcurrentMap<String, ReplicationPeer>) peerClusters).remove(peerId, rp);
264     }
265   }
266 
267   @Override
268   public Map<String, String> getAllPeerClusterKeys() {
269     Map<String, String> peers = new TreeMap<String, String>();
270     List<String> ids = null;
271     try {
272       ids = ZKUtil.listChildrenNoWatch(this.zookeeper, this.peersZNode);
273       for (String id : ids) {
274         byte[] bytes = ZKUtil.getData(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id));
275         String clusterKey = null;
276         try {
277           clusterKey = parsePeerFrom(bytes);
278         } catch (DeserializationException de) {
279           LOG.warn("Failed parse of clusterid=" + id + " znode content, continuing.");
280           continue;
281         }
282         peers.put(id, clusterKey);
283       }
284     } catch (KeeperException e) {
285       this.abortable.abort("Cannot get the list of peers ", e);
286     }
287     return peers;
288   }
289 
290   @Override
291   public List<ServerName> getRegionServersOfConnectedPeer(String peerId) {
292     if (this.peerClusters.size() == 0) {
293       return Collections.emptyList();
294     }
295     ReplicationPeer peer = this.peerClusters.get(peerId);
296     if (peer == null) {
297       return Collections.emptyList();
298     }
299     // Synchronize peer cluster connection attempts to avoid races and rate
300     // limit connections when multiple replication sources try to connect to
301     // the peer cluster. If the peer cluster is down we can get out of control
302     // over time.
303     synchronized (peer) {
304       List<ServerName> addresses;
305       try {
306         addresses = fetchSlavesAddresses(peer.getZkw());
307       } 
308       catch (KeeperException ke) {
309         if (LOG.isDebugEnabled()) {
310           LOG.debug("Fetch salves addresses failed.", ke);
311         }
312         reconnectPeer(ke, peer);
313         addresses = Collections.emptyList();
314       }
315       peer.setRegionServers(addresses);
316     }
317     
318     return peer.getRegionServers();
319   }
320 
321   @Override
322   public UUID getPeerUUID(String peerId) {
323     ReplicationPeer peer = this.peerClusters.get(peerId);
324     if (peer == null) {
325       return null;
326     }
327     UUID peerUUID = null;
328     // Synchronize peer cluster connection attempts to avoid races and rate
329     // limit connections when multiple replication sources try to connect to
330     // the peer cluster. If the peer cluster is down we can get out of control
331     // over time.
332     synchronized (peer) {
333       try {
334         peerUUID = ZKClusterId.getUUIDForCluster(peer.getZkw());
335       } catch (KeeperException ke) {
336         reconnectPeer(ke, peer);
337       }
338     }
339     return peerUUID;
340   }
341 
342   @Override
343   public Set<String> getConnectedPeers() {
344     return this.peerClusters.keySet();
345   }
346 
347   @Override
348   public Configuration getPeerConf(String peerId) throws ReplicationException {
349     String znode = ZKUtil.joinZNode(this.peersZNode, peerId);
350     byte[] data = null;
351     try {
352       data = ZKUtil.getData(this.zookeeper, znode);
353     } catch (KeeperException e) {
354       throw new ReplicationException("Error getting configuration for peer with id="
355           + peerId, e);
356     }
357     if (data == null) {
358       LOG.error("Could not get configuration for peer because it doesn't exist. peerId=" + peerId);
359       return null;
360     }
361     String otherClusterKey = "";
362     try {
363       otherClusterKey = parsePeerFrom(data);
364     } catch (DeserializationException e) {
365       LOG.warn("Failed to parse cluster key from peerId=" + peerId
366           + ", specifically the content from the following znode: " + znode);
367       return null;
368     }
369 
370     Configuration otherConf = new Configuration(this.conf);
371     try {
372       ZKUtil.applyClusterKeyToConf(otherConf, otherClusterKey);
373     } catch (IOException e) {
374       LOG.error("Can't get peer configuration for peerId=" + peerId + " because:", e);
375       return null;
376     }
377     return otherConf;
378   }
379 
380   /**
381    * List all registered peer clusters and set a watch on their znodes.
382    */
383   @Override
384   public List<String> getAllPeerIds() {
385     List<String> ids = null;
386     try {
387       ids = ZKUtil.listChildrenAndWatchThem(this.zookeeper, this.peersZNode);
388     } catch (KeeperException e) {
389       this.abortable.abort("Cannot get the list of peers ", e);
390     }
391     return ids;
392   }
393 
394   @Override
395   public long getTimestampOfLastChangeToPeer(String peerId) {
396     ReplicationPeer peer = this.peerClusters.get(peerId);
397     if (peer == null) {
398       throw new IllegalArgumentException("Unknown peer id: " + peerId);
399     }
400     return peer.getLastRegionserverUpdate();
401   }
402 
403   /**
404    * A private method used during initialization. This method attempts to connect to all registered
405    * peer clusters. This method does not set a watch on the peer cluster znodes.
406    */
407   private void connectExistingPeers() throws ReplicationException {
408     List<String> znodes = null;
409     try {
410       znodes = ZKUtil.listChildrenNoWatch(this.zookeeper, this.peersZNode);
411     } catch (KeeperException e) {
412       throw new ReplicationException("Error getting the list of peer clusters.", e);
413     }
414     if (znodes != null) {
415       for (String z : znodes) {
416         connectToPeer(z);
417       }
418     }
419   }
420 
421   /**
422    * A private method used to re-establish a zookeeper session with a peer cluster.
423    * @param ke
424    * @param peer
425    */
426   private void reconnectPeer(KeeperException ke, ReplicationPeer peer) {
427     if (ke instanceof ConnectionLossException || ke instanceof SessionExpiredException
428         || ke instanceof AuthFailedException) {
429       LOG.warn("Lost the ZooKeeper connection for peer " + peer.getClusterKey(), ke);
430       try {
431         peer.reloadZkWatcher();
432         peer.getZkw().registerListener(new PeerRegionServerListener(peer));
433       } catch (IOException io) {
434         LOG.warn("Creation of ZookeeperWatcher failed for peer " + peer.getClusterKey(), io);
435       }
436     }
437   }
438 
439   /**
440    * Get the list of all the region servers from the specified peer
441    * @param zkw zk connection to use
442    * @return list of region server addresses or an empty list if the slave is unavailable
443    */
444   private static List<ServerName> fetchSlavesAddresses(ZooKeeperWatcher zkw)
445       throws KeeperException {
446     List<String> children = ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.rsZNode);
447     if (children == null) {
448       return Collections.emptyList();
449     }
450     List<ServerName> addresses = new ArrayList<ServerName>(children.size());
451     for (String child : children) {
452       addresses.add(ServerName.parseServerName(child));
453     }
454     return addresses;
455   }
456 
457   private String getTableCFsNode(String id) {
458     return ZKUtil.joinZNode(this.peersZNode, ZKUtil.joinZNode(id, this.tableCFsNodeName));
459   }
460 
461   private String getPeerStateNode(String id) {
462     return ZKUtil.joinZNode(this.peersZNode, ZKUtil.joinZNode(id, this.peerStateNodeName));
463   }
464 
465   /**
466    * Update the state znode of a peer cluster.
467    * @param id
468    * @param state
469    */
470   private void changePeerState(String id, ZooKeeperProtos.ReplicationState.State state)
471       throws ReplicationException {
472     try {
473       if (!peerExists(id)) {
474         throw new IllegalArgumentException("Cannot enable/disable peer because id=" + id
475             + " does not exist.");
476       }
477       String peerStateZNode = getPeerStateNode(id);
478       byte[] stateBytes =
479           (state == ZooKeeperProtos.ReplicationState.State.ENABLED) ? ENABLED_ZNODE_BYTES
480               : DISABLED_ZNODE_BYTES;
481       if (ZKUtil.checkExists(this.zookeeper, peerStateZNode) != -1) {
482         ZKUtil.setData(this.zookeeper, peerStateZNode, stateBytes);
483       } else {
484         ZKUtil.createAndWatch(this.zookeeper, peerStateZNode, stateBytes);
485       }
486       LOG.info("Peer with id= " + id + " is now " + state.name());
487     } catch (KeeperException e) {
488       throw new ReplicationException("Unable to change state of the peer with id=" + id, e);
489     }
490   }
491 
492   /**
493    * Helper method to connect to a peer
494    * @param peerId peer's identifier
495    * @return object representing the peer
496    * @throws ReplicationException
497    */
498   private ReplicationPeer getPeer(String peerId) throws ReplicationException {
499     Configuration peerConf = getPeerConf(peerId);
500     if (peerConf == null) {
501       return null;
502     }
503     if (this.ourClusterKey.equals(ZKUtil.getZooKeeperClusterKey(peerConf))) {
504       LOG.debug("Not connecting to " + peerId + " because it's us");
505       return null;
506     }
507 
508     ReplicationPeer peer =
509         new ReplicationPeer(peerConf, peerId);
510     try {
511       peer.startStateTracker(this.zookeeper, this.getPeerStateNode(peerId));
512     } catch (KeeperException e) {
513       throw new ReplicationException("Error starting the peer state tracker for peerId=" +
514           peerId, e);
515     }
516 
517     try {
518       peer.startTableCFsTracker(this.zookeeper, this.getTableCFsNode(peerId));
519     } catch (KeeperException e) {
520       throw new ReplicationException("Error starting the peer tableCFs tracker for peerId=" +
521           peerId, e);
522     }
523 
524     peer.getZkw().registerListener(new PeerRegionServerListener(peer));
525     return peer;
526   }
527 
528   /**
529    * @param bytes Content of a peer znode.
530    * @return ClusterKey parsed from the passed bytes.
531    * @throws DeserializationException
532    */
533   private static String parsePeerFrom(final byte[] bytes) throws DeserializationException {
534     if (ProtobufUtil.isPBMagicPrefix(bytes)) {
535       int pblen = ProtobufUtil.lengthOfPBMagic();
536       ZooKeeperProtos.ReplicationPeer.Builder builder =
537           ZooKeeperProtos.ReplicationPeer.newBuilder();
538       ZooKeeperProtos.ReplicationPeer peer;
539       try {
540         peer = builder.mergeFrom(bytes, pblen, bytes.length - pblen).build();
541       } catch (InvalidProtocolBufferException e) {
542         throw new DeserializationException(e);
543       }
544       return peer.getClusterkey();
545     } else {
546       if (bytes.length > 0) {
547         return Bytes.toString(bytes);
548       }
549       return "";
550     }
551   }
552 
553   /**
554    * @param clusterKey
555    * @return Serialized protobuf of <code>clusterKey</code> with pb magic prefix prepended suitable
556    *         for use as content of a this.peersZNode; i.e. the content of PEER_ID znode under
557    *         /hbase/replication/peers/PEER_ID
558    */
559   private static byte[] toByteArray(final String clusterKey) {
560     byte[] bytes =
561         ZooKeeperProtos.ReplicationPeer.newBuilder().setClusterkey(clusterKey).build()
562             .toByteArray();
563     return ProtobufUtil.prependPBMagic(bytes);
564   }
565 
566   /**
567    * Tracks changes to the list of region servers in a peer's cluster.
568    */
569   public static class PeerRegionServerListener extends ZooKeeperListener {
570 
571     private ReplicationPeer peer;
572     private String regionServerListNode;
573 
574     public PeerRegionServerListener(ReplicationPeer replicationPeer) {
575       super(replicationPeer.getZkw());
576       this.peer = replicationPeer;
577       this.regionServerListNode = peer.getZkw().rsZNode;
578     }
579 
580     public PeerRegionServerListener(String regionServerListNode, ZooKeeperWatcher zkw) {
581       super(zkw);
582       this.regionServerListNode = regionServerListNode;
583     }
584 
585     @Override
586     public synchronized void nodeChildrenChanged(String path) {
587       if (path.equals(regionServerListNode)) {
588         try {
589           LOG.info("Detected change to peer regionservers, fetching updated list");
590           peer.setRegionServers(fetchSlavesAddresses(peer.getZkw()));
591         } catch (KeeperException e) {
592           LOG.fatal("Error reading slave addresses", e);
593         }
594       }
595     }
596 
597   }
598 }