View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import java.io.Closeable;
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.List;
25  import java.util.concurrent.CopyOnWriteArrayList;
26  import java.util.concurrent.CountDownLatch;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.util.Threads;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.hbase.Abortable;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
36  import org.apache.hadoop.security.UserGroupInformation;
37  import org.apache.zookeeper.KeeperException;
38  import org.apache.zookeeper.WatchedEvent;
39  import org.apache.zookeeper.Watcher;
40  import org.apache.zookeeper.ZooDefs;
41  import org.apache.zookeeper.ZooDefs.Ids;
42  import org.apache.zookeeper.ZooDefs.Perms;
43  import org.apache.zookeeper.data.ACL;
44  import org.apache.zookeeper.data.Id;
45  import org.apache.zookeeper.data.Stat;
46  
47  /**
48   * Acts as the single ZooKeeper Watcher.  One instance of this is instantiated
49   * for each Master, RegionServer, and client process.
50   *
51   * <p>This is the only class that implements {@link Watcher}.  Other internal
52   * classes which need to be notified of ZooKeeper events must register with
53   * the local instance of this watcher via {@link #registerListener}.
54   *
55   * <p>This class also holds and manages the connection to ZooKeeper.  Code to
56   * deal with connection related events and exceptions are handled here.
57   */
58  @InterfaceAudience.Private
59  public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
60    private static final Log LOG = LogFactory.getLog(ZooKeeperWatcher.class);
61  
62    // Identifier for this watcher (for logging only).  It is made of the prefix
63    // passed on construction and the zookeeper sessionid.
64    private String prefix;
65    private String identifier;
66  
67    // zookeeper quorum
68    private String quorum;
69  
70    // zookeeper connection
71    private RecoverableZooKeeper recoverableZooKeeper;
72  
73    // abortable in case of zk failure
74    protected Abortable abortable;
75    // Used if abortable is null
76    private boolean aborted = false;
77  
78    // listeners to be notified
79    private final List<ZooKeeperListener> listeners =
80      new CopyOnWriteArrayList<ZooKeeperListener>();
81  
82    // Used by ZKUtil:waitForZKConnectionIfAuthenticating to wait for SASL
83    // negotiation to complete
84    public CountDownLatch saslLatch = new CountDownLatch(1);
85  
86    // node names
87  
88    // base znode for this cluster
89    public String baseZNode;
90    // znode containing location of server hosting meta region
91    public String metaServerZNode;
92    // znode containing ephemeral nodes of the regionservers
93    public String rsZNode;
94    // znode containing ephemeral nodes of the draining regionservers
95    public String drainingZNode;
96    // znode of currently active master
97    private String masterAddressZNode;
98    // znode of this master in backup master directory, if not the active master
99    public String backupMasterAddressesZNode;
100   // znode containing the current cluster state
101   public String clusterStateZNode;
102   // znode used for region transitioning and assignment
103   public String assignmentZNode;
104   // znode used for table disabling/enabling
105   public String tableZNode;
106   // znode containing the unique cluster ID
107   public String clusterIdZNode;
108   // znode used for log splitting work assignment
109   public String splitLogZNode;
110   // znode containing the state of the load balancer
111   public String balancerZNode;
112   // znode containing the lock for the tables
113   public String tableLockZNode;
114   // znode containing the state of recovering regions
115   public String recoveringRegionsZNode;
116   // znode containing namespace descriptors
117   public static String namespaceZNode = "namespace";
118 
119   // Certain ZooKeeper nodes need to be world-readable
120   public static final ArrayList<ACL> CREATOR_ALL_AND_WORLD_READABLE =
121     new ArrayList<ACL>() { {
122       add(new ACL(ZooDefs.Perms.READ,ZooDefs.Ids.ANYONE_ID_UNSAFE));
123       add(new ACL(ZooDefs.Perms.ALL,ZooDefs.Ids.AUTH_IDS));
124     }};
125 
126   private final Configuration conf;
127 
128   private final Exception constructorCaller;
129 
130   /**
131    * Instantiate a ZooKeeper connection and watcher.
132    * @param identifier string that is passed to RecoverableZookeeper to be used as
133    * identifier for this instance. Use null for default.
134    * @throws IOException
135    * @throws ZooKeeperConnectionException
136    */
137   public ZooKeeperWatcher(Configuration conf, String identifier,
138       Abortable abortable) throws ZooKeeperConnectionException, IOException {
139     this(conf, identifier, abortable, false);
140   }
141 
142   /**
143    * Instantiate a ZooKeeper connection and watcher.
144    * @param conf
145    * @param identifier string that is passed to RecoverableZookeeper to be used as identifier for
146    *          this instance. Use null for default.
147    * @param abortable Can be null if there is on error there is no host to abort: e.g. client
148    *          context.
149    * @param canCreateBaseZNode
150    * @throws IOException
151    * @throws ZooKeeperConnectionException
152    */
153   public ZooKeeperWatcher(Configuration conf, String identifier,
154       Abortable abortable, boolean canCreateBaseZNode)
155   throws IOException, ZooKeeperConnectionException {
156     this.conf = conf;
157     // Capture a stack trace now.  Will print it out later if problem so we can
158     // distingush amongst the myriad ZKWs.
159     try {
160       throw new Exception("ZKW CONSTRUCTOR STACK TRACE FOR DEBUGGING");
161     } catch (Exception e) {
162       this.constructorCaller = e;
163     }
164     this.quorum = ZKConfig.getZKQuorumServersString(conf);
165     this.prefix = identifier;
166     // Identifier will get the sessionid appended later below down when we
167     // handle the syncconnect event.
168     this.identifier = identifier + "0x0";
169     this.abortable = abortable;
170     setNodeNames(conf);
171     this.recoverableZooKeeper = ZKUtil.connect(conf, quorum, this, identifier);
172     if (canCreateBaseZNode) {
173       createBaseZNodes();
174     }
175   }
176 
177   private void createBaseZNodes() throws ZooKeeperConnectionException {
178     try {
179       // Create all the necessary "directories" of znodes
180       ZKUtil.createWithParents(this, baseZNode);
181       if (conf.getBoolean("hbase.assignment.usezk", true)) {
182         ZKUtil.createAndFailSilent(this, assignmentZNode);
183       }
184       ZKUtil.createAndFailSilent(this, rsZNode);
185       ZKUtil.createAndFailSilent(this, drainingZNode);
186       ZKUtil.createAndFailSilent(this, tableZNode);
187       ZKUtil.createAndFailSilent(this, splitLogZNode);
188       ZKUtil.createAndFailSilent(this, backupMasterAddressesZNode);
189       ZKUtil.createAndFailSilent(this, tableLockZNode);
190       ZKUtil.createAndFailSilent(this, recoveringRegionsZNode);
191     } catch (KeeperException e) {
192       throw new ZooKeeperConnectionException(
193           prefix("Unexpected KeeperException creating base node"), e);
194     }
195   }
196 
197   /** Returns whether the znode is supposed to be readable by the client
198    * and DOES NOT contain sensitive information (world readable).*/
199   public boolean isClientReadable(String node) {
200     // Developer notice: These znodes are world readable. DO NOT add more znodes here UNLESS
201     // all clients need to access this data to work. Using zk for sharing data to clients (other
202     // than service lookup case is not a recommended design pattern.
203     return
204         node.equals(baseZNode) ||
205         node.equals(metaServerZNode) ||
206         node.equals(getMasterAddressZNode()) ||
207         node.equals(clusterIdZNode)||
208         node.equals(rsZNode) ||
209         // /hbase/table and /hbase/table/foo is allowed, /hbase/table-lock is not
210         node.equals(tableZNode) ||
211         node.startsWith(tableZNode + "/");
212   }
213 
214   /**
215    * On master start, we check the znode ACLs under the root directory and set the ACLs properly
216    * if needed. If the cluster goes from an unsecure setup to a secure setup, this step is needed
217    * so that the existing znodes created with open permissions are now changed with restrictive
218    * perms.
219    */
220   public void checkAndSetZNodeAcls() {
221     if (!ZKUtil.isSecureZooKeeper(getConfiguration())) {
222       return;
223     }
224 
225     // Check the base znodes permission first. Only do the recursion if base znode's perms are not
226     // correct.
227     try {
228       List<ACL> actualAcls = recoverableZooKeeper.getAcl(baseZNode, new Stat());
229 
230       if (!isBaseZnodeAclSetup(actualAcls)) {
231         LOG.info("setting znode ACLs");
232         setZnodeAclsRecursive(baseZNode);
233       }
234     } catch(KeeperException.NoNodeException nne) {
235       return;
236     } catch(InterruptedException ie) {
237       interruptedException(ie);
238     } catch (IOException e) {
239       LOG.warn("Received exception while checking and setting zookeeper ACLs", e);
240     } catch (KeeperException e) {
241       LOG.warn("Received exception while checking and setting zookeeper ACLs", e);
242     }
243   }
244 
245   /**
246    * Set the znode perms recursively. This will do post-order recursion, so that baseZnode ACLs
247    * will be set last in case the master fails in between.
248    * @param znode
249    */
250   private void setZnodeAclsRecursive(String znode) throws KeeperException, InterruptedException {
251     List<String> children = recoverableZooKeeper.getChildren(znode, false);
252 
253     for (String child : children) {
254       setZnodeAclsRecursive(ZKUtil.joinZNode(znode, child));
255     }
256     List<ACL> acls = ZKUtil.createACL(this, znode, true);
257     LOG.info("Setting ACLs for znode:" + znode + " , acl:" + acls);
258     recoverableZooKeeper.setAcl(znode, acls, -1);
259   }
260 
261   /**
262    * Checks whether the ACLs returned from the base znode (/hbase) is set for secure setup.
263    * @param acls acls from zookeeper
264    * @return whether ACLs are set for the base znode
265    * @throws IOException
266    */
267   private boolean isBaseZnodeAclSetup(List<ACL> acls) throws IOException {
268     String superUser = conf.get("hbase.superuser");
269 
270     // this assumes that current authenticated user is the same as zookeeper client user
271     // configured via JAAS
272     String hbaseUser = UserGroupInformation.getCurrentUser().getShortUserName();
273 
274     if (acls.isEmpty()) {
275       return false;
276     }
277 
278     for (ACL acl : acls) {
279       int perms = acl.getPerms();
280       Id id = acl.getId();
281       // We should only set at most 3 possible ACLs for 3 Ids. One for everyone, one for superuser
282       // and one for the hbase user
283       if (Ids.ANYONE_ID_UNSAFE.equals(id)) {
284         if (perms != Perms.READ) {
285           return false;
286         }
287       } else if (superUser != null && new Id("sasl", superUser).equals(id)) {
288         if (perms != Perms.ALL) {
289           return false;
290         }
291       } else if (new Id("sasl", hbaseUser).equals(id)) {
292         if (perms != Perms.ALL) {
293           return false;
294         }
295       } else {
296         return false;
297       }
298     }
299     return true;
300   }
301 
302   @Override
303   public String toString() {
304     return this.identifier + ", quorum=" + quorum + ", baseZNode=" + baseZNode;
305   }
306 
307   /**
308    * Adds this instance's identifier as a prefix to the passed <code>str</code>
309    * @param str String to amend.
310    * @return A new string with this instance's identifier as prefix: e.g.
311    * if passed 'hello world', the returned string could be
312    */
313   public String prefix(final String str) {
314     return this.toString() + " " + str;
315   }
316 
317   /**
318    * Set the local variable node names using the specified configuration.
319    */
320   private void setNodeNames(Configuration conf) {
321     baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
322         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
323     metaServerZNode = ZKUtil.joinZNode(baseZNode,
324         conf.get("zookeeper.znode.metaserver", "meta-region-server"));
325     rsZNode = ZKUtil.joinZNode(baseZNode,
326         conf.get("zookeeper.znode.rs", "rs"));
327     drainingZNode = ZKUtil.joinZNode(baseZNode,
328         conf.get("zookeeper.znode.draining.rs", "draining"));
329     masterAddressZNode = ZKUtil.joinZNode(baseZNode,
330         conf.get("zookeeper.znode.master", "master"));
331     backupMasterAddressesZNode = ZKUtil.joinZNode(baseZNode,
332         conf.get("zookeeper.znode.backup.masters", "backup-masters"));
333     clusterStateZNode = ZKUtil.joinZNode(baseZNode,
334         conf.get("zookeeper.znode.state", "running"));
335     assignmentZNode = ZKUtil.joinZNode(baseZNode,
336         conf.get("zookeeper.znode.unassigned", "region-in-transition"));
337     tableZNode = ZKUtil.joinZNode(baseZNode,
338         conf.get("zookeeper.znode.tableEnableDisable", "table"));
339     clusterIdZNode = ZKUtil.joinZNode(baseZNode,
340         conf.get("zookeeper.znode.clusterId", "hbaseid"));
341     splitLogZNode = ZKUtil.joinZNode(baseZNode,
342         conf.get("zookeeper.znode.splitlog", HConstants.SPLIT_LOGDIR_NAME));
343     balancerZNode = ZKUtil.joinZNode(baseZNode,
344         conf.get("zookeeper.znode.balancer", "balancer"));
345     tableLockZNode = ZKUtil.joinZNode(baseZNode,
346         conf.get("zookeeper.znode.tableLock", "table-lock"));
347     recoveringRegionsZNode = ZKUtil.joinZNode(baseZNode,
348         conf.get("zookeeper.znode.recovering.regions", "recovering-regions"));
349     namespaceZNode = ZKUtil.joinZNode(baseZNode,
350         conf.get("zookeeper.znode.namespace", "namespace"));
351   }
352 
353   /**
354    * Register the specified listener to receive ZooKeeper events.
355    * @param listener
356    */
357   public void registerListener(ZooKeeperListener listener) {
358     listeners.add(listener);
359   }
360 
361   /**
362    * Register the specified listener to receive ZooKeeper events and add it as
363    * the first in the list of current listeners.
364    * @param listener
365    */
366   public void registerListenerFirst(ZooKeeperListener listener) {
367     listeners.add(0, listener);
368   }
369 
370   public void unregisterListener(ZooKeeperListener listener) {
371     listeners.remove(listener);
372   }
373 
374   /**
375    * Clean all existing listeners
376    */
377   public void unregisterAllListeners() {
378     listeners.clear();
379   }
380 
381   /**
382    * Get a copy of current registered listeners
383    */
384   public List<ZooKeeperListener> getListeners() {
385     return new ArrayList<ZooKeeperListener>(listeners);
386   }
387 
388   /**
389    * @return The number of currently registered listeners
390    */
391   public int getNumberOfListeners() {
392     return listeners.size();
393   }
394 
395   /**
396    * Get the connection to ZooKeeper.
397    * @return connection reference to zookeeper
398    */
399   public RecoverableZooKeeper getRecoverableZooKeeper() {
400     return recoverableZooKeeper;
401   }
402 
403   public void reconnectAfterExpiration() throws IOException, KeeperException, InterruptedException {
404     recoverableZooKeeper.reconnectAfterExpiration();
405   }
406 
407   /**
408    * Get the quorum address of this instance.
409    * @return quorum string of this zookeeper connection instance
410    */
411   public String getQuorum() {
412     return quorum;
413   }
414 
415   /**
416    * @return the base znode of this zookeeper connection instance.
417    */
418   public String getBaseZNode() {
419     return baseZNode;
420   }
421 
422   /**
423    * Method called from ZooKeeper for events and connection status.
424    * <p>
425    * Valid events are passed along to listeners.  Connection status changes
426    * are dealt with locally.
427    */
428   @Override
429   public void process(WatchedEvent event) {
430     LOG.debug(prefix("Received ZooKeeper Event, " +
431         "type=" + event.getType() + ", " +
432         "state=" + event.getState() + ", " +
433         "path=" + event.getPath()));
434 
435     switch(event.getType()) {
436 
437       // If event type is NONE, this is a connection status change
438       case None: {
439         connectionEvent(event);
440         break;
441       }
442 
443       // Otherwise pass along to the listeners
444 
445       case NodeCreated: {
446         for(ZooKeeperListener listener : listeners) {
447           listener.nodeCreated(event.getPath());
448         }
449         break;
450       }
451 
452       case NodeDeleted: {
453         for(ZooKeeperListener listener : listeners) {
454           listener.nodeDeleted(event.getPath());
455         }
456         break;
457       }
458 
459       case NodeDataChanged: {
460         for(ZooKeeperListener listener : listeners) {
461           listener.nodeDataChanged(event.getPath());
462         }
463         break;
464       }
465 
466       case NodeChildrenChanged: {
467         for(ZooKeeperListener listener : listeners) {
468           listener.nodeChildrenChanged(event.getPath());
469         }
470         break;
471       }
472     }
473   }
474 
475   // Connection management
476 
477   /**
478    * Called when there is a connection-related event via the Watcher callback.
479    * <p>
480    * If Disconnected or Expired, this should shutdown the cluster. But, since
481    * we send a KeeperException.SessionExpiredException along with the abort
482    * call, it's possible for the Abortable to catch it and try to create a new
483    * session with ZooKeeper. This is what the client does in HCM.
484    * <p>
485    * @param event
486    */
487   private void connectionEvent(WatchedEvent event) {
488     switch(event.getState()) {
489       case SyncConnected:
490         // Now, this callback can be invoked before the this.zookeeper is set.
491         // Wait a little while.
492         long finished = System.currentTimeMillis() +
493           this.conf.getLong("hbase.zookeeper.watcher.sync.connected.wait", 2000);
494         while (System.currentTimeMillis() < finished) {
495           Threads.sleep(1);
496           if (this.recoverableZooKeeper != null) break;
497         }
498         if (this.recoverableZooKeeper == null) {
499           LOG.error("ZK is null on connection event -- see stack trace " +
500             "for the stack trace when constructor was called on this zkw",
501             this.constructorCaller);
502           throw new NullPointerException("ZK is null");
503         }
504         this.identifier = this.prefix + "-0x" +
505           Long.toHexString(this.recoverableZooKeeper.getSessionId());
506         // Update our identifier.  Otherwise ignore.
507         LOG.debug(this.identifier + " connected");
508         break;
509 
510       // Abort the server if Disconnected or Expired
511       case Disconnected:
512         LOG.debug(prefix("Received Disconnected from ZooKeeper, ignoring"));
513         break;
514 
515       case Expired:
516         String msg = prefix(this.identifier + " received expired from " +
517           "ZooKeeper, aborting");
518         // TODO: One thought is to add call to ZooKeeperListener so say,
519         // ZooKeeperNodeTracker can zero out its data values.
520         if (this.abortable != null) {
521           this.abortable.abort(msg, new KeeperException.SessionExpiredException());
522         }
523         break;
524 
525       case ConnectedReadOnly:
526       case SaslAuthenticated:
527       case AuthFailed:
528         break;
529 
530       default:
531         throw new IllegalStateException("Received event is not valid: " + event.getState());
532     }
533   }
534 
535   /**
536    * Forces a synchronization of this ZooKeeper client connection.
537    * <p>
538    * Executing this method before running other methods will ensure that the
539    * subsequent operations are up-to-date and consistent as of the time that
540    * the sync is complete.
541    * <p>
542    * This is used for compareAndSwap type operations where we need to read the
543    * data of an existing node and delete or transition that node, utilizing the
544    * previously read version and data.  We want to ensure that the version read
545    * is up-to-date from when we begin the operation.
546    */
547   public void sync(String path) throws KeeperException {
548     this.recoverableZooKeeper.sync(path, null, null);
549   }
550 
551   /**
552    * Handles KeeperExceptions in client calls.
553    * <p>
554    * This may be temporary but for now this gives one place to deal with these.
555    * <p>
556    * TODO: Currently this method rethrows the exception to let the caller handle
557    * <p>
558    * @param ke
559    * @throws KeeperException
560    */
561   public void keeperException(KeeperException ke)
562   throws KeeperException {
563     LOG.error(prefix("Received unexpected KeeperException, re-throwing exception"), ke);
564     throw ke;
565   }
566 
567   /**
568    * Handles InterruptedExceptions in client calls.
569    * <p>
570    * This may be temporary but for now this gives one place to deal with these.
571    * <p>
572    * TODO: Currently, this method does nothing.
573    *       Is this ever expected to happen?  Do we abort or can we let it run?
574    *       Maybe this should be logged as WARN?  It shouldn't happen?
575    * <p>
576    * @param ie
577    */
578   public void interruptedException(InterruptedException ie) {
579     LOG.debug(prefix("Received InterruptedException, doing nothing here"), ie);
580     // At least preserver interrupt.
581     Thread.currentThread().interrupt();
582     // no-op
583   }
584 
585   /**
586    * Close the connection to ZooKeeper.
587    *
588    * @throws InterruptedException
589    */
590   @Override
591   public void close() {
592     try {
593       if (recoverableZooKeeper != null) {
594         recoverableZooKeeper.close();
595       }
596     } catch (InterruptedException e) {
597       Thread.currentThread().interrupt();
598     }
599   }
600 
601   public Configuration getConfiguration() {
602     return conf;
603   }
604 
605   @Override
606   public void abort(String why, Throwable e) {
607     if (this.abortable != null) this.abortable.abort(why, e);
608     else this.aborted = true;
609   }
610 
611   @Override
612   public boolean isAborted() {
613     return this.abortable == null? this.aborted: this.abortable.isAborted();
614   }
615 
616   /**
617    * @return Path to the currently active master.
618    */
619   public String getMasterAddressZNode() {
620     return this.masterAddressZNode;
621   }
622 
623 }