View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.util.concurrent.atomic.AtomicBoolean;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.ZNodeClearer;
28  import org.apache.hadoop.hbase.exceptions.DeserializationException;
29  import org.apache.hadoop.hbase.Server;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
32  import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
33  import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
34  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
35  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
36  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
37  import org.apache.zookeeper.KeeperException;
38  
39  /**
40   * Handles everything on master-side related to master election.
41   *
42   * <p>Listens and responds to ZooKeeper notifications on the master znode,
43   * both <code>nodeCreated</code> and <code>nodeDeleted</code>.
44   *
45   * <p>Contains blocking methods which will hold up backup masters, waiting
46   * for the active master to fail.
47   *
48   * <p>This class is instantiated in the HMaster constructor and the method
49   * #blockUntilBecomingActiveMaster() is called to wait until becoming
50   * the active master of the cluster.
51   */
52  @InterfaceAudience.Private
53  class ActiveMasterManager extends ZooKeeperListener {
54    private static final Log LOG = LogFactory.getLog(ActiveMasterManager.class);
55  
56    final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
57  
58    private final ServerName sn;
59    private final Server master;
60  
61    /**
62     * @param watcher
63     * @param sn ServerName
64     * @param master In an instance of a Master.
65     */
66    ActiveMasterManager(ZooKeeperWatcher watcher, ServerName sn, Server master) {
67      super(watcher);
68      this.sn = sn;
69      this.master = master;
70    }
71  
72    @Override
73    public void nodeCreated(String path) {
74      handle(path);
75    }
76  
77    @Override
78    public void nodeDeleted(String path) {
79      handle(path);
80    }
81  
82    void handle(final String path) {
83      if (path.equals(watcher.getMasterAddressZNode()) && !master.isStopped()) {
84        handleMasterNodeChange();
85      }
86    }
87  
88    /**
89     * Handle a change in the master node.  Doesn't matter whether this was called
90     * from a nodeCreated or nodeDeleted event because there are no guarantees
91     * that the current state of the master node matches the event at the time of
92     * our next ZK request.
93     *
94     * <p>Uses the watchAndCheckExists method which watches the master address node
95     * regardless of whether it exists or not.  If it does exist (there is an
96     * active master), it returns true.  Otherwise it returns false.
97     *
98     * <p>A watcher is set which guarantees that this method will get called again if
99     * there is another change in the master node.
100    */
101   private void handleMasterNodeChange() {
102     // Watch the node and check if it exists.
103     try {
104       synchronized(clusterHasActiveMaster) {
105         if (ZKUtil.watchAndCheckExists(watcher, watcher.getMasterAddressZNode())) {
106           // A master node exists, there is an active master
107           LOG.debug("A master is now available");
108           clusterHasActiveMaster.set(true);
109         } else {
110           // Node is no longer there, cluster does not have an active master
111           LOG.debug("No master available. Notifying waiting threads");
112           clusterHasActiveMaster.set(false);
113           // Notify any thread waiting to become the active master
114           clusterHasActiveMaster.notifyAll();
115         }
116       }
117     } catch (KeeperException ke) {
118       master.abort("Received an unexpected KeeperException, aborting", ke);
119     }
120   }
121 
122   /**
123    * Block until becoming the active master.
124    *
125    * Method blocks until there is not another active master and our attempt
126    * to become the new active master is successful.
127    *
128    * This also makes sure that we are watching the master znode so will be
129    * notified if another master dies.
130    * @param startupStatus
131    * @return True if no issue becoming active master else false if another
132    * master was running or if some other problem (zookeeper, stop flag has been
133    * set on this Master)
134    */
135   boolean blockUntilBecomingActiveMaster(MonitoredTask startupStatus,
136     ClusterStatusTracker clusterStatusTracker) {
137     while (true) {
138       startupStatus.setStatus("Trying to register in ZK as active master");
139       // Try to become the active master, watch if there is another master.
140       // Write out our ServerName as versioned bytes.
141       try {
142         String backupZNode =
143             ZKUtil.joinZNode(this.watcher.backupMasterAddressesZNode, this.sn.toString());
144         if (MasterAddressTracker.setMasterAddress(this.watcher,
145             this.watcher.getMasterAddressZNode(), this.sn)) {
146 
147           // If we were a backup master before, delete our ZNode from the backup
148           // master directory since we are the active now)
149           if (ZKUtil.checkExists(this.watcher, backupZNode) != -1) {
150             LOG.info("Deleting ZNode for " + backupZNode + " from backup master directory");
151             ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
152           }
153           // Save the znode in a file, this will allow to check if we crash in the launch scripts
154           ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString());
155 
156           // We are the master, return
157           startupStatus.setStatus("Successfully registered as active master.");
158           this.clusterHasActiveMaster.set(true);
159           LOG.info("Master=" + this.sn);
160           return true;
161         }
162 
163         // There is another active master running elsewhere or this is a restart
164         // and the master ephemeral node has not expired yet.
165         this.clusterHasActiveMaster.set(true);
166 
167         /*
168         * Add a ZNode for ourselves in the backup master directory since we are
169         * not the active master.
170         *
171         * If we become the active master later, ActiveMasterManager will delete
172         * this node explicitly.  If we crash before then, ZooKeeper will delete
173         * this node for us since it is ephemeral.
174         */
175         LOG.info("Adding ZNode for " + backupZNode + " in backup master directory");
176         MasterAddressTracker.setMasterAddress(this.watcher, backupZNode, this.sn);
177 
178         String msg;
179         byte[] bytes =
180           ZKUtil.getDataAndWatch(this.watcher, this.watcher.getMasterAddressZNode());
181         if (bytes == null) {
182           msg = ("A master was detected, but went down before its address " +
183             "could be read.  Attempting to become the next active master");
184         } else {
185           ServerName currentMaster;
186           try {
187             currentMaster = ServerName.parseFrom(bytes);
188           } catch (DeserializationException e) {
189             LOG.warn("Failed parse", e);
190             // Hopefully next time around we won't fail the parse.  Dangerous.
191             continue;
192           }
193           if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
194             msg = ("Current master has this master's address, " +
195               currentMaster + "; master was restarted? Deleting node.");
196             // Hurry along the expiration of the znode.
197             ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
198 
199             // We may have failed to delete the znode at the previous step, but
200             //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
201             ZNodeClearer.deleteMyEphemeralNodeOnDisk();
202           } else {
203             msg = "Another master is the active master, " + currentMaster +
204               "; waiting to become the next active master";
205           }
206         }
207         LOG.info(msg);
208         startupStatus.setStatus(msg);
209       } catch (KeeperException ke) {
210         master.abort("Received an unexpected KeeperException, aborting", ke);
211         return false;
212       }
213       synchronized (this.clusterHasActiveMaster) {
214         while (this.clusterHasActiveMaster.get() && !this.master.isStopped()) {
215           try {
216             this.clusterHasActiveMaster.wait();
217           } catch (InterruptedException e) {
218             // We expect to be interrupted when a master dies,
219             //  will fall out if so
220             LOG.debug("Interrupted waiting for master to die", e);
221           }
222         }
223         if (!clusterStatusTracker.isClusterUp()) {
224           this.master.stop(
225             "Cluster went down before this master became active");
226         }
227         if (this.master.isStopped()) {
228           return false;
229         }
230         // there is no active master so we can try to become active master again
231       }
232     }
233   }
234 
235   /**
236    * @return True if cluster has an active master.
237    */
238   public boolean isActiveMaster() {
239     try {
240       if (ZKUtil.checkExists(watcher, watcher.getMasterAddressZNode()) >= 0) {
241         return true;
242       }
243     }
244     catch (KeeperException ke) {
245       LOG.info("Received an unexpected KeeperException when checking " +
246           "isActiveMaster : "+ ke);
247     }
248     return false;
249   }
250 
251   public void stop() {
252     try {
253       // If our address is in ZK, delete it on our way out
254       ServerName activeMaster = null;
255       try {
256         activeMaster = MasterAddressTracker.getMasterAddress(this.watcher);
257       } catch (IOException e) {
258         LOG.warn("Failed get of master address: " + e.toString());
259       }
260       if (activeMaster != null &&  activeMaster.equals(this.sn)) {
261         ZKUtil.deleteNode(watcher, watcher.getMasterAddressZNode());
262         // We may have failed to delete the znode at the previous step, but
263         //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
264         ZNodeClearer.deleteMyEphemeralNodeOnDisk();
265       }
266     } catch (KeeperException e) {
267       LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);
268     }
269   }
270 }