View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import java.io.IOException;
23  import java.util.Set;
24  import java.util.concurrent.BlockingQueue;
25  import java.util.concurrent.CopyOnWriteArraySet;
26  import java.util.concurrent.DelayQueue;
27  import java.util.concurrent.PriorityBlockingQueue;
28  import java.util.concurrent.TimeUnit;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.HMsg;
36  import org.apache.hadoop.hbase.HServerInfo;
37  import org.apache.hadoop.hbase.HServerAddress;
38  import org.apache.hadoop.hbase.RemoteExceptionHandler;
39  import org.apache.hadoop.hbase.util.Sleeper;
40  import org.apache.hadoop.ipc.RemoteException;
41  
42  /**
43   * Keeps up the queue of {@link RegionServerOperation}s.
44   * Has both live queue and a temporary put-aside queue; if processing of the
45   * live todo queue fails for some reason, we'll add the item back on the delay
46   * queue for retry later.  Call {@link #shutdown()} to effect a cleanup of
47   * queues when done.  Listen to this queue by registering
48   * {@link RegionServerOperationListener}s.
49   * @see #registerRegionServerOperationListener(RegionServerOperationListener)
50   * @see #unregisterRegionServerOperationListener(RegionServerOperationListener)
51   */
52  public class RegionServerOperationQueue {
53    // TODO: Build up the junit test of this class.
54    private final Log LOG = LogFactory.getLog(this.getClass());
55  
56    /**
57     * Enums returned by {@link RegionServerOperationQueue#process()};
58     */
59    public static enum ProcessingResultCode {
60      /**
61       * Operation was processed successfully.
62       */
63      PROCESSED,
64      /**
65       * Nothing to do.
66       */
67      NOOP,
68      /**
69       * Operation was put-aside for now.  Will be retried later.
70       */
71      REQUEUED,
72      /**
73       * Failed processing of the operation.
74       */
75      FAILED,
76      /**
77       * Operation was requeued but we failed its processing for some reason
78       * (Bad filesystem?).
79       */
80      REQUEUED_BUT_PROBLEM
81    };
82  
83    /*
84     * Do not put items directly on this queue. Use {@link #putOnDelayQueue(RegionServerOperation)}.
85     * It makes sure the expiration on the RegionServerOperation added is updated.
86     */
87    private final DelayQueue<RegionServerOperation> delayedToDoQueue =
88      new DelayQueue<RegionServerOperation>();
89    private final BlockingQueue<RegionServerOperation> toDoQueue =
90      new PriorityBlockingQueue<RegionServerOperation>();
91    private final Set<RegionServerOperationListener> listeners =
92      new CopyOnWriteArraySet<RegionServerOperationListener>();
93    private final int threadWakeFrequency;
94    private final AtomicBoolean closed;
95    private final Sleeper sleeper;
96  
97    RegionServerOperationQueue(final Configuration c, final AtomicBoolean closed) {
98      this.threadWakeFrequency = c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
99      this.closed = closed;
100     this.sleeper = new Sleeper(this.threadWakeFrequency, this.closed);
101   }
102 
103   public void put(final RegionServerOperation op) {
104     try {
105       this.toDoQueue.put(op);
106     } catch (InterruptedException e) {
107       LOG.warn("Insertion into todo queue interrupted; putting on delay queue", e);
108       putOnDelayQueue(op);
109     }
110   }
111 
112   /**
113    * Try to get an operation off of the queue and process it.
114    * @return {@link ProcessingResultCode#PROCESSED},
115    * {@link ProcessingResultCode#REQUEUED},
116    * {@link ProcessingResultCode#REQUEUED_BUT_PROBLEM}
117    */
118   public synchronized ProcessingResultCode process() {
119     RegionServerOperation op = null;
120     // Only process the delayed queue if root region is online.  If offline,
121     // the operation to put it online is probably in the toDoQueue.  Process
122     // it first.
123     if (toDoQueue.isEmpty()) {
124       op = delayedToDoQueue.poll();
125     }
126     if (op == null) {
127       try {
128         op = toDoQueue.poll(threadWakeFrequency, TimeUnit.MILLISECONDS);
129       } catch (InterruptedException e) {
130         LOG.debug("Interrupted", e);
131       }
132     }
133 
134     // At this point, if there's still no todo operation, or we're supposed to
135     // be closed, return.
136     if (op == null || closed.get()) {
137       return ProcessingResultCode.NOOP;
138     }
139 
140     try {
141       if (LOG.isDebugEnabled()) {
142         LOG.debug("Processing todo: " + op.toString());
143       }
144       if (!process(op)) {
145         // Add it back on the queue.
146         putOnDelayQueue(op);
147       } else if (op.process()) {
148         processed(op);
149       } else {
150         // Operation would have blocked because not all meta regions are
151         // online. This could cause a deadlock, because this thread is waiting
152         // for the missing meta region(s) to come back online, but since it
153         // is waiting, it cannot process the meta region online operation it
154         // is waiting for. So put this operation back on the queue for now.
155         if (toDoQueue.size() == 0) {
156           // The queue is currently empty so wait for a while to see if what
157           // we need comes in first
158           this.sleeper.sleep();
159         }
160         try {
161           if (LOG.isDebugEnabled()) {
162             LOG.debug("Put " + op.toString() + " back on queue");
163           }
164           toDoQueue.put(op);
165         } catch (InterruptedException e) {
166           throw new RuntimeException(
167             "Putting into toDoQueue was interrupted.", e);
168         }
169       }
170     } catch (Exception ex) {
171       // There was an exception performing the operation.
172       if (ex instanceof RemoteException) {
173         try {
174           ex = RemoteExceptionHandler.decodeRemoteException(
175             (RemoteException)ex);
176         } catch (IOException e) {
177           ex = e;
178           LOG.warn("main processing loop: " + op.toString(), e);
179         }
180       }
181       LOG.warn("Failed processing: " + op.toString() +
182         "; putting onto delayed todo queue", ex);
183       putOnDelayQueue(op);
184       return ProcessingResultCode.REQUEUED_BUT_PROBLEM;
185     }
186     return ProcessingResultCode.REQUEUED;
187   }
188 
189   void putOnDelayQueue(final RegionServerOperation op) {
190     op.resetExpiration();
191     this.delayedToDoQueue.put(op);
192   }
193 
194   /**
195    * Clean up the queues.
196    */
197   public synchronized void shutdown() {
198     this.toDoQueue.clear();
199     this.delayedToDoQueue.clear();
200   }
201 
202   /**
203    * @param l Register this listener of RegionServerOperation events.
204    */
205   public void registerRegionServerOperationListener(final RegionServerOperationListener l) {
206     this.listeners.add(l);
207   }
208 
209   /**
210    * @param l Unregister this listener for RegionServerOperation events.
211    * @return True if this listener was registered.
212    */
213   public boolean unregisterRegionServerOperationListener(final RegionServerOperationListener l) {
214     return this.listeners.remove(l);
215   }
216 
217   /*
218    * Tell listeners that we processed a RegionServerOperation.
219    * @param op Operation to tell the world about.
220    */
221   private void processed(final RegionServerOperation op) {
222     if (this.listeners.isEmpty()) return;
223     for (RegionServerOperationListener listener: this.listeners) {
224       listener.processed(op);
225     }
226   }
227 
228   /**
229    * Called for each message passed the master.  Most of the messages that come
230    * in here will go on to become {@link #process(RegionServerOperation)}s but
231    * others like {@linke HMsg.Type#MSG_REPORT_PROCESS_OPEN} go no further;
232    * only in here can you see them come in.
233    * @param serverInfo Server we got the message from.
234    * @param incomingMsg The message received.
235    * @return True to continue processing, false to skip.
236    */
237   boolean process(final HServerInfo serverInfo,
238       final HMsg incomingMsg) {
239     if (this.listeners.isEmpty()) return true;
240     for (RegionServerOperationListener listener: this.listeners) {
241       if (!listener.process(serverInfo, incomingMsg)) return false;
242     }
243     return true;
244   }
245 
246   /*
247    * Tell listeners that we processed a RegionServerOperation.
248    * @param op Operation to tell the world about.
249    */
250   private boolean process(final RegionServerOperation op) throws IOException {
251     if (this.listeners.isEmpty()) return true;
252     for (RegionServerOperationListener listener: this.listeners) {
253       if (!listener.process(op)) return false;
254     }
255     return true;
256   }
257 }