View Javadoc

1   package org.apache.jcs.auxiliary.lateral;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.Iterator;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.jcs.auxiliary.lateral.behavior.ILateralCacheManager;
27  import org.apache.jcs.engine.CacheConstants;
28  
29  /***
30   * Used to monitor and repair any failed connection for the lateral cache
31   * service. By default the monitor operates in a failure driven mode. That is,
32   * it goes into a wait state until there is an error. Upon the notification of a
33   * connection error, the monitor changes to operate in a time driven mode. That
34   * is, it attempts to recover the connections on a periodic basis. When all
35   * failed connections are restored, it changes back to the failure driven mode.
36   *
37   */
38  public class LateralCacheMonitor
39      implements Runnable
40  {
41      private final static Log log = LogFactory.getLog( LateralCacheMonitor.class );
42  
43      private static long idlePeriod = 20 * 1000;
44  
45      // minimum 20 seconds.
46      //private static long idlePeriod = 3*1000; // for debugging.
47  
48      // Must make sure LateralCacheMonitor is started before any lateral error
49      // can be detected!
50      private boolean alright = true;
51  
52      private final static int ERROR = 1;
53  
54      private static int mode = ERROR;
55  
56      private ILateralCacheManager manager;
57  
58      /***
59       * Configures the idle period between repairs.
60       *
61       * @param idlePeriod
62       *            The new idlePeriod value
63       */
64      public static void setIdlePeriod( long idlePeriod )
65      {
66          if ( idlePeriod > LateralCacheMonitor.idlePeriod )
67          {
68              LateralCacheMonitor.idlePeriod = idlePeriod;
69          }
70      }
71  
72      /***
73       * Allows close classes, ie testers to set the idle period to something
74       * testable.
75       *
76       * @param idlePeriod
77       */
78      protected static void forceShortIdlePeriod( long idlePeriod )
79      {
80          LateralCacheMonitor.idlePeriod = idlePeriod;
81      }
82  
83      /*** Constructor for the LateralCacheMonitor object
84       * <p>
85       * It's the clients responsibility to decide how many
86       * of these there will be.
87       *
88       * @param manager
89       */
90      public LateralCacheMonitor( ILateralCacheManager manager )
91      {
92          this.manager = manager;
93      }
94  
95      /***
96       * Notifies the cache monitor that an error occurred, and kicks off the
97       * error recovery process.
98       */
99      public void notifyError()
100     {
101         bad();
102         synchronized ( this )
103         {
104             notify();
105         }
106     }
107 
108     /***
109      * Main processing method for the LateralCacheMonitor object
110      */
111     public void run()
112     {
113         do
114         {
115             if ( mode == ERROR )
116             {
117                 if ( log.isDebugEnabled() )
118                 {
119                     if ( alright )
120                     {
121                         log.debug( "ERROR DRIVEN MODE: alright = " + alright
122                             + ", connection monitor will wait for an error." );
123                     }
124                     else
125                     {
126                         log.debug( "ERROR DRIVEN MODE: alright = " + alright + " connection monitor running." );
127                     }
128                 }
129 
130                 if ( alright )
131                 {
132                     synchronized ( this )
133                     {
134                         if ( alright )
135                         {
136                             // Failure driven mode.
137                             try
138                             {
139                                 wait();
140                                 // wake up only if there is an error.
141                             }
142                             catch ( InterruptedException ignore )
143                             {
144                                 //no op, this is expected
145                             }
146                         }
147                     }
148                 }
149             }
150             else
151             {
152                 log.debug( "TIME DRIVEN MODE: connection monitor will sleep for " + idlePeriod + " after this run." );
153                 // Time driven mode: sleep between each round of recovery
154                 // attempt.
155                 // will need to test not just check status
156             }
157 
158             // The "alright" flag must be false here.
159             // Simply presume we can fix all the errors until proven otherwise.
160             synchronized ( this )
161             {
162                 alright = true;
163             }
164 
165             if ( log.isDebugEnabled() )
166             {
167                 log.debug( "Cache monitor running." );
168             }
169 
170             // Monitor each LateralCacheManager instance one after the other.
171             // Each LateralCacheManager corresponds to one lateral connection.
172             log.info( "LateralCacheManager.instances.size() = " + manager.getInstances().size() );
173             //for
174             int cnt = 0;
175             Iterator itr = manager.getInstances().values().iterator();
176             while ( itr.hasNext() )
177             {
178                 cnt++;
179                 ILateralCacheManager mgr = (ILateralCacheManager) itr.next();
180                 try
181                 {
182                     // If any cache is in error, it strongly suggests all caches
183                     // managed by the
184                     // same LateralCacheManager instance are in error. So we fix
185                     // them once and for all.
186                     //for
187                     //log.info( "\n " + cnt + "- mgr.lca.getTcpServer() = " + mgr.lca.getTcpServer() + " mgr = " + mgr );
188                     log.info( "\n " + cnt + "- mgr.getCaches().size() = " + mgr.getCaches().size() );
189 
190                     if ( mgr.getCaches().size() == 0 )
191                     {
192                         // there is probably a problem.
193                         // monitor may be running when we just started up and
194                         // there
195                         // is not a cache yet.
196                         // if this is error driven mode, mark as bad,
197                         // otherwise we will come back around argain.
198                         if ( mode == ERROR )
199                         {
200                             bad();
201                         }
202                     }
203 
204                     Iterator itr2 = mgr.getCaches().values().iterator();
205 
206                     while ( itr2.hasNext() )
207                     {
208                         LateralCacheNoWait c = (LateralCacheNoWait) itr2.next();
209                         if ( c.getStatus() == CacheConstants.STATUS_ERROR )
210                         {
211                             log.info( "found LateralCacheNoWait in error, " + c.toString() );
212 
213                             LateralCacheRestore repairer = new LateralCacheRestore( mgr );
214                             // If we can't fix them, just skip and re-try in the
215                             // next round.
216                             if ( repairer.canFix() )
217                             {
218                                 repairer.fix();
219                             }
220                             else
221                             {
222                                 bad();
223                             }
224                             //break;
225                         }
226                         else
227                         {
228                             log.info( "Lateral Cache No Wait not in error" );
229                         }
230                     }
231                 }
232                 catch ( Exception ex )
233                 {
234                     bad();
235                     // Problem encountered in fixing the caches managed by a
236                     // LateralCacheManager instance.
237                     // Soldier on to the next LateralCacheManager instance.
238                     log.error( "Problem encountered in fixing the caches", ex );
239                 }
240             }
241 
242             try
243             {
244                 // don't want to sleep after waking from an error
245                 // run immediately and sleep here.
246                 if ( log.isDebugEnabled() )
247                 {
248                     log.debug( "Lateral cache monitor sleeping for " + idlePeriod + " between runs." );
249                 }
250 
251                 Thread.sleep( idlePeriod );
252             }
253             catch ( InterruptedException ex )
254             {
255                 // ignore;
256             }
257         }
258         while ( true );
259     }
260 
261     /***
262      * Sets the "alright" flag to false in a critial section.
263      */
264     private void bad()
265     {
266         if ( alright )
267         {
268             synchronized ( this )
269             {
270                 alright = false;
271             }
272         }
273     }
274 }