%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
org.apache.jcs.auxiliary.remote.RemoteCacheFailoverRunner |
|
|
1 | package org.apache.jcs.auxiliary.remote; |
|
2 | ||
3 | /* |
|
4 | * Licensed to the Apache Software Foundation (ASF) under one |
|
5 | * or more contributor license agreements. See the NOTICE file |
|
6 | * distributed with this work for additional information |
|
7 | * regarding copyright ownership. The ASF licenses this file |
|
8 | * to you under the Apache License, Version 2.0 (the |
|
9 | * "License"); you may not use this file except in compliance |
|
10 | * with the License. You may obtain a copy of the License at |
|
11 | * |
|
12 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
13 | * |
|
14 | * Unless required by applicable law or agreed to in writing, |
|
15 | * software distributed under the License is distributed on an |
|
16 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
|
17 | * KIND, either express or implied. See the License for the |
|
18 | * specific language governing permissions and limitations |
|
19 | * under the License. |
|
20 | */ |
|
21 | ||
22 | import org.apache.commons.logging.Log; |
|
23 | import org.apache.commons.logging.LogFactory; |
|
24 | import org.apache.jcs.engine.CacheConstants; |
|
25 | import org.apache.jcs.engine.behavior.ICache; |
|
26 | import org.apache.jcs.engine.behavior.ICompositeCacheManager; |
|
27 | ||
28 | /** |
|
29 | * The RemoteCacheFailoverRunner tries to establish a connection with a failover |
|
30 | * server, if any are defined. Once a failover connectin is made, it will |
|
31 | * attempt to replace the failover with the primary remote server. |
|
32 | * <p> |
|
33 | * It works by switching out the RemoteCacheNoWait inside the Facade. |
|
34 | * <p> |
|
35 | * Client (i.e.) the CompositeCache has refernce to a RemoteCacheNoWaitFacade. |
|
36 | * This facade is created by the RemoteCacheFactory. The factory maintains a set |
|
37 | * of managers, one for each remote server. Typically, there will only be one |
|
38 | * manager. |
|
39 | * <p> |
|
40 | * If you use multipleremote servesr, you may want to set one or more as |
|
41 | * failovers. If a local cache cannot connect to the primary server, or looses |
|
42 | * its connection to the primary server, it will attempt to restore that |
|
43 | * connectin in the background. If failovers are defined, the Failover runner |
|
44 | * will try to connect to a failover until the primary is restored. |
|
45 | * |
|
46 | */ |
|
47 | public class RemoteCacheFailoverRunner |
|
48 | implements Runnable |
|
49 | { |
|
50 | 0 | private final static Log log = LogFactory.getLog( RemoteCacheFailoverRunner.class ); |
51 | ||
52 | private RemoteCacheNoWaitFacade facade; |
|
53 | ||
54 | 0 | private static long idlePeriod = 20 * 1000; |
55 | ||
56 | 0 | private boolean alright = true; |
57 | ||
58 | private ICompositeCacheManager cacheMgr; |
|
59 | ||
60 | /** |
|
61 | * Constructor for the RemoteCacheFailoverRunner object. This allows the |
|
62 | * FailoverRunner to modify the facade that the CompositeCache references. |
|
63 | * |
|
64 | * @param facade |
|
65 | * the facade the CompositeCache talks to. |
|
66 | * @param cacheMgr |
|
67 | */ |
|
68 | public RemoteCacheFailoverRunner( RemoteCacheNoWaitFacade facade, ICompositeCacheManager cacheMgr ) |
|
69 | 0 | { |
70 | 0 | this.facade = facade; |
71 | 0 | this.cacheMgr = cacheMgr; |
72 | 0 | } |
73 | ||
74 | /** |
|
75 | * Notifies the cache monitor that an error occurred, and kicks off the |
|
76 | * error recovery process. |
|
77 | */ |
|
78 | public void notifyError() |
|
79 | { |
|
80 | 0 | bad(); |
81 | 0 | synchronized ( this ) |
82 | { |
|
83 | 0 | notify(); |
84 | 0 | } |
85 | 0 | } |
86 | ||
87 | /** |
|
88 | * Main processing method for the RemoteCacheFailoverRunner object. |
|
89 | * <p> |
|
90 | * If we do not have a connection with any failover server, this will try to |
|
91 | * connect one at a time. If no connection can be made, it goes to sleep for |
|
92 | * a while (20 seconds). |
|
93 | * <p> |
|
94 | * Once a connection with a failover is made, we will try to reconnect to |
|
95 | * the primary server. |
|
96 | * <p> |
|
97 | * The primary server is the first server defines in the FailoverServers |
|
98 | * list. |
|
99 | */ |
|
100 | public void run() |
|
101 | { |
|
102 | // start the main work of connecting to a failover and then restoring |
|
103 | // the primary. |
|
104 | 0 | connectAndRestore(); |
105 | ||
106 | 0 | if ( log.isInfoEnabled() ) |
107 | { |
|
108 | 0 | log.info( "Exiting failover runner. Failover index = " + facade.remoteCacheAttributes.getFailoverIndex() ); |
109 | 0 | if ( facade.remoteCacheAttributes.getFailoverIndex() <= 0 ) |
110 | { |
|
111 | 0 | log.info( "Failover index is <= 0, meaning we are not " + "connected to a failover server." ); |
112 | 0 | } |
113 | 0 | else if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 ) |
114 | { |
|
115 | 0 | log.info( "Failover index is > 0, meaning we are " + "connected to a failover server." ); |
116 | } |
|
117 | // log if we are alright or not. |
|
118 | } |
|
119 | 0 | return; |
120 | } |
|
121 | ||
122 | /** |
|
123 | * This is the main loop. If there are failovers defined, then this will |
|
124 | * continue until the primary is re-connected. If no failovers are defined, |
|
125 | * this will exit automatically. |
|
126 | */ |
|
127 | private void connectAndRestore() |
|
128 | { |
|
129 | do |
|
130 | { |
|
131 | 0 | log.info( "Remote cache FAILOVER RUNNING." ); |
132 | ||
133 | // there is no active listener |
|
134 | 0 | if ( !alright ) |
135 | { |
|
136 | // Monitor each RemoteCacheManager instance one after the other. |
|
137 | // Each RemoteCacheManager corresponds to one remote connection. |
|
138 | 0 | String[] failovers = facade.remoteCacheAttributes.getFailovers(); |
139 | // we should probalby check to see if there are any failovers, |
|
140 | // even though the caller |
|
141 | // should have already. |
|
142 | ||
143 | 0 | if ( failovers == null ) |
144 | { |
|
145 | 0 | log.warn( "Remote is misconfigured, failovers was null." ); |
146 | 0 | return; |
147 | } |
|
148 | 0 | else if ( failovers.length == 1 ) |
149 | { |
|
150 | // if there is only the primary, return out of this |
|
151 | 0 | if ( log.isInfoEnabled() ) |
152 | { |
|
153 | 0 | log.info( "No failovers defined, exiting failover runner." ); |
154 | 0 | return; |
155 | } |
|
156 | } |
|
157 | ||
158 | 0 | int fidx = facade.remoteCacheAttributes.getFailoverIndex(); |
159 | 0 | log.debug( "fidx = " + fidx + " failovers.length = " + failovers.length ); |
160 | ||
161 | // shouldn't we see if the primary is backup? |
|
162 | // If we don't check the primary, if it gets connected in the |
|
163 | // backgorund, |
|
164 | // we will disconnect it only to put it right back |
|
165 | 0 | int i = fidx; // + 1; // +1 skips the primary |
166 | 0 | if ( log.isDebugEnabled() ) |
167 | { |
|
168 | 0 | log.debug( "stating at failover i = " + i ); |
169 | } |
|
170 | ||
171 | // try them one at a time until successful |
|
172 | 0 | for ( ; i < failovers.length && !alright; i++ ) |
173 | { |
|
174 | 0 | String server = failovers[i]; |
175 | 0 | if ( log.isDebugEnabled() ) |
176 | { |
|
177 | 0 | log.debug( "Trying server [" + server + "] at failover index i = " + i ); |
178 | } |
|
179 | ||
180 | 0 | RemoteCacheAttributes rca = null; |
181 | try |
|
182 | { |
|
183 | 0 | rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy(); |
184 | 0 | rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) ); |
185 | 0 | rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) ); |
186 | 0 | RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr ); |
187 | ||
188 | 0 | if ( log.isDebugEnabled() ) |
189 | { |
|
190 | 0 | log.debug( "RemoteCacheAttributes for failover = " + rca.toString() ); |
191 | } |
|
192 | ||
193 | // add a listener if there are none, need to tell rca |
|
194 | // what number it is at |
|
195 | 0 | ICache ic = rcm.getCache( rca.getCacheName() ); |
196 | 0 | if ( ic != null ) |
197 | { |
|
198 | 0 | if ( ic.getStatus() == CacheConstants.STATUS_ALIVE ) |
199 | { |
|
200 | // may need to do this more gracefully |
|
201 | 0 | log.debug( "reseting no wait" ); |
202 | 0 | facade.noWaits = new RemoteCacheNoWait[1]; |
203 | 0 | facade.noWaits[0] = (RemoteCacheNoWait) ic; |
204 | 0 | facade.remoteCacheAttributes.setFailoverIndex( i ); |
205 | ||
206 | 0 | synchronized ( this ) |
207 | { |
|
208 | 0 | if ( log.isDebugEnabled() ) |
209 | { |
|
210 | 0 | log.debug( "setting ALRIGHT to true" ); |
211 | 0 | if ( i > 0 ) |
212 | { |
|
213 | 0 | log.debug( "Moving to Primary Recovery Mode, failover index = " + i ); |
214 | 0 | } |
215 | else |
|
216 | { |
|
217 | 0 | if ( log.isInfoEnabled() ) |
218 | { |
|
219 | 0 | String message = "No need to connect to failover, the primary server is back up."; |
220 | 0 | log.info( message ); |
221 | } |
|
222 | } |
|
223 | } |
|
224 | ||
225 | 0 | alright = true; |
226 | ||
227 | 0 | if ( log.isInfoEnabled() ) |
228 | { |
|
229 | 0 | log.info( "CONNECTED to host = [" + rca.getRemoteHost() + "] port = [" |
230 | + rca.getRemotePort() + "]" ); |
|
231 | } |
|
232 | 0 | } |
233 | 0 | } |
234 | } |
|
235 | else |
|
236 | { |
|
237 | 0 | log.info( "noWait is null" ); |
238 | } |
|
239 | } |
|
240 | 0 | catch ( Exception ex ) |
241 | { |
|
242 | 0 | bad(); |
243 | // Problem encountered in fixing the caches managed by a |
|
244 | // RemoteCacheManager instance. |
|
245 | // Soldier on to the next RemoteCacheManager instance. |
|
246 | 0 | if ( i == 0 ) |
247 | { |
|
248 | 0 | log.warn( "FAILED to connect, as expected, to primary" + rca.getRemoteHost() + ":" |
249 | + rca.getRemotePort(), ex ); |
|
250 | 0 | } |
251 | else |
|
252 | { |
|
253 | 0 | log.error( "FAILED to connect to failover [" + rca.getRemoteHost() + ":" |
254 | + rca.getRemotePort() + "]", ex ); |
|
255 | } |
|
256 | 0 | } |
257 | } |
|
258 | 0 | } |
259 | // end if !alright |
|
260 | // get here if while index >0 and alright, meaning that we are |
|
261 | // connected to some backup server. |
|
262 | else |
|
263 | { |
|
264 | 0 | if ( log.isDebugEnabled() ) |
265 | { |
|
266 | 0 | log.debug( "ALRIGHT is true " ); |
267 | } |
|
268 | 0 | if ( log.isInfoEnabled() ) |
269 | { |
|
270 | 0 | log.info( "Failover runner is in primary recovery mode. Failover index = " |
271 | + facade.remoteCacheAttributes.getFailoverIndex() + "\n" + "Will now try to reconnect to primary server." ); |
|
272 | } |
|
273 | } |
|
274 | ||
275 | 0 | boolean primaryRestoredSuccessfully = false; |
276 | // if we are not connected to the primary, try. |
|
277 | 0 | if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 ) |
278 | { |
|
279 | 0 | primaryRestoredSuccessfully = restorePrimary(); |
280 | 0 | if ( log.isDebugEnabled() ) |
281 | { |
|
282 | 0 | log.debug( "Primary recovery success state = " + primaryRestoredSuccessfully ); |
283 | } |
|
284 | } |
|
285 | ||
286 | 0 | if ( !primaryRestoredSuccessfully ) |
287 | { |
|
288 | // Time driven mode: sleep between each round of recovery |
|
289 | // attempt. |
|
290 | try |
|
291 | { |
|
292 | 0 | log.warn( "Failed to reconnect to primary server. Cache failover runner is going to sleep for " |
293 | + idlePeriod + " milliseconds." ); |
|
294 | 0 | Thread.sleep( idlePeriod ); |
295 | } |
|
296 | 0 | catch ( InterruptedException ex ) |
297 | { |
|
298 | // ignore; |
|
299 | 0 | } |
300 | } |
|
301 | ||
302 | // try to bring the listener back to the primary |
|
303 | } |
|
304 | 0 | while ( facade.remoteCacheAttributes.getFailoverIndex() > 0 || !alright ); |
305 | // continue if the primary is not restored or if things are not alright. |
|
306 | ||
307 | 0 | } |
308 | ||
309 | /** |
|
310 | * Try to restore the primary server. |
|
311 | * <p> |
|
312 | * Once primary is restored the failover listener must be deregistered. |
|
313 | * <p> |
|
314 | * The primary server is the first server defines in the FailoverServers |
|
315 | * list. |
|
316 | * |
|
317 | * @return boolean value indicating whether the resoration was successful |
|
318 | */ |
|
319 | private boolean restorePrimary() |
|
320 | { |
|
321 | // try to move back to the primary |
|
322 | 0 | String[] failovers = facade.remoteCacheAttributes.getFailovers(); |
323 | 0 | String server = failovers[0]; |
324 | ||
325 | 0 | if ( log.isInfoEnabled() ) |
326 | { |
|
327 | 0 | log.info( "Trying to restore connection to primary remote server [" + server + "]" ); |
328 | } |
|
329 | ||
330 | try |
|
331 | { |
|
332 | 0 | RemoteCacheAttributes rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy(); |
333 | 0 | rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) ); |
334 | 0 | rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) ); |
335 | 0 | RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr ); |
336 | ||
337 | // add a listener if there are none, need to tell rca what number it |
|
338 | // is at |
|
339 | 0 | ICache ic = rcm.getCache( rca.getCacheName() ); |
340 | // by default the listener id should be 0, else it will be the |
|
341 | // listener |
|
342 | // orignally associated with the remote cache. either way is fine. |
|
343 | // We just don't want the listener id from a failover being used. |
|
344 | // If the remote server was rebooted this couldbe a problem if new |
|
345 | // locals were also added. |
|
346 | ||
347 | 0 | if ( ic != null ) |
348 | { |
|
349 | 0 | if ( ic.getStatus() == CacheConstants.STATUS_ALIVE ) |
350 | { |
|
351 | try |
|
352 | { |
|
353 | // we could have more than one listener registered right |
|
354 | // now. |
|
355 | // this will not result in a loop, only duplication |
|
356 | // stop duplicate listening. |
|
357 | 0 | if ( facade.noWaits[0] != null && facade.noWaits[0].getStatus() == CacheConstants.STATUS_ALIVE ) |
358 | { |
|
359 | 0 | int fidx = facade.remoteCacheAttributes.getFailoverIndex(); |
360 | ||
361 | 0 | if ( fidx > 0 ) |
362 | { |
|
363 | 0 | String serverOld = failovers[fidx]; |
364 | ||
365 | 0 | if ( log.isDebugEnabled() ) |
366 | { |
|
367 | 0 | log.debug( "Failover Index = " + fidx + " the server at that index is [" |
368 | + serverOld + "]" ); |
|
369 | } |
|
370 | ||
371 | 0 | if ( serverOld != null ) |
372 | { |
|
373 | // create attributes that reflect the |
|
374 | // previous failed over configuration. |
|
375 | 0 | RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy(); |
376 | 0 | rcaOld.setRemoteHost( serverOld.substring( 0, serverOld.indexOf( ":" ) ) ); |
377 | 0 | rcaOld.setRemotePort( Integer.parseInt( serverOld.substring( serverOld |
378 | .indexOf( ":" ) + 1 ) ) ); |
|
379 | 0 | RemoteCacheManager rcmOld = RemoteCacheManager.getInstance( rcaOld, cacheMgr ); |
380 | ||
381 | 0 | if ( rcmOld != null ) |
382 | { |
|
383 | // manager can remove by name if |
|
384 | // necessary |
|
385 | 0 | rcmOld.removeRemoteCacheListener( rcaOld ); |
386 | } |
|
387 | 0 | if ( log.isInfoEnabled() ) |
388 | { |
|
389 | 0 | log.info( "Successfully deregistered from FAILOVER remote server = " |
390 | + serverOld ); |
|
391 | } |
|
392 | } |
|
393 | 0 | } |
394 | 0 | else if ( fidx == 0 ) |
395 | { |
|
396 | // this should never happen. If there are no |
|
397 | // failovers this shouldn't get called. |
|
398 | 0 | if ( log.isDebugEnabled() ) |
399 | { |
|
400 | 0 | log.debug( "No need to restore primary, it is already restored." ); |
401 | 0 | return true; |
402 | } |
|
403 | } |
|
404 | 0 | else if ( fidx < 0 ) |
405 | { |
|
406 | // this should never happen |
|
407 | 0 | log.warn( "Failover index is less than 0, this shouldn't happen" ); |
408 | } |
|
409 | } |
|
410 | } |
|
411 | 0 | catch ( Exception e ) |
412 | { |
|
413 | // TODO, should try again, or somehow stop the listener |
|
414 | 0 | log.error( |
415 | "Trouble trying to deregister old failover listener prior to restoring the primary = " |
|
416 | + server, e ); |
|
417 | 0 | } |
418 | ||
419 | // Restore primary |
|
420 | // may need to do this more gracefully, letting the failover finish in the background |
|
421 | 0 | RemoteCacheNoWait failoverNoWait = facade.noWaits[0]; |
422 | ||
423 | // swap in a new one |
|
424 | 0 | facade.noWaits = new RemoteCacheNoWait[1]; |
425 | 0 | facade.noWaits[0] = (RemoteCacheNoWait) ic; |
426 | 0 | facade.remoteCacheAttributes.setFailoverIndex( 0 ); |
427 | ||
428 | 0 | if ( log.isInfoEnabled() ) |
429 | { |
|
430 | 0 | log.info( "Successfully reconnected to PRIMARY remote server. Substituted primary for failoverNoWait [" + failoverNoWait + "]" ); |
431 | } |
|
432 | 0 | return true; |
433 | } |
|
434 | ||
435 | // else alright |
|
436 | // if the failover index was at 0 here, we would be in a bad |
|
437 | // situation, unless there were jsut |
|
438 | // no failovers configured. |
|
439 | 0 | if ( log.isDebugEnabled() ) |
440 | { |
|
441 | 0 | log.debug( "Primary server status in error, not connected." ); |
442 | 0 | } |
443 | } |
|
444 | else |
|
445 | { |
|
446 | 0 | if ( log.isDebugEnabled() ) |
447 | { |
|
448 | 0 | log.debug( "Primary server is null, not connected." ); |
449 | } |
|
450 | } |
|
451 | } |
|
452 | 0 | catch ( Exception ex ) |
453 | { |
|
454 | 0 | log.error( ex ); |
455 | 0 | } |
456 | 0 | return false; |
457 | } |
|
458 | ||
459 | /** |
|
460 | * Sets the "alright" flag to false in a critial section. This flag |
|
461 | * indicates whether or not we are connected to any server at all. If we are |
|
462 | * connected to a secondary server, then alright will be true, but we will |
|
463 | * continue to try to restore the connetion with the primary server. |
|
464 | * <p> |
|
465 | * The primary server is the first server defines in the FailoverServers |
|
466 | * list. |
|
467 | */ |
|
468 | private void bad() |
|
469 | { |
|
470 | 0 | if ( alright ) |
471 | { |
|
472 | 0 | synchronized ( this ) |
473 | { |
|
474 | 0 | alright = false; |
475 | 0 | } |
476 | } |
|
477 | 0 | } |
478 | } |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |