1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.client;
19
20 import static org.junit.Assert.*;
21
22 import java.io.EOFException;
23 import java.io.IOException;
24 import java.io.SyncFailedException;
25 import java.net.ConnectException;
26 import java.net.SocketTimeoutException;
27 import java.nio.channels.ClosedChannelException;
28 import java.util.concurrent.Callable;
29 import java.util.concurrent.CountDownLatch;
30 import java.util.concurrent.ExecutionException;
31 import java.util.concurrent.ExecutorService;
32 import java.util.concurrent.Executors;
33 import java.util.concurrent.Future;
34 import java.util.concurrent.TimeoutException;
35 import java.util.concurrent.atomic.AtomicBoolean;
36 import java.util.concurrent.atomic.AtomicInteger;
37
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.hbase.DoNotRetryIOException;
42 import org.apache.hadoop.hbase.HBaseConfiguration;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HRegionLocation;
46 import org.apache.hadoop.hbase.ServerName;
47 import org.apache.hadoop.hbase.exceptions.ConnectionClosingException;
48 import org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException;
49 import org.apache.hadoop.hbase.testclassification.SmallTests;
50 import org.apache.hadoop.ipc.RemoteException;
51 import org.junit.Test;
52 import org.junit.experimental.categories.Category;
53
54 @Category({ SmallTests.class })
55 public class TestFastFailWithoutTestUtil {
56 private static final Log LOG = LogFactory.getLog(TestFastFailWithoutTestUtil.class);
57
58 @Test
59 public void testInterceptorFactoryMethods() {
60 Configuration conf = HBaseConfiguration.create();
61 conf.setBoolean(HConstants.HBASE_CLIENT_FAST_FAIL_MODE_ENABLED, true);
62 RetryingCallerInterceptorFactory interceptorFactory = new RetryingCallerInterceptorFactory(
63 conf);
64
65 RetryingCallerInterceptor interceptorBeforeCast = interceptorFactory
66 .build();
67 assertTrue("We should be getting a PreemptiveFastFailInterceptor",
68 interceptorBeforeCast instanceof PreemptiveFastFailInterceptor);
69 PreemptiveFastFailInterceptor interceptor = (PreemptiveFastFailInterceptor) interceptorBeforeCast;
70
71 RetryingCallerInterceptorContext contextBeforeCast = interceptor
72 .createEmptyContext();
73 assertTrue(
74 "We should be getting a FastFailInterceptorContext since we are interacting with the"
75 + " PreemptiveFastFailInterceptor",
76 contextBeforeCast instanceof FastFailInterceptorContext);
77
78 FastFailInterceptorContext context = (FastFailInterceptorContext) contextBeforeCast;
79 assertTrue(context != null);
80
81 conf = HBaseConfiguration.create();
82 interceptorFactory = new RetryingCallerInterceptorFactory(conf);
83
84 interceptorBeforeCast = interceptorFactory.build();
85 assertTrue(
86 "We should be getting a NoOpRetryableCallerInterceptor since we disabled PFFE",
87 interceptorBeforeCast instanceof NoOpRetryableCallerInterceptor);
88
89 contextBeforeCast = interceptorBeforeCast.createEmptyContext();
90 assertTrue(
91 "We should be getting a NoOpRetryingInterceptorContext from NoOpRetryableCallerInterceptor",
92 contextBeforeCast instanceof NoOpRetryingInterceptorContext);
93
94 assertTrue(context != null);
95 }
96
97 @Test
98 public void testInterceptorContextClear() {
99 PreemptiveFastFailInterceptor interceptor = createPreemptiveInterceptor();
100 FastFailInterceptorContext context = (FastFailInterceptorContext) interceptor
101 .createEmptyContext();
102 context.clear();
103 assertFalse(context.getCouldNotCommunicateWithServer().booleanValue());
104 assertEquals(context.didTry(), false);
105 assertEquals(context.getFailureInfo(), null);
106 assertEquals(context.getServer(), null);
107 assertEquals(context.getTries(), 0);
108 }
109
110 @Test
111 public void testInterceptorContextPrepare() throws IOException {
112 PreemptiveFastFailInterceptor interceptor = TestFastFailWithoutTestUtil
113 .createPreemptiveInterceptor();
114 FastFailInterceptorContext context = (FastFailInterceptorContext) interceptor
115 .createEmptyContext();
116 RetryingCallable<?> callable = new RegionServerCallable<Boolean>(null,
117 null, null) {
118 @Override
119 public Boolean call(int callTimeout) throws Exception {
120 return true;
121 }
122
123 @Override
124 protected HRegionLocation getLocation() {
125 return new HRegionLocation(null, ServerName.valueOf("localhost", 1234,
126 987654321));
127 }
128 };
129 context.prepare(callable);
130 ServerName server = getSomeServerName();
131 assertEquals(context.getServer(), server);
132 context.clear();
133 context.prepare(callable, 2);
134 assertEquals(context.getServer(), server);
135 }
136
137 @Test
138 public void testInterceptorIntercept50Times() throws IOException,
139 InterruptedException {
140 for (int i = 0; i < 50; i++) {
141 testInterceptorIntercept();
142 }
143 }
144
145 public void testInterceptorIntercept() throws IOException,
146 InterruptedException {
147 Configuration conf = HBaseConfiguration.create();
148 long CLEANUP_TIMEOUT = 50;
149 long FAST_FAIL_THRESHOLD = 10;
150 conf.setBoolean(HConstants.HBASE_CLIENT_FAST_FAIL_MODE_ENABLED, true);
151 conf.setLong(HConstants.HBASE_CLIENT_FAST_FAIL_CLEANUP_MS_DURATION_MS,
152 CLEANUP_TIMEOUT);
153 conf.setLong(HConstants.HBASE_CLIENT_FAST_FAIL_THREASHOLD_MS,
154 FAST_FAIL_THRESHOLD);
155
156 PreemptiveFastFailInterceptor interceptor = TestFastFailWithoutTestUtil
157 .createPreemptiveInterceptor(conf);
158 FastFailInterceptorContext context = (FastFailInterceptorContext) interceptor
159 .createEmptyContext();
160
161 RetryingCallable<?> callable = getDummyRetryingCallable(getSomeServerName());
162
163
164 int tries = 0;
165 context.prepare(callable, tries);
166 interceptor.intercept(context);
167 interceptor.handleFailure(context, new ConnectException(
168 "Failed to connect to server"));
169 interceptor.updateFailureInfo(context);
170 assertTrue("Interceptor should have updated didTry to true",
171 context.didTry());
172 assertTrue(
173 "The call shouldn't have been successful if there was a ConnectException",
174 context.getCouldNotCommunicateWithServer().booleanValue());
175 assertNull(
176 "Once a failure is identified, the first time the FailureInfo is generated for the server,"
177 + " but it is not assigned to the context yet. It would be assigned on the next"
178 + " intercept.", context.getFailureInfo());
179 assertEquals(context.getTries(), tries);
180 assertFalse(
181 "We are still in the first attempt and so we dont set this variable to true yet.",
182 context.isRetryDespiteFastFailMode());
183
184 Thread.sleep(FAST_FAIL_THRESHOLD + 1);
185
186
187
188 tries++;
189
190 context.prepare(callable, tries);
191 interceptor.intercept(context);
192 interceptor.handleFailure(context, new ConnectException(
193 "Failed to connect to server"));
194 interceptor.updateFailureInfo(context);
195 assertTrue("didTru should remain true", context.didTry());
196 assertTrue(
197 "The call shouldn't have been successful if there was a ConnectException",
198 context.getCouldNotCommunicateWithServer().booleanValue());
199 assertNotNull(
200 "The context this time is updated with a failureInfo, since we already gave it a try.",
201 context.getFailureInfo());
202 assertEquals(context.getTries(), tries);
203 assertTrue(
204 "Since we are alone here we would be given the permission to retryDespiteFailures.",
205 context.isRetryDespiteFastFailMode());
206 context.clear();
207
208 Thread.sleep(CLEANUP_TIMEOUT);
209
210
211 tries++;
212
213 context.clear();
214 context.prepare(callable, tries);
215 interceptor.occasionallyCleanupFailureInformation();
216 assertNull("The cleanup should have cleared the server",
217 interceptor.repeatedFailuresMap.get(context.getServer()));
218 interceptor.intercept(context);
219 interceptor.handleFailure(context, new ConnectException(
220 "Failed to connect to server"));
221 interceptor.updateFailureInfo(context);
222 assertTrue("didTru should remain true", context.didTry());
223 assertTrue(
224 "The call shouldn't have been successful if there was a ConnectException",
225 context.getCouldNotCommunicateWithServer().booleanValue());
226 assertNull("The failureInfo is cleared off from the maps.",
227 context.getFailureInfo());
228 assertEquals(context.getTries(), tries);
229 assertFalse(
230 "Since we are alone here we would be given the permission to retryDespiteFailures.",
231 context.isRetryDespiteFastFailMode());
232 context.clear();
233
234 }
235
236 private <T> RetryingCallable<T> getDummyRetryingCallable(
237 ServerName someServerName) {
238 return new RegionServerCallable<T>(null, null, null) {
239 @Override
240 public T call(int callTimeout) throws Exception {
241 return null;
242 }
243
244 @Override
245 protected HRegionLocation getLocation() {
246 return new HRegionLocation(null, serverName);
247 }
248 };
249 }
250
251 @Test
252 public void testExceptionsIdentifiedByInterceptor() throws IOException {
253 Throwable[] networkexceptions = new Throwable[] {
254 new ConnectException("Mary is unwell"),
255 new SocketTimeoutException("Mike is too late"),
256 new ClosedChannelException(),
257 new SyncFailedException("Dave is not on the same page"),
258 new TimeoutException("Mike is late again"),
259 new EOFException("This is the end... "),
260 new ConnectionClosingException("Its closing") };
261 final String INDUCED = "Induced";
262 Throwable[] nonNetworkExceptions = new Throwable[] {
263 new IOException("Bob died"),
264 new RemoteException("Bob's cousin died", null),
265 new NoSuchMethodError(INDUCED), new NullPointerException(INDUCED),
266 new DoNotRetryIOException(INDUCED), new Error(INDUCED) };
267
268 Configuration conf = HBaseConfiguration.create();
269 long CLEANUP_TIMEOUT = 0;
270 long FAST_FAIL_THRESHOLD = 1000000;
271 conf.setBoolean(HConstants.HBASE_CLIENT_FAST_FAIL_MODE_ENABLED, true);
272 conf.setLong(HConstants.HBASE_CLIENT_FAST_FAIL_CLEANUP_MS_DURATION_MS,
273 CLEANUP_TIMEOUT);
274 conf.setLong(HConstants.HBASE_CLIENT_FAST_FAIL_THREASHOLD_MS,
275 FAST_FAIL_THRESHOLD);
276 for (Throwable e : networkexceptions) {
277 PreemptiveFastFailInterceptor interceptor = TestFastFailWithoutTestUtil
278 .createPreemptiveInterceptor(conf);
279 FastFailInterceptorContext context = (FastFailInterceptorContext) interceptor
280 .createEmptyContext();
281
282 RetryingCallable<?> callable = getDummyRetryingCallable(getSomeServerName());
283 context.prepare(callable, 0);
284 interceptor.intercept(context);
285 interceptor.handleFailure(context, e);
286 interceptor.updateFailureInfo(context);
287 assertTrue(
288 "The call shouldn't have been successful if there was a ConnectException",
289 context.getCouldNotCommunicateWithServer().booleanValue());
290 }
291 for (Throwable e : nonNetworkExceptions) {
292 try {
293 PreemptiveFastFailInterceptor interceptor = TestFastFailWithoutTestUtil
294 .createPreemptiveInterceptor(conf);
295 FastFailInterceptorContext context = (FastFailInterceptorContext) interceptor
296 .createEmptyContext();
297
298 RetryingCallable<?> callable = getDummyRetryingCallable(getSomeServerName());
299 context.prepare(callable, 0);
300 interceptor.intercept(context);
301 interceptor.handleFailure(context, e);
302 interceptor.updateFailureInfo(context);
303 assertFalse(
304 "The call shouldn't have been successful if there was a ConnectException",
305 context.getCouldNotCommunicateWithServer().booleanValue());
306 } catch (NoSuchMethodError t) {
307 assertTrue("Exception not induced", t.getMessage().contains(INDUCED));
308 } catch (NullPointerException t) {
309 assertTrue("Exception not induced", t.getMessage().contains(INDUCED));
310 } catch (DoNotRetryIOException t) {
311 assertTrue("Exception not induced", t.getMessage().contains(INDUCED));
312 } catch (Error t) {
313 assertTrue("Exception not induced", t.getMessage().contains(INDUCED));
314 }
315 }
316 }
317
318 protected static PreemptiveFastFailInterceptor createPreemptiveInterceptor(
319 Configuration conf) {
320 conf.setBoolean(HConstants.HBASE_CLIENT_FAST_FAIL_MODE_ENABLED, true);
321 RetryingCallerInterceptorFactory interceptorFactory = new RetryingCallerInterceptorFactory(
322 conf);
323 RetryingCallerInterceptor interceptorBeforeCast = interceptorFactory
324 .build();
325 return (PreemptiveFastFailInterceptor) interceptorBeforeCast;
326 }
327
328 static PreemptiveFastFailInterceptor createPreemptiveInterceptor() {
329 Configuration conf = HBaseConfiguration.create();
330 conf.setBoolean(HConstants.HBASE_CLIENT_FAST_FAIL_MODE_ENABLED, true);
331 return createPreemptiveInterceptor(conf);
332 }
333
334 @Test(timeout = 120000)
335 public void testPreemptiveFastFailException50Times()
336 throws InterruptedException, ExecutionException {
337 for (int i = 0; i < 50; i++) {
338 testPreemptiveFastFailException();
339 }
340 }
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395 private void testPreemptiveFastFailException() throws InterruptedException,
396 ExecutionException {
397 LOG.debug("Setting up the counters to start the test");
398 priviRetryCounter.set(0);
399 nonPriviRetryCounter.set(0);
400 done.set(0);
401
402 for (int i = 0; i <= RETRIES; i++) {
403 latches[i] = new CountDownLatch(1);
404 latches2[i] = new CountDownLatch(1);
405 }
406
407 PreemptiveFastFailInterceptor interceptor = getInterceptor();
408
409 final RpcRetryingCaller<Void> priviCaller = getRpcRetryingCaller(
410 PAUSE_TIME, RETRIES, interceptor);
411 final RpcRetryingCaller<Void> nonPriviCaller = getRpcRetryingCaller(
412 PAUSE_TIME, RETRIES, interceptor);
413
414 LOG.debug("Submitting the thread 1");
415 Future<Boolean> priviFuture = executor.submit(new Callable<Boolean>() {
416 @Override
417 public Boolean call() throws Exception {
418 try {
419 isPriviThreadLocal.get().set(true);
420 priviCaller
421 .callWithRetries(
422 getRetryingCallable(serverName, exception),
423 CLEANUP_TIMEOUT);
424 } catch (RetriesExhaustedException e) {
425 return true;
426 } catch (PreemptiveFastFailException e) {
427 return false;
428 }
429 return false;
430 }
431 });
432 LOG.debug("Submitting the thread 2");
433 Future<Boolean> nonPriviFuture = executor.submit(new Callable<Boolean>() {
434 @Override
435 public Boolean call() throws Exception {
436 try {
437 isPriviThreadLocal.get().set(false);
438 nonPriviCaller.callWithRetries(
439 getRetryingCallable(serverName, exception),
440 CLEANUP_TIMEOUT);
441 } catch (PreemptiveFastFailException e) {
442 return true;
443 }
444 return false;
445 }
446 });
447 LOG.debug("Waiting for Thread 2 to finish");
448 assertTrue(nonPriviFuture.get());
449 LOG.debug("Waiting for Thread 1 to finish");
450 assertTrue(priviFuture.get());
451
452
453
454
455
456 assertTrue(interceptor.isServerInFailureMap(serverName));
457 final RpcRetryingCaller<Void> priviCallerNew = getRpcRetryingCaller(
458 PAUSE_TIME, RETRIES, interceptor);
459 executor.submit(new Callable<Boolean>() {
460 @Override
461 public Boolean call() throws Exception {
462 priviCallerNew.callWithRetries(
463 getRetryingCallable(serverName, null), CLEANUP_TIMEOUT);
464 return false;
465 }
466 }).get();
467 assertFalse("The server was supposed to be removed from the map",
468 interceptor.isServerInFailureMap(serverName));
469 }
470
471 ExecutorService executor = Executors.newCachedThreadPool();
472
473
474
475
476 final int PAUSE_TIME = 10;
477 final int RETRIES = 3;
478 final int CLEANUP_TIMEOUT = 10000;
479 final long FAST_FAIL_THRESHOLD = PAUSE_TIME / 1;
480
481
482
483
484 final CountDownLatch[] latches = new CountDownLatch[RETRIES + 1];
485 final CountDownLatch[] latches2 = new CountDownLatch[RETRIES + 1];
486 final AtomicInteger done = new AtomicInteger(0);
487
488
489
490
491 final AtomicInteger priviRetryCounter = new AtomicInteger();
492 final AtomicInteger nonPriviRetryCounter = new AtomicInteger();
493 final ServerName serverName = getSomeServerName();
494
495
496
497
498 public final ThreadLocal<AtomicBoolean> isPriviThreadLocal = new ThreadLocal<AtomicBoolean>() {
499 @Override
500 public AtomicBoolean initialValue() {
501 return new AtomicBoolean(true);
502 }
503 };
504 final Exception exception = new ConnectionClosingException("The current connection is closed");
505
506 public PreemptiveFastFailInterceptor getInterceptor() {
507 final Configuration conf = HBaseConfiguration.create();
508 conf.setBoolean(HConstants.HBASE_CLIENT_FAST_FAIL_MODE_ENABLED, true);
509 conf.setLong(HConstants.HBASE_CLIENT_FAST_FAIL_CLEANUP_MS_DURATION_MS,
510 CLEANUP_TIMEOUT);
511 conf.setLong(HConstants.HBASE_CLIENT_FAST_FAIL_THREASHOLD_MS,
512 FAST_FAIL_THRESHOLD);
513
514 return new PreemptiveFastFailInterceptor(
515 conf) {
516 @Override
517 public void updateFailureInfo(RetryingCallerInterceptorContext context) {
518 boolean pffe = false;
519 if (!isPriviThreadLocal.get().get()) {
520 pffe = !((FastFailInterceptorContext)context).isRetryDespiteFastFailMode();
521 }
522 if (isPriviThreadLocal.get().get()) {
523 try {
524
525
526 if (done.get() <= 1) {
527 latches2[priviRetryCounter.get()].await();
528 }
529 } catch (InterruptedException e) {
530 fail();
531 }
532 }
533 super.updateFailureInfo(context);
534 if (!isPriviThreadLocal.get().get()) {
535 if (pffe) done.incrementAndGet();
536 latches2[nonPriviRetryCounter.get()].countDown();
537 }
538 }
539
540 @Override
541 public void intercept(RetryingCallerInterceptorContext context)
542 throws PreemptiveFastFailException {
543 if (!isPriviThreadLocal.get().get()) {
544 try {
545 latches[nonPriviRetryCounter.getAndIncrement()].await();
546 } catch (InterruptedException e) {
547 fail();
548 }
549 }
550 super.intercept(context);
551 }
552
553 @Override
554 public void handleFailure(RetryingCallerInterceptorContext context,
555 Throwable t) throws IOException {
556 super.handleFailure(context, t);
557 if (isPriviThreadLocal.get().get()) {
558 latches[priviRetryCounter.getAndIncrement()].countDown();
559 }
560 }
561 };
562 }
563
564 public RpcRetryingCaller<Void> getRpcRetryingCaller(int pauseTime,
565 int retries, RetryingCallerInterceptor interceptor) {
566 return new RpcRetryingCaller<Void>(pauseTime, retries, interceptor, 9) {
567 @Override
568 public Void callWithRetries(RetryingCallable<Void> callable,
569 int callTimeout) throws IOException, RuntimeException {
570 Void ret = super.callWithRetries(callable, callTimeout);
571 return ret;
572 }
573 };
574 }
575
576 protected static ServerName getSomeServerName() {
577 return ServerName.valueOf("localhost", 1234, 987654321);
578 }
579
580 private RegionServerCallable<Void> getRetryingCallable(
581 final ServerName serverName, final Exception e) {
582 return new RegionServerCallable<Void>(null, null, null) {
583 @Override
584 public void prepare(boolean reload) throws IOException {
585 this.location = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO,
586 serverName);
587 }
588
589 @Override
590 public Void call(int callTimeout) throws Exception {
591 if (e != null)
592 throw e;
593 return null;
594 }
595
596 @Override
597 protected HRegionLocation getLocation() {
598 return new HRegionLocation(null, serverName);
599 }
600
601 @Override
602 public void throwable(Throwable t, boolean retrying) {
603
604 }
605
606 @Override
607 public long sleep(long pause, int tries) {
608 return ConnectionUtils.getPauseTime(pause, tries + 1);
609 }
610 };
611 }
612 }