1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   * http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.coprocessor;
22  
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.MediumTests;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Put;
34  import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
35  import org.apache.hadoop.hbase.regionserver.HRegionServer;
36  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.junit.AfterClass;
39  import org.junit.BeforeClass;
40  import org.junit.Test;
41  import org.junit.experimental.categories.Category;
42  
43  /**
44   * Tests unhandled exceptions thrown by coprocessors running on regionserver.
45   * Expected result is that the master will remove the buggy coprocessor from
46   * its set of coprocessors and throw a org.apache.hadoop.hbase.DoNotRetryIOException
47   * back to the client.
48   * (HBASE-4014).
49   */
50  @Category(MediumTests.class)
51  public class TestRegionServerCoprocessorExceptionWithRemove {
52    public static class BuggyRegionObserver extends SimpleRegionObserver {
53      @SuppressWarnings("null")
54      @Override
55      public void prePut(final ObserverContext<RegionCoprocessorEnvironment> c,
56                         final Put put, final WALEdit edit,
57                         final boolean writeToWAL) {
58        String tableName =
59            c.getEnvironment().getRegion().getRegionInfo().getTableNameAsString();
60        if (tableName.equals("observed_table")) {
61          Integer i = null;
62          i = i + 1;
63        }
64      }
65    }
66  
67    private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
68  
69    @BeforeClass
70    public static void setupBeforeClass() throws Exception {
71      // set configure to indicate which cp should be loaded
72      Configuration conf = TEST_UTIL.getConfiguration();
73      conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
74          BuggyRegionObserver.class.getName());
75      TEST_UTIL.startMiniCluster(2);
76    }
77  
78    @AfterClass
79    public static void teardownAfterClass() throws Exception {
80      TEST_UTIL.shutdownMiniCluster();
81    }
82  
83    @Test(timeout=60000)
84    public void testExceptionFromCoprocessorDuringPut()
85        throws IOException {
86      // Set watches on the zookeeper nodes for all of the regionservers in the
87      // cluster. When we try to write to TEST_TABLE, the buggy coprocessor will
88      // cause a NullPointerException, which will cause the regionserver (which
89      // hosts the region we attempted to write to) to abort. In turn, this will
90      // cause the nodeDeleted() method of the DeadRegionServer tracker to
91      // execute, which will set the rsZKNodeDeleted flag to true, which will
92      // pass this test.
93  
94      byte[] TEST_TABLE = Bytes.toBytes("observed_table");
95      byte[] TEST_FAMILY = Bytes.toBytes("aaa");
96  
97      HTable table = TEST_UTIL.createTable(TEST_TABLE, TEST_FAMILY);
98      TEST_UTIL.createMultiRegions(table, TEST_FAMILY);
99      TEST_UTIL.waitUntilAllRegionsAssigned(TEST_TABLE);
100     // Note which regionServer that should survive the buggy coprocessor's
101     // prePut().
102     HRegionServer regionServer =
103         TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE);
104 
105     // same logic as {@link TestMasterCoprocessorExceptionWithRemove},
106     // but exception will be RetriesExhaustedWithDetailException rather
107     // than DoNotRetryIOException. The latter exception is what the RegionServer
108     // will have actually thrown, but the client will wrap this in a
109     // RetriesExhaustedWithDetailException.
110     // We will verify that "DoNotRetryIOException" appears in the text of the
111     // the exception's detailMessage.
112     boolean threwDNRE = false;
113     try {
114       final byte[] ROW = Bytes.toBytes("aaa");
115       Put put = new Put(ROW);
116       put.add(TEST_FAMILY, ROW, ROW);
117       table.put(put);
118     } catch (RetriesExhaustedWithDetailsException e) {
119       // below, could call instead :
120       // startsWith("Failed 1 action: DoNotRetryIOException.")
121       // But that might be too brittle if client-side
122       // DoNotRetryIOException-handler changes its message.
123       assertTrue(e.getMessage().contains("DoNotRetryIOException"));
124       threwDNRE = true;
125     } finally {
126       assertTrue(threwDNRE);
127     }
128 
129     // Wait 3 seconds for the regionserver to abort: expected result is that
130     // it will survive and not abort.
131     for (int i = 0; i < 3; i++) {
132       assertFalse(regionServer.isAborted());
133       try {
134         Thread.sleep(1000);
135       } catch (InterruptedException e) {
136         fail("InterruptedException while waiting for regionserver " +
137             "zk node to be deleted.");
138       }
139     }
140     table.close();
141   }
142 
143   @org.junit.Rule
144   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
145     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
146 }
147