1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.Collection;
24  import java.util.List;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.HBaseClusterTestCase;
29  import org.apache.hadoop.hbase.HColumnDescriptor;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.LocalHBaseCluster;
33  import org.apache.hadoop.hbase.client.HBaseAdmin;
34  import org.apache.hadoop.hbase.client.HTable;
35  import org.apache.hadoop.hbase.client.Put;
36  import org.apache.hadoop.hbase.client.Result;
37  import org.apache.hadoop.hbase.client.ResultScanner;
38  import org.apache.hadoop.hbase.client.Scan;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.util.JVMClusterUtil;
41  
42  /**
43   * Tests region server failover when a region server exits both cleanly and
44   * when it aborts.
45   */
46  public class DisabledTestRegionServerExit extends HBaseClusterTestCase {
47    final Log LOG = LogFactory.getLog(this.getClass().getName());
48    HTable table;
49  
50    /** constructor */
51    public DisabledTestRegionServerExit() {
52      super(2);
53      conf.setInt("ipc.client.connect.max.retries", 5); // reduce ipc retries
54      conf.setInt("ipc.client.timeout", 10000);         // and ipc timeout
55      conf.setInt("hbase.client.pause", 10000);         // increase client timeout
56      conf.setInt("hbase.client.retries.number", 10);   // increase HBase retries
57    }
58  
59    /**
60     * Test abort of region server.
61     * @throws IOException
62     */
63    public void testAbort() throws IOException {
64      // When the META table can be opened, the region servers are running
65      new HTable(conf, HConstants.META_TABLE_NAME);
66      // Create table and add a row.
67      final String tableName = getName();
68      byte [] row = createTableAndAddRow(tableName);
69      // Start up a new region server to take over serving of root and meta
70      // after we shut down the current meta/root host.
71      this.cluster.startRegionServer();
72      // Now abort the meta region server and wait for it to go down and come back
73      stopOrAbortMetaRegionServer(true);
74      // Verify that everything is back up.
75      LOG.info("Starting up the verification thread for " + getName());
76      Thread t = startVerificationThread(tableName, row);
77      t.start();
78      threadDumpingJoin(t);
79    }
80  
81    /**
82     * Test abort of region server.
83     * Test is flakey up on hudson.  Needs work.
84     * @throws IOException
85     */
86    public void testCleanExit() throws IOException {
87      // When the META table can be opened, the region servers are running
88      new HTable(this.conf, HConstants.META_TABLE_NAME);
89      // Create table and add a row.
90      final String tableName = getName();
91      byte [] row = createTableAndAddRow(tableName);
92      // Start up a new region server to take over serving of root and meta
93      // after we shut down the current meta/root host.
94      this.cluster.startRegionServer();
95      // Now abort the meta region server and wait for it to go down and come back
96      stopOrAbortMetaRegionServer(false);
97      // Verify that everything is back up.
98      LOG.info("Starting up the verification thread for " + getName());
99      Thread t = startVerificationThread(tableName, row);
100     t.start();
101     threadDumpingJoin(t);
102   }
103 
104   private byte [] createTableAndAddRow(final String tableName)
105   throws IOException {
106     HTableDescriptor desc = new HTableDescriptor(tableName);
107     desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
108     HBaseAdmin admin = new HBaseAdmin(conf);
109     admin.createTable(desc);
110     // put some values in the table
111     this.table = new HTable(conf, tableName);
112     byte [] row = Bytes.toBytes("row1");
113     Put put = new Put(row);
114     put.add(HConstants.CATALOG_FAMILY, null, Bytes.toBytes(tableName));
115     table.put(put);
116     return row;
117   }
118 
119   /*
120    * Stop the region server serving the meta region and wait for the meta region
121    * to get reassigned. This is always the most problematic case.
122    *
123    * @param abort set to true if region server should be aborted, if false it
124    * is just shut down.
125    */
126   private void stopOrAbortMetaRegionServer(boolean abort) {
127     List<JVMClusterUtil.RegionServerThread> regionThreads =
128       cluster.getRegionServerThreads();
129 
130     int server = -1;
131     for (int i = 0; i < regionThreads.size() && server == -1; i++) {
132       HRegionServer s = regionThreads.get(i).getRegionServer();
133       Collection<HRegion> regions = s.getOnlineRegionsLocalContext();
134       for (HRegion r : regions) {
135         if (Bytes.equals(r.getTableDesc().getName(),
136             HConstants.META_TABLE_NAME)) {
137           server = i;
138         }
139       }
140     }
141     if (server == -1) {
142       LOG.fatal("could not find region server serving meta region");
143       fail();
144     }
145     if (abort) {
146       this.cluster.abortRegionServer(server);
147 
148     } else {
149       this.cluster.stopRegionServer(server);
150     }
151     LOG.info(this.cluster.waitOnRegionServer(server) + " has been " +
152         (abort ? "aborted" : "shut down"));
153   }
154 
155   /*
156    * Run verification in a thread so I can concurrently run a thread-dumper
157    * while we're waiting (because in this test sometimes the meta scanner
158    * looks to be be stuck).
159    * @param tableName Name of table to find.
160    * @param row Row we expect to find.
161    * @return Verification thread.  Caller needs to calls start on it.
162    */
163   private Thread startVerificationThread(final String tableName,
164       final byte [] row) {
165     Runnable runnable = new Runnable() {
166       public void run() {
167         try {
168           // Now try to open a scanner on the meta table. Should stall until
169           // meta server comes back up.
170           HTable t = new HTable(conf, HConstants.META_TABLE_NAME);
171           Scan scan = new Scan();
172           scan.addFamily(HConstants.CATALOG_FAMILY);
173 
174           ResultScanner s = t.getScanner(scan);
175           s.close();
176 
177         } catch (IOException e) {
178           LOG.fatal("could not re-open meta table because", e);
179           fail();
180         }
181         ResultScanner scanner = null;
182         try {
183           // Verify that the client can find the data after the region has moved
184           // to a different server
185           Scan scan = new Scan();
186           scan.addFamily(HConstants.CATALOG_FAMILY);
187 
188           scanner = table.getScanner(scan);
189           LOG.info("Obtained scanner " + scanner);
190           for (Result r : scanner) {
191             assertTrue(Bytes.equals(r.getRow(), row));
192             assertEquals(1, r.size());
193             byte[] bytes = r.value();
194             assertNotNull(bytes);
195             assertTrue(tableName.equals(Bytes.toString(bytes)));
196           }
197           LOG.info("Success!");
198         } catch (Exception e) {
199           e.printStackTrace();
200           fail();
201         } finally {
202           if (scanner != null) {
203             LOG.info("Closing scanner " + scanner);
204             scanner.close();
205           }
206         }
207       }
208     };
209     return new Thread(runnable);
210   }
211 }