View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.test;
20  
21  import com.google.common.base.Joiner;
22  
23  import org.apache.commons.cli.CommandLine;
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.conf.Configured;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HRegionLocation;
31  import org.apache.hadoop.hbase.IntegrationTestingUtility;
32  import org.apache.hadoop.hbase.ServerName;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HConnection;
37  import org.apache.hadoop.hbase.client.HConnectionManager;
38  import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
39  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.RollWALWriterRequest;
40  import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
41  import org.apache.hadoop.util.Tool;
42  import org.apache.hadoop.util.ToolRunner;
43  
44  import java.util.ArrayList;
45  import java.util.HashMap;
46  import java.util.Set;
47  import java.util.TreeSet;
48  import java.util.UUID;
49  
50  
51  /**
52   * This is an integration test for replication. It is derived off
53   * {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} that creates a large circular
54   * linked list in one cluster and verifies that the data is correct in a sink cluster. The test
55   * handles creating the tables and schema and setting up the replication.
56   */
57  public class IntegrationTestReplication extends IntegrationTestBigLinkedList {
58    protected String sourceClusterIdString;
59    protected String sinkClusterIdString;
60    protected int numIterations;
61    protected int numMappers;
62    protected long numNodes;
63    protected String outputDir;
64    protected int numReducers;
65    protected int generateVerifyGap;
66    protected Integer width;
67    protected Integer wrapMultiplier;
68    protected boolean noReplicationSetup = false;
69  
70    private final String SOURCE_CLUSTER_OPT = "sourceCluster";
71    private final String DEST_CLUSTER_OPT = "destCluster";
72    private final String ITERATIONS_OPT = "iterations";
73    private final String NUM_MAPPERS_OPT = "numMappers";
74    private final String OUTPUT_DIR_OPT = "outputDir";
75    private final String NUM_REDUCERS_OPT = "numReducers";
76    private final String NO_REPLICATION_SETUP_OPT = "noReplicationSetup";
77  
78    /**
79     * The gap (in seconds) from when data is finished being generated at the source
80     * to when it can be verified. This is the replication lag we are willing to tolerate
81     */
82    private final String GENERATE_VERIFY_GAP_OPT = "generateVerifyGap";
83  
84    /**
85     * The width of the linked list.
86     * See {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} for more details
87     */
88    private final String WIDTH_OPT = "width";
89  
90    /**
91     * The number of rows after which the linked list points to the first row.
92     * See {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} for more details
93     */
94    private final String WRAP_MULTIPLIER_OPT = "wrapMultiplier";
95  
96    /**
97     * The number of nodes in the test setup. This has to be a multiple of WRAP_MULTIPLIER * WIDTH
98     * in order to ensure that the linked list can is complete.
99     * See {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} for more details
100    */
101   private final String NUM_NODES_OPT = "numNodes";
102 
103   private final int DEFAULT_NUM_MAPPERS = 1;
104   private final int DEFAULT_NUM_REDUCERS = 1;
105   private final int DEFAULT_NUM_ITERATIONS = 1;
106   private final int DEFAULT_GENERATE_VERIFY_GAP = 60;
107   private final int DEFAULT_WIDTH = 1000000;
108   private final int DEFAULT_WRAP_MULTIPLIER = 25;
109   private final int DEFAULT_NUM_NODES = DEFAULT_WIDTH * DEFAULT_WRAP_MULTIPLIER;
110 
111   /**
112    * Wrapper around an HBase ClusterID allowing us
113    * to get admin connections and configurations for it
114    */
115   protected class ClusterID {
116     private final Configuration configuration;
117     private HConnection connection = null;
118 
119     /**
120      * This creates a new ClusterID wrapper that will automatically build connections and
121      * configurations to be able to talk to the specified cluster
122      *
123      * @param base the base configuration that this class will add to
124      * @param key the cluster key in the form of zk_quorum:zk_port:zk_parent_node
125      */
126     public ClusterID(Configuration base,
127                      String key) {
128       configuration = new Configuration(base);
129       String[] parts = key.split(":");
130       configuration.set(HConstants.ZOOKEEPER_QUORUM, parts[0]);
131       configuration.set(HConstants.ZOOKEEPER_CLIENT_PORT, parts[1]);
132       configuration.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parts[2]);
133     }
134 
135     @Override
136     public String toString() {
137       return Joiner.on(":").join(configuration.get(HConstants.ZOOKEEPER_QUORUM),
138                                  configuration.get(HConstants.ZOOKEEPER_CLIENT_PORT),
139                                  configuration.get(HConstants.ZOOKEEPER_ZNODE_PARENT));
140     }
141 
142     public Configuration getConfiguration() {
143       return this.configuration;
144     }
145 
146     public HConnection getConnection() throws Exception {
147       if (this.connection == null) {
148         this.connection = HConnectionManager.createConnection(this.configuration);
149       }
150       return this.connection;
151     }
152 
153     public void closeConnection() throws Exception {
154       this.connection.close();
155       this.connection = null;
156     }
157 
158     public boolean equals(ClusterID other) {
159       return this.toString().equalsIgnoreCase(other.toString());
160     }
161   }
162 
163   /**
164    * The main runner loop for the test. It uses
165    * {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList}
166    * for the generation and verification of the linked list. It is heavily based on
167    * {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList.Loop}
168    */
169   protected class VerifyReplicationLoop extends Configured implements Tool {
170     private final Log LOG = LogFactory.getLog(VerifyReplicationLoop.class);
171     protected ClusterID source;
172     protected ClusterID sink;
173 
174     IntegrationTestBigLinkedList integrationTestBigLinkedList;
175 
176     /**
177      * This tears down any tables that existed from before and rebuilds the tables and schemas on
178      * the source cluster. It then sets up replication from the source to the sink cluster by using
179      * the {@link org.apache.hadoop.hbase.client.replication.ReplicationAdmin}
180      * connection.
181      *
182      * @throws Exception
183      */
184     protected void setupTablesAndReplication() throws Exception {
185       TableName tableName = getTableName(source.getConfiguration());
186 
187       ClusterID[] clusters = {source, sink};
188 
189       // delete any old tables in the source and sink
190       for (ClusterID cluster : clusters) {
191         HBaseAdmin admin = new HBaseAdmin(cluster.getConnection());
192         try {
193           if (admin.tableExists(tableName)) {
194             if (admin.isTableEnabled(tableName)) {
195               admin.disableTable(tableName);
196             }
197 
198             /**
199              * TODO: This is a work around on a replication bug (HBASE-13416)
200              * When we recreate a table against that has recently been
201              * deleted, the contents of the logs are replayed even though
202              * they should not. This ensures that we flush the logs
203              * before the table gets deleted. Eventually the bug should be
204              * fixed and this should be removed.
205              */
206             Set<ServerName> regionServers = new TreeSet<ServerName>();
207             for (HRegionLocation location: admin.getConnection().locateRegions(tableName)) {
208               regionServers.add(location.getServerName());
209             }
210             for (ServerName server : regionServers) {
211               source.getConnection().getAdmin(server).rollWALWriter(null,
212                 RollWALWriterRequest.newBuilder().build());
213             }
214 
215             admin.deleteTable(tableName);
216           }
217         } finally {
218           admin.close();
219         }
220       }
221 
222       // create the schema
223       Generator generator = new Generator();
224       generator.setConf(source.getConfiguration());
225       generator.createSchema();
226 
227       // setup the replication on the source
228       if (!source.equals(sink)) {
229         ReplicationAdmin replicationAdmin = new ReplicationAdmin(source.getConfiguration());
230         // remove any old replication peers
231         for (String oldPeer : replicationAdmin.listPeerConfigs().keySet()) {
232           replicationAdmin.removePeer(oldPeer);
233         }
234 
235         // set the sink to be the target
236         ReplicationPeerConfig peerConfig = new ReplicationPeerConfig();
237         peerConfig.setClusterKey(sink.toString());
238 
239         // set the test table to be the table to replicate
240         HashMap<TableName, ArrayList<String>> toReplicate =
241             new HashMap<TableName, ArrayList<String>>();
242         toReplicate.put(tableName, new ArrayList<String>(0));
243 
244         replicationAdmin.addPeer("TestPeer", peerConfig, toReplicate);
245 
246         replicationAdmin.enableTableRep(tableName);
247         replicationAdmin.close();
248       }
249 
250       for (ClusterID cluster : clusters) {
251         cluster.closeConnection();
252       }
253     }
254 
255     protected void waitForReplication() throws Exception {
256       // TODO: we shouldn't be sleeping here. It would be better to query the region servers
257       // and wait for them to report 0 replication lag.
258       Thread.sleep(generateVerifyGap * 1000);
259     }
260 
261     /**
262      * Run the {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList.Generator} in the
263      * source cluster. This assumes that the tables have been setup via setupTablesAndReplication.
264      *
265      * @throws Exception
266      */
267     protected void runGenerator() throws Exception {
268       Path outputPath = new Path(outputDir);
269       UUID uuid = UUID.randomUUID(); //create a random UUID.
270       Path generatorOutput = new Path(outputPath, uuid.toString());
271 
272       Generator generator = new Generator();
273       generator.setConf(source.getConfiguration());
274 
275       int retCode = generator.run(numMappers, numNodes, generatorOutput, width, wrapMultiplier);
276       if (retCode > 0) {
277         throw new RuntimeException("Generator failed with return code: " + retCode);
278       }
279     }
280 
281 
282     /**
283      * Run the {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList.Verify}
284      * in the sink cluster. If replication is working properly the data written at the source
285      * cluster should be available in the sink cluster after a reasonable gap
286      *
287      * @param expectedNumNodes the number of nodes we are expecting to see in the sink cluster
288      * @throws Exception
289      */
290     protected void runVerify(long expectedNumNodes) throws Exception {
291       Path outputPath = new Path(outputDir);
292       UUID uuid = UUID.randomUUID(); //create a random UUID.
293       Path iterationOutput = new Path(outputPath, uuid.toString());
294 
295       Verify verify = new Verify();
296       verify.setConf(sink.getConfiguration());
297 
298       int retCode = verify.run(iterationOutput, numReducers);
299       if (retCode > 0) {
300         throw new RuntimeException("Verify.run failed with return code: " + retCode);
301       }
302 
303       if (!verify.verify(expectedNumNodes)) {
304         throw new RuntimeException("Verify.verify failed");
305       }
306 
307       LOG.info("Verify finished with success. Total nodes=" + expectedNumNodes);
308     }
309 
310     /**
311      * The main test runner
312      *
313      * This test has 4 steps:
314      *  1: setupTablesAndReplication
315      *  2: generate the data into the source cluster
316      *  3: wait for replication to propagate
317      *  4: verify that the data is available in the sink cluster
318      *
319      * @param args should be empty
320      * @return 0 on success
321      * @throws Exception on an error
322      */
323     @Override
324     public int run(String[] args) throws Exception {
325       source = new ClusterID(getConf(), sourceClusterIdString);
326       sink = new ClusterID(getConf(), sinkClusterIdString);
327 
328       if (!noReplicationSetup) {
329         setupTablesAndReplication();
330       }
331       int expectedNumNodes = 0;
332       for (int i = 0; i < numIterations; i++) {
333         LOG.info("Starting iteration = " + i);
334 
335         expectedNumNodes += numMappers * numNodes;
336 
337         runGenerator();
338         waitForReplication();
339         runVerify(expectedNumNodes);
340       }
341 
342       /**
343        * we are always returning 0 because exceptions are thrown when there is an error
344        * in the verification step.
345        */
346       return 0;
347     }
348   }
349 
350   @Override
351   protected void addOptions() {
352     super.addOptions();
353     addRequiredOptWithArg("s", SOURCE_CLUSTER_OPT,
354                           "Cluster ID of the source cluster (e.g. localhost:2181:/hbase)");
355     addRequiredOptWithArg("r", DEST_CLUSTER_OPT,
356                           "Cluster ID of the sink cluster (e.g. localhost:2182:/hbase)");
357     addRequiredOptWithArg("d", OUTPUT_DIR_OPT,
358                           "Temporary directory where to write keys for the test");
359 
360     addOptWithArg("nm", NUM_MAPPERS_OPT,
361                   "Number of mappers (default: " + DEFAULT_NUM_MAPPERS + ")");
362     addOptWithArg("nr", NUM_REDUCERS_OPT,
363                   "Number of reducers (default: " + DEFAULT_NUM_MAPPERS + ")");
364     addOptNoArg("nrs", NO_REPLICATION_SETUP_OPT,
365                   "Don't setup tables or configure replication before starting test");
366     addOptWithArg("n", NUM_NODES_OPT,
367                   "Number of nodes. This should be a multiple of width * wrapMultiplier."  +
368                   " (default: " + DEFAULT_NUM_NODES + ")");
369     addOptWithArg("i", ITERATIONS_OPT, "Number of iterations to run (default: " +
370                   DEFAULT_NUM_ITERATIONS +  ")");
371     addOptWithArg("t", GENERATE_VERIFY_GAP_OPT,
372                   "Gap between generate and verify steps in seconds (default: " +
373                   DEFAULT_GENERATE_VERIFY_GAP + ")");
374     addOptWithArg("w", WIDTH_OPT,
375                   "Width of the linked list chain (default: " + DEFAULT_WIDTH + ")");
376     addOptWithArg("wm", WRAP_MULTIPLIER_OPT, "How many times to wrap around (default: " +
377                   DEFAULT_WRAP_MULTIPLIER + ")");
378   }
379 
380   @Override
381   protected void processOptions(CommandLine cmd) {
382     processBaseOptions(cmd);
383 
384     sourceClusterIdString = cmd.getOptionValue(SOURCE_CLUSTER_OPT);
385     sinkClusterIdString = cmd.getOptionValue(DEST_CLUSTER_OPT);
386     outputDir = cmd.getOptionValue(OUTPUT_DIR_OPT);
387 
388     /** This uses parseInt from {@link org.apache.hadoop.hbase.util.AbstractHBaseTool} */
389     numMappers = parseInt(cmd.getOptionValue(NUM_MAPPERS_OPT,
390                                              Integer.toString(DEFAULT_NUM_MAPPERS)),
391                           1, Integer.MAX_VALUE);
392     numReducers = parseInt(cmd.getOptionValue(NUM_REDUCERS_OPT,
393                                               Integer.toString(DEFAULT_NUM_REDUCERS)),
394                            1, Integer.MAX_VALUE);
395     numNodes = parseInt(cmd.getOptionValue(NUM_NODES_OPT, Integer.toString(DEFAULT_NUM_NODES)),
396                         1, Integer.MAX_VALUE);
397     generateVerifyGap = parseInt(cmd.getOptionValue(GENERATE_VERIFY_GAP_OPT,
398                                                     Integer.toString(DEFAULT_GENERATE_VERIFY_GAP)),
399                                  1, Integer.MAX_VALUE);
400     numIterations = parseInt(cmd.getOptionValue(ITERATIONS_OPT,
401                                                 Integer.toString(DEFAULT_NUM_ITERATIONS)),
402                              1, Integer.MAX_VALUE);
403     width = parseInt(cmd.getOptionValue(WIDTH_OPT, Integer.toString(DEFAULT_WIDTH)),
404                                         1, Integer.MAX_VALUE);
405     wrapMultiplier = parseInt(cmd.getOptionValue(WRAP_MULTIPLIER_OPT,
406                                                  Integer.toString(DEFAULT_WRAP_MULTIPLIER)),
407                               1, Integer.MAX_VALUE);
408 
409     if (cmd.hasOption(NO_REPLICATION_SETUP_OPT)) {
410       noReplicationSetup = true;
411     }
412 
413     if (numNodes % (width * wrapMultiplier) != 0) {
414       throw new RuntimeException("numNodes must be a multiple of width and wrap multiplier");
415     }
416   }
417 
418   @Override
419   public int runTestFromCommandLine() throws Exception {
420     VerifyReplicationLoop tool = new  VerifyReplicationLoop();
421     tool.integrationTestBigLinkedList = this;
422     return ToolRunner.run(getConf(), tool, null);
423   }
424 
425   public static void main(String[] args) throws Exception {
426     Configuration conf = HBaseConfiguration.create();
427     IntegrationTestingUtility.setUseDistributedCluster(conf);
428     int ret = ToolRunner.run(conf, new IntegrationTestReplication(), args);
429     System.exit(ret);
430   }
431 }