View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.util.Set;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.HBaseConfiguration;
28  import org.apache.hadoop.hbase.IntegrationTestBase;
29  import org.apache.hadoop.hbase.IntegrationTestingUtility;
30  import org.apache.hadoop.hbase.IntegrationTests;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.util.ToolRunner;
33  import org.junit.After;
34  import org.junit.Before;
35  import org.junit.experimental.categories.Category;
36  
37  /**
38   * An integration test to test {@link TableSnapshotInputFormat} which enables
39   * reading directly from snapshot files without going through hbase servers.
40   *
41   * This test creates a table and loads the table with the rows ranging from
42   * 'aaa' to 'zzz', and for each row, sets the columns f1:(null) and f2:(null) to be
43   * the the same as the row value.
44   * <pre>
45   * aaa, f1: => aaa
46   * aaa, f2: => aaa
47   * aab, f1: => aab
48   * ....
49   * zzz, f2: => zzz
50   * </pre>
51   *
52   * Then the test creates a snapshot from this table, and overrides the values in the original
53   * table with values 'after_snapshot_value'. The test, then runs a mapreduce job over the snapshot
54   * with a scan start row 'bbb' and stop row 'yyy'. The data is saved in a single reduce output file, and
55   * inspected later to verify that the MR job has seen all the values from the snapshot.
56   *
57   * <p> These parameters can be used to configure the job:
58   * <br>"IntegrationTestTableSnapshotInputFormat.table" =&gt; the name of the table
59   * <br>"IntegrationTestTableSnapshotInputFormat.snapshot" =&gt; the name of the snapshot
60   * <br>"IntegrationTestTableSnapshotInputFormat.numRegions" =&gt; number of regions in the table to be created
61   * <br>"IntegrationTestTableSnapshotInputFormat.tableDir" =&gt; temporary directory to restore the snapshot files
62   *
63   */
64  @Category(IntegrationTests.class)
65  // Not runnable as a unit test. See TestTableSnapshotInputFormat
66  public class IntegrationTestTableSnapshotInputFormat extends IntegrationTestBase {
67  
68    private static final Log LOG = LogFactory.getLog(IntegrationTestTableSnapshotInputFormat.class);
69  
70    private static final String TABLE_NAME_KEY = "IntegrationTestTableSnapshotInputFormat.table";
71    private static final String DEFAULT_TABLE_NAME = "IntegrationTestTableSnapshotInputFormat";
72  
73    private static final String SNAPSHOT_NAME_KEY = "IntegrationTestTableSnapshotInputFormat.snapshot";
74  
75  
76    private static final String NUM_REGIONS_KEY = "IntegrationTestTableSnapshotInputFormat.numRegions";
77    private static final int DEFAULT_NUM_REGIONS = 32;
78  
79    private static final String TABLE_DIR_KEY = "IntegrationTestTableSnapshotInputFormat.tableDir";
80  
81    private IntegrationTestingUtility util;
82  
83    @Override
84    public void setConf(Configuration conf) {
85      super.setConf(conf);
86      util = getTestingUtil(conf);
87    }
88  
89    @Override
90    @Before
91    public void setUp() throws Exception {
92      super.setUp();
93      util = getTestingUtil(getConf());
94      util.initializeCluster(1);
95      this.setConf(util.getConfiguration());
96    }
97  
98    @Override
99    @After
100   public void cleanUp() throws Exception {
101     util.restoreCluster();
102   }
103 
104   @Override
105   public void setUpCluster() throws Exception {
106   }
107 
108   @Override
109   public int runTestFromCommandLine() throws Exception {
110     Configuration conf = getConf();
111     TableName tableName = TableName.valueOf(conf.get(TABLE_NAME_KEY, DEFAULT_TABLE_NAME));
112     String snapshotName = conf.get(SNAPSHOT_NAME_KEY, tableName.getQualifierAsString()
113       + "_snapshot_" + System.currentTimeMillis());
114     int numRegions = conf.getInt(NUM_REGIONS_KEY, DEFAULT_NUM_REGIONS);
115     String tableDirStr = conf.get(TABLE_DIR_KEY);
116     Path tableDir;
117     if (tableDirStr == null) {
118       tableDir = util.getDataTestDirOnTestFS(tableName.getQualifierAsString());
119     } else {
120       tableDir = new Path(tableDirStr);
121     }
122 
123     /* We create the table using HBaseAdmin#createTable(), which will create the table
124      * with desired number of regions. We pass bbb as startKey and yyy as endKey, so if
125      * desiredNumRegions is > 2, we create regions empty - bbb and yyy - empty, and we
126      * create numRegions - 2 regions between bbb - yyy. The test uses a Scan with startRow
127      * bbb and endRow yyy, so, we expect the first and last region to be filtered out in
128      * the input format, and we expect numRegions - 2 splits between bbb and yyy.
129      */
130     int expectedNumSplits = numRegions > 2 ? numRegions - 2 : numRegions;
131 
132     TestTableSnapshotInputFormat.doTestWithMapReduce(util, tableName, snapshotName, tableDir,
133       numRegions, expectedNumSplits, false);
134 
135     return 0;
136   }
137 
138   @Override // CM is not intended to be run with this test
139   public String getTablename() {
140     return null;
141   }
142 
143   @Override
144   protected Set<String> getColumnFamilies() {
145     return null;
146   }
147 
148   public static void main(String[] args) throws Exception {
149     Configuration conf = HBaseConfiguration.create();
150     IntegrationTestingUtility.setUseDistributedCluster(conf);
151     int ret = ToolRunner.run(conf, new IntegrationTestTableSnapshotInputFormat(), args);
152     System.exit(ret);
153   }
154 
155 }