View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.HBaseTestingUtility;
30  import org.apache.hadoop.hbase.HColumnDescriptor;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.MediumTests;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.client.Delete;
36  import org.apache.hadoop.hbase.client.Get;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.ResultScanner;
41  import org.apache.hadoop.hbase.client.Scan;
42  import org.apache.hadoop.hbase.filter.Filter;
43  import org.apache.hadoop.hbase.filter.PrefixFilter;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.mapreduce.Job;
46  import org.apache.hadoop.util.GenericOptionsParser;
47  import org.junit.After;
48  import org.junit.AfterClass;
49  import org.junit.Assert;
50  import org.junit.Before;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  
55  /**
56   * Tests the table import and table export MR job functionality
57   */
58  @Category(MediumTests.class)
59  public class TestImportExport {
60    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
61    private static final byte[] ROW1 = Bytes.toBytes("row1");
62    private static final byte[] ROW2 = Bytes.toBytes("row2");
63    private static final String FAMILYA_STRING = "a";
64    private static final String FAMILYB_STRING = "b";
65    private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
66    private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
67    private static final byte[] QUAL = Bytes.toBytes("q");
68    private static final String OUTPUT_DIR = "outputdir";
69    private static String FQ_OUTPUT_DIR;
70    private static final String EXPORT_BATCH_SIZE = "100";
71  
72    private static long now = System.currentTimeMillis();
73  
74    @BeforeClass
75    public static void beforeClass() throws Exception {
76      UTIL.startMiniCluster();
77      UTIL.startMiniMapReduceCluster();
78      FQ_OUTPUT_DIR =  new Path(OUTPUT_DIR).makeQualified(FileSystem.get(UTIL.getConfiguration())).toString();
79    }
80  
81    @AfterClass
82    public static void afterClass() throws Exception {
83      UTIL.shutdownMiniMapReduceCluster();
84      UTIL.shutdownMiniCluster();
85    }
86  
87    @Before
88    @After
89    public void cleanup() throws Exception {
90      FileSystem fs = FileSystem.get(UTIL.getConfiguration());
91      fs.delete(new Path(OUTPUT_DIR), true);
92    }
93  
94    /**
95     * Runs an export job with the specified command line args
96     * @param args
97     * @return true if job completed successfully
98     * @throws IOException
99     * @throws InterruptedException
100    * @throws ClassNotFoundException
101    */
102   boolean runExport(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
103     // need to make a copy of the configuration because to make sure different temp dirs are used.
104     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
105     Configuration conf = opts.getConfiguration();
106     args = opts.getRemainingArgs();
107     Job job = Export.createSubmittableJob(conf, args);
108     job.waitForCompletion(false);
109     return job.isSuccessful();
110   }
111 
112   /**
113    * Runs an import job with the specified command line args
114    * @param args
115    * @return true if job completed successfully
116    * @throws IOException
117    * @throws InterruptedException
118    * @throws ClassNotFoundException
119    */
120   boolean runImport(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
121     // need to make a copy of the configuration because to make sure different temp dirs are used.
122     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
123     Configuration conf = opts.getConfiguration();
124     args = opts.getRemainingArgs();
125     Job job = Import.createSubmittableJob(conf, args);
126     job.waitForCompletion(false);
127     return job.isSuccessful();
128   }
129 
130   /**
131    * Test simple replication case with column mapping
132    * @throws Exception
133    */
134   @Test
135   public void testSimpleCase() throws Exception {
136     String EXPORT_TABLE = "exportSimpleCase";
137     HTable t = UTIL.createTable(Bytes.toBytes(EXPORT_TABLE), FAMILYA, 3);
138     Put p = new Put(ROW1);
139     p.add(FAMILYA, QUAL, now, QUAL);
140     p.add(FAMILYA, QUAL, now+1, QUAL);
141     p.add(FAMILYA, QUAL, now+2, QUAL);
142     t.put(p);
143     p = new Put(ROW2);
144     p.add(FAMILYA, QUAL, now, QUAL);
145     p.add(FAMILYA, QUAL, now+1, QUAL);
146     p.add(FAMILYA, QUAL, now+2, QUAL);
147     t.put(p);
148 
149     String[] args = new String[] {
150         EXPORT_TABLE,
151         FQ_OUTPUT_DIR,
152         "1000", // max number of key versions per key to export
153     };
154     assertTrue(runExport(args));
155 
156     String IMPORT_TABLE = "importTableSimpleCase";
157     t = UTIL.createTable(Bytes.toBytes(IMPORT_TABLE), FAMILYB, 3);
158     args = new String[] {
159         "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
160         IMPORT_TABLE,
161         FQ_OUTPUT_DIR
162     };
163     assertTrue(runImport(args));
164 
165     Get g = new Get(ROW1);
166     g.setMaxVersions();
167     Result r = t.get(g);
168     assertEquals(3, r.size());
169     g = new Get(ROW2);
170     g.setMaxVersions();
171     r = t.get(g);
172     assertEquals(3, r.size());
173   }
174 
175   /**
176    * Test export .META. table
177    * 
178    * @throws Exception
179    */
180   @Test
181   public void testMetaExport() throws Exception {
182     String EXPORT_TABLE = TableName.META_TABLE_NAME.getNameAsString();
183     String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1", "0", "0" };
184     assertTrue(runExport(args));
185   }
186 
187   /**
188    * Test export scanner batching
189    */
190    @Test
191    public void testExportScannerBatching() throws Exception {
192     String BATCH_TABLE = "exportWithBatch";
193     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(BATCH_TABLE));
194     desc.addFamily(new HColumnDescriptor(FAMILYA)
195         .setMaxVersions(1)
196     );
197     UTIL.getHBaseAdmin().createTable(desc);
198     HTable t = new HTable(UTIL.getConfiguration(), BATCH_TABLE);
199 
200     Put p = new Put(ROW1);
201     p.add(FAMILYA, QUAL, now, QUAL);
202     p.add(FAMILYA, QUAL, now+1, QUAL);
203     p.add(FAMILYA, QUAL, now+2, QUAL);
204     p.add(FAMILYA, QUAL, now+3, QUAL);
205     p.add(FAMILYA, QUAL, now+4, QUAL);
206     t.put(p);
207 
208     String[] args = new String[] {
209         "-D" + Export.EXPORT_BATCHING + "=" + EXPORT_BATCH_SIZE,  // added scanner batching arg.
210         BATCH_TABLE,
211         FQ_OUTPUT_DIR
212     };
213     assertTrue(runExport(args));
214 
215     FileSystem fs = FileSystem.get(UTIL.getConfiguration());
216     fs.delete(new Path(FQ_OUTPUT_DIR), true);
217   }
218 
219   @Test
220   public void testWithDeletes() throws Exception {
221     String EXPORT_TABLE = "exportWithDeletes";
222     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(EXPORT_TABLE));
223     desc.addFamily(new HColumnDescriptor(FAMILYA)
224         .setMaxVersions(5)
225         .setKeepDeletedCells(true)
226     );
227     UTIL.getHBaseAdmin().createTable(desc);
228     HTable t = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
229 
230     Put p = new Put(ROW1);
231     p.add(FAMILYA, QUAL, now, QUAL);
232     p.add(FAMILYA, QUAL, now+1, QUAL);
233     p.add(FAMILYA, QUAL, now+2, QUAL);
234     p.add(FAMILYA, QUAL, now+3, QUAL);
235     p.add(FAMILYA, QUAL, now+4, QUAL);
236     t.put(p);
237 
238     Delete d = new Delete(ROW1, now+3);
239     t.delete(d);
240     d = new Delete(ROW1);
241     d.deleteColumns(FAMILYA, QUAL, now+2);
242     t.delete(d);
243 
244     String[] args = new String[] {
245         "-D" + Export.RAW_SCAN + "=true",
246         EXPORT_TABLE,
247         FQ_OUTPUT_DIR,
248         "1000", // max number of key versions per key to export
249     };
250     assertTrue(runExport(args));
251 
252     String IMPORT_TABLE = "importWithDeletes";
253     desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
254     desc.addFamily(new HColumnDescriptor(FAMILYA)
255         .setMaxVersions(5)
256         .setKeepDeletedCells(true)
257     );
258     UTIL.getHBaseAdmin().createTable(desc);
259     t.close();
260     t = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
261     args = new String[] {
262         IMPORT_TABLE,
263         FQ_OUTPUT_DIR
264     };
265     assertTrue(runImport(args));
266 
267     Scan s = new Scan();
268     s.setMaxVersions();
269     s.setRaw(true);
270     ResultScanner scanner = t.getScanner(s);
271     Result r = scanner.next();
272     KeyValue[] res = r.raw();
273     assertTrue(res[0].isDeleteFamily());
274     assertEquals(now+4, res[1].getTimestamp());
275     assertEquals(now+3, res[2].getTimestamp());
276     assertTrue(res[3].isDelete());
277     assertEquals(now+2, res[4].getTimestamp());
278     assertEquals(now+1, res[5].getTimestamp());
279     assertEquals(now, res[6].getTimestamp());
280     t.close();
281   }
282 
283   /**
284    * Create a simple table, run an Export Job on it, Import with filtering on,  verify counts,
285    * attempt with invalid values.
286    */
287   @Test
288   public void testWithFilter() throws Exception {
289     // Create simple table to export
290     String EXPORT_TABLE = "exportSimpleCase_ImportWithFilter";
291     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(EXPORT_TABLE));
292     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
293     UTIL.getHBaseAdmin().createTable(desc);
294     HTable exportTable = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
295 
296     Put p = new Put(ROW1);
297     p.add(FAMILYA, QUAL, now, QUAL);
298     p.add(FAMILYA, QUAL, now + 1, QUAL);
299     p.add(FAMILYA, QUAL, now + 2, QUAL);
300     p.add(FAMILYA, QUAL, now + 3, QUAL);
301     p.add(FAMILYA, QUAL, now + 4, QUAL);
302     exportTable.put(p);
303 
304     // Export the simple table
305     String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1000" };
306     assertTrue(runExport(args));
307 
308     // Import to a new table
309     String IMPORT_TABLE = "importWithFilter";
310     desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
311     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
312     UTIL.getHBaseAdmin().createTable(desc);
313 
314     HTable importTable = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
315     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
316         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE, FQ_OUTPUT_DIR,
317         "1000" };
318     assertTrue(runImport(args));
319 
320     // get the count of the source table for that time range
321     PrefixFilter filter = new PrefixFilter(ROW1);
322     int count = getCount(exportTable, filter);
323 
324     Assert.assertEquals("Unexpected row count between export and import tables", count,
325       getCount(importTable, null));
326 
327     // and then test that a broken command doesn't bork everything - easier here because we don't
328     // need to re-run the export job
329 
330     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
331         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", EXPORT_TABLE,
332         FQ_OUTPUT_DIR, "1000" };
333     assertFalse(runImport(args));
334 
335     // cleanup
336     exportTable.close();
337     importTable.close();
338   }
339 
340   /**
341    * Count the number of keyvalues in the specified table for the given timerange
342    * @param start
343    * @param end
344    * @param table
345    * @return
346    * @throws IOException
347    */
348   private int getCount(HTable table, Filter filter) throws IOException {
349     Scan scan = new Scan();
350     scan.setFilter(filter);
351     ResultScanner results = table.getScanner(scan);
352     int count = 0;
353     for (Result res : results) {
354       count += res.size();
355     }
356     results.close();
357     return count;
358   }
359 }