View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  import java.net.URL;
26  
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.KeyValue;
36  import org.apache.hadoop.hbase.MediumTests;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.Delete;
39  import org.apache.hadoop.hbase.client.Get;
40  import org.apache.hadoop.hbase.client.HTable;
41  import org.apache.hadoop.hbase.client.Put;
42  import org.apache.hadoop.hbase.client.Result;
43  import org.apache.hadoop.hbase.client.ResultScanner;
44  import org.apache.hadoop.hbase.client.Scan;
45  import org.apache.hadoop.hbase.filter.Filter;
46  import org.apache.hadoop.hbase.filter.PrefixFilter;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.mapreduce.Job;
49  import org.apache.hadoop.util.GenericOptionsParser;
50  import org.junit.After;
51  import org.junit.AfterClass;
52  import org.junit.Assert;
53  import org.junit.Before;
54  import org.junit.BeforeClass;
55  import org.junit.Test;
56  import org.junit.experimental.categories.Category;
57  
58  /**
59   * Tests the table import and table export MR job functionality
60   */
61  @Category(MediumTests.class)
62  public class TestImportExport {
63    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
64    private static final byte[] ROW1 = Bytes.toBytes("row1");
65    private static final byte[] ROW2 = Bytes.toBytes("row2");
66    private static final String FAMILYA_STRING = "a";
67    private static final String FAMILYB_STRING = "b";
68    private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
69    private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
70    private static final byte[] QUAL = Bytes.toBytes("q");
71    private static final String OUTPUT_DIR = "outputdir";
72    private static String FQ_OUTPUT_DIR;
73    private static final String EXPORT_BATCH_SIZE = "100";
74  
75    private static long now = System.currentTimeMillis();
76  
77    @BeforeClass
78    public static void beforeClass() throws Exception {
79      UTIL.startMiniCluster();
80      UTIL.startMiniMapReduceCluster();
81      FQ_OUTPUT_DIR =  new Path(OUTPUT_DIR).makeQualified(FileSystem.get(UTIL.getConfiguration())).toString();
82    }
83  
84    @AfterClass
85    public static void afterClass() throws Exception {
86      UTIL.shutdownMiniMapReduceCluster();
87      UTIL.shutdownMiniCluster();
88    }
89  
90    @Before
91    @After
92    public void cleanup() throws Exception {
93      FileSystem fs = FileSystem.get(UTIL.getConfiguration());
94      fs.delete(new Path(OUTPUT_DIR), true);
95    }
96  
97    /**
98     * Runs an export job with the specified command line args
99     * @param args
100    * @return true if job completed successfully
101    * @throws IOException
102    * @throws InterruptedException
103    * @throws ClassNotFoundException
104    */
105   boolean runExport(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
106     // need to make a copy of the configuration because to make sure different temp dirs are used.
107     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
108     Configuration conf = opts.getConfiguration();
109     args = opts.getRemainingArgs();
110     Job job = Export.createSubmittableJob(conf, args);
111     job.waitForCompletion(false);
112     return job.isSuccessful();
113   }
114 
115   /**
116    * Runs an import job with the specified command line args
117    * @param args
118    * @return true if job completed successfully
119    * @throws IOException
120    * @throws InterruptedException
121    * @throws ClassNotFoundException
122    */
123   boolean runImport(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
124     // need to make a copy of the configuration because to make sure different temp dirs are used.
125     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
126     Configuration conf = opts.getConfiguration();
127     args = opts.getRemainingArgs();
128     Job job = Import.createSubmittableJob(conf, args);
129     job.waitForCompletion(false);
130     return job.isSuccessful();
131   }
132 
133   /**
134    * Test simple replication case with column mapping
135    * @throws Exception
136    */
137   @Test
138   public void testSimpleCase() throws Exception {
139     String EXPORT_TABLE = "exportSimpleCase";
140     HTable t = UTIL.createTable(Bytes.toBytes(EXPORT_TABLE), FAMILYA, 3);
141     Put p = new Put(ROW1);
142     p.add(FAMILYA, QUAL, now, QUAL);
143     p.add(FAMILYA, QUAL, now+1, QUAL);
144     p.add(FAMILYA, QUAL, now+2, QUAL);
145     t.put(p);
146     p = new Put(ROW2);
147     p.add(FAMILYA, QUAL, now, QUAL);
148     p.add(FAMILYA, QUAL, now+1, QUAL);
149     p.add(FAMILYA, QUAL, now+2, QUAL);
150     t.put(p);
151 
152     String[] args = new String[] {
153         EXPORT_TABLE,
154         FQ_OUTPUT_DIR,
155         "1000", // max number of key versions per key to export
156     };
157     assertTrue(runExport(args));
158 
159     String IMPORT_TABLE = "importTableSimpleCase";
160     t = UTIL.createTable(Bytes.toBytes(IMPORT_TABLE), FAMILYB, 3);
161     args = new String[] {
162         "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
163         IMPORT_TABLE,
164         FQ_OUTPUT_DIR
165     };
166     assertTrue(runImport(args));
167 
168     Get g = new Get(ROW1);
169     g.setMaxVersions();
170     Result r = t.get(g);
171     assertEquals(3, r.size());
172     g = new Get(ROW2);
173     g.setMaxVersions();
174     r = t.get(g);
175     assertEquals(3, r.size());
176   }
177 
178   /**
179    * Test export hbase:meta table
180    *
181    * @throws Exception
182    */
183   @Test
184   public void testMetaExport() throws Exception {
185     String EXPORT_TABLE = TableName.META_TABLE_NAME.getNameAsString();
186     String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1", "0", "0" };
187     assertTrue(runExport(args));
188   }
189 
190   /**
191    * Test import data from 0.94 exported file
192    * @throws Exception
193    */
194   @Test
195   public void testImport94Table() throws Exception {
196     URL url = TestImportExport.class.getResource(
197         "exportedTableIn94Format");
198     Path importPath = new Path(url.getPath());
199     FileSystem fs = FileSystem.get(UTIL.getConfiguration());
200     fs.copyFromLocalFile(importPath, new Path(FQ_OUTPUT_DIR + Path.SEPARATOR
201         + "exportedTableIn94Format"));
202     String IMPORT_TABLE = "importTableExportedFrom94";
203     HTable t = UTIL.createTable(Bytes.toBytes(IMPORT_TABLE), Bytes.toBytes("f1"), 3);
204     String[] args = new String[] {
205         "-Dhbase.import.version=0.94" ,
206         IMPORT_TABLE, FQ_OUTPUT_DIR
207     };
208     assertTrue(runImport(args));
209 
210     /* exportedTableIn94Format contains 5 rows
211      ROW         COLUMN+CELL
212      r1          column=f1:c1, timestamp=1383766761171, value=val1
213      r2          column=f1:c1, timestamp=1383766771642, value=val2
214      r3          column=f1:c1, timestamp=1383766777615, value=val3
215      r4          column=f1:c1, timestamp=1383766785146, value=val4
216      r5          column=f1:c1, timestamp=1383766791506, value=val5
217      */
218     assertEquals(5, UTIL.countRows(t));
219     t.close();
220   }
221 
222   /**
223    * Test export scanner batching
224    */
225    @Test
226    public void testExportScannerBatching() throws Exception {
227     String BATCH_TABLE = "exportWithBatch";
228     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(BATCH_TABLE));
229     desc.addFamily(new HColumnDescriptor(FAMILYA)
230         .setMaxVersions(1)
231     );
232     UTIL.getHBaseAdmin().createTable(desc);
233     HTable t = new HTable(UTIL.getConfiguration(), BATCH_TABLE);
234 
235     Put p = new Put(ROW1);
236     p.add(FAMILYA, QUAL, now, QUAL);
237     p.add(FAMILYA, QUAL, now+1, QUAL);
238     p.add(FAMILYA, QUAL, now+2, QUAL);
239     p.add(FAMILYA, QUAL, now+3, QUAL);
240     p.add(FAMILYA, QUAL, now+4, QUAL);
241     t.put(p);
242 
243     String[] args = new String[] {
244         "-D" + Export.EXPORT_BATCHING + "=" + EXPORT_BATCH_SIZE,  // added scanner batching arg.
245         BATCH_TABLE,
246         FQ_OUTPUT_DIR
247     };
248     assertTrue(runExport(args));
249 
250     FileSystem fs = FileSystem.get(UTIL.getConfiguration());
251     fs.delete(new Path(FQ_OUTPUT_DIR), true);
252   }
253 
254   @Test
255   public void testWithDeletes() throws Exception {
256     String EXPORT_TABLE = "exportWithDeletes";
257     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(EXPORT_TABLE));
258     desc.addFamily(new HColumnDescriptor(FAMILYA)
259         .setMaxVersions(5)
260         .setKeepDeletedCells(true)
261     );
262     UTIL.getHBaseAdmin().createTable(desc);
263     HTable t = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
264 
265     Put p = new Put(ROW1);
266     p.add(FAMILYA, QUAL, now, QUAL);
267     p.add(FAMILYA, QUAL, now+1, QUAL);
268     p.add(FAMILYA, QUAL, now+2, QUAL);
269     p.add(FAMILYA, QUAL, now+3, QUAL);
270     p.add(FAMILYA, QUAL, now+4, QUAL);
271     t.put(p);
272 
273     Delete d = new Delete(ROW1, now+3);
274     t.delete(d);
275     d = new Delete(ROW1);
276     d.deleteColumns(FAMILYA, QUAL, now+2);
277     t.delete(d);
278 
279     String[] args = new String[] {
280         "-D" + Export.RAW_SCAN + "=true",
281         EXPORT_TABLE,
282         FQ_OUTPUT_DIR,
283         "1000", // max number of key versions per key to export
284     };
285     assertTrue(runExport(args));
286 
287     String IMPORT_TABLE = "importWithDeletes";
288     desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
289     desc.addFamily(new HColumnDescriptor(FAMILYA)
290         .setMaxVersions(5)
291         .setKeepDeletedCells(true)
292     );
293     UTIL.getHBaseAdmin().createTable(desc);
294     t.close();
295     t = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
296     args = new String[] {
297         IMPORT_TABLE,
298         FQ_OUTPUT_DIR
299     };
300     assertTrue(runImport(args));
301 
302     Scan s = new Scan();
303     s.setMaxVersions();
304     s.setRaw(true);
305     ResultScanner scanner = t.getScanner(s);
306     Result r = scanner.next();
307     Cell[] res = r.rawCells();
308     assertTrue(CellUtil.isDeleteFamily(res[0]));
309     assertEquals(now+4, res[1].getTimestamp());
310     assertEquals(now+3, res[2].getTimestamp());
311     assertTrue(CellUtil.isDelete(res[3]));
312     assertEquals(now+2, res[4].getTimestamp());
313     assertEquals(now+1, res[5].getTimestamp());
314     assertEquals(now, res[6].getTimestamp());
315     t.close();
316   }
317 
318   /**
319    * Create a simple table, run an Export Job on it, Import with filtering on,  verify counts,
320    * attempt with invalid values.
321    */
322   @Test
323   public void testWithFilter() throws Exception {
324     // Create simple table to export
325     String EXPORT_TABLE = "exportSimpleCase_ImportWithFilter";
326     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(EXPORT_TABLE));
327     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
328     UTIL.getHBaseAdmin().createTable(desc);
329     HTable exportTable = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
330 
331     Put p = new Put(ROW1);
332     p.add(FAMILYA, QUAL, now, QUAL);
333     p.add(FAMILYA, QUAL, now + 1, QUAL);
334     p.add(FAMILYA, QUAL, now + 2, QUAL);
335     p.add(FAMILYA, QUAL, now + 3, QUAL);
336     p.add(FAMILYA, QUAL, now + 4, QUAL);
337     exportTable.put(p);
338 
339     // Export the simple table
340     String[] args = new String[] { EXPORT_TABLE, FQ_OUTPUT_DIR, "1000" };
341     assertTrue(runExport(args));
342 
343     // Import to a new table
344     String IMPORT_TABLE = "importWithFilter";
345     desc = new HTableDescriptor(TableName.valueOf(IMPORT_TABLE));
346     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
347     UTIL.getHBaseAdmin().createTable(desc);
348 
349     HTable importTable = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
350     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
351         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE, FQ_OUTPUT_DIR,
352         "1000" };
353     assertTrue(runImport(args));
354 
355     // get the count of the source table for that time range
356     PrefixFilter filter = new PrefixFilter(ROW1);
357     int count = getCount(exportTable, filter);
358 
359     Assert.assertEquals("Unexpected row count between export and import tables", count,
360       getCount(importTable, null));
361 
362     // and then test that a broken command doesn't bork everything - easier here because we don't
363     // need to re-run the export job
364 
365     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
366         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", EXPORT_TABLE,
367         FQ_OUTPUT_DIR, "1000" };
368     assertFalse(runImport(args));
369 
370     // cleanup
371     exportTable.close();
372     importTable.close();
373   }
374 
375   /**
376    * Count the number of keyvalues in the specified table for the given timerange
377    * @param start
378    * @param end
379    * @param table
380    * @return
381    * @throws IOException
382    */
383   private int getCount(HTable table, Filter filter) throws IOException {
384     Scan scan = new Scan();
385     scan.setFilter(filter);
386     ResultScanner results = table.getScanner(scan);
387     int count = 0;
388     for (Result res : results) {
389       count += res.size();
390     }
391     results.close();
392     return count;
393   }
394 }