1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.HBaseTestingUtility;
30  import org.apache.hadoop.hbase.HColumnDescriptor;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.MediumTests;
34  import org.apache.hadoop.hbase.client.Delete;
35  import org.apache.hadoop.hbase.client.Get;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.ResultScanner;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.filter.Filter;
42  import org.apache.hadoop.hbase.filter.PrefixFilter;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.mapreduce.Job;
45  import org.apache.hadoop.util.GenericOptionsParser;
46  import org.junit.After;
47  import org.junit.AfterClass;
48  import org.junit.Assert;
49  import org.junit.Before;
50  import org.junit.BeforeClass;
51  import org.junit.Test;
52  import org.junit.experimental.categories.Category;
53  
54  @Category(MediumTests.class)
55  public class TestImportExport {
56    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
57    private static final byte[] ROW1 = Bytes.toBytes("row1");
58    private static final byte[] ROW2 = Bytes.toBytes("row2");
59    private static final String FAMILYA_STRING = "a";
60    private static final String FAMILYB_STRING = "b";
61    private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
62    private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
63    private static final byte[] QUAL = Bytes.toBytes("q");
64    private static final String OUTPUT_DIR = "outputdir";
65  
66    private static long now = System.currentTimeMillis();
67  
68    @BeforeClass
69    public static void beforeClass() throws Exception {
70      UTIL.startMiniCluster();
71      UTIL.startMiniMapReduceCluster();
72      UTIL.getConfiguration().set("mapred.job.tracker", "local");
73    }
74  
75    @AfterClass
76    public static void afterClass() throws Exception {
77      UTIL.shutdownMiniMapReduceCluster();
78      UTIL.shutdownMiniCluster();
79    }
80  
81    @Before
82    @After
83    public void cleanup() throws Exception {
84      FileSystem fs = FileSystem.get(UTIL.getConfiguration());
85      fs.delete(new Path(OUTPUT_DIR), true);
86    }
87  
88    /**
89     * Test simple replication case with column mapping
90     * @throws Exception
91     */
92    @Test
93    public void testSimpleCase() throws Exception {
94      String EXPORT_TABLE = "exportSimpleCase";
95      HTable t = UTIL.createTable(Bytes.toBytes(EXPORT_TABLE), FAMILYA);
96      Put p = new Put(ROW1);
97      p.add(FAMILYA, QUAL, now, QUAL);
98      p.add(FAMILYA, QUAL, now+1, QUAL);
99      p.add(FAMILYA, QUAL, now+2, QUAL);
100     t.put(p);
101     p = new Put(ROW2);
102     p.add(FAMILYA, QUAL, now, QUAL);
103     p.add(FAMILYA, QUAL, now+1, QUAL);
104     p.add(FAMILYA, QUAL, now+2, QUAL);
105     t.put(p);
106 
107     String[] args = new String[] {
108         EXPORT_TABLE,
109         OUTPUT_DIR,
110         "1000"
111     };
112     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
113     Configuration conf = opts.getConfiguration();
114     args = opts.getRemainingArgs();
115 
116     Job job = Export.createSubmittableJob(conf, args);
117     job.getConfiguration().set("mapreduce.framework.name", "yarn");
118     job.waitForCompletion(false);
119     assertTrue(job.isSuccessful());
120 
121 
122     String IMPORT_TABLE = "importTableSimpleCase";
123     t = UTIL.createTable(Bytes.toBytes(IMPORT_TABLE), FAMILYB);
124     args = new String[] {
125         "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
126         IMPORT_TABLE,
127         OUTPUT_DIR
128     };
129 
130     opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
131     conf = opts.getConfiguration();
132     args = opts.getRemainingArgs();
133 
134     job = Import.createSubmittableJob(conf, args);
135     job.getConfiguration().set("mapreduce.framework.name", "yarn");
136     job.waitForCompletion(false);
137     assertTrue(job.isSuccessful());
138 
139     Get g = new Get(ROW1);
140     g.setMaxVersions();
141     Result r = t.get(g);
142     assertEquals(3, r.size());
143     g = new Get(ROW2);
144     g.setMaxVersions();
145     r = t.get(g);
146     assertEquals(3, r.size());
147   }
148 
149   /**
150    * Test export .META. table
151    * 
152    * @throws Exception
153    */
154   @Test
155   public void testMetaExport() throws Exception {
156     String EXPORT_TABLE = ".META.";
157     String[] args = new String[] { EXPORT_TABLE, OUTPUT_DIR, "1", "0", "0" };
158     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
159         UTIL.getConfiguration()), args);
160     Configuration conf = opts.getConfiguration();
161     args = opts.getRemainingArgs();
162 
163     Job job = Export.createSubmittableJob(conf, args);
164     job.getConfiguration().set("mapreduce.framework.name", "yarn");
165     job.waitForCompletion(false);
166     assertTrue(job.isSuccessful());
167   }
168 
169   @Test
170   public void testWithDeletes() throws Exception {
171     String EXPORT_TABLE = "exportWithDeletes";
172     HTableDescriptor desc = new HTableDescriptor(EXPORT_TABLE);
173     desc.addFamily(new HColumnDescriptor(FAMILYA)
174         .setMaxVersions(5)
175         .setKeepDeletedCells(true)
176     );
177     UTIL.getHBaseAdmin().createTable(desc);
178     HTable t = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
179 
180     Put p = new Put(ROW1);
181     p.add(FAMILYA, QUAL, now, QUAL);
182     p.add(FAMILYA, QUAL, now+1, QUAL);
183     p.add(FAMILYA, QUAL, now+2, QUAL);
184     p.add(FAMILYA, QUAL, now+3, QUAL);
185     p.add(FAMILYA, QUAL, now+4, QUAL);
186     t.put(p);
187 
188     Delete d = new Delete(ROW1, now+3);
189     t.delete(d);
190     d = new Delete(ROW1);
191     d.deleteColumns(FAMILYA, QUAL, now+2);
192     t.delete(d);
193     
194     String[] args = new String[] {
195         "-D" + Export.RAW_SCAN + "=true",
196         EXPORT_TABLE,
197         OUTPUT_DIR,
198         "1000"
199     };
200 
201     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
202     Configuration conf = opts.getConfiguration();
203     args = opts.getRemainingArgs();
204 
205     Job job = Export.createSubmittableJob(conf, args);
206     job.getConfiguration().set("mapreduce.framework.name", "yarn");
207     job.waitForCompletion(false);
208     assertTrue(job.isSuccessful());
209 
210 
211     String IMPORT_TABLE = "importWithDeletes";
212     desc = new HTableDescriptor(IMPORT_TABLE);
213     desc.addFamily(new HColumnDescriptor(FAMILYA)
214         .setMaxVersions(5)
215         .setKeepDeletedCells(true)
216     );
217     UTIL.getHBaseAdmin().createTable(desc);
218     t.close();
219     t = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
220     args = new String[] {
221         IMPORT_TABLE,
222         OUTPUT_DIR
223     };
224 
225     opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
226     conf = opts.getConfiguration();
227     args = opts.getRemainingArgs();
228 
229     job = Import.createSubmittableJob(conf, args);
230     job.getConfiguration().set("mapreduce.framework.name", "yarn");
231     job.waitForCompletion(false);
232     assertTrue(job.isSuccessful());
233 
234     Scan s = new Scan();
235     s.setMaxVersions();
236     s.setRaw(true);
237     ResultScanner scanner = t.getScanner(s);
238     Result r = scanner.next();
239     KeyValue[] res = r.raw();
240     assertTrue(res[0].isDeleteFamily());
241     assertEquals(now+4, res[1].getTimestamp());
242     assertEquals(now+3, res[2].getTimestamp());
243     assertTrue(res[3].isDelete());
244     assertEquals(now+2, res[4].getTimestamp());
245     assertEquals(now+1, res[5].getTimestamp());
246     assertEquals(now, res[6].getTimestamp());
247     t.close();
248   }
249 
250   @Test
251   public void testWithFilter() throws Exception {
252     String EXPORT_TABLE = "exportSimpleCase_ImportWithFilter";
253     HTableDescriptor desc = new HTableDescriptor(EXPORT_TABLE);
254     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
255     UTIL.getHBaseAdmin().createTable(desc);
256     HTable exportTable = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
257 
258     Put p = new Put(ROW1);
259     p.add(FAMILYA, QUAL, now, QUAL);
260     p.add(FAMILYA, QUAL, now + 1, QUAL);
261     p.add(FAMILYA, QUAL, now + 2, QUAL);
262     p.add(FAMILYA, QUAL, now + 3, QUAL);
263     p.add(FAMILYA, QUAL, now + 4, QUAL);
264     exportTable.put(p);
265 
266     String[] args = new String[] { EXPORT_TABLE, OUTPUT_DIR, "1000" };
267 
268     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
269         UTIL.getConfiguration()), args);
270     Configuration conf = opts.getConfiguration();
271     args = opts.getRemainingArgs();
272 
273     Job job = Export.createSubmittableJob(conf, args);
274     job.getConfiguration().set("mapreduce.framework.name", "yarn");
275     job.waitForCompletion(false);
276     assertTrue(job.isSuccessful());
277 
278     String IMPORT_TABLE = "importWithFilter";
279     desc = new HTableDescriptor(IMPORT_TABLE);
280     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
281     UTIL.getHBaseAdmin().createTable(desc);
282 
283     HTable importTable = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
284     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
285         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE, OUTPUT_DIR,
286         "1000" };
287 
288     opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
289     conf = opts.getConfiguration();
290     args = opts.getRemainingArgs();
291 
292     job = Import.createSubmittableJob(conf, args);
293     job.getConfiguration().set("mapreduce.framework.name", "yarn");
294     job.waitForCompletion(false);
295     assertTrue(job.isSuccessful());
296 
297     // get the count of the source table for that time range
298     PrefixFilter filter = new PrefixFilter(ROW1);
299     int count = getCount(exportTable, filter);
300 
301     Assert.assertEquals("Unexpected row count between export and import tables", count,
302       getCount(importTable, null));
303 
304     // and then test that a broken command doesn't bork everything - easier here because we don't
305     // need to re-run the export job
306 
307     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
308         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", EXPORT_TABLE,
309         OUTPUT_DIR, "1000" };
310 
311     opts = new GenericOptionsParser(new Configuration(UTIL.getConfiguration()), args);
312     conf = opts.getConfiguration();
313     args = opts.getRemainingArgs();
314 
315     job = Import.createSubmittableJob(conf, args);
316     job.getConfiguration().set("mapreduce.framework.name", "yarn");
317     job.waitForCompletion(false);
318     assertFalse("Job succeeedd, but it had a non-instantiable filter!", job.isSuccessful());
319 
320     // cleanup
321     exportTable.close();
322     importTable.close();
323   }
324 
325   /**
326    * Count the number of keyvalues in the specified table for the given timerange
327    * @param start
328    * @param end
329    * @param table
330    * @return
331    * @throws IOException
332    */
333   private int getCount(HTable table, Filter filter) throws IOException {
334     Scan scan = new Scan();
335     scan.setFilter(filter);
336     ResultScanner results = table.getScanner(scan);
337     int count = 0;
338     for (Result res : results) {
339       count += res.size();
340     }
341     results.close();
342     return count;
343   }
344 }