View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  
22  import java.io.IOException;
23  import java.util.Arrays;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.Cell;
30  import org.apache.hadoop.hbase.CellUtil;
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.client.HTable;
34  import org.apache.hadoop.hbase.client.Put;
35  import org.apache.hadoop.hbase.client.Result;
36  import org.apache.hadoop.hbase.client.ResultScanner;
37  import org.apache.hadoop.hbase.client.Scan;
38  import org.apache.hadoop.hbase.mapreduce.SyncTable.SyncMapper.Counter;
39  import org.apache.hadoop.hbase.testclassification.LargeTests;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.mapreduce.Counters;
42  import org.junit.AfterClass;
43  import org.junit.Assert;
44  import org.junit.BeforeClass;
45  import org.junit.Test;
46  import org.junit.experimental.categories.Category;
47  
48  import com.google.common.base.Throwables;
49  
50  /**
51   * Basic test for the SyncTable M/R tool
52   */
53  @Category(LargeTests.class)
54  public class TestSyncTable {
55    
56    private static final Log LOG = LogFactory.getLog(TestSyncTable.class);
57    
58    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();  
59    
60    @BeforeClass
61    public static void beforeClass() throws Exception {
62      TEST_UTIL.startMiniCluster(3);
63      TEST_UTIL.startMiniMapReduceCluster();
64    }
65    
66    @AfterClass
67    public static void afterClass() throws Exception {
68      TEST_UTIL.shutdownMiniMapReduceCluster();
69      TEST_UTIL.shutdownMiniCluster();
70    }
71    
72    private static byte[][] generateSplits(int numRows, int numRegions) {
73      byte[][] splitRows = new byte[numRegions-1][];
74      for (int i = 1; i < numRegions; i++) {
75        splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
76      }
77      return splitRows;
78    }
79    
80    @Test
81    public void testSyncTable() throws Exception {
82      String sourceTableName = "testSourceTable";
83      String targetTableName = "testTargetTable";
84      Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable");
85      
86      writeTestData(sourceTableName, targetTableName);
87      hashSourceTable(sourceTableName, testDir);
88      Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir);
89      assertEqualTables(90, sourceTableName, targetTableName);
90      
91      assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue());
92      assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue());
93      assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue());
94      assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue());
95      assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue());
96      assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue());
97      
98      TEST_UTIL.deleteTable(sourceTableName);
99      TEST_UTIL.deleteTable(targetTableName);
100     TEST_UTIL.cleanupDataTestDirOnTestFS();
101   }
102 
103   private void assertEqualTables(int expectedRows, String sourceTableName, String targetTableName) 
104       throws Exception {
105     HTable sourceTable = new HTable(TEST_UTIL.getConfiguration(),
106       TableName.valueOf(sourceTableName));
107     HTable targetTable = new HTable(TEST_UTIL.getConfiguration(),
108       TableName.valueOf(targetTableName));
109     
110     ResultScanner sourceScanner = sourceTable.getScanner(new Scan());
111     ResultScanner targetScanner = targetTable.getScanner(new Scan());
112     
113     for (int i = 0; i < expectedRows; i++) {
114       Result sourceRow = sourceScanner.next();
115       Result targetRow = targetScanner.next();
116       
117       LOG.debug("SOURCE row: " + (sourceRow == null ? "null" : Bytes.toInt(sourceRow.getRow()))
118           + " cells:" + sourceRow);
119       LOG.debug("TARGET row: " + (targetRow == null ? "null" : Bytes.toInt(targetRow.getRow()))
120           + " cells:" + targetRow);
121       
122       if (sourceRow == null) {
123         Assert.fail("Expected " + expectedRows
124             + " source rows but only found " + i); 
125       }
126       if (targetRow == null) {
127         Assert.fail("Expected " + expectedRows
128             + " target rows but only found " + i); 
129       }
130       Cell[] sourceCells = sourceRow.rawCells();
131       Cell[] targetCells = targetRow.rawCells();
132       if (sourceCells.length != targetCells.length) {
133         LOG.debug("Source cells: " + Arrays.toString(sourceCells));
134         LOG.debug("Target cells: " + Arrays.toString(targetCells));
135         Assert.fail("Row " + Bytes.toInt(sourceRow.getRow())
136             + " has " + sourceCells.length
137             + " cells in source table but " + targetCells.length
138             + " cells in target table");
139       }
140       for (int j = 0; j < sourceCells.length; j++) {
141         Cell sourceCell = sourceCells[j];
142         Cell targetCell = targetCells[j];
143         try {
144           if (!CellUtil.matchingRow(sourceCell, targetCell)) {
145             Assert.fail("Rows don't match");
146           }
147           if (!CellUtil.matchingFamily(sourceCell, targetCell)) {
148             Assert.fail("Families don't match");
149           }
150           if (!CellUtil.matchingQualifier(sourceCell, targetCell)) {
151             Assert.fail("Qualifiers don't match");
152           }
153           if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) {
154             Assert.fail("Timestamps don't match");
155           }
156           if (!CellUtil.matchingValue(sourceCell, targetCell)) {
157             Assert.fail("Values don't match");
158           }
159         } catch (Throwable t) {
160           LOG.debug("Source cell: " + sourceCell + " target cell: " + targetCell);
161           Throwables.propagate(t);
162         }
163       }
164     }
165     Result sourceRow = sourceScanner.next();
166     if (sourceRow != null) {
167       Assert.fail("Source table has more than " + expectedRows
168           + " rows.  Next row: " + Bytes.toInt(sourceRow.getRow()));
169     }
170     Result targetRow = targetScanner.next();
171     if (targetRow != null) {
172       Assert.fail("Target table has more than " + expectedRows
173           + " rows.  Next row: " + Bytes.toInt(targetRow.getRow()));
174     }
175     sourceScanner.close();
176     targetScanner.close();
177     sourceTable.close();
178     targetTable.close();
179   }
180 
181   private Counters syncTables(String sourceTableName, String targetTableName,
182       Path testDir) throws Exception {
183     SyncTable syncTable = new SyncTable(TEST_UTIL.getConfiguration());
184     int code = syncTable.run(new String[] { 
185         testDir.toString(),
186         sourceTableName,
187         targetTableName
188         });
189     assertEquals("sync table job failed", 0, code);
190     
191     LOG.info("Sync tables completed");
192     return syncTable.counters;
193   }
194 
195   private void hashSourceTable(String sourceTableName, Path testDir)
196       throws Exception, IOException {
197     int numHashFiles = 3;
198     long batchSize = 100;  // should be 2 batches per region
199     int scanBatch = 1;
200     HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
201     int code = hashTable.run(new String[] { 
202         "--batchsize=" + batchSize,
203         "--numhashfiles=" + numHashFiles,
204         "--scanbatch=" + scanBatch,
205         sourceTableName,
206         testDir.toString()});
207     assertEquals("hash table job failed", 0, code);
208     
209     FileSystem fs = TEST_UTIL.getTestFileSystem();
210     
211     HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
212     assertEquals(sourceTableName, tableHash.tableName);
213     assertEquals(batchSize, tableHash.batchSize);
214     assertEquals(numHashFiles, tableHash.numHashFiles);
215     assertEquals(numHashFiles - 1, tableHash.partitions.size());
216 
217     LOG.info("Hash table completed");
218   }
219 
220   private void writeTestData(String sourceTableName, String targetTableName)
221       throws Exception {
222     final byte[] family = Bytes.toBytes("family");
223     final byte[] column1 = Bytes.toBytes("c1");
224     final byte[] column2 = Bytes.toBytes("c2");
225     final byte[] value1 = Bytes.toBytes("val1");
226     final byte[] value2 = Bytes.toBytes("val2");
227     final byte[] value3 = Bytes.toBytes("val3");
228     
229     int numRows = 100;
230     int sourceRegions = 10;
231     int targetRegions = 6;
232     
233     HTable sourceTable = TEST_UTIL.createTable(TableName.valueOf(sourceTableName),
234         family, generateSplits(numRows, sourceRegions));
235 
236     HTable targetTable = TEST_UTIL.createTable(TableName.valueOf(targetTableName),
237         family, generateSplits(numRows, targetRegions));
238 
239     long timestamp = 1430764183454L;
240 
241     int rowIndex = 0;
242     // a bunch of identical rows
243     for (; rowIndex < 40; rowIndex++) {
244       Put sourcePut = new Put(Bytes.toBytes(rowIndex));
245       sourcePut.add(family, column1, timestamp, value1);
246       sourcePut.add(family, column2, timestamp, value2);
247       sourceTable.put(sourcePut);
248      
249       Put targetPut = new Put(Bytes.toBytes(rowIndex));
250       targetPut.add(family, column1, timestamp, value1);
251       targetPut.add(family, column2, timestamp, value2);
252       targetTable.put(targetPut);
253     }
254     // some rows only in the source table
255     // ROWSWITHDIFFS: 10
256     // TARGETMISSINGROWS: 10
257     // TARGETMISSINGCELLS: 20
258     for (; rowIndex < 50; rowIndex++) {
259       Put put = new Put(Bytes.toBytes(rowIndex));
260       put.add(family, column1, timestamp, value1);
261       put.add(family, column2, timestamp, value2);
262       sourceTable.put(put);
263     }
264     // some rows only in the target table
265     // ROWSWITHDIFFS: 10
266     // SOURCEMISSINGROWS: 10
267     // SOURCEMISSINGCELLS: 20
268     for (; rowIndex < 60; rowIndex++) {
269       Put put = new Put(Bytes.toBytes(rowIndex));
270       put.add(family, column1, timestamp, value1);
271       put.add(family, column2, timestamp, value2);
272       targetTable.put(put);
273     }
274     // some rows with 1 missing cell in target table
275     // ROWSWITHDIFFS: 10
276     // TARGETMISSINGCELLS: 10
277     for (; rowIndex < 70; rowIndex++) {
278       Put sourcePut = new Put(Bytes.toBytes(rowIndex));
279       sourcePut.add(family, column1, timestamp, value1);
280       sourcePut.add(family, column2, timestamp, value2);
281       sourceTable.put(sourcePut);
282 
283       Put targetPut = new Put(Bytes.toBytes(rowIndex));
284       targetPut.add(family, column1, timestamp, value1);
285       targetTable.put(targetPut);
286     }
287     // some rows with 1 missing cell in source table
288     // ROWSWITHDIFFS: 10
289     // SOURCEMISSINGCELLS: 10
290     for (; rowIndex < 80; rowIndex++) {
291       Put sourcePut = new Put(Bytes.toBytes(rowIndex));
292       sourcePut.add(family, column1, timestamp, value1);
293       sourceTable.put(sourcePut);
294 
295       Put targetPut = new Put(Bytes.toBytes(rowIndex));
296       targetPut.add(family, column1, timestamp, value1);
297       targetPut.add(family, column2, timestamp, value2);
298       targetTable.put(targetPut);
299     }
300     // some rows differing only in timestamp
301     // ROWSWITHDIFFS: 10
302     // SOURCEMISSINGCELLS: 20
303     // TARGETMISSINGCELLS: 20
304     for (; rowIndex < 90; rowIndex++) {
305       Put sourcePut = new Put(Bytes.toBytes(rowIndex));
306       sourcePut.add(family, column1, timestamp, column1);
307       sourcePut.add(family, column2, timestamp, value2);
308       sourceTable.put(sourcePut);
309 
310       Put targetPut = new Put(Bytes.toBytes(rowIndex));
311       targetPut.add(family, column1, timestamp+1, column1);
312       targetPut.add(family, column2, timestamp-1, value2);
313       targetTable.put(targetPut);
314     }
315     // some rows with different values
316     // ROWSWITHDIFFS: 10
317     // DIFFERENTCELLVALUES: 20
318     for (; rowIndex < numRows; rowIndex++) {
319       Put sourcePut = new Put(Bytes.toBytes(rowIndex));
320       sourcePut.add(family, column1, timestamp, value1);
321       sourcePut.add(family, column2, timestamp, value2);
322       sourceTable.put(sourcePut);
323       
324       Put targetPut = new Put(Bytes.toBytes(rowIndex));
325       targetPut.add(family, column1, timestamp, value3);
326       targetPut.add(family, column2, timestamp, value3);
327       targetTable.put(targetPut);
328     }
329     
330     sourceTable.close();
331     targetTable.close();
332   }
333   
334 
335 }