1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import static org.junit.Assert.assertEquals;
21
22 import java.io.IOException;
23 import java.util.Arrays;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.fs.FileSystem;
28 import org.apache.hadoop.fs.Path;
29 import org.apache.hadoop.hbase.Cell;
30 import org.apache.hadoop.hbase.CellUtil;
31 import org.apache.hadoop.hbase.HBaseTestingUtility;
32 import org.apache.hadoop.hbase.TableName;
33 import org.apache.hadoop.hbase.client.HTable;
34 import org.apache.hadoop.hbase.client.Put;
35 import org.apache.hadoop.hbase.client.Result;
36 import org.apache.hadoop.hbase.client.ResultScanner;
37 import org.apache.hadoop.hbase.client.Scan;
38 import org.apache.hadoop.hbase.mapreduce.SyncTable.SyncMapper.Counter;
39 import org.apache.hadoop.hbase.testclassification.LargeTests;
40 import org.apache.hadoop.hbase.util.Bytes;
41 import org.apache.hadoop.mapreduce.Counters;
42 import org.junit.AfterClass;
43 import org.junit.Assert;
44 import org.junit.BeforeClass;
45 import org.junit.Test;
46 import org.junit.experimental.categories.Category;
47
48 import com.google.common.base.Throwables;
49
50
51
52
53 @Category(LargeTests.class)
54 public class TestSyncTable {
55
56 private static final Log LOG = LogFactory.getLog(TestSyncTable.class);
57
58 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
59
60 @BeforeClass
61 public static void beforeClass() throws Exception {
62 TEST_UTIL.startMiniCluster(3);
63 TEST_UTIL.startMiniMapReduceCluster();
64 }
65
66 @AfterClass
67 public static void afterClass() throws Exception {
68 TEST_UTIL.shutdownMiniMapReduceCluster();
69 TEST_UTIL.shutdownMiniCluster();
70 }
71
72 private static byte[][] generateSplits(int numRows, int numRegions) {
73 byte[][] splitRows = new byte[numRegions-1][];
74 for (int i = 1; i < numRegions; i++) {
75 splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
76 }
77 return splitRows;
78 }
79
80 @Test
81 public void testSyncTable() throws Exception {
82 String sourceTableName = "testSourceTable";
83 String targetTableName = "testTargetTable";
84 Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable");
85
86 writeTestData(sourceTableName, targetTableName);
87 hashSourceTable(sourceTableName, testDir);
88 Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir);
89 assertEqualTables(90, sourceTableName, targetTableName);
90
91 assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue());
92 assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue());
93 assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue());
94 assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue());
95 assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue());
96 assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue());
97
98 TEST_UTIL.deleteTable(sourceTableName);
99 TEST_UTIL.deleteTable(targetTableName);
100 TEST_UTIL.cleanupDataTestDirOnTestFS();
101 }
102
103 private void assertEqualTables(int expectedRows, String sourceTableName, String targetTableName)
104 throws Exception {
105 HTable sourceTable = new HTable(TEST_UTIL.getConfiguration(),
106 TableName.valueOf(sourceTableName));
107 HTable targetTable = new HTable(TEST_UTIL.getConfiguration(),
108 TableName.valueOf(targetTableName));
109
110 ResultScanner sourceScanner = sourceTable.getScanner(new Scan());
111 ResultScanner targetScanner = targetTable.getScanner(new Scan());
112
113 for (int i = 0; i < expectedRows; i++) {
114 Result sourceRow = sourceScanner.next();
115 Result targetRow = targetScanner.next();
116
117 LOG.debug("SOURCE row: " + (sourceRow == null ? "null" : Bytes.toInt(sourceRow.getRow()))
118 + " cells:" + sourceRow);
119 LOG.debug("TARGET row: " + (targetRow == null ? "null" : Bytes.toInt(targetRow.getRow()))
120 + " cells:" + targetRow);
121
122 if (sourceRow == null) {
123 Assert.fail("Expected " + expectedRows
124 + " source rows but only found " + i);
125 }
126 if (targetRow == null) {
127 Assert.fail("Expected " + expectedRows
128 + " target rows but only found " + i);
129 }
130 Cell[] sourceCells = sourceRow.rawCells();
131 Cell[] targetCells = targetRow.rawCells();
132 if (sourceCells.length != targetCells.length) {
133 LOG.debug("Source cells: " + Arrays.toString(sourceCells));
134 LOG.debug("Target cells: " + Arrays.toString(targetCells));
135 Assert.fail("Row " + Bytes.toInt(sourceRow.getRow())
136 + " has " + sourceCells.length
137 + " cells in source table but " + targetCells.length
138 + " cells in target table");
139 }
140 for (int j = 0; j < sourceCells.length; j++) {
141 Cell sourceCell = sourceCells[j];
142 Cell targetCell = targetCells[j];
143 try {
144 if (!CellUtil.matchingRow(sourceCell, targetCell)) {
145 Assert.fail("Rows don't match");
146 }
147 if (!CellUtil.matchingFamily(sourceCell, targetCell)) {
148 Assert.fail("Families don't match");
149 }
150 if (!CellUtil.matchingQualifier(sourceCell, targetCell)) {
151 Assert.fail("Qualifiers don't match");
152 }
153 if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) {
154 Assert.fail("Timestamps don't match");
155 }
156 if (!CellUtil.matchingValue(sourceCell, targetCell)) {
157 Assert.fail("Values don't match");
158 }
159 } catch (Throwable t) {
160 LOG.debug("Source cell: " + sourceCell + " target cell: " + targetCell);
161 Throwables.propagate(t);
162 }
163 }
164 }
165 Result sourceRow = sourceScanner.next();
166 if (sourceRow != null) {
167 Assert.fail("Source table has more than " + expectedRows
168 + " rows. Next row: " + Bytes.toInt(sourceRow.getRow()));
169 }
170 Result targetRow = targetScanner.next();
171 if (targetRow != null) {
172 Assert.fail("Target table has more than " + expectedRows
173 + " rows. Next row: " + Bytes.toInt(targetRow.getRow()));
174 }
175 sourceScanner.close();
176 targetScanner.close();
177 sourceTable.close();
178 targetTable.close();
179 }
180
181 private Counters syncTables(String sourceTableName, String targetTableName,
182 Path testDir) throws Exception {
183 SyncTable syncTable = new SyncTable(TEST_UTIL.getConfiguration());
184 int code = syncTable.run(new String[] {
185 testDir.toString(),
186 sourceTableName,
187 targetTableName
188 });
189 assertEquals("sync table job failed", 0, code);
190
191 LOG.info("Sync tables completed");
192 return syncTable.counters;
193 }
194
195 private void hashSourceTable(String sourceTableName, Path testDir)
196 throws Exception, IOException {
197 int numHashFiles = 3;
198 long batchSize = 100;
199 int scanBatch = 1;
200 HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
201 int code = hashTable.run(new String[] {
202 "--batchsize=" + batchSize,
203 "--numhashfiles=" + numHashFiles,
204 "--scanbatch=" + scanBatch,
205 sourceTableName,
206 testDir.toString()});
207 assertEquals("hash table job failed", 0, code);
208
209 FileSystem fs = TEST_UTIL.getTestFileSystem();
210
211 HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
212 assertEquals(sourceTableName, tableHash.tableName);
213 assertEquals(batchSize, tableHash.batchSize);
214 assertEquals(numHashFiles, tableHash.numHashFiles);
215 assertEquals(numHashFiles - 1, tableHash.partitions.size());
216
217 LOG.info("Hash table completed");
218 }
219
220 private void writeTestData(String sourceTableName, String targetTableName)
221 throws Exception {
222 final byte[] family = Bytes.toBytes("family");
223 final byte[] column1 = Bytes.toBytes("c1");
224 final byte[] column2 = Bytes.toBytes("c2");
225 final byte[] value1 = Bytes.toBytes("val1");
226 final byte[] value2 = Bytes.toBytes("val2");
227 final byte[] value3 = Bytes.toBytes("val3");
228
229 int numRows = 100;
230 int sourceRegions = 10;
231 int targetRegions = 6;
232
233 HTable sourceTable = TEST_UTIL.createTable(TableName.valueOf(sourceTableName),
234 family, generateSplits(numRows, sourceRegions));
235
236 HTable targetTable = TEST_UTIL.createTable(TableName.valueOf(targetTableName),
237 family, generateSplits(numRows, targetRegions));
238
239 long timestamp = 1430764183454L;
240
241 int rowIndex = 0;
242
243 for (; rowIndex < 40; rowIndex++) {
244 Put sourcePut = new Put(Bytes.toBytes(rowIndex));
245 sourcePut.add(family, column1, timestamp, value1);
246 sourcePut.add(family, column2, timestamp, value2);
247 sourceTable.put(sourcePut);
248
249 Put targetPut = new Put(Bytes.toBytes(rowIndex));
250 targetPut.add(family, column1, timestamp, value1);
251 targetPut.add(family, column2, timestamp, value2);
252 targetTable.put(targetPut);
253 }
254
255
256
257
258 for (; rowIndex < 50; rowIndex++) {
259 Put put = new Put(Bytes.toBytes(rowIndex));
260 put.add(family, column1, timestamp, value1);
261 put.add(family, column2, timestamp, value2);
262 sourceTable.put(put);
263 }
264
265
266
267
268 for (; rowIndex < 60; rowIndex++) {
269 Put put = new Put(Bytes.toBytes(rowIndex));
270 put.add(family, column1, timestamp, value1);
271 put.add(family, column2, timestamp, value2);
272 targetTable.put(put);
273 }
274
275
276
277 for (; rowIndex < 70; rowIndex++) {
278 Put sourcePut = new Put(Bytes.toBytes(rowIndex));
279 sourcePut.add(family, column1, timestamp, value1);
280 sourcePut.add(family, column2, timestamp, value2);
281 sourceTable.put(sourcePut);
282
283 Put targetPut = new Put(Bytes.toBytes(rowIndex));
284 targetPut.add(family, column1, timestamp, value1);
285 targetTable.put(targetPut);
286 }
287
288
289
290 for (; rowIndex < 80; rowIndex++) {
291 Put sourcePut = new Put(Bytes.toBytes(rowIndex));
292 sourcePut.add(family, column1, timestamp, value1);
293 sourceTable.put(sourcePut);
294
295 Put targetPut = new Put(Bytes.toBytes(rowIndex));
296 targetPut.add(family, column1, timestamp, value1);
297 targetPut.add(family, column2, timestamp, value2);
298 targetTable.put(targetPut);
299 }
300
301
302
303
304 for (; rowIndex < 90; rowIndex++) {
305 Put sourcePut = new Put(Bytes.toBytes(rowIndex));
306 sourcePut.add(family, column1, timestamp, column1);
307 sourcePut.add(family, column2, timestamp, value2);
308 sourceTable.put(sourcePut);
309
310 Put targetPut = new Put(Bytes.toBytes(rowIndex));
311 targetPut.add(family, column1, timestamp+1, column1);
312 targetPut.add(family, column2, timestamp-1, value2);
313 targetTable.put(targetPut);
314 }
315
316
317
318 for (; rowIndex < numRows; rowIndex++) {
319 Put sourcePut = new Put(Bytes.toBytes(rowIndex));
320 sourcePut.add(family, column1, timestamp, value1);
321 sourcePut.add(family, column2, timestamp, value2);
322 sourceTable.put(sourcePut);
323
324 Put targetPut = new Put(Bytes.toBytes(rowIndex));
325 targetPut.add(family, column1, timestamp, value3);
326 targetPut.add(family, column2, timestamp, value3);
327 targetTable.put(targetPut);
328 }
329
330 sourceTable.close();
331 targetTable.close();
332 }
333
334
335 }