1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertTrue;
26
27 import java.io.IOException;
28 import java.util.Arrays;
29 import java.util.Random;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.fs.FileStatus;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.Path;
37 import org.apache.hadoop.hbase.HBaseConfiguration;
38 import org.apache.hadoop.hbase.HBaseTestingUtility;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.KeyValue;
41 import org.apache.hadoop.hbase.PerformanceEvaluation;
42 import org.apache.hadoop.hbase.client.HBaseAdmin;
43 import org.apache.hadoop.hbase.client.HTable;
44 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
45 import org.apache.hadoop.hbase.util.Bytes;
46 import org.apache.hadoop.io.NullWritable;
47 import org.apache.hadoop.mapreduce.Job;
48 import org.apache.hadoop.mapreduce.Mapper;
49 import org.apache.hadoop.mapreduce.RecordWriter;
50 import org.apache.hadoop.mapreduce.TaskAttemptContext;
51 import org.apache.hadoop.mapreduce.TaskAttemptID;
52 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
53 import org.junit.Before;
54 import org.junit.Test;
55 import org.mockito.Mockito;
56
57
58
59
60
61
62
63
64 public class TestHFileOutputFormat {
65 private final static int ROWSPERSPLIT = 1024;
66
67 private static final byte[] FAMILY_NAME = PerformanceEvaluation.FAMILY_NAME;
68 private static final byte[] TABLE_NAME = Bytes.toBytes("TestTable");
69
70 private HBaseTestingUtility util = new HBaseTestingUtility();
71
72 private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
73
74
75
76
77 static class RandomKVGeneratingMapper
78 extends Mapper<NullWritable, NullWritable,
79 ImmutableBytesWritable, KeyValue> {
80
81 private int keyLength;
82 private static final int KEYLEN_DEFAULT=10;
83 private static final String KEYLEN_CONF="randomkv.key.length";
84
85 private int valLength;
86 private static final int VALLEN_DEFAULT=10;
87 private static final String VALLEN_CONF="randomkv.val.length";
88
89 @Override
90 protected void setup(Context context) throws IOException,
91 InterruptedException {
92 super.setup(context);
93
94 Configuration conf = context.getConfiguration();
95 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
96 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
97 }
98
99 protected void map(
100 NullWritable n1, NullWritable n2,
101 Mapper<NullWritable, NullWritable,
102 ImmutableBytesWritable,KeyValue>.Context context)
103 throws java.io.IOException ,InterruptedException
104 {
105
106 byte keyBytes[] = new byte[keyLength];
107 byte valBytes[] = new byte[valLength];
108
109 Random random = new Random(System.currentTimeMillis());
110 for (int i = 0; i < ROWSPERSPLIT; i++) {
111
112 random.nextBytes(keyBytes);
113 random.nextBytes(valBytes);
114 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
115
116 KeyValue kv = new KeyValue(keyBytes, PerformanceEvaluation.FAMILY_NAME,
117 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
118 context.write(key, kv);
119 }
120 }
121 }
122
123 @Before
124 public void cleanupDir() throws IOException {
125 util.cleanupTestDir();
126 }
127
128
129 private void setupRandomGeneratorMapper(Job job) {
130 job.setInputFormatClass(NMapInputFormat.class);
131 job.setMapperClass(RandomKVGeneratingMapper.class);
132 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
133 job.setMapOutputValueClass(KeyValue.class);
134 }
135
136
137
138
139
140
141 @Test
142 public void test_LATEST_TIMESTAMP_isReplaced()
143 throws IOException, InterruptedException {
144 Configuration conf = new Configuration(this.util.getConfiguration());
145 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
146 TaskAttemptContext context = null;
147 Path dir =
148 HBaseTestingUtility.getTestDir("test_LATEST_TIMESTAMP_isReplaced");
149 try {
150 Job job = new Job(conf);
151 FileOutputFormat.setOutputPath(job, dir);
152 context = new TaskAttemptContext(job.getConfiguration(),
153 new TaskAttemptID());
154 HFileOutputFormat hof = new HFileOutputFormat();
155 writer = hof.getRecordWriter(context);
156 final byte [] b = Bytes.toBytes("b");
157
158
159
160 KeyValue kv = new KeyValue(b, b, b);
161 KeyValue original = kv.clone();
162 writer.write(new ImmutableBytesWritable(), kv);
163 assertFalse(original.equals(kv));
164 assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
165 assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
166 assertNotSame(original.getTimestamp(), kv.getTimestamp());
167 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
168
169
170
171 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
172 original = kv.clone();
173 writer.write(new ImmutableBytesWritable(), kv);
174 assertTrue(original.equals(kv));
175 } finally {
176 if (writer != null && context != null) writer.close(context);
177 dir.getFileSystem(conf).delete(dir, true);
178 }
179 }
180
181
182
183
184 @Test
185 public void testWritingPEData() throws Exception {
186 Configuration conf = util.getConfiguration();
187 Path testDir = HBaseTestingUtility.getTestDir("testWritingPEData");
188 FileSystem fs = testDir.getFileSystem(conf);
189
190
191 conf.setInt("io.sort.mb", 20);
192
193 conf.setLong("hbase.hregion.max.filesize", 64 * 1024);
194
195 Job job = new Job(conf, "testWritingPEData");
196 setupRandomGeneratorMapper(job);
197
198
199 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
200 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
201
202 Arrays.fill(startKey, (byte)0);
203 Arrays.fill(endKey, (byte)0xff);
204
205 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
206
207 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
208 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
209 job.setReducerClass(KeyValueSortReducer.class);
210 job.setOutputFormatClass(HFileOutputFormat.class);
211 job.setNumReduceTasks(4);
212
213 FileOutputFormat.setOutputPath(job, testDir);
214 assertTrue(job.waitForCompletion(false));
215 FileStatus [] files = fs.listStatus(testDir);
216 assertTrue(files.length > 0);
217 }
218
219 @Test
220 public void testJobConfiguration() throws Exception {
221 Job job = new Job();
222 HTable table = Mockito.mock(HTable.class);
223 byte[][] mockKeys = new byte[][] {
224 HConstants.EMPTY_BYTE_ARRAY,
225 Bytes.toBytes("aaa"),
226 Bytes.toBytes("ggg"),
227 Bytes.toBytes("zzz")
228 };
229 Mockito.doReturn(mockKeys).when(table).getStartKeys();
230
231 HFileOutputFormat.configureIncrementalLoad(job, table);
232 assertEquals(job.getNumReduceTasks(), 4);
233 }
234
235 private byte [][] generateRandomStartKeys(int numKeys) {
236 Random random = new Random();
237 byte[][] ret = new byte[numKeys][];
238
239 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
240 for (int i = 1; i < numKeys; i++) {
241 ret[i] = PerformanceEvaluation.generateValue(random);
242 }
243 return ret;
244 }
245
246 @Test
247 public void testMRIncrementalLoad() throws Exception {
248 doIncrementalLoadTest(false);
249 }
250
251 @Test
252 public void testMRIncrementalLoadWithSplit() throws Exception {
253 doIncrementalLoadTest(true);
254 }
255
256 private void doIncrementalLoadTest(
257 boolean shouldChangeRegions) throws Exception {
258 Configuration conf = util.getConfiguration();
259 Path testDir = HBaseTestingUtility.getTestDir("testLocalMRIncrementalLoad");
260 byte[][] startKeys = generateRandomStartKeys(5);
261
262 try {
263 util.startMiniCluster();
264 HBaseAdmin admin = new HBaseAdmin(conf);
265 HTable table = util.createTable(TABLE_NAME, FAMILY_NAME);
266 int numRegions = util.createMultiRegions(
267 util.getConfiguration(), table, FAMILY_NAME,
268 startKeys);
269 assertEquals("Should make 5 regions",
270 numRegions, 5);
271 assertEquals("Should start with empty table",
272 0, util.countRows(table));
273
274
275 util.startMiniMapReduceCluster();
276 runIncrementalPELoad(conf, table, testDir);
277
278 assertEquals("HFOF should not touch actual table",
279 0, util.countRows(table));
280
281 if (shouldChangeRegions) {
282 LOG.info("Changing regions in table");
283 admin.disableTable(table.getTableName());
284 byte[][] newStartKeys = generateRandomStartKeys(15);
285 util.createMultiRegions(util.getConfiguration(),
286 table, FAMILY_NAME, newStartKeys);
287 admin.enableTable(table.getTableName());
288 while (table.getRegionsInfo().size() != 15 ||
289 !admin.isTableAvailable(table.getTableName())) {
290 Thread.sleep(1000);
291 LOG.info("Waiting for new region assignment to happen");
292 }
293 }
294
295
296 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
297
298
299 int expectedRows = conf.getInt("mapred.map.tasks", 1) * ROWSPERSPLIT;
300 assertEquals("LoadIncrementalHFiles should put expected data in table",
301 expectedRows, util.countRows(table));
302 String tableDigestBefore = util.checksumRows(table);
303
304
305 admin.disableTable(TABLE_NAME);
306 while (table.getRegionsInfo().size() != 0) {
307 Thread.sleep(1000);
308 LOG.info("Waiting for table to disable");
309 }
310 admin.enableTable(TABLE_NAME);
311 util.waitTableAvailable(TABLE_NAME, 30000);
312
313 assertEquals("Data should remain after reopening of regions",
314 tableDigestBefore, util.checksumRows(table));
315 } finally {
316 util.shutdownMiniMapReduceCluster();
317 util.shutdownMiniCluster();
318 }
319 }
320
321
322
323 private void runIncrementalPELoad(
324 Configuration conf, HTable table, Path outDir)
325 throws Exception {
326 Job job = new Job(conf, "testLocalMRIncrementalLoad");
327 setupRandomGeneratorMapper(job);
328 HFileOutputFormat.configureIncrementalLoad(job, table);
329 FileOutputFormat.setOutputPath(job, outDir);
330
331 assertEquals(table.getRegionsInfo().size(),
332 job.getNumReduceTasks());
333
334 assertTrue(job.waitForCompletion(true));
335 }
336
337 public static void main(String args[]) throws Exception {
338 new TestHFileOutputFormat().manualTest(args);
339 }
340
341 public void manualTest(String args[]) throws Exception {
342 Configuration conf = HBaseConfiguration.create();
343 util = new HBaseTestingUtility(conf);
344 if ("newtable".equals(args[0])) {
345 byte[] tname = args[1].getBytes();
346 HTable table = util.createTable(tname, FAMILY_NAME);
347 HBaseAdmin admin = new HBaseAdmin(conf);
348 admin.disableTable(tname);
349 util.createMultiRegions(conf, table, FAMILY_NAME,
350 generateRandomStartKeys(5));
351 admin.enableTable(tname);
352 } else if ("incremental".equals(args[0])) {
353 byte[] tname = args[1].getBytes();
354 HTable table = new HTable(conf, tname);
355 Path outDir = new Path("incremental-out");
356 runIncrementalPELoad(conf, table, outDir);
357 } else {
358 throw new RuntimeException(
359 "usage: TestHFileOutputFormat newtable | incremental");
360 }
361 }
362 }