1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertTrue;
26
27 import java.io.IOException;
28 import java.util.Arrays;
29 import java.util.Random;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.fs.FileStatus;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.Path;
37 import org.apache.hadoop.hbase.HBaseConfiguration;
38 import org.apache.hadoop.hbase.HBaseTestingUtility;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.KeyValue;
41 import org.apache.hadoop.hbase.PerformanceEvaluation;
42 import org.apache.hadoop.hbase.client.HBaseAdmin;
43 import org.apache.hadoop.hbase.client.HTable;
44 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
45 import org.apache.hadoop.hbase.util.Bytes;
46 import org.apache.hadoop.hbase.util.Threads;
47 import org.apache.hadoop.io.NullWritable;
48 import org.apache.hadoop.mapreduce.Job;
49 import org.apache.hadoop.mapreduce.Mapper;
50 import org.apache.hadoop.mapreduce.RecordWriter;
51 import org.apache.hadoop.mapreduce.TaskAttemptContext;
52 import org.apache.hadoop.mapreduce.TaskAttemptID;
53 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
54 import org.junit.Before;
55 import org.junit.Test;
56 import org.mockito.Mockito;
57
58
59
60
61
62
63
64
65 public class TestHFileOutputFormat {
66 private final static int ROWSPERSPLIT = 1024;
67
68 private static final byte[] FAMILY_NAME = PerformanceEvaluation.FAMILY_NAME;
69 private static final byte[] TABLE_NAME = Bytes.toBytes("TestTable");
70
71 private HBaseTestingUtility util = new HBaseTestingUtility();
72
73 private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
74
75
76
77
78 static class RandomKVGeneratingMapper
79 extends Mapper<NullWritable, NullWritable,
80 ImmutableBytesWritable, KeyValue> {
81
82 private int keyLength;
83 private static final int KEYLEN_DEFAULT=10;
84 private static final String KEYLEN_CONF="randomkv.key.length";
85
86 private int valLength;
87 private static final int VALLEN_DEFAULT=10;
88 private static final String VALLEN_CONF="randomkv.val.length";
89
90 @Override
91 protected void setup(Context context) throws IOException,
92 InterruptedException {
93 super.setup(context);
94
95 Configuration conf = context.getConfiguration();
96 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
97 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
98 }
99
100 protected void map(
101 NullWritable n1, NullWritable n2,
102 Mapper<NullWritable, NullWritable,
103 ImmutableBytesWritable,KeyValue>.Context context)
104 throws java.io.IOException ,InterruptedException
105 {
106
107 byte keyBytes[] = new byte[keyLength];
108 byte valBytes[] = new byte[valLength];
109
110 int taskId = context.getTaskAttemptID().getTaskID().getId();
111 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
112
113 Random random = new Random();
114 for (int i = 0; i < ROWSPERSPLIT; i++) {
115
116 random.nextBytes(keyBytes);
117
118 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
119 random.nextBytes(valBytes);
120 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
121
122 KeyValue kv = new KeyValue(keyBytes, PerformanceEvaluation.FAMILY_NAME,
123 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
124 context.write(key, kv);
125 }
126 }
127 }
128
129 @Before
130 public void cleanupDir() throws IOException {
131 util.cleanupTestDir();
132 }
133
134
135 private void setupRandomGeneratorMapper(Job job) {
136 job.setInputFormatClass(NMapInputFormat.class);
137 job.setMapperClass(RandomKVGeneratingMapper.class);
138 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
139 job.setMapOutputValueClass(KeyValue.class);
140 }
141
142
143
144
145
146
147 @Test
148 public void test_LATEST_TIMESTAMP_isReplaced()
149 throws IOException, InterruptedException {
150 Configuration conf = new Configuration(this.util.getConfiguration());
151 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
152 TaskAttemptContext context = null;
153 Path dir =
154 HBaseTestingUtility.getTestDir("test_LATEST_TIMESTAMP_isReplaced");
155 try {
156 Job job = new Job(conf);
157 FileOutputFormat.setOutputPath(job, dir);
158 context = new TaskAttemptContext(job.getConfiguration(),
159 new TaskAttemptID());
160 HFileOutputFormat hof = new HFileOutputFormat();
161 writer = hof.getRecordWriter(context);
162 final byte [] b = Bytes.toBytes("b");
163
164
165
166 KeyValue kv = new KeyValue(b, b, b);
167 KeyValue original = kv.clone();
168 writer.write(new ImmutableBytesWritable(), kv);
169 assertFalse(original.equals(kv));
170 assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
171 assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
172 assertNotSame(original.getTimestamp(), kv.getTimestamp());
173 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
174
175
176
177 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
178 original = kv.clone();
179 writer.write(new ImmutableBytesWritable(), kv);
180 assertTrue(original.equals(kv));
181 } finally {
182 if (writer != null && context != null) writer.close(context);
183 dir.getFileSystem(conf).delete(dir, true);
184 }
185 }
186
187
188
189
190 @Test
191 public void testWritingPEData() throws Exception {
192 Configuration conf = util.getConfiguration();
193 Path testDir = HBaseTestingUtility.getTestDir("testWritingPEData");
194 FileSystem fs = testDir.getFileSystem(conf);
195
196
197 conf.setInt("io.sort.mb", 20);
198
199 conf.setLong("hbase.hregion.max.filesize", 64 * 1024);
200
201 Job job = new Job(conf, "testWritingPEData");
202 setupRandomGeneratorMapper(job);
203
204
205 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
206 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
207
208 Arrays.fill(startKey, (byte)0);
209 Arrays.fill(endKey, (byte)0xff);
210
211 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
212
213 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
214 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
215 job.setReducerClass(KeyValueSortReducer.class);
216 job.setOutputFormatClass(HFileOutputFormat.class);
217 job.setNumReduceTasks(4);
218
219 FileOutputFormat.setOutputPath(job, testDir);
220 assertTrue(job.waitForCompletion(false));
221 FileStatus [] files = fs.listStatus(testDir);
222 assertTrue(files.length > 0);
223 }
224
225 @Test
226 public void testJobConfiguration() throws Exception {
227 Job job = new Job();
228 HTable table = Mockito.mock(HTable.class);
229 byte[][] mockKeys = new byte[][] {
230 HConstants.EMPTY_BYTE_ARRAY,
231 Bytes.toBytes("aaa"),
232 Bytes.toBytes("ggg"),
233 Bytes.toBytes("zzz")
234 };
235 Mockito.doReturn(mockKeys).when(table).getStartKeys();
236
237 HFileOutputFormat.configureIncrementalLoad(job, table);
238 assertEquals(job.getNumReduceTasks(), 4);
239 }
240
241 private byte [][] generateRandomStartKeys(int numKeys) {
242 Random random = new Random();
243 byte[][] ret = new byte[numKeys][];
244
245 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
246 for (int i = 1; i < numKeys; i++) {
247 ret[i] = PerformanceEvaluation.generateValue(random);
248 }
249 return ret;
250 }
251
252 @Test
253 public void testMRIncrementalLoad() throws Exception {
254 doIncrementalLoadTest(false);
255 }
256
257 @Test
258 public void testMRIncrementalLoadWithSplit() throws Exception {
259 doIncrementalLoadTest(true);
260 }
261
262 private void doIncrementalLoadTest(
263 boolean shouldChangeRegions) throws Exception {
264 Configuration conf = util.getConfiguration();
265 Path testDir = HBaseTestingUtility.getTestDir("testLocalMRIncrementalLoad");
266 byte[][] startKeys = generateRandomStartKeys(5);
267
268 try {
269 util.startMiniCluster();
270 HBaseAdmin admin = new HBaseAdmin(conf);
271 HTable table = util.createTable(TABLE_NAME, FAMILY_NAME);
272 int numRegions = util.createMultiRegions(
273 util.getConfiguration(), table, FAMILY_NAME,
274 startKeys);
275 assertEquals("Should make 5 regions",
276 numRegions, 5);
277 assertEquals("Should start with empty table",
278 0, util.countRows(table));
279
280
281 util.startMiniMapReduceCluster();
282 runIncrementalPELoad(conf, table, testDir);
283
284 assertEquals("HFOF should not touch actual table",
285 0, util.countRows(table));
286
287 if (shouldChangeRegions) {
288 LOG.info("Changing regions in table");
289 admin.disableTable(table.getTableName());
290 while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
291 isRegionsInTransition()) {
292 Threads.sleep(1000);
293 LOG.info("Waiting on table to finish disabling");
294 }
295 byte[][] newStartKeys = generateRandomStartKeys(15);
296 util.createMultiRegions(util.getConfiguration(),
297 table, FAMILY_NAME, newStartKeys);
298 admin.enableTable(table.getTableName());
299 while (table.getRegionsInfo().size() != 15 ||
300 !admin.isTableAvailable(table.getTableName())) {
301 Thread.sleep(1000);
302 LOG.info("Waiting for new region assignment to happen");
303 }
304 }
305
306
307 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
308
309
310 int expectedRows = conf.getInt("mapred.map.tasks", 1) * ROWSPERSPLIT;
311 assertEquals("LoadIncrementalHFiles should put expected data in table",
312 expectedRows, util.countRows(table));
313 String tableDigestBefore = util.checksumRows(table);
314
315
316 admin.disableTable(TABLE_NAME);
317 while (!admin.isTableDisabled(TABLE_NAME)) {
318 Thread.sleep(1000);
319 LOG.info("Waiting for table to disable");
320 }
321 admin.enableTable(TABLE_NAME);
322 util.waitTableAvailable(TABLE_NAME, 30000);
323 assertEquals("Data should remain after reopening of regions",
324 tableDigestBefore, util.checksumRows(table));
325 } finally {
326 util.shutdownMiniMapReduceCluster();
327 util.shutdownMiniCluster();
328 }
329 }
330
331 private void runIncrementalPELoad(
332 Configuration conf, HTable table, Path outDir)
333 throws Exception {
334 Job job = new Job(conf, "testLocalMRIncrementalLoad");
335 setupRandomGeneratorMapper(job);
336 HFileOutputFormat.configureIncrementalLoad(job, table);
337 FileOutputFormat.setOutputPath(job, outDir);
338
339 assertEquals(table.getRegionsInfo().size(),
340 job.getNumReduceTasks());
341
342 assertTrue(job.waitForCompletion(true));
343 }
344
345 public static void main(String args[]) throws Exception {
346 new TestHFileOutputFormat().manualTest(args);
347 }
348
349 public void manualTest(String args[]) throws Exception {
350 Configuration conf = HBaseConfiguration.create();
351 util = new HBaseTestingUtility(conf);
352 if ("newtable".equals(args[0])) {
353 byte[] tname = args[1].getBytes();
354 HTable table = util.createTable(tname, FAMILY_NAME);
355 HBaseAdmin admin = new HBaseAdmin(conf);
356 admin.disableTable(tname);
357 util.createMultiRegions(conf, table, FAMILY_NAME,
358 generateRandomStartKeys(5));
359 admin.enableTable(tname);
360 } else if ("incremental".equals(args[0])) {
361 byte[] tname = args[1].getBytes();
362 HTable table = new HTable(conf, tname);
363 Path outDir = new Path("incremental-out");
364 runIncrementalPELoad(conf, table, outDir);
365 } else {
366 throw new RuntimeException(
367 "usage: TestHFileOutputFormat newtable | incremental");
368 }
369 }
370 }