1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce.hadoopbackport;
19
20 import java.io.File;
21 import java.io.FileOutputStream;
22 import java.io.IOException;
23 import java.io.StringReader;
24 import java.lang.reflect.Array;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27
28 import org.apache.commons.io.FileUtils;
29 import org.apache.commons.io.IOUtils;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.FileUtil;
33 import org.apache.hadoop.fs.Path;
34 import org.apache.hadoop.hbase.SmallTests;
35 import org.apache.hadoop.io.LongWritable;
36 import org.apache.hadoop.io.NullWritable;
37 import org.apache.hadoop.io.SequenceFile;
38 import org.apache.hadoop.io.Writable;
39 import org.apache.hadoop.util.ReflectionUtils;
40 import org.apache.hadoop.util.Tool;
41 import org.junit.AfterClass;
42 import org.junit.BeforeClass;
43 import org.junit.Test;
44 import org.junit.experimental.categories.Category;
45
46 import static org.junit.Assert.*;
47
48
49
50
51 @Category(SmallTests.class)
52 public class TestInputSamplerTool {
53
54 private static final int NUM_REDUCES = 4;
55
56 private static final String input1Str =
57 "2\n"
58 +"...5\n"
59 +"......8\n";
60 private static final String input2Str =
61 "2\n"
62 +".3\n"
63 +"..4\n"
64 +"...5\n"
65 +"....6\n"
66 +".....7\n"
67 +"......8\n"
68 +".......9\n";
69
70 private static File tempDir;
71 private static String input1, input2, output;
72
73 @BeforeClass
74 public static void beforeClass() throws IOException {
75 tempDir = FileUtil.createLocalTempFile(
76 new File(FileUtils.getTempDirectory(), TestInputSamplerTool.class.getName() + "-tmp-"),
77 "", false);
78 tempDir.delete();
79 tempDir.mkdirs();
80 assertTrue(tempDir.exists());
81 assertTrue(tempDir.isDirectory());
82
83 input1 = tempDir.getAbsolutePath() + "/input1";
84 input2 = tempDir.getAbsolutePath() + "/input2";
85 output = tempDir.getAbsolutePath() + "/output";
86
87 IOUtils.copy(new StringReader(input1Str), new FileOutputStream(input1));
88 IOUtils.copy(new StringReader(input2Str), new FileOutputStream(input2));
89 }
90
91 @AfterClass
92 public static void afterClass() throws IOException {
93 final File td = tempDir;
94 if (td != null && td.exists()) {
95 FileUtil.fullyDelete(tempDir);
96 }
97 }
98
99 @Test
100 public void testIncorrectParameters() throws Exception {
101 Tool tool = new InputSampler<Object,Object>(new Configuration());
102
103 int result = tool.run(new String[] { "-r" });
104 assertTrue(result != 0);
105
106 result = tool.run(new String[] { "-r", "not-a-number" });
107 assertTrue(result != 0);
108
109
110 result = tool.run(new String[] { "-r", "1" });
111 assertTrue(result != 0);
112
113 try {
114 result = tool.run(new String[] { "-inFormat", "java.lang.Object" });
115 fail("ClassCastException expected");
116 } catch (ClassCastException cce) {
117
118 }
119
120 try {
121 result = tool.run(new String[] { "-keyClass", "java.lang.Object" });
122 fail("ClassCastException expected");
123 } catch (ClassCastException cce) {
124
125 }
126
127 result = tool.run(new String[] { "-splitSample", "1", });
128 assertTrue(result != 0);
129
130 result = tool.run(new String[] { "-splitRandom", "1.0", "2", "xxx" });
131 assertTrue(result != 0);
132
133 result = tool.run(new String[] { "-splitInterval", "yyy", "5" });
134 assertTrue(result != 0);
135
136
137 result = tool.run(new String[] { "-r", "2", "-splitInterval", "11.0f", "0", "input" });
138 assertTrue(result != 0);
139 }
140
141 @Test
142 public void testSplitSample() throws Exception {
143 Tool tool = new InputSampler<Object,Object>(new Configuration());
144 int result = tool.run(new String[] { "-r", Integer.toString(NUM_REDUCES),
145 "-splitSample", "10", "100",
146 input1, input2, output });
147 assertEquals(0, result);
148
149 Object[] partitions = readPartitions(output);
150 assertArrayEquals(
151 new LongWritable[] { new LongWritable(2L), new LongWritable(7L), new LongWritable(20L),},
152 partitions);
153 }
154
155 @Test
156 @SuppressWarnings("unchecked")
157 public void testSplitRamdom() throws Exception {
158 Tool tool = new InputSampler<Object,Object>(new Configuration());
159 int result = tool.run(new String[] { "-r", Integer.toString(NUM_REDUCES),
160
161
162 "-splitRandom", "0.999f", "20", "100",
163 input1, input2, output });
164 assertEquals(0, result);
165 Object[] partitions = readPartitions(output);
166
167 assertEquals(3, partitions.length);
168
169 Object[] sortedPartitions = Arrays.copyOf(partitions, partitions.length);
170 Arrays.sort(sortedPartitions, new LongWritable.Comparator());
171 assertArrayEquals(sortedPartitions, partitions);
172 }
173
174 @Test
175 public void testSplitInterval() throws Exception {
176 Tool tool = new InputSampler<Object,Object>(new Configuration());
177 int result = tool.run(new String[] { "-r", Integer.toString(NUM_REDUCES),
178 "-splitInterval", "0.5f", "0",
179 input1, input2, output });
180 assertEquals(0, result);
181 Object[] partitions = readPartitions(output);
182 assertArrayEquals(new LongWritable[] { new LongWritable(7L), new LongWritable(9L),
183 new LongWritable(35L),}, partitions);
184 }
185
186 private Object[] readPartitions(String filePath) throws Exception {
187 Configuration conf = new Configuration();
188 TotalOrderPartitioner.setPartitionFile(conf, new Path(filePath));
189 Object[] partitions = readPartitions(FileSystem.getLocal(conf), new Path(filePath),
190 LongWritable.class, conf);
191 return partitions;
192 }
193
194 private Object[] readPartitions(FileSystem fs, Path p, Class<?> keyClass,
195 Configuration conf) throws IOException {
196 SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
197 ArrayList<Object> parts = new ArrayList<Object>();
198 Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
199 NullWritable value = NullWritable.get();
200 while (reader.next(key, value)) {
201 parts.add(key);
202 key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
203 }
204 reader.close();
205 return parts.toArray((Object[])Array.newInstance(keyClass, parts.size()));
206 }
207 }