1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce.hadoopbackport;
19  
20  import java.io.File;
21  import java.io.FileOutputStream;
22  import java.io.IOException;
23  import java.io.StringReader;
24  import java.lang.reflect.Array;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  
28  import org.apache.commons.io.FileUtils;
29  import org.apache.commons.io.IOUtils;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.FileUtil;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.SmallTests;
35  import org.apache.hadoop.io.LongWritable;
36  import org.apache.hadoop.io.NullWritable;
37  import org.apache.hadoop.io.SequenceFile;
38  import org.apache.hadoop.io.Writable;
39  import org.apache.hadoop.util.ReflectionUtils;
40  import org.apache.hadoop.util.Tool;
41  import org.junit.AfterClass;
42  import org.junit.BeforeClass;
43  import org.junit.Test;
44  import org.junit.experimental.categories.Category;
45  
46  import static org.junit.Assert.*;
47  
48  /**
49   * Tests {@link InputSampler} as a {@link Tool}.
50   */
51  @Category(SmallTests.class)
52  public class TestInputSamplerTool {
53  
54    private static final int NUM_REDUCES = 4;
55  
56    private static final String input1Str =
57       "2\n"
58      +"...5\n"
59      +"......8\n";
60    private static final String input2Str =
61       "2\n"
62      +".3\n"
63      +"..4\n"
64      +"...5\n"
65      +"....6\n"
66      +".....7\n"
67      +"......8\n"
68      +".......9\n";
69  
70    private static File tempDir;
71    private static String input1, input2, output;
72  
73    @BeforeClass
74    public static void beforeClass() throws IOException {
75      tempDir = FileUtil.createLocalTempFile(
76        new File(FileUtils.getTempDirectory(), TestInputSamplerTool.class.getName() + "-tmp-"),
77        "", false);
78      tempDir.delete();
79      tempDir.mkdirs();
80      assertTrue(tempDir.exists());
81      assertTrue(tempDir.isDirectory());
82      // define files:
83      input1 = tempDir.getAbsolutePath() + "/input1";
84      input2 = tempDir.getAbsolutePath() + "/input2";
85      output = tempDir.getAbsolutePath() + "/output";
86      // create 2 input files:
87      IOUtils.copy(new StringReader(input1Str), new FileOutputStream(input1));
88      IOUtils.copy(new StringReader(input2Str), new FileOutputStream(input2));
89    }
90  
91    @AfterClass
92    public static void afterClass() throws IOException {
93      final File td = tempDir;
94      if (td != null && td.exists()) {
95        FileUtil.fullyDelete(tempDir);
96      }
97    }
98  
99    @Test
100   public void testIncorrectParameters() throws Exception {
101     Tool tool = new InputSampler<Object,Object>(new Configuration());
102 
103     int result = tool.run(new String[] { "-r" });
104     assertTrue(result != 0);
105 
106     result = tool.run(new String[] { "-r", "not-a-number" });
107     assertTrue(result != 0);
108 
109     // more than one reducer is required:
110     result = tool.run(new String[] { "-r", "1" });
111     assertTrue(result != 0);
112 
113     try {
114       result = tool.run(new String[] { "-inFormat", "java.lang.Object" });
115       fail("ClassCastException expected");
116     } catch (ClassCastException cce) {
117       // expected
118     }
119 
120     try {
121       result = tool.run(new String[] { "-keyClass", "java.lang.Object" });
122       fail("ClassCastException expected");
123     } catch (ClassCastException cce) {
124       // expected
125     }
126 
127     result = tool.run(new String[] { "-splitSample", "1", });
128     assertTrue(result != 0);
129 
130     result = tool.run(new String[] { "-splitRandom", "1.0", "2", "xxx" });
131     assertTrue(result != 0);
132 
133     result = tool.run(new String[] { "-splitInterval", "yyy", "5" });
134     assertTrue(result != 0);
135 
136     // not enough subsequent arguments:
137     result = tool.run(new String[] { "-r", "2", "-splitInterval", "11.0f", "0", "input" });
138     assertTrue(result != 0);
139   }
140 
141   @Test
142   public void testSplitSample() throws Exception {
143     Tool tool = new InputSampler<Object,Object>(new Configuration());
144     int result = tool.run(new String[] { "-r", Integer.toString(NUM_REDUCES),
145         "-splitSample", "10", "100",
146         input1, input2, output });
147     assertEquals(0, result);
148 
149     Object[] partitions = readPartitions(output);
150     assertArrayEquals(
151         new LongWritable[] { new LongWritable(2L), new LongWritable(7L), new LongWritable(20L),},
152         partitions);
153   }
154 
155   @Test
156   @SuppressWarnings("unchecked")
157   public void testSplitRamdom() throws Exception {
158     Tool tool = new InputSampler<Object,Object>(new Configuration());
159     int result = tool.run(new String[] { "-r", Integer.toString(NUM_REDUCES),
160         // Use 0.999 probability to reduce the flakiness of the test because
161         // the test will fail if the number of samples is less than (number of reduces + 1).
162         "-splitRandom", "0.999f", "20", "100",
163         input1, input2, output });
164     assertEquals(0, result);
165     Object[] partitions = readPartitions(output);
166     // must be 3 split points since NUM_REDUCES = 4:
167     assertEquals(3, partitions.length);
168     // check that the partition array is sorted:
169     Object[] sortedPartitions = Arrays.copyOf(partitions, partitions.length);
170     Arrays.sort(sortedPartitions, new LongWritable.Comparator());
171     assertArrayEquals(sortedPartitions, partitions);
172   }
173 
174   @Test
175   public void testSplitInterval() throws Exception {
176     Tool tool = new InputSampler<Object,Object>(new Configuration());
177     int result = tool.run(new String[] { "-r", Integer.toString(NUM_REDUCES),
178         "-splitInterval", "0.5f", "0",
179         input1, input2, output });
180     assertEquals(0, result);
181     Object[] partitions = readPartitions(output);
182     assertArrayEquals(new LongWritable[] { new LongWritable(7L), new LongWritable(9L),
183       new LongWritable(35L),}, partitions);
184   }
185 
186   private Object[] readPartitions(String filePath) throws Exception {
187     Configuration conf = new Configuration();
188     TotalOrderPartitioner.setPartitionFile(conf, new Path(filePath));
189     Object[] partitions = readPartitions(FileSystem.getLocal(conf), new Path(filePath),
190       LongWritable.class, conf);
191     return partitions;
192   }
193 
194   private Object[] readPartitions(FileSystem fs, Path p, Class<?> keyClass,
195       Configuration conf) throws IOException {
196     SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
197     ArrayList<Object> parts = new ArrayList<Object>();
198     Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
199     NullWritable value = NullWritable.get();
200     while (reader.next(key, value)) {
201       parts.add(key);
202       key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
203     }
204     reader.close();
205     return parts.toArray((Object[])Array.newInstance(keyClass, parts.size()));
206   }
207 }