View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseConfiguration;
25  import org.apache.hadoop.hbase.client.HTable;
26  import org.apache.hadoop.hbase.client.Put;
27  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28  import org.apache.hadoop.hbase.security.User;
29  import org.apache.hadoop.io.Writable;
30  import org.apache.hadoop.io.WritableComparable;
31  import org.apache.hadoop.mapred.FileInputFormat;
32  import org.apache.hadoop.mapred.JobConf;
33  import org.apache.hadoop.mapred.InputFormat;
34  import org.apache.hadoop.mapred.OutputFormat;
35  import org.apache.hadoop.mapred.TextInputFormat;
36  import org.apache.hadoop.mapred.TextOutputFormat;
37  
38  /**
39   * Utility for {@link TableMap} and {@link TableReduce}
40   */
41  @Deprecated
42  @SuppressWarnings("unchecked")
43  public class TableMapReduceUtil {
44  
45    /**
46     * Use this before submitting a TableMap job. It will
47     * appropriately set up the JobConf.
48     *
49     * @param table  The table name to read from.
50     * @param columns  The columns to scan.
51     * @param mapper  The mapper class to use.
52     * @param outputKeyClass  The class of the output key.
53     * @param outputValueClass  The class of the output value.
54     * @param job  The current job configuration to adjust.
55     */
56    public static void initTableMapJob(String table, String columns,
57      Class<? extends TableMap> mapper,
58      Class<? extends WritableComparable> outputKeyClass,
59      Class<? extends Writable> outputValueClass, JobConf job) {
60      initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true);
61    }
62  
63    /**
64     * Use this before submitting a TableMap job. It will
65     * appropriately set up the JobConf.
66     *
67     * @param table  The table name to read from.
68     * @param columns  The columns to scan.
69     * @param mapper  The mapper class to use.
70     * @param outputKeyClass  The class of the output key.
71     * @param outputValueClass  The class of the output value.
72     * @param job  The current job configuration to adjust.
73     * @param addDependencyJars upload HBase jars and jars for any of the configured
74     *           job classes via the distributed cache (tmpjars).
75     */
76    public static void initTableMapJob(String table, String columns,
77      Class<? extends TableMap> mapper,
78      Class<? extends WritableComparable> outputKeyClass,
79      Class<? extends Writable> outputValueClass, JobConf job, boolean addDependencyJars) {
80  
81      job.setInputFormat(TableInputFormat.class);
82      job.setMapOutputValueClass(outputValueClass);
83      job.setMapOutputKeyClass(outputKeyClass);
84      job.setMapperClass(mapper);
85      FileInputFormat.addInputPaths(job, table);
86      job.set(TableInputFormat.COLUMN_LIST, columns);
87      if (addDependencyJars) {
88        try {
89          addDependencyJars(job);
90        } catch (IOException e) {
91          e.printStackTrace();
92        }
93      }
94      try {
95        initCredentials(job);
96      } catch (IOException ioe) {
97        // just spit out the stack trace?  really?
98        ioe.printStackTrace();
99      }
100   }
101 
102   /**
103    * Use this before submitting a TableReduce job. It will
104    * appropriately set up the JobConf.
105    *
106    * @param table  The output table.
107    * @param reducer  The reducer class to use.
108    * @param job  The current job configuration to adjust.
109    * @throws IOException When determining the region count fails.
110    */
111   public static void initTableReduceJob(String table,
112     Class<? extends TableReduce> reducer, JobConf job)
113   throws IOException {
114     initTableReduceJob(table, reducer, job, null);
115   }
116 
117   /**
118    * Use this before submitting a TableReduce job. It will
119    * appropriately set up the JobConf.
120    *
121    * @param table  The output table.
122    * @param reducer  The reducer class to use.
123    * @param job  The current job configuration to adjust.
124    * @param partitioner  Partitioner to use. Pass <code>null</code> to use
125    * default partitioner.
126    * @throws IOException When determining the region count fails.
127    */
128   public static void initTableReduceJob(String table,
129     Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
130   throws IOException {
131     initTableReduceJob(table, reducer, job, partitioner, true);
132   }
133 
134   /**
135    * Use this before submitting a TableReduce job. It will
136    * appropriately set up the JobConf.
137    *
138    * @param table  The output table.
139    * @param reducer  The reducer class to use.
140    * @param job  The current job configuration to adjust.
141    * @param partitioner  Partitioner to use. Pass <code>null</code> to use
142    * default partitioner.
143    * @param addDependencyJars upload HBase jars and jars for any of the configured
144    *           job classes via the distributed cache (tmpjars).
145    * @throws IOException When determining the region count fails.
146    */
147   public static void initTableReduceJob(String table,
148     Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
149     boolean addDependencyJars) throws IOException {
150     job.setOutputFormat(TableOutputFormat.class);
151     job.setReducerClass(reducer);
152     job.set(TableOutputFormat.OUTPUT_TABLE, table);
153     job.setOutputKeyClass(ImmutableBytesWritable.class);
154     job.setOutputValueClass(Put.class);
155     if (partitioner == HRegionPartitioner.class) {
156       job.setPartitionerClass(HRegionPartitioner.class);
157       HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
158       int regions = outputTable.getRegionsInfo().size();
159       if (job.getNumReduceTasks() > regions) {
160         job.setNumReduceTasks(outputTable.getRegionsInfo().size());
161       }
162     } else if (partitioner != null) {
163       job.setPartitionerClass(partitioner);
164     }
165     if (addDependencyJars) {
166       addDependencyJars(job);
167     }
168     initCredentials(job);
169   }
170 
171   public static void initCredentials(JobConf job) throws IOException {
172     if (User.isHBaseSecurityEnabled(job)) {
173       try {
174         User.getCurrent().obtainAuthTokenForJob(job);
175       } catch (InterruptedException ie) {
176         ie.printStackTrace();
177         Thread.interrupted();
178       }
179     }
180   }
181 
182   /**
183    * Ensures that the given number of reduce tasks for the given job
184    * configuration does not exceed the number of regions for the given table.
185    *
186    * @param table  The table to get the region count for.
187    * @param job  The current job configuration to adjust.
188    * @throws IOException When retrieving the table details fails.
189    */
190   public static void limitNumReduceTasks(String table, JobConf job)
191   throws IOException {
192     HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
193     int regions = outputTable.getRegionsInfo().size();
194     if (job.getNumReduceTasks() > regions)
195       job.setNumReduceTasks(regions);
196   }
197 
198   /**
199    * Ensures that the given number of map tasks for the given job
200    * configuration does not exceed the number of regions for the given table.
201    *
202    * @param table  The table to get the region count for.
203    * @param job  The current job configuration to adjust.
204    * @throws IOException When retrieving the table details fails.
205    */
206   public static void limitNumMapTasks(String table, JobConf job)
207   throws IOException {
208     HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
209     int regions = outputTable.getRegionsInfo().size();
210     if (job.getNumMapTasks() > regions)
211       job.setNumMapTasks(regions);
212   }
213 
214   /**
215    * Sets the number of reduce tasks for the given job configuration to the
216    * number of regions the given table has.
217    *
218    * @param table  The table to get the region count for.
219    * @param job  The current job configuration to adjust.
220    * @throws IOException When retrieving the table details fails.
221    */
222   public static void setNumReduceTasks(String table, JobConf job)
223   throws IOException {
224     HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
225     int regions = outputTable.getRegionsInfo().size();
226     job.setNumReduceTasks(regions);
227   }
228 
229   /**
230    * Sets the number of map tasks for the given job configuration to the
231    * number of regions the given table has.
232    *
233    * @param table  The table to get the region count for.
234    * @param job  The current job configuration to adjust.
235    * @throws IOException When retrieving the table details fails.
236    */
237   public static void setNumMapTasks(String table, JobConf job)
238   throws IOException {
239     HTable outputTable = new HTable(HBaseConfiguration.create(job), table);
240     int regions = outputTable.getRegionsInfo().size();
241     job.setNumMapTasks(regions);
242   }
243 
244   /**
245    * Sets the number of rows to return and cache with each scanner iteration.
246    * Higher caching values will enable faster mapreduce jobs at the expense of
247    * requiring more heap to contain the cached rows.
248    *
249    * @param job The current job configuration to adjust.
250    * @param batchSize The number of rows to return in batch with each scanner
251    * iteration.
252    */
253   public static void setScannerCaching(JobConf job, int batchSize) {
254     job.setInt("hbase.client.scanner.caching", batchSize);
255   }
256 
257   /**
258    * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(Job)
259    */
260   public static void addDependencyJars(JobConf job) throws IOException {
261     org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
262       job,
263       org.apache.zookeeper.ZooKeeper.class,
264       com.google.common.base.Function.class,
265       com.google.protobuf.Message.class,
266       job.getMapOutputKeyClass(),
267       job.getMapOutputValueClass(),
268       job.getOutputKeyClass(),
269       job.getOutputValueClass(),
270       job.getPartitionerClass(),
271       job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
272       job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
273       job.getCombinerClass());
274   }
275 }