View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseConfiguration;
25  import org.apache.hadoop.hbase.client.HTable;
26  import org.apache.hadoop.hbase.client.Put;
27  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28  import org.apache.hadoop.io.Writable;
29  import org.apache.hadoop.io.WritableComparable;
30  import org.apache.hadoop.mapred.FileInputFormat;
31  import org.apache.hadoop.mapred.JobConf;
32  
33  /**
34   * Utility for {@link TableMap} and {@link TableReduce}
35   */
36  @Deprecated
37  @SuppressWarnings("unchecked")
38  public class TableMapReduceUtil {
39  
40    /**
41     * Use this before submitting a TableMap job. It will
42     * appropriately set up the JobConf.
43     *
44     * @param table  The table name to read from.
45     * @param columns  The columns to scan.
46     * @param mapper  The mapper class to use.
47     * @param outputKeyClass  The class of the output key.
48     * @param outputValueClass  The class of the output value.
49     * @param job  The current job configuration to adjust.
50     */
51    public static void initTableMapJob(String table, String columns,
52      Class<? extends TableMap> mapper,
53      Class<? extends WritableComparable> outputKeyClass,
54      Class<? extends Writable> outputValueClass, JobConf job) {
55  
56      job.setInputFormat(TableInputFormat.class);
57      job.setMapOutputValueClass(outputValueClass);
58      job.setMapOutputKeyClass(outputKeyClass);
59      job.setMapperClass(mapper);
60      FileInputFormat.addInputPaths(job, table);
61      job.set(TableInputFormat.COLUMN_LIST, columns);
62    }
63  
64    /**
65     * Use this before submitting a TableReduce job. It will
66     * appropriately set up the JobConf.
67     *
68     * @param table  The output table.
69     * @param reducer  The reducer class to use.
70     * @param job  The current job configuration to adjust.
71     * @throws IOException When determining the region count fails.
72     */
73    public static void initTableReduceJob(String table,
74      Class<? extends TableReduce> reducer, JobConf job)
75    throws IOException {
76      initTableReduceJob(table, reducer, job, null);
77    }
78  
79    /**
80     * Use this before submitting a TableReduce job. It will
81     * appropriately set up the JobConf.
82     *
83     * @param table  The output table.
84     * @param reducer  The reducer class to use.
85     * @param job  The current job configuration to adjust.
86     * @param partitioner  Partitioner to use. Pass <code>null</code> to use
87     * default partitioner.
88     * @throws IOException When determining the region count fails.
89     */
90    public static void initTableReduceJob(String table,
91      Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
92    throws IOException {
93      job.setOutputFormat(TableOutputFormat.class);
94      job.setReducerClass(reducer);
95      job.set(TableOutputFormat.OUTPUT_TABLE, table);
96      job.setOutputKeyClass(ImmutableBytesWritable.class);
97      job.setOutputValueClass(Put.class);
98      if (partitioner == HRegionPartitioner.class) {
99        job.setPartitionerClass(HRegionPartitioner.class);
100       HTable outputTable = new HTable(new HBaseConfiguration(job), table);
101       int regions = outputTable.getRegionsInfo().size();
102       if (job.getNumReduceTasks() > regions) {
103         job.setNumReduceTasks(outputTable.getRegionsInfo().size());
104       }
105     } else if (partitioner != null) {
106       job.setPartitionerClass(partitioner);
107     }
108   }
109 
110   /**
111    * Ensures that the given number of reduce tasks for the given job
112    * configuration does not exceed the number of regions for the given table.
113    *
114    * @param table  The table to get the region count for.
115    * @param job  The current job configuration to adjust.
116    * @throws IOException When retrieving the table details fails.
117    */
118   public static void limitNumReduceTasks(String table, JobConf job)
119   throws IOException {
120     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
121     int regions = outputTable.getRegionsInfo().size();
122     if (job.getNumReduceTasks() > regions)
123       job.setNumReduceTasks(regions);
124   }
125 
126   /**
127    * Ensures that the given number of map tasks for the given job
128    * configuration does not exceed the number of regions for the given table.
129    *
130    * @param table  The table to get the region count for.
131    * @param job  The current job configuration to adjust.
132    * @throws IOException When retrieving the table details fails.
133    */
134   public static void limitNumMapTasks(String table, JobConf job)
135   throws IOException {
136     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
137     int regions = outputTable.getRegionsInfo().size();
138     if (job.getNumMapTasks() > regions)
139       job.setNumMapTasks(regions);
140   }
141 
142   /**
143    * Sets the number of reduce tasks for the given job configuration to the
144    * number of regions the given table has.
145    *
146    * @param table  The table to get the region count for.
147    * @param job  The current job configuration to adjust.
148    * @throws IOException When retrieving the table details fails.
149    */
150   public static void setNumReduceTasks(String table, JobConf job)
151   throws IOException {
152     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
153     int regions = outputTable.getRegionsInfo().size();
154     job.setNumReduceTasks(regions);
155   }
156 
157   /**
158    * Sets the number of map tasks for the given job configuration to the
159    * number of regions the given table has.
160    *
161    * @param table  The table to get the region count for.
162    * @param job  The current job configuration to adjust.
163    * @throws IOException When retrieving the table details fails.
164    */
165   public static void setNumMapTasks(String table, JobConf job)
166   throws IOException {
167     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
168     int regions = outputTable.getRegionsInfo().size();
169     job.setNumMapTasks(regions);
170   }
171 
172   /**
173    * Sets the number of rows to return and cache with each scanner iteration.
174    * Higher caching values will enable faster mapreduce jobs at the expense of
175    * requiring more heap to contain the cached rows.
176    *
177    * @param job The current job configuration to adjust.
178    * @param batchSize The number of rows to return in batch with each scanner
179    * iteration.
180    */
181   public static void setScannerCaching(JobConf job, int batchSize) {
182     job.setInt("hbase.client.scanner.caching", batchSize);
183   }
184 }