1 /**
2 * Copyright 2010 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.mapred;
21
22 import java.io.IOException;
23
24 import org.apache.hadoop.hbase.HBaseConfiguration;
25 import org.apache.hadoop.hbase.client.HTable;
26 import org.apache.hadoop.hbase.client.Put;
27 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28 import org.apache.hadoop.io.Writable;
29 import org.apache.hadoop.io.WritableComparable;
30 import org.apache.hadoop.mapred.FileInputFormat;
31 import org.apache.hadoop.mapred.JobConf;
32
33 /**
34 * Utility for {@link TableMap} and {@link TableReduce}
35 */
36 @Deprecated
37 @SuppressWarnings("unchecked")
38 public class TableMapReduceUtil {
39
40 /**
41 * Use this before submitting a TableMap job. It will
42 * appropriately set up the JobConf.
43 *
44 * @param table The table name to read from.
45 * @param columns The columns to scan.
46 * @param mapper The mapper class to use.
47 * @param outputKeyClass The class of the output key.
48 * @param outputValueClass The class of the output value.
49 * @param job The current job configuration to adjust.
50 */
51 public static void initTableMapJob(String table, String columns,
52 Class<? extends TableMap> mapper,
53 Class<? extends WritableComparable> outputKeyClass,
54 Class<? extends Writable> outputValueClass, JobConf job) {
55
56 job.setInputFormat(TableInputFormat.class);
57 job.setMapOutputValueClass(outputValueClass);
58 job.setMapOutputKeyClass(outputKeyClass);
59 job.setMapperClass(mapper);
60 FileInputFormat.addInputPaths(job, table);
61 job.set(TableInputFormat.COLUMN_LIST, columns);
62 }
63
64 /**
65 * Use this before submitting a TableReduce job. It will
66 * appropriately set up the JobConf.
67 *
68 * @param table The output table.
69 * @param reducer The reducer class to use.
70 * @param job The current job configuration to adjust.
71 * @throws IOException When determining the region count fails.
72 */
73 public static void initTableReduceJob(String table,
74 Class<? extends TableReduce> reducer, JobConf job)
75 throws IOException {
76 initTableReduceJob(table, reducer, job, null);
77 }
78
79 /**
80 * Use this before submitting a TableReduce job. It will
81 * appropriately set up the JobConf.
82 *
83 * @param table The output table.
84 * @param reducer The reducer class to use.
85 * @param job The current job configuration to adjust.
86 * @param partitioner Partitioner to use. Pass <code>null</code> to use
87 * default partitioner.
88 * @throws IOException When determining the region count fails.
89 */
90 public static void initTableReduceJob(String table,
91 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
92 throws IOException {
93 job.setOutputFormat(TableOutputFormat.class);
94 job.setReducerClass(reducer);
95 job.set(TableOutputFormat.OUTPUT_TABLE, table);
96 job.setOutputKeyClass(ImmutableBytesWritable.class);
97 job.setOutputValueClass(Put.class);
98 if (partitioner == HRegionPartitioner.class) {
99 job.setPartitionerClass(HRegionPartitioner.class);
100 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
101 int regions = outputTable.getRegionsInfo().size();
102 if (job.getNumReduceTasks() > regions) {
103 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
104 }
105 } else if (partitioner != null) {
106 job.setPartitionerClass(partitioner);
107 }
108 }
109
110 /**
111 * Ensures that the given number of reduce tasks for the given job
112 * configuration does not exceed the number of regions for the given table.
113 *
114 * @param table The table to get the region count for.
115 * @param job The current job configuration to adjust.
116 * @throws IOException When retrieving the table details fails.
117 */
118 public static void limitNumReduceTasks(String table, JobConf job)
119 throws IOException {
120 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
121 int regions = outputTable.getRegionsInfo().size();
122 if (job.getNumReduceTasks() > regions)
123 job.setNumReduceTasks(regions);
124 }
125
126 /**
127 * Ensures that the given number of map tasks for the given job
128 * configuration does not exceed the number of regions for the given table.
129 *
130 * @param table The table to get the region count for.
131 * @param job The current job configuration to adjust.
132 * @throws IOException When retrieving the table details fails.
133 */
134 public static void limitNumMapTasks(String table, JobConf job)
135 throws IOException {
136 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
137 int regions = outputTable.getRegionsInfo().size();
138 if (job.getNumMapTasks() > regions)
139 job.setNumMapTasks(regions);
140 }
141
142 /**
143 * Sets the number of reduce tasks for the given job configuration to the
144 * number of regions the given table has.
145 *
146 * @param table The table to get the region count for.
147 * @param job The current job configuration to adjust.
148 * @throws IOException When retrieving the table details fails.
149 */
150 public static void setNumReduceTasks(String table, JobConf job)
151 throws IOException {
152 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
153 int regions = outputTable.getRegionsInfo().size();
154 job.setNumReduceTasks(regions);
155 }
156
157 /**
158 * Sets the number of map tasks for the given job configuration to the
159 * number of regions the given table has.
160 *
161 * @param table The table to get the region count for.
162 * @param job The current job configuration to adjust.
163 * @throws IOException When retrieving the table details fails.
164 */
165 public static void setNumMapTasks(String table, JobConf job)
166 throws IOException {
167 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
168 int regions = outputTable.getRegionsInfo().size();
169 job.setNumMapTasks(regions);
170 }
171
172 /**
173 * Sets the number of rows to return and cache with each scanner iteration.
174 * Higher caching values will enable faster mapreduce jobs at the expense of
175 * requiring more heap to contain the cached rows.
176 *
177 * @param job The current job configuration to adjust.
178 * @param batchSize The number of rows to return in batch with each scanner
179 * iteration.
180 */
181 public static void setScannerCaching(JobConf job, int batchSize) {
182 job.setInt("hbase.client.scanner.caching", batchSize);
183 }
184 }