1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.IOException;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.conf.Configurable;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.hbase.KeyValue;
29 import org.apache.hadoop.hbase.client.HTable;
30 import org.apache.hadoop.hbase.client.Scan;
31 import org.apache.hadoop.hbase.util.Bytes;
32 import org.apache.hadoop.util.StringUtils;
33
34
35
36
37 public class TableInputFormat extends TableInputFormatBase
38 implements Configurable {
39
40 private final Log LOG = LogFactory.getLog(TableInputFormat.class);
41
42
43 public static final String INPUT_TABLE = "hbase.mapreduce.inputtable";
44
45
46
47 public static final String SCAN = "hbase.mapreduce.scan";
48
49 public static final String SCAN_ROW_START = "hbase.mapreduce.scan.row.start";
50
51 public static final String SCAN_ROW_STOP = "hbase.mapreduce.scan.row.stop";
52
53 public static final String SCAN_COLUMN_FAMILY = "hbase.mapreduce.scan.column.family";
54
55 public static final String SCAN_COLUMNS = "hbase.mapreduce.scan.columns";
56
57 public static final String SCAN_TIMESTAMP = "hbase.mapreduce.scan.timestamp";
58
59 public static final String SCAN_TIMERANGE_START = "hbase.mapreduce.scan.timerange.start";
60
61 public static final String SCAN_TIMERANGE_END = "hbase.mapreduce.scan.timerange.end";
62
63 public static final String SCAN_MAXVERSIONS = "hbase.mapreduce.scan.maxversions";
64
65 public static final String SCAN_CACHEBLOCKS = "hbase.mapreduce.scan.cacheblocks";
66
67 public static final String SCAN_CACHEDROWS = "hbase.mapreduce.scan.cachedrows";
68
69 public static final String SCAN_BATCHSIZE = "hbase.mapreduce.scan.batchsize";
70
71
72 private Configuration conf = null;
73
74
75
76
77
78
79
80 @Override
81 public Configuration getConf() {
82 return conf;
83 }
84
85
86
87
88
89
90
91
92
93 @Override
94 public void setConf(Configuration configuration) {
95 this.conf = configuration;
96 String tableName = conf.get(INPUT_TABLE);
97 try {
98 setHTable(new HTable(new Configuration(conf), tableName));
99 } catch (Exception e) {
100 LOG.error(StringUtils.stringifyException(e));
101 }
102
103 Scan scan = null;
104
105 if (conf.get(SCAN) != null) {
106 try {
107 scan = TableMapReduceUtil.convertStringToScan(conf.get(SCAN));
108 } catch (IOException e) {
109 LOG.error("An error occurred.", e);
110 }
111 } else {
112 try {
113 scan = new Scan();
114
115 if (conf.get(SCAN_ROW_START) != null) {
116 scan.setStartRow(Bytes.toBytes(conf.get(SCAN_ROW_START)));
117 }
118
119 if (conf.get(SCAN_ROW_STOP) != null) {
120 scan.setStopRow(Bytes.toBytes(conf.get(SCAN_ROW_STOP)));
121 }
122
123 if (conf.get(SCAN_COLUMNS) != null) {
124 addColumns(scan, conf.get(SCAN_COLUMNS));
125 }
126
127 if (conf.get(SCAN_COLUMN_FAMILY) != null) {
128 scan.addFamily(Bytes.toBytes(conf.get(SCAN_COLUMN_FAMILY)));
129 }
130
131 if (conf.get(SCAN_TIMESTAMP) != null) {
132 scan.setTimeStamp(Long.parseLong(conf.get(SCAN_TIMESTAMP)));
133 }
134
135 if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) {
136 scan.setTimeRange(
137 Long.parseLong(conf.get(SCAN_TIMERANGE_START)),
138 Long.parseLong(conf.get(SCAN_TIMERANGE_END)));
139 }
140
141 if (conf.get(SCAN_MAXVERSIONS) != null) {
142 scan.setMaxVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS)));
143 }
144
145 if (conf.get(SCAN_CACHEDROWS) != null) {
146 scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS)));
147 }
148
149 if (conf.get(SCAN_BATCHSIZE) != null) {
150 scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE)));
151 }
152
153
154 scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false)));
155 } catch (Exception e) {
156 LOG.error(StringUtils.stringifyException(e));
157 }
158 }
159
160 setScan(scan);
161 }
162
163
164
165
166
167
168
169
170
171
172
173
174 private static void addColumn(Scan scan, byte[] familyAndQualifier) {
175 byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
176 if (fq.length > 1 && fq[1] != null && fq[1].length > 0) {
177 scan.addColumn(fq[0], fq[1]);
178 } else {
179 scan.addFamily(fq[0]);
180 }
181 }
182
183
184
185
186
187
188
189
190 public static void addColumns(Scan scan, byte [][] columns) {
191 for (byte[] column : columns) {
192 addColumn(scan, column);
193 }
194 }
195
196
197
198
199
200
201
202
203
204
205 private static void addColumns(Scan scan, String columns) {
206 String[] cols = columns.split(" ");
207 for (String col : cols) {
208 addColumn(scan, Bytes.toBytes(col));
209 }
210 }
211
212 }