1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import com.google.protobuf.HBaseZeroCopyByteString;
21 import org.apache.hadoop.classification.InterfaceAudience;
22 import org.apache.hadoop.classification.InterfaceStability;
23 import org.apache.hadoop.conf.Configuration;
24 import org.apache.hadoop.fs.FileSystem;
25 import org.apache.hadoop.fs.Path;
26 import org.apache.hadoop.hbase.CellUtil;
27 import org.apache.hadoop.hbase.HConstants;
28 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
29 import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
30 import org.apache.hadoop.hbase.HRegionInfo;
31 import org.apache.hadoop.hbase.HTableDescriptor;
32 import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
33 import org.apache.hadoop.hbase.client.IsolationLevel;
34 import org.apache.hadoop.hbase.client.Result;
35 import org.apache.hadoop.hbase.client.Scan;
36 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
38 import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
39 import org.apache.hadoop.hbase.regionserver.HRegion;
40 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
41 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
42 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
43 import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.FSTableDescriptors;
46 import org.apache.hadoop.io.Writable;
47 import org.apache.hadoop.mapreduce.Job;
48
49 import java.io.ByteArrayOutputStream;
50 import java.io.DataInput;
51 import java.io.DataOutput;
52 import java.io.IOException;
53 import java.util.ArrayList;
54 import java.util.List;
55 import java.util.Set;
56 import java.util.UUID;
57
58
59
60
61 @InterfaceAudience.Private
62 @InterfaceStability.Evolving
63 public class TableSnapshotInputFormatImpl {
64
65
66
67 private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
68 private static final String TABLE_DIR_KEY = "hbase.TableSnapshotInputFormat.table.dir";
69
70
71 private static final String LOCALITY_CUTOFF_MULTIPLIER = "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
72 private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
73
74
75
76
77 public static class InputSplit implements Writable {
78 private String regionName;
79 private String[] locations;
80
81
82 public InputSplit() { }
83
84 public InputSplit(String regionName, List<String> locations) {
85 this.regionName = regionName;
86 if (locations == null || locations.isEmpty()) {
87 this.locations = new String[0];
88 } else {
89 this.locations = locations.toArray(new String[locations.size()]);
90 }
91 }
92
93 public long getLength() {
94
95 return 0;
96 }
97
98 public String[] getLocations() {
99 return locations;
100 }
101
102
103
104 @Override
105 public void write(DataOutput out) throws IOException {
106 MapReduceProtos.TableSnapshotRegionSplit.Builder builder =
107 MapReduceProtos.TableSnapshotRegionSplit.newBuilder()
108 .setRegion(HBaseProtos.RegionSpecifier.newBuilder()
109 .setType(HBaseProtos.RegionSpecifier.RegionSpecifierType.ENCODED_REGION_NAME)
110 .setValue(HBaseZeroCopyByteString.wrap(Bytes.toBytes(regionName))).build());
111
112 for (String location : locations) {
113 builder.addLocations(location);
114 }
115
116 MapReduceProtos.TableSnapshotRegionSplit split = builder.build();
117
118 ByteArrayOutputStream baos = new ByteArrayOutputStream();
119 split.writeTo(baos);
120 baos.close();
121 byte[] buf = baos.toByteArray();
122 out.writeInt(buf.length);
123 out.write(buf);
124 }
125
126 @Override
127 public void readFields(DataInput in) throws IOException {
128 int len = in.readInt();
129 byte[] buf = new byte[len];
130 in.readFully(buf);
131 MapReduceProtos.TableSnapshotRegionSplit split = MapReduceProtos.TableSnapshotRegionSplit.PARSER.parseFrom(buf);
132 this.regionName = Bytes.toString(split.getRegion().getValue().toByteArray());
133 List<String> locationsList = split.getLocationsList();
134 this.locations = locationsList.toArray(new String[locationsList.size()]);
135 }
136 }
137
138
139
140
141 public static class RecordReader {
142 InputSplit split;
143 private Scan scan;
144 private Result result = null;
145 private ImmutableBytesWritable row = null;
146 private ClientSideRegionScanner scanner;
147
148 public ClientSideRegionScanner getScanner() {
149 return scanner;
150 }
151
152 public void initialize(InputSplit split, Configuration conf) throws IOException {
153 this.split = split;
154 String regionName = this.split.regionName;
155 String snapshotName = getSnapshotName(conf);
156 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
157 FileSystem fs = rootDir.getFileSystem(conf);
158
159 Path tmpRootDir = new Path(conf.get(TABLE_DIR_KEY));
160
161
162 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
163
164
165 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, snapshotDir);
166
167
168 Path regionDir = new Path(snapshotDir, regionName);
169 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
170
171
172
173 if (conf.get(TableInputFormat.SCAN) != null) {
174 scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
175 } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
176 String[] columns =
177 conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
178 scan = new Scan();
179 for (String col : columns) {
180 scan.addFamily(Bytes.toBytes(col));
181 }
182 } else {
183 throw new IllegalArgumentException("A Scan is not configured for this job");
184 }
185
186
187
188 scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
189
190 scan.setCacheBlocks(false);
191
192 scanner = new ClientSideRegionScanner(conf, fs, tmpRootDir, htd, hri, scan, null);
193 }
194
195 public boolean nextKeyValue() throws IOException {
196 result = scanner.next();
197 if (result == null) {
198
199 return false;
200 }
201
202 if (this.row == null) {
203 this.row = new ImmutableBytesWritable();
204 }
205 this.row.set(result.getRow());
206 return true;
207 }
208
209 public ImmutableBytesWritable getCurrentKey() {
210 return row;
211 }
212
213 public Result getCurrentValue() {
214 return result;
215 }
216
217 public long getPos() {
218 return 0;
219 }
220
221 public float getProgress() {
222 return 0;
223 }
224
225 public void close() {
226 if (this.scanner != null) {
227 this.scanner.close();
228 }
229 }
230 }
231
232 public static List<InputSplit> getSplits(Configuration conf) throws IOException {
233 String snapshotName = getSnapshotName(conf);
234
235 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
236 FileSystem fs = rootDir.getFileSystem(conf);
237
238 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
239 HBaseProtos.SnapshotDescription snapshotDesc =
240 SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
241
242 Set<String> snapshotRegionNames =
243 SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
244 if (snapshotRegionNames == null) {
245 throw new IllegalArgumentException("Snapshot seems empty");
246 }
247
248
249 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs,
250 snapshotDir);
251
252
253 Scan scan = null;
254 if (conf.get(TableInputFormat.SCAN) != null) {
255 scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
256 } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
257 String[] columns =
258 conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
259 scan = new Scan();
260 for (String col : columns) {
261 scan.addFamily(Bytes.toBytes(col));
262 }
263 } else {
264 throw new IllegalArgumentException("Unable to create scan");
265 }
266 Path tableDir = new Path(conf.get(TABLE_DIR_KEY));
267
268 List<InputSplit> splits = new ArrayList<InputSplit>();
269 for (String regionName : snapshotRegionNames) {
270
271 Path regionDir = new Path(snapshotDir, regionName);
272 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs,
273 regionDir);
274
275 if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
276 hri.getStartKey(), hri.getEndKey())) {
277
278
279 List<String> hosts = getBestLocations(conf,
280 HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
281
282 int len = Math.min(3, hosts.size());
283 hosts = hosts.subList(0, len);
284 splits.add(new InputSplit(regionName, hosts));
285 }
286 }
287
288 return splits;
289 }
290
291
292
293
294
295
296
297
298
299
300
301
302 public static List<String> getBestLocations(
303 Configuration conf, HDFSBlocksDistribution blockDistribution) {
304 List<String> locations = new ArrayList<String>(3);
305
306 HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
307
308 if (hostAndWeights.length == 0) {
309 return locations;
310 }
311
312 HostAndWeight topHost = hostAndWeights[0];
313 locations.add(topHost.getHost());
314
315
316 double cutoffMultiplier
317 = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
318
319 double filterWeight = topHost.getWeight() * cutoffMultiplier;
320
321 for (int i = 1; i < hostAndWeights.length; i++) {
322 if (hostAndWeights[i].getWeight() >= filterWeight) {
323 locations.add(hostAndWeights[i].getHost());
324 } else {
325 break;
326 }
327 }
328
329 return locations;
330 }
331
332 private static String getSnapshotName(Configuration conf) {
333 String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
334 if (snapshotName == null) {
335 throw new IllegalArgumentException("Snapshot name must be provided");
336 }
337 return snapshotName;
338 }
339
340
341
342
343
344
345
346
347
348
349 public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
350 throws IOException {
351 conf.set(SNAPSHOT_NAME_KEY, snapshotName);
352
353 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
354 FileSystem fs = rootDir.getFileSystem(conf);
355
356 restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
357
358
359 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
360
361 conf.set(TABLE_DIR_KEY, restoreDir.toString());
362 }
363 }