1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import com.google.common.collect.Lists;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.fs.FileSystem;
27 import org.apache.hadoop.fs.Path;
28 import org.apache.hadoop.hbase.CellUtil;
29 import org.apache.hadoop.hbase.HConstants;
30 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
31 import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
32 import org.apache.hadoop.hbase.HRegionInfo;
33 import org.apache.hadoop.hbase.HTableDescriptor;
34 import org.apache.hadoop.hbase.classification.InterfaceAudience;
35 import org.apache.hadoop.hbase.classification.InterfaceStability;
36 import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
37 import org.apache.hadoop.hbase.client.IsolationLevel;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.client.Scan;
40 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
42 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
43 import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
44 import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
45 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
46 import org.apache.hadoop.hbase.regionserver.HRegion;
47 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
48 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
49 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.FSUtils;
52 import org.apache.hadoop.io.Writable;
53
54 import java.io.ByteArrayOutputStream;
55 import java.io.DataInput;
56 import java.io.DataOutput;
57 import java.io.IOException;
58 import java.util.ArrayList;
59 import java.util.List;
60 import java.util.UUID;
61
62
63
64
65 @InterfaceAudience.Private
66 @InterfaceStability.Evolving
67 public class TableSnapshotInputFormatImpl {
68
69
70
71 public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
72
73 private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
74
75 protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
76
77
78 private static final String LOCALITY_CUTOFF_MULTIPLIER = "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
79 private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
80
81
82
83
84 public static class InputSplit implements Writable {
85
86 private HTableDescriptor htd;
87 private HRegionInfo regionInfo;
88 private String[] locations;
89 private String scan;
90 private String restoreDir;
91
92
93 public InputSplit() { }
94
95 public InputSplit(HTableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
96 Scan scan, Path restoreDir) {
97 this.htd = htd;
98 this.regionInfo = regionInfo;
99 if (locations == null || locations.isEmpty()) {
100 this.locations = new String[0];
101 } else {
102 this.locations = locations.toArray(new String[locations.size()]);
103 }
104 try {
105 this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
106 } catch (IOException e) {
107 LOG.warn("Failed to convert Scan to String", e);
108 }
109
110 this.restoreDir = restoreDir.toString();
111 }
112
113 public HTableDescriptor getHtd() {
114 return htd;
115 }
116
117 public String getScan() {
118 return scan;
119 }
120
121 public String getRestoreDir() {
122 return restoreDir;
123 }
124
125 public long getLength() {
126
127 return 0;
128 }
129
130 public String[] getLocations() {
131 return locations;
132 }
133
134 public HTableDescriptor getTableDescriptor() {
135 return htd;
136 }
137
138 public HRegionInfo getRegionInfo() {
139 return regionInfo;
140 }
141
142
143
144 @Override
145 public void write(DataOutput out) throws IOException {
146 MapReduceProtos.TableSnapshotRegionSplit.Builder builder = MapReduceProtos.TableSnapshotRegionSplit.newBuilder()
147 .setTable(htd.convert())
148 .setRegion(HRegionInfo.convert(regionInfo));
149
150 for (String location : locations) {
151 builder.addLocations(location);
152 }
153
154 MapReduceProtos.TableSnapshotRegionSplit split = builder.build();
155
156 ByteArrayOutputStream baos = new ByteArrayOutputStream();
157 split.writeTo(baos);
158 baos.close();
159 byte[] buf = baos.toByteArray();
160 out.writeInt(buf.length);
161 out.write(buf);
162
163 Bytes.writeByteArray(out, Bytes.toBytes(scan));
164 Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
165
166 }
167
168 @Override
169 public void readFields(DataInput in) throws IOException {
170 int len = in.readInt();
171 byte[] buf = new byte[len];
172 in.readFully(buf);
173 TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
174 this.htd = HTableDescriptor.convert(split.getTable());
175 this.regionInfo = HRegionInfo.convert(split.getRegion());
176 List<String> locationsList = split.getLocationsList();
177 this.locations = locationsList.toArray(new String[locationsList.size()]);
178
179 this.scan = Bytes.toString(Bytes.readByteArray(in));
180 this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
181 }
182 }
183
184
185
186
187 public static class RecordReader {
188 InputSplit split;
189 private Scan scan;
190 private Result result = null;
191 private ImmutableBytesWritable row = null;
192 private ClientSideRegionScanner scanner;
193
194 public ClientSideRegionScanner getScanner() {
195 return scanner;
196 }
197
198 public void initialize(InputSplit split, Configuration conf) throws IOException {
199 this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
200 this.split = split;
201 HTableDescriptor htd = split.htd;
202 HRegionInfo hri = this.split.getRegionInfo();
203 FileSystem fs = FSUtils.getCurrentFileSystem(conf);
204
205
206
207
208 scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
209
210 scan.setCacheBlocks(false);
211
212 scanner =
213 new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
214 }
215
216 public boolean nextKeyValue() throws IOException {
217 result = scanner.next();
218 if (result == null) {
219
220 return false;
221 }
222
223 if (this.row == null) {
224 this.row = new ImmutableBytesWritable();
225 }
226 this.row.set(result.getRow());
227 return true;
228 }
229
230 public ImmutableBytesWritable getCurrentKey() {
231 return row;
232 }
233
234 public Result getCurrentValue() {
235 return result;
236 }
237
238 public long getPos() {
239 return 0;
240 }
241
242 public float getProgress() {
243 return 0;
244 }
245
246 public void close() {
247 if (this.scanner != null) {
248 this.scanner.close();
249 }
250 }
251 }
252
253 public static List<InputSplit> getSplits(Configuration conf) throws IOException {
254 String snapshotName = getSnapshotName(conf);
255
256 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
257 FileSystem fs = rootDir.getFileSystem(conf);
258
259 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
260 HBaseProtos.SnapshotDescription snapshotDesc =
261 SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
262 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
263
264 List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
265
266
267 Scan scan = extractScanFromConf(conf);
268
269 Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
270
271 return getSplits(scan, manifest, regionInfos, restoreDir, conf);
272 }
273
274 public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
275 List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
276
277 if (regionManifests == null) {
278 throw new IllegalArgumentException("Snapshot seems empty");
279 }
280
281 List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
282
283 for (SnapshotRegionManifest regionManifest : regionManifests) {
284 regionInfos.add(HRegionInfo.convert(regionManifest.getRegionInfo()));
285 }
286 return regionInfos;
287 }
288
289 public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
290 Path rootDir, FileSystem fs) throws IOException {
291 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
292 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
293 return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
294 }
295
296 public static Scan extractScanFromConf(Configuration conf) throws IOException {
297 Scan scan = null;
298 if (conf.get(TableInputFormat.SCAN) != null) {
299 scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
300 } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
301 String[] columns =
302 conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
303 scan = new Scan();
304 for (String col : columns) {
305 scan.addFamily(Bytes.toBytes(col));
306 }
307 } else {
308 throw new IllegalArgumentException("Unable to create scan");
309 }
310 return scan;
311 }
312
313 public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
314 List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
315
316 HTableDescriptor htd = manifest.getTableDescriptor();
317
318 Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
319
320 List<InputSplit> splits = new ArrayList<InputSplit>();
321 for (HRegionInfo hri : regionManifests) {
322
323
324 if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
325 hri.getEndKey())) {
326
327
328 List<String> hosts = getBestLocations(conf,
329 HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
330
331 int len = Math.min(3, hosts.size());
332 hosts = hosts.subList(0, len);
333 splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
334 }
335 }
336
337 return splits;
338
339 }
340
341
342
343
344
345
346
347
348
349
350
351
352 public static List<String> getBestLocations(
353 Configuration conf, HDFSBlocksDistribution blockDistribution) {
354 List<String> locations = new ArrayList<String>(3);
355
356 HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
357
358 if (hostAndWeights.length == 0) {
359 return locations;
360 }
361
362 HostAndWeight topHost = hostAndWeights[0];
363 locations.add(topHost.getHost());
364
365
366 double cutoffMultiplier
367 = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
368
369 double filterWeight = topHost.getWeight() * cutoffMultiplier;
370
371 for (int i = 1; i < hostAndWeights.length; i++) {
372 if (hostAndWeights[i].getWeight() >= filterWeight) {
373 locations.add(hostAndWeights[i].getHost());
374 } else {
375 break;
376 }
377 }
378
379 return locations;
380 }
381
382 private static String getSnapshotName(Configuration conf) {
383 String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
384 if (snapshotName == null) {
385 throw new IllegalArgumentException("Snapshot name must be provided");
386 }
387 return snapshotName;
388 }
389
390
391
392
393
394
395
396
397
398
399 public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
400 throws IOException {
401 conf.set(SNAPSHOT_NAME_KEY, snapshotName);
402
403 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
404 FileSystem fs = rootDir.getFileSystem(conf);
405
406 restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
407
408
409 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
410
411 conf.set(RESTORE_DIR_KEY, restoreDir.toString());
412 }
413 }