1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.DataInput;
23 import java.io.DataOutput;
24 import java.io.IOException;
25 import java.lang.reflect.Method;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.UUID;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.classification.InterfaceAudience;
34 import org.apache.hadoop.classification.InterfaceStability;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.CellUtil;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
41 import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.HTableDescriptor;
44 import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
45 import org.apache.hadoop.hbase.client.IsolationLevel;
46 import org.apache.hadoop.hbase.client.Result;
47 import org.apache.hadoop.hbase.client.Scan;
48 import org.apache.hadoop.hbase.client.TableSnapshotScanner;
49 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
50 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
51 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier;
52 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
53 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
54 import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
55 import org.apache.hadoop.hbase.regionserver.HRegion;
56 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
57 import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
58 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
59 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
60 import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
61 import org.apache.hadoop.hbase.util.Bytes;
62 import org.apache.hadoop.hbase.util.FSTableDescriptors;
63 import org.apache.hadoop.io.Writable;
64 import org.apache.hadoop.mapreduce.InputFormat;
65 import org.apache.hadoop.mapreduce.InputSplit;
66 import org.apache.hadoop.mapreduce.Job;
67 import org.apache.hadoop.mapreduce.JobContext;
68 import org.apache.hadoop.mapreduce.RecordReader;
69 import org.apache.hadoop.mapreduce.TaskAttemptContext;
70
71 import com.google.common.annotations.VisibleForTesting;
72 import com.google.protobuf.ByteString;
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 @InterfaceAudience.Public
112 @InterfaceStability.Evolving
113 public class TableSnapshotInputFormat extends InputFormat<ImmutableBytesWritable, Result> {
114
115
116
117 private static final Log LOG = LogFactory.getLog(TableSnapshotInputFormat.class);
118
119
120 private static final String LOCALITY_CUTOFF_MULTIPLIER = "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
121 private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
122
123 private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
124 private static final String TABLE_DIR_KEY = "hbase.TableSnapshotInputFormat.table.dir";
125
126 public static class TableSnapshotRegionSplit extends InputSplit implements Writable {
127 private String regionName;
128 private String[] locations;
129
130
131 public TableSnapshotRegionSplit() { }
132
133 TableSnapshotRegionSplit(String regionName, List<String> locations) {
134 this.regionName = regionName;
135 if (locations == null || locations.isEmpty()) {
136 this.locations = new String[0];
137 } else {
138 this.locations = locations.toArray(new String[locations.size()]);
139 }
140 }
141 @Override
142 public long getLength() throws IOException, InterruptedException {
143
144 return 0;
145 }
146
147 @Override
148 public String[] getLocations() throws IOException, InterruptedException {
149 return locations;
150 }
151
152
153
154 @Override
155 public void write(DataOutput out) throws IOException {
156 MapReduceProtos.TableSnapshotRegionSplit.Builder builder =
157 MapReduceProtos.TableSnapshotRegionSplit.newBuilder()
158 .setRegion(RegionSpecifier.newBuilder()
159 .setType(RegionSpecifierType.ENCODED_REGION_NAME)
160 .setValue(ByteString.copyFrom(Bytes.toBytes(regionName))).build());
161
162 for (String location : locations) {
163 builder.addLocations(location);
164 }
165
166 MapReduceProtos.TableSnapshotRegionSplit split = builder.build();
167
168 ByteArrayOutputStream baos = new ByteArrayOutputStream();
169 split.writeTo(baos);
170 baos.close();
171 byte[] buf = baos.toByteArray();
172 out.writeInt(buf.length);
173 out.write(buf);
174 }
175 @Override
176 public void readFields(DataInput in) throws IOException {
177 int len = in.readInt();
178 byte[] buf = new byte[len];
179 in.readFully(buf);
180 MapReduceProtos.TableSnapshotRegionSplit split = MapReduceProtos.TableSnapshotRegionSplit.PARSER.parseFrom(buf);
181 this.regionName = Bytes.toString(split.getRegion().getValue().toByteArray());
182 List<String> locationsList = split.getLocationsList();
183 this.locations = locationsList.toArray(new String[locationsList.size()]);
184 }
185 }
186
187 @VisibleForTesting
188 class TableSnapshotRegionRecordReader extends RecordReader<ImmutableBytesWritable, Result> {
189 private TableSnapshotRegionSplit split;
190 private Scan scan;
191 private Result result = null;
192 private ImmutableBytesWritable row = null;
193 private ClientSideRegionScanner scanner;
194 private TaskAttemptContext context;
195 private Method getCounter;
196
197 @Override
198 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
199 InterruptedException {
200
201 Configuration conf = context.getConfiguration();
202 this.split = (TableSnapshotRegionSplit) split;
203 String regionName = this.split.regionName;
204 String snapshotName = getSnapshotName(conf);
205 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
206 FileSystem fs = rootDir.getFileSystem(conf);
207
208 Path tmpRootDir = new Path(conf.get(TABLE_DIR_KEY));
209
210
211 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
212
213
214 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, snapshotDir);
215
216
217 Path regionDir = new Path(snapshotDir, regionName);
218 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
219
220
221 String scanStr = conf.get(TableInputFormat.SCAN);
222 if (scanStr == null) {
223 throw new IllegalArgumentException("A Scan is not configured for this job");
224 }
225 scan = TableMapReduceUtil.convertStringToScan(scanStr);
226 scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
227
228
229 scanner = new ClientSideRegionScanner(conf, fs, tmpRootDir, htd, hri, scan, null);
230 if (context != null) {
231 this.context = context;
232 getCounter = TableRecordReaderImpl.retrieveGetCounterWithStringsParams(context);
233 }
234 }
235
236 @Override
237 public boolean nextKeyValue() throws IOException, InterruptedException {
238 result = scanner.next();
239 if (result == null) {
240
241 return false;
242 }
243
244 if (this.row == null) {
245 this.row = new ImmutableBytesWritable();
246 }
247 this.row.set(result.getRow());
248
249 ScanMetrics scanMetrics = scanner.getScanMetrics();
250 if (scanMetrics != null && context != null) {
251 TableRecordReaderImpl.updateCounters(scanMetrics, 0, getCounter, context);
252 }
253
254 return true;
255 }
256
257 @Override
258 public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
259 return row;
260 }
261
262 @Override
263 public Result getCurrentValue() throws IOException, InterruptedException {
264 return result;
265 }
266
267 @Override
268 public float getProgress() throws IOException, InterruptedException {
269 return 0;
270 }
271
272 @Override
273 public void close() throws IOException {
274 if (this.scanner != null) {
275 this.scanner.close();
276 }
277 }
278 }
279
280 @Override
281 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
282 InputSplit split, TaskAttemptContext context) throws IOException {
283 return new TableSnapshotRegionRecordReader();
284 }
285
286 @Override
287 public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
288 Configuration conf = job.getConfiguration();
289 String snapshotName = getSnapshotName(conf);
290
291 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
292 FileSystem fs = rootDir.getFileSystem(conf);
293
294 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
295 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
296
297 Set<String> snapshotRegionNames
298 = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
299 if (snapshotRegionNames == null) {
300 throw new IllegalArgumentException("Snapshot seems empty");
301 }
302
303
304 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs,
305 snapshotDir);
306
307 Scan scan = TableMapReduceUtil.convertStringToScan(conf
308 .get(TableInputFormat.SCAN));
309 Path tableDir = new Path(conf.get(TABLE_DIR_KEY));
310
311 List<InputSplit> splits = new ArrayList<InputSplit>();
312 for (String regionName : snapshotRegionNames) {
313
314 Path regionDir = new Path(snapshotDir, regionName);
315 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs,
316 regionDir);
317
318 if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
319 hri.getStartKey(), hri.getEndKey())) {
320
321
322 List<String> hosts = getBestLocations(conf,
323 HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
324
325 int len = Math.min(3, hosts.size());
326 hosts = hosts.subList(0, len);
327 splits.add(new TableSnapshotRegionSplit(regionName, hosts));
328 }
329 }
330
331 return splits;
332 }
333
334
335
336
337
338
339
340
341
342
343
344
345 @VisibleForTesting
346 List<String> getBestLocations(Configuration conf, HDFSBlocksDistribution blockDistribution) {
347 List<String> locations = new ArrayList<String>(3);
348
349 HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
350
351 if (hostAndWeights.length == 0) {
352 return locations;
353 }
354
355 HostAndWeight topHost = hostAndWeights[0];
356 locations.add(topHost.getHost());
357
358
359 double cutoffMultiplier
360 = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
361
362 double filterWeight = topHost.getWeight() * cutoffMultiplier;
363
364 for (int i = 1; i < hostAndWeights.length; i++) {
365 if (hostAndWeights[i].getWeight() >= filterWeight) {
366 locations.add(hostAndWeights[i].getHost());
367 } else {
368 break;
369 }
370 }
371
372 return locations;
373 }
374
375
376
377
378
379
380
381
382
383
384 public static void setInput(Job job, String snapshotName, Path restoreDir) throws IOException {
385 Configuration conf = job.getConfiguration();
386 conf.set(SNAPSHOT_NAME_KEY, snapshotName);
387
388 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
389 FileSystem fs = rootDir.getFileSystem(conf);
390
391 restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
392
393
394 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
395
396 conf.set(TABLE_DIR_KEY, restoreDir.toString());
397 }
398
399 private static String getSnapshotName(Configuration conf) {
400 String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
401 if (snapshotName == null) {
402 throw new IllegalArgumentException("Snapshot name must be provided");
403 }
404 return snapshotName;
405 }
406 }