1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.DataInput;
23 import java.io.DataOutput;
24 import java.io.IOException;
25 import java.lang.reflect.Method;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.UUID;
30
31 import com.google.protobuf.HBaseZeroCopyByteString;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.classification.InterfaceAudience;
35 import org.apache.hadoop.classification.InterfaceStability;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.CellUtil;
40 import org.apache.hadoop.hbase.HConstants;
41 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
42 import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
46 import org.apache.hadoop.hbase.client.IsolationLevel;
47 import org.apache.hadoop.hbase.client.Result;
48 import org.apache.hadoop.hbase.client.Scan;
49 import org.apache.hadoop.hbase.client.TableSnapshotScanner;
50 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
51 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
52 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier;
53 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
54 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
55 import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
56 import org.apache.hadoop.hbase.regionserver.HRegion;
57 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
58 import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
59 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
60 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
61 import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
62 import org.apache.hadoop.hbase.util.Bytes;
63 import org.apache.hadoop.hbase.util.FSTableDescriptors;
64 import org.apache.hadoop.io.Writable;
65 import org.apache.hadoop.mapreduce.InputFormat;
66 import org.apache.hadoop.mapreduce.InputSplit;
67 import org.apache.hadoop.mapreduce.Job;
68 import org.apache.hadoop.mapreduce.JobContext;
69 import org.apache.hadoop.mapreduce.RecordReader;
70 import org.apache.hadoop.mapreduce.TaskAttemptContext;
71
72 import com.google.common.annotations.VisibleForTesting;
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 @InterfaceAudience.Public
112 @InterfaceStability.Evolving
113 public class TableSnapshotInputFormat extends InputFormat<ImmutableBytesWritable, Result> {
114
115
116
117 private static final Log LOG = LogFactory.getLog(TableSnapshotInputFormat.class);
118
119
120 private static final String LOCALITY_CUTOFF_MULTIPLIER = "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
121 private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
122
123 private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
124 private static final String TABLE_DIR_KEY = "hbase.TableSnapshotInputFormat.table.dir";
125
126 public static class TableSnapshotRegionSplit extends InputSplit implements Writable {
127 private String regionName;
128 private String[] locations;
129
130
131 public TableSnapshotRegionSplit() { }
132
133 TableSnapshotRegionSplit(String regionName, List<String> locations) {
134 this.regionName = regionName;
135 if (locations == null || locations.isEmpty()) {
136 this.locations = new String[0];
137 } else {
138 this.locations = locations.toArray(new String[locations.size()]);
139 }
140 }
141 @Override
142 public long getLength() throws IOException, InterruptedException {
143
144 return 0;
145 }
146
147 @Override
148 public String[] getLocations() throws IOException, InterruptedException {
149 return locations;
150 }
151
152
153
154 @Override
155 public void write(DataOutput out) throws IOException {
156 MapReduceProtos.TableSnapshotRegionSplit.Builder builder =
157 MapReduceProtos.TableSnapshotRegionSplit.newBuilder()
158 .setRegion(RegionSpecifier.newBuilder()
159 .setType(RegionSpecifierType.ENCODED_REGION_NAME)
160 .setValue(HBaseZeroCopyByteString.wrap(Bytes.toBytes(regionName))).build());
161
162 for (String location : locations) {
163 builder.addLocations(location);
164 }
165
166 MapReduceProtos.TableSnapshotRegionSplit split = builder.build();
167
168 ByteArrayOutputStream baos = new ByteArrayOutputStream();
169 split.writeTo(baos);
170 baos.close();
171 byte[] buf = baos.toByteArray();
172 out.writeInt(buf.length);
173 out.write(buf);
174 }
175 @Override
176 public void readFields(DataInput in) throws IOException {
177 int len = in.readInt();
178 byte[] buf = new byte[len];
179 in.readFully(buf);
180 MapReduceProtos.TableSnapshotRegionSplit split = MapReduceProtos.TableSnapshotRegionSplit.PARSER.parseFrom(buf);
181 this.regionName = Bytes.toString(split.getRegion().getValue().toByteArray());
182 List<String> locationsList = split.getLocationsList();
183 this.locations = locationsList.toArray(new String[locationsList.size()]);
184 }
185 }
186
187 @VisibleForTesting
188 static class TableSnapshotRegionRecordReader extends RecordReader<ImmutableBytesWritable, Result> {
189 private TableSnapshotRegionSplit split;
190 private Scan scan;
191 private Result result = null;
192 private ImmutableBytesWritable row = null;
193 private ClientSideRegionScanner scanner;
194 private TaskAttemptContext context;
195 private Method getCounter;
196
197 @Override
198 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
199 InterruptedException {
200
201 Configuration conf = context.getConfiguration();
202 this.split = (TableSnapshotRegionSplit) split;
203 String regionName = this.split.regionName;
204 String snapshotName = getSnapshotName(conf);
205 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
206 FileSystem fs = rootDir.getFileSystem(conf);
207
208 Path tmpRootDir = new Path(conf.get(TABLE_DIR_KEY));
209
210
211 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
212
213
214 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, snapshotDir);
215
216
217 Path regionDir = new Path(snapshotDir, regionName);
218 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
219
220
221 String scanStr = conf.get(TableInputFormat.SCAN);
222 if (scanStr == null) {
223 throw new IllegalArgumentException("A Scan is not configured for this job");
224 }
225 scan = TableMapReduceUtil.convertStringToScan(scanStr);
226
227
228 scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
229
230 scan.setCacheBlocks(false);
231
232 scanner = new ClientSideRegionScanner(conf, fs, tmpRootDir, htd, hri, scan, null);
233 if (context != null) {
234 this.context = context;
235 getCounter = TableRecordReaderImpl.retrieveGetCounterWithStringsParams(context);
236 }
237 }
238
239 @Override
240 public boolean nextKeyValue() throws IOException, InterruptedException {
241 result = scanner.next();
242 if (result == null) {
243
244 return false;
245 }
246
247 if (this.row == null) {
248 this.row = new ImmutableBytesWritable();
249 }
250 this.row.set(result.getRow());
251
252 ScanMetrics scanMetrics = scanner.getScanMetrics();
253 if (scanMetrics != null && context != null) {
254 TableRecordReaderImpl.updateCounters(scanMetrics, 0, getCounter, context);
255 }
256
257 return true;
258 }
259
260 @Override
261 public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
262 return row;
263 }
264
265 @Override
266 public Result getCurrentValue() throws IOException, InterruptedException {
267 return result;
268 }
269
270 @Override
271 public float getProgress() throws IOException, InterruptedException {
272 return 0;
273 }
274
275 @Override
276 public void close() throws IOException {
277 if (this.scanner != null) {
278 this.scanner.close();
279 }
280 }
281 }
282
283 @Override
284 public RecordReader<ImmutableBytesWritable, Result> createRecordReader(
285 InputSplit split, TaskAttemptContext context) throws IOException {
286 return new TableSnapshotRegionRecordReader();
287 }
288
289 @Override
290 public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
291 Configuration conf = job.getConfiguration();
292 String snapshotName = getSnapshotName(conf);
293
294 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
295 FileSystem fs = rootDir.getFileSystem(conf);
296
297 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
298 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
299
300 Set<String> snapshotRegionNames
301 = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
302 if (snapshotRegionNames == null) {
303 throw new IllegalArgumentException("Snapshot seems empty");
304 }
305
306
307 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs,
308 snapshotDir);
309
310 Scan scan = TableMapReduceUtil.convertStringToScan(conf
311 .get(TableInputFormat.SCAN));
312 Path tableDir = new Path(conf.get(TABLE_DIR_KEY));
313
314 List<InputSplit> splits = new ArrayList<InputSplit>();
315 for (String regionName : snapshotRegionNames) {
316
317 Path regionDir = new Path(snapshotDir, regionName);
318 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs,
319 regionDir);
320
321 if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
322 hri.getStartKey(), hri.getEndKey())) {
323
324
325 List<String> hosts = getBestLocations(conf,
326 HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
327
328 int len = Math.min(3, hosts.size());
329 hosts = hosts.subList(0, len);
330 splits.add(new TableSnapshotRegionSplit(regionName, hosts));
331 }
332 }
333
334 return splits;
335 }
336
337
338
339
340
341
342
343
344
345
346
347
348 @VisibleForTesting
349 List<String> getBestLocations(Configuration conf, HDFSBlocksDistribution blockDistribution) {
350 List<String> locations = new ArrayList<String>(3);
351
352 HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
353
354 if (hostAndWeights.length == 0) {
355 return locations;
356 }
357
358 HostAndWeight topHost = hostAndWeights[0];
359 locations.add(topHost.getHost());
360
361
362 double cutoffMultiplier
363 = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
364
365 double filterWeight = topHost.getWeight() * cutoffMultiplier;
366
367 for (int i = 1; i < hostAndWeights.length; i++) {
368 if (hostAndWeights[i].getWeight() >= filterWeight) {
369 locations.add(hostAndWeights[i].getHost());
370 } else {
371 break;
372 }
373 }
374
375 return locations;
376 }
377
378
379
380
381
382
383
384
385
386
387 public static void setInput(Job job, String snapshotName, Path restoreDir) throws IOException {
388 Configuration conf = job.getConfiguration();
389 conf.set(SNAPSHOT_NAME_KEY, snapshotName);
390
391 Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
392 FileSystem fs = rootDir.getFileSystem(conf);
393
394 restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
395
396
397 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
398
399 conf.set(TABLE_DIR_KEY, restoreDir.toString());
400 }
401
402 private static String getSnapshotName(Configuration conf) {
403 String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
404 if (snapshotName == null) {
405 throw new IllegalArgumentException("Snapshot name must be provided");
406 }
407 return snapshotName;
408 }
409 }