View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import com.google.common.collect.Lists;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.fs.FileSystem;
27  import org.apache.hadoop.fs.Path;
28  import org.apache.hadoop.hbase.CellUtil;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
31  import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.hbase.classification.InterfaceStability;
36  import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
37  import org.apache.hadoop.hbase.client.IsolationLevel;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.Scan;
40  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
42  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
43  import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
44  import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
45  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
46  import org.apache.hadoop.hbase.regionserver.HRegion;
47  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
48  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
49  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.FSUtils;
52  import org.apache.hadoop.io.Writable;
53  
54  import java.io.ByteArrayOutputStream;
55  import java.io.DataInput;
56  import java.io.DataOutput;
57  import java.io.IOException;
58  import java.util.ArrayList;
59  import java.util.List;
60  import java.util.UUID;
61  
62  /**
63   * API-agnostic implementation for mapreduce over table snapshots.
64   */
65  @InterfaceAudience.Private
66  @InterfaceStability.Evolving
67  public class TableSnapshotInputFormatImpl {
68    // TODO: Snapshots files are owned in fs by the hbase user. There is no
69    // easy way to delegate access.
70  
71    public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
72  
73    private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
74    // key for specifying the root dir of the restored snapshot
75    protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
76  
77    /** See {@link #getBestLocations(Configuration, HDFSBlocksDistribution)} */
78    private static final String LOCALITY_CUTOFF_MULTIPLIER = "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
79    private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
80  
81    /**
82     * Implementation class for InputSplit logic common between mapred and mapreduce.
83     */
84    public static class InputSplit implements Writable {
85  
86      private HTableDescriptor htd;
87      private HRegionInfo regionInfo;
88      private String[] locations;
89      private String scan;
90      private String restoreDir;
91  
92      // constructor for mapreduce framework / Writable
93      public InputSplit() { }
94  
95      public InputSplit(HTableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
96          Scan scan, Path restoreDir) {
97        this.htd = htd;
98        this.regionInfo = regionInfo;
99        if (locations == null || locations.isEmpty()) {
100         this.locations = new String[0];
101       } else {
102         this.locations = locations.toArray(new String[locations.size()]);
103       }
104       try {
105         this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
106       } catch (IOException e) {
107         LOG.warn("Failed to convert Scan to String", e);
108       }
109 
110       this.restoreDir = restoreDir.toString();
111     }
112 
113     public HTableDescriptor getHtd() {
114       return htd;
115     }
116 
117     public String getScan() {
118       return scan;
119     }
120 
121     public String getRestoreDir() {
122       return restoreDir;
123     }
124 
125     public long getLength() {
126       //TODO: We can obtain the file sizes of the snapshot here.
127       return 0;
128     }
129 
130     public String[] getLocations() {
131       return locations;
132     }
133 
134     public HTableDescriptor getTableDescriptor() {
135       return htd;
136     }
137 
138     public HRegionInfo getRegionInfo() {
139       return regionInfo;
140     }
141 
142     // TODO: We should have ProtobufSerialization in Hadoop, and directly use PB objects instead of
143     // doing this wrapping with Writables.
144     @Override
145     public void write(DataOutput out) throws IOException {
146       MapReduceProtos.TableSnapshotRegionSplit.Builder builder = MapReduceProtos.TableSnapshotRegionSplit.newBuilder()
147 	  .setTable(htd.convert())
148 	  .setRegion(HRegionInfo.convert(regionInfo));
149 
150       for (String location : locations) {
151         builder.addLocations(location);
152       }
153 
154       MapReduceProtos.TableSnapshotRegionSplit split = builder.build();
155 
156       ByteArrayOutputStream baos = new ByteArrayOutputStream();
157       split.writeTo(baos);
158       baos.close();
159       byte[] buf = baos.toByteArray();
160       out.writeInt(buf.length);
161       out.write(buf);
162 
163       Bytes.writeByteArray(out, Bytes.toBytes(scan));
164       Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
165 
166     }
167 
168     @Override
169     public void readFields(DataInput in) throws IOException {
170       int len = in.readInt();
171       byte[] buf = new byte[len];
172       in.readFully(buf);
173       TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
174       this.htd = HTableDescriptor.convert(split.getTable());
175       this.regionInfo = HRegionInfo.convert(split.getRegion());
176       List<String> locationsList = split.getLocationsList();
177       this.locations = locationsList.toArray(new String[locationsList.size()]);
178 
179       this.scan = Bytes.toString(Bytes.readByteArray(in));
180       this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
181     }
182   }
183 
184   /**
185    * Implementation class for RecordReader logic common between mapred and mapreduce.
186    */
187   public static class RecordReader {
188     InputSplit split;
189     private Scan scan;
190     private Result result = null;
191     private ImmutableBytesWritable row = null;
192     private ClientSideRegionScanner scanner;
193 
194     public ClientSideRegionScanner getScanner() {
195       return scanner;
196     }
197 
198     public void initialize(InputSplit split, Configuration conf) throws IOException {
199       this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
200       this.split = split;
201       HTableDescriptor htd = split.htd;
202       HRegionInfo hri = this.split.getRegionInfo();
203       FileSystem fs = FSUtils.getCurrentFileSystem(conf);
204 
205 
206       // region is immutable, this should be fine,
207       // otherwise we have to set the thread read point
208       scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
209       // disable caching of data blocks
210       scan.setCacheBlocks(false);
211 
212       scanner =
213           new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
214     }
215 
216     public boolean nextKeyValue() throws IOException {
217       result = scanner.next();
218       if (result == null) {
219         //we are done
220         return false;
221       }
222 
223       if (this.row == null) {
224         this.row = new ImmutableBytesWritable();
225       }
226       this.row.set(result.getRow());
227       return true;
228     }
229 
230     public ImmutableBytesWritable getCurrentKey() {
231       return row;
232     }
233 
234     public Result getCurrentValue() {
235       return result;
236     }
237 
238     public long getPos() {
239       return 0;
240     }
241 
242     public float getProgress() {
243       return 0; // TODO: use total bytes to estimate
244     }
245 
246     public void close() {
247       if (this.scanner != null) {
248         this.scanner.close();
249       }
250     }
251   }
252 
253   public static List<InputSplit> getSplits(Configuration conf) throws IOException {
254     String snapshotName = getSnapshotName(conf);
255 
256     Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
257     FileSystem fs = rootDir.getFileSystem(conf);
258 
259     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
260     HBaseProtos.SnapshotDescription snapshotDesc =
261       SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
262     SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
263 
264     List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
265 
266     // TODO: mapred does not support scan as input API. Work around for now.
267     Scan scan = extractScanFromConf(conf);
268     // the temp dir where the snapshot is restored
269     Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
270 
271     return getSplits(scan, manifest, regionInfos, restoreDir, conf);
272   }
273 
274   public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
275     List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
276 
277     if (regionManifests == null) {
278 	throw new IllegalArgumentException("Snapshot seems empty");
279     }
280 
281     List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
282 
283     for (SnapshotRegionManifest regionManifest : regionManifests) {
284       regionInfos.add(HRegionInfo.convert(regionManifest.getRegionInfo()));
285     }
286     return regionInfos;
287   }
288 
289   public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
290       Path rootDir, FileSystem fs) throws IOException {
291     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
292     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
293     return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
294   }
295 
296   public static Scan extractScanFromConf(Configuration conf) throws IOException {
297     Scan scan = null;
298     if (conf.get(TableInputFormat.SCAN) != null) {
299       scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
300     } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
301       String[] columns =
302         conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
303       scan = new Scan();
304       for (String col : columns) {
305         scan.addFamily(Bytes.toBytes(col));
306       }
307     } else {
308       throw new IllegalArgumentException("Unable to create scan");
309     }
310     return scan;
311   }
312 
313   public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
314       List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
315     // load table descriptor
316     HTableDescriptor htd = manifest.getTableDescriptor();
317 
318     Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
319 
320     List<InputSplit> splits = new ArrayList<InputSplit>();
321     for (HRegionInfo hri : regionManifests) {
322       // load region descriptor
323 
324       if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
325           hri.getEndKey())) {
326         // compute HDFS locations from snapshot files (which will get the locations for
327         // referred hfiles)
328         List<String> hosts = getBestLocations(conf,
329             HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
330 
331         int len = Math.min(3, hosts.size());
332         hosts = hosts.subList(0, len);
333         splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
334       }
335     }
336 
337     return splits;
338 
339   }
340 
341   /**
342    * This computes the locations to be passed from the InputSplit. MR/Yarn schedulers does not take
343    * weights into account, thus will treat every location passed from the input split as equal. We
344    * do not want to blindly pass all the locations, since we are creating one split per region, and
345    * the region's blocks are all distributed throughout the cluster unless favorite node assignment
346    * is used. On the expected stable case, only one location will contain most of the blocks as local.
347    * On the other hand, in favored node assignment, 3 nodes will contain highly local blocks. Here
348    * we are doing a simple heuristic, where we will pass all hosts which have at least 80%
349    * (hbase.tablesnapshotinputformat.locality.cutoff.multiplier) as much block locality as the top
350    * host with the best locality.
351    */
352   public static List<String> getBestLocations(
353       Configuration conf, HDFSBlocksDistribution blockDistribution) {
354     List<String> locations = new ArrayList<String>(3);
355 
356     HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
357 
358     if (hostAndWeights.length == 0) {
359       return locations;
360     }
361 
362     HostAndWeight topHost = hostAndWeights[0];
363     locations.add(topHost.getHost());
364 
365     // Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
366     double cutoffMultiplier
367       = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
368 
369     double filterWeight = topHost.getWeight() * cutoffMultiplier;
370 
371     for (int i = 1; i < hostAndWeights.length; i++) {
372       if (hostAndWeights[i].getWeight() >= filterWeight) {
373         locations.add(hostAndWeights[i].getHost());
374       } else {
375         break;
376       }
377     }
378 
379     return locations;
380   }
381 
382   private static String getSnapshotName(Configuration conf) {
383     String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
384     if (snapshotName == null) {
385       throw new IllegalArgumentException("Snapshot name must be provided");
386     }
387     return snapshotName;
388   }
389 
390   /**
391    * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
392    * @param conf the job to configure
393    * @param snapshotName the name of the snapshot to read from
394    * @param restoreDir a temporary directory to restore the snapshot into. Current user should
395    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
396    * After the job is finished, restoreDir can be deleted.
397    * @throws IOException if an error occurs
398    */
399   public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
400       throws IOException {
401     conf.set(SNAPSHOT_NAME_KEY, snapshotName);
402 
403     Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
404     FileSystem fs = rootDir.getFileSystem(conf);
405 
406     restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
407 
408     // TODO: restore from record readers to parallelize.
409     RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
410 
411     conf.set(RESTORE_DIR_KEY, restoreDir.toString());
412   }
413 }