View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
38  import org.apache.hadoop.hbase.io.Reference;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo implements Comparable<StoreFileInfo> {
47    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // Configuration
71    private Configuration conf;
72  
73    // HDFS blocks distribution information
74    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
75  
76    // If this storefile references another, this is the reference instance.
77    private final Reference reference;
78  
79    // If this storefile is a link to another, this is the link instance.
80    private final HFileLink link;
81  
82    // FileSystem information for the file.
83    private final FileStatus fileStatus;
84  
85    private RegionCoprocessorHost coprocessorHost;
86  
87    /**
88     * Create a Store File Info
89     * @param conf the {@link Configuration} to use
90     * @param fs The current file system to use.
91     * @param path The {@link Path} of the file
92     */
93    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path)
94        throws IOException {
95      this(conf, fs, fs.getFileStatus(path));
96    }
97  
98    /**
99     * Create a Store File Info
100    * @param conf the {@link Configuration} to use
101    * @param fs The current file system to use.
102    * @param fileStatus The {@link FileStatus} of the file
103    */
104   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
105       throws IOException {
106     this.conf = conf;
107     this.fileStatus = fileStatus;
108     Path p = fileStatus.getPath();
109     if (HFileLink.isHFileLink(p)) {
110       // HFileLink
111       this.reference = null;
112       this.link = new HFileLink(conf, p);
113       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
114     } else if (isReference(p)) {
115       this.reference = Reference.read(fs, p);
116       Path referencePath = getReferredToFile(p);
117       if (HFileLink.isHFileLink(referencePath)) {
118         // HFileLink Reference
119         this.link = new HFileLink(conf, referencePath);
120       } else {
121         // Reference
122         this.link = null;
123       }
124       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
125         " reference to " + referencePath);
126     } else if (isHFile(p)) {
127       // HFile
128       this.reference = null;
129       this.link = null;
130     } else {
131       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
132     }
133   }
134 
135   /**
136    * Create a Store File Info from an HFileLink
137    * @param conf the {@link Configuration} to use
138    * @param fs The current file system to use.
139    * @param fileStatus The {@link FileStatus} of the file
140    */
141   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
142       final HFileLink link)
143       throws IOException {
144     this.conf = conf;
145     this.fileStatus = fileStatus;
146       // HFileLink
147     this.reference = null;
148     this.link = link;
149   }
150 
151   /**
152    * Sets the region coprocessor env.
153    * @param coprocessorHost
154    */
155   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
156     this.coprocessorHost = coprocessorHost;
157   }
158 
159   /*
160    * @return the Reference object associated to this StoreFileInfo.
161    *         null if the StoreFile is not a reference.
162    */
163   public Reference getReference() {
164     return this.reference;
165   }
166 
167   /** @return True if the store file is a Reference */
168   public boolean isReference() {
169     return this.reference != null;
170   }
171 
172   /** @return True if the store file is a top Reference */
173   public boolean isTopReference() {
174     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
175   }
176 
177   /** @return True if the store file is a link */
178   public boolean isLink() {
179     return this.link != null && this.reference == null;
180   }
181 
182   /** @return the HDFS block distribution */
183   public HDFSBlocksDistribution getHDFSBlockDistribution() {
184     return this.hdfsBlocksDistribution;
185   }
186 
187   /**
188    * Open a Reader for the StoreFile
189    * @param fs The current file system to use.
190    * @param cacheConf The cache configuration and block cache reference.
191    * @return The StoreFile.Reader for the file
192    */
193   public StoreFile.Reader open(final FileSystem fs,
194       final CacheConfig cacheConf) throws IOException {
195     FSDataInputStreamWrapper in;
196     FileStatus status;
197 
198     if (this.link != null) {
199       // HFileLink
200       in = new FSDataInputStreamWrapper(fs, this.link);
201       status = this.link.getFileStatus(fs);
202     } else if (this.reference != null) {
203       // HFile Reference
204       Path referencePath = getReferredToFile(this.getPath());
205       in = new FSDataInputStreamWrapper(fs, referencePath);
206       status = fs.getFileStatus(referencePath);
207     } else {
208       in = new FSDataInputStreamWrapper(fs, this.getPath());
209       status = fileStatus;
210     }
211     long length = status.getLen();
212     hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
213 
214     StoreFile.Reader reader = null;
215     if (this.coprocessorHost != null) {
216       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
217         cacheConf, reference);
218     }
219     if (reader == null) {
220       if (this.reference != null) {
221         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
222           conf);
223       } else {
224         reader = new StoreFile.Reader(fs, this.getPath(), in, length, cacheConf, conf);
225       }
226     }
227     if (this.coprocessorHost != null) {
228       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
229         cacheConf, reference, reader);
230     }
231     return reader;
232   }
233 
234   /**
235    * Compute the HDFS Block Distribution for this StoreFile
236    */
237   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
238       throws IOException {
239 
240     // guard agains the case where we get the FileStatus from link, but by the time we
241     // call compute the file is moved again
242     if (this.link != null) {
243       FileNotFoundException exToThrow = null;
244       for (int i = 0; i < this.link.getLocations().length; i++) {
245         try {
246           return computeHDFSBlocksDistributionInternal(fs);
247         } catch (FileNotFoundException ex) {
248           // try the other location
249           exToThrow = ex;
250         }
251       }
252       throw exToThrow;
253     } else {
254       return computeHDFSBlocksDistributionInternal(fs);
255     }
256   }
257 
258   private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
259       throws IOException {
260     FileStatus status = getReferencedFileStatus(fs);
261     if (this.reference != null) {
262       return computeRefFileHDFSBlockDistribution(fs, reference, status);
263     } else {
264       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
265     }
266   }
267 
268   /**
269    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
270    * @param fs The current file system to use.
271    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
272    */
273   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
274     FileStatus status;
275     if (this.reference != null) {
276       if (this.link != null) {
277         FileNotFoundException exToThrow = null;
278         for (int i = 0; i < this.link.getLocations().length; i++) {
279           // HFileLink Reference
280           try {
281             return link.getFileStatus(fs);
282           } catch (FileNotFoundException ex) {
283             // try the other location
284             exToThrow = ex;
285           }
286         }
287         throw exToThrow;
288       } else {
289         // HFile Reference
290         Path referencePath = getReferredToFile(this.getPath());
291         status = fs.getFileStatus(referencePath);
292       }
293     } else {
294       if (this.link != null) {
295         FileNotFoundException exToThrow = null;
296         for (int i = 0; i < this.link.getLocations().length; i++) {
297           // HFileLink
298           try {
299             return link.getFileStatus(fs);
300           } catch (FileNotFoundException ex) {
301             // try the other location
302             exToThrow = ex;
303           }
304         }
305         throw exToThrow;
306       } else {
307         status = this.fileStatus;
308       }
309     }
310     return status;
311   }
312 
313   /** @return The {@link Path} of the file */
314   public Path getPath() {
315     return this.fileStatus.getPath();
316   }
317 
318   /** @return The {@link FileStatus} of the file */
319   public FileStatus getFileStatus() {
320     return this.fileStatus;
321   }
322 
323   /** @return Get the modification time of the file. */
324   public long getModificationTime() {
325     return this.fileStatus.getModificationTime();
326   }
327 
328   @Override
329   public String toString() {
330     return this.getPath() +
331       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
332   }
333 
334   /**
335    * @param path Path to check.
336    * @return True if the path has format of a HFile.
337    */
338   public static boolean isHFile(final Path path) {
339     return isHFile(path.getName());
340   }
341 
342   public static boolean isHFile(final String fileName) {
343     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
344     return m.matches() && m.groupCount() > 0;
345   }
346 
347   /**
348    * @param path Path to check.
349    * @return True if the path has format of a HStoreFile reference.
350    */
351   public static boolean isReference(final Path path) {
352     return isReference(path.getName());
353   }
354 
355   /**
356    * @param name file name to check.
357    * @return True if the path has format of a HStoreFile reference.
358    */
359   public static boolean isReference(final String name) {
360     Matcher m = REF_NAME_PATTERN.matcher(name);
361     return m.matches() && m.groupCount() > 1;
362   }
363 
364   /*
365    * Return path to the file referred to by a Reference.  Presumes a directory
366    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
367    * @param p Path to a Reference file.
368    * @return Calculated path to parent region file.
369    * @throws IllegalArgumentException when path regex fails to match.
370    */
371   public static Path getReferredToFile(final Path p) {
372     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
373     if (m == null || !m.matches()) {
374       LOG.warn("Failed match of store file name " + p.toString());
375       throw new IllegalArgumentException("Failed match of store file name " +
376           p.toString());
377     }
378 
379     // Other region name is suffix on the passed Reference file name
380     String otherRegion = m.group(2);
381     // Tabledir is up two directories from where Reference was written.
382     Path tableDir = p.getParent().getParent().getParent();
383     String nameStrippedOfSuffix = m.group(1);
384     LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);
385 
386     // Build up new path with the referenced region in place of our current
387     // region in the reference path.  Also strip regionname suffix from name.
388     return new Path(new Path(new Path(tableDir, otherRegion),
389       p.getParent().getName()), nameStrippedOfSuffix);
390   }
391 
392   /**
393    * Validate the store file name.
394    * @param fileName name of the file to validate
395    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
396    */
397   public static boolean validateStoreFileName(final String fileName) {
398     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
399       return(true);
400     return !fileName.contains("-");
401   }
402 
403   /**
404    * Return if the specified file is a valid store file or not.
405    * @param fileStatus The {@link FileStatus} of the file
406    * @return <tt>true</tt> if the file is valid
407    */
408   public static boolean isValid(final FileStatus fileStatus)
409       throws IOException {
410     final Path p = fileStatus.getPath();
411 
412     if (fileStatus.isDirectory())
413       return false;
414 
415     // Check for empty hfile. Should never be the case but can happen
416     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
417     // NOTE: that the HFileLink is just a name, so it's an empty file.
418     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
419       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
420       return false;
421     }
422 
423     return validateStoreFileName(p.getName());
424   }
425 
426   /**
427    * helper function to compute HDFS blocks distribution of a given reference
428    * file.For reference file, we don't compute the exact value. We use some
429    * estimate instead given it might be good enough. we assume bottom part
430    * takes the first half of reference file, top part takes the second half
431    * of the reference file. This is just estimate, given
432    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
433    * If this estimate isn't good enough, we can improve it later.
434    * @param fs  The FileSystem
435    * @param reference  The reference
436    * @param status  The reference FileStatus
437    * @return HDFS blocks distribution
438    */
439   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
440       final FileSystem fs, final Reference reference, final FileStatus status)
441       throws IOException {
442     if (status == null) {
443       return null;
444     }
445 
446     long start = 0;
447     long length = 0;
448 
449     if (Reference.isTopFileRegion(reference.getFileRegion())) {
450       start = status.getLen()/2;
451       length = status.getLen() - status.getLen()/2;
452     } else {
453       start = 0;
454       length = status.getLen()/2;
455     }
456     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
457   }
458 
459   @Override
460   public boolean equals(Object that) {
461     if (that == null) {
462       return false;
463     }
464 
465     if (that instanceof StoreFileInfo) {
466       return this.compareTo((StoreFileInfo)that) == 0;
467     }
468 
469     return false;
470   };
471 
472   @Override
473   public int compareTo(StoreFileInfo o) {
474     return this.fileStatus.compareTo(o.fileStatus);
475   }
476 
477   @Override
478   public int hashCode() {
479     return this.fileStatus.hashCode();
480   }
481 }