View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileStatus;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
34  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
35  import org.apache.hadoop.hbase.io.HFileLink;
36  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
37  import org.apache.hadoop.hbase.io.Reference;
38  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // HDFS blocks distribution information
71    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
72  
73    // If this storefile references another, this is the reference instance.
74    private final Reference reference;
75  
76    // If this storefile is a link to another, this is the link instance.
77    private final HFileLink link;
78  
79    // FileSystem information for the file.
80    private final FileStatus fileStatus;
81  
82    /**
83     * Create a Store File Info
84     * @param conf the {@link Configuration} to use
85     * @param fs The current file system to use.
86     * @param path The {@link Path} of the file
87     */
88    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path)
89        throws IOException {
90      this(conf, fs, fs.getFileStatus(path));
91    }
92  
93    /**
94     * Create a Store File Info
95     * @param conf the {@link Configuration} to use
96     * @param fs The current file system to use.
97     * @param fileStatus The {@link FileStatus} of the file
98     */
99    public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
100       throws IOException {
101     this.fileStatus = fileStatus;
102     Path p = fileStatus.getPath();
103     if (HFileLink.isHFileLink(p)) {
104       // HFileLink
105       this.reference = null;
106       this.link = new HFileLink(conf, p);
107       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
108     } else if (isReference(p)) {
109       this.reference = Reference.read(fs, p);
110       Path referencePath = getReferredToFile(p);
111       if (HFileLink.isHFileLink(referencePath)) {
112         // HFileLink Reference
113         this.link = new HFileLink(conf, referencePath);
114       } else {
115         // Reference
116         this.link = null;
117       }
118       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
119         " reference to " + referencePath);
120     } else if (isHFile(p)) {
121       // HFile
122       this.reference = null;
123       this.link = null;
124     } else {
125       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
126     }
127   }
128 
129   /*
130    * @return the Reference object associated to this StoreFileInfo.
131    *         null if the StoreFile is not a reference.
132    */
133   Reference getReference() {
134     return this.reference;
135   }
136 
137   /** @return True if the store file is a Reference */
138   public boolean isReference() {
139     return this.reference != null;
140   }
141 
142   /** @return True if the store file is a top Reference */
143   public boolean isTopReference() {
144     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
145   }
146 
147   /** @return True if the store file is a link */
148   public boolean isLink() {
149     return this.link != null && this.reference == null;
150   }
151 
152   /** @return the HDFS block distribution */
153   public HDFSBlocksDistribution getHDFSBlockDistribution() {
154     return this.hdfsBlocksDistribution;
155   }
156 
157   /**
158    * Open a Reader for the StoreFile
159    * @param fs The current file system to use.
160    * @param cacheConf The cache configuration and block cache reference.
161    * @return The StoreFile.Reader for the file
162    */
163   public StoreFile.Reader open(final FileSystem fs,
164       final CacheConfig cacheConf) throws IOException {
165     FSDataInputStreamWrapper in;
166     FileStatus status;
167 
168     if (this.link != null) {
169       // HFileLink
170       in = new FSDataInputStreamWrapper(fs, this.link);
171       status = this.link.getFileStatus(fs);
172     } else if (this.reference != null) {
173       // HFile Reference
174       Path referencePath = getReferredToFile(this.getPath());
175       in = new FSDataInputStreamWrapper(fs, referencePath);
176       status = fs.getFileStatus(referencePath);
177     } else {
178       in = new FSDataInputStreamWrapper(fs, this.getPath());
179       status = fileStatus;
180     }
181     long length = status.getLen();
182     if (this.reference != null) {
183       hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution(fs, reference, status);
184       return new HalfStoreFileReader(
185           fs, this.getPath(), in, length, cacheConf, reference);
186     } else {
187       hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length);
188       return new StoreFile.Reader(fs, this.getPath(), in, length, cacheConf);
189     }
190   }
191 
192   /**
193    * Compute the HDFS Block Distribution for this StoreFile
194    */
195   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
196       throws IOException {
197     FileStatus status;
198     if (this.reference != null) {
199       if (this.link != null) {
200         // HFileLink Reference
201         status = link.getFileStatus(fs);
202       } else {
203         // HFile Reference
204         Path referencePath = getReferredToFile(this.getPath());
205         status = fs.getFileStatus(referencePath);
206       }
207       return computeRefFileHDFSBlockDistribution(fs, reference, status);
208     } else {
209       if (this.link != null) {
210         // HFileLink
211         status = link.getFileStatus(fs);
212       } else {
213         status = this.fileStatus;
214       }
215       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
216     }
217   }
218 
219   /** @return The {@link Path} of the file */
220   public Path getPath() {
221     return this.fileStatus.getPath();
222   }
223 
224   /** @return The {@link FileStatus} of the file */
225   public FileStatus getFileStatus() {
226     return this.fileStatus;
227   }
228 
229   /** @return Get the modification time of the file. */
230   public long getModificationTime() {
231     return this.fileStatus.getModificationTime();
232   }
233 
234   @Override
235   public String toString() {
236     return this.getPath() +
237       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
238   }
239 
240   /**
241    * @param path Path to check.
242    * @return True if the path has format of a HFile.
243    */
244   public static boolean isHFile(final Path path) {
245     return isHFile(path.getName());
246   }
247 
248   public static boolean isHFile(final String fileName) {
249     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
250     return m.matches() && m.groupCount() > 0;
251   }
252 
253   /**
254    * @param path Path to check.
255    * @return True if the path has format of a HStoreFile reference.
256    */
257   public static boolean isReference(final Path path) {
258     return isReference(path.getName());
259   }
260 
261   /**
262    * @param name file name to check.
263    * @return True if the path has format of a HStoreFile reference.
264    */
265   public static boolean isReference(final String name) {
266     Matcher m = REF_NAME_PATTERN.matcher(name);
267     return m.matches() && m.groupCount() > 1;
268   }
269 
270   /*
271    * Return path to the file referred to by a Reference.  Presumes a directory
272    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
273    * @param p Path to a Reference file.
274    * @return Calculated path to parent region file.
275    * @throws IllegalArgumentException when path regex fails to match.
276    */
277   public static Path getReferredToFile(final Path p) {
278     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
279     if (m == null || !m.matches()) {
280       LOG.warn("Failed match of store file name " + p.toString());
281       throw new IllegalArgumentException("Failed match of store file name " +
282           p.toString());
283     }
284 
285     // Other region name is suffix on the passed Reference file name
286     String otherRegion = m.group(2);
287     // Tabledir is up two directories from where Reference was written.
288     Path tableDir = p.getParent().getParent().getParent();
289     String nameStrippedOfSuffix = m.group(1);
290     LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);
291 
292     // Build up new path with the referenced region in place of our current
293     // region in the reference path.  Also strip regionname suffix from name.
294     return new Path(new Path(new Path(tableDir, otherRegion),
295       p.getParent().getName()), nameStrippedOfSuffix);
296   }
297 
298   /**
299    * Validate the store file name.
300    * @param fileName name of the file to validate
301    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
302    */
303   public static boolean validateStoreFileName(final String fileName) {
304     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
305       return(true);
306     return !fileName.contains("-");
307   }
308 
309   /**
310    * Return if the specified file is a valid store file or not.
311    * @param fileStatus The {@link FileStatus} of the file
312    * @return <tt>true</tt> if the file is valid
313    */
314   public static boolean isValid(final FileStatus fileStatus)
315       throws IOException {
316     final Path p = fileStatus.getPath();
317 
318     if (fileStatus.isDir())
319       return false;
320 
321     // Check for empty hfile. Should never be the case but can happen
322     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
323     // NOTE: that the HFileLink is just a name, so it's an empty file.
324     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
325       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
326       return false;
327     }
328 
329     return validateStoreFileName(p.getName());
330   }
331 
332   /**
333    * helper function to compute HDFS blocks distribution of a given reference
334    * file.For reference file, we don't compute the exact value. We use some
335    * estimate instead given it might be good enough. we assume bottom part
336    * takes the first half of reference file, top part takes the second half
337    * of the reference file. This is just estimate, given
338    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
339    * If this estimate isn't good enough, we can improve it later.
340    * @param fs  The FileSystem
341    * @param reference  The reference
342    * @param status  The reference FileStatus
343    * @return HDFS blocks distribution
344    */
345   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
346       final FileSystem fs, final Reference reference, final FileStatus status)
347       throws IOException {
348     if (status == null) {
349       return null;
350     }
351 
352     long start = 0;
353     long length = 0;
354 
355     if (Reference.isTopFileRegion(reference.getFileRegion())) {
356       start = status.getLen()/2;
357       length = status.getLen() - status.getLen()/2;
358     } else {
359       start = 0;
360       length = status.getLen()/2;
361     }
362     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
363   }
364 }