View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileStatus;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
34  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
35  import org.apache.hadoop.hbase.io.HFileLink;
36  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
37  import org.apache.hadoop.hbase.io.Reference;
38  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // HDFS blocks distribution information
71    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
72  
73    // If this storefile references another, this is the reference instance.
74    private final Reference reference;
75  
76    // If this storefile is a link to another, this is the link instance.
77    private final HFileLink link;
78  
79    // FileSystem information for the file.
80    private final FileStatus fileStatus;
81  
82    /**
83     * Create a Store File Info
84     * @param conf the {@link Configuration} to use
85     * @param fs The current file system to use.
86     * @param path The {@link Path} of the file
87     */
88    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path)
89        throws IOException {
90      this(conf, fs, fs.getFileStatus(path));
91    }
92  
93    /**
94     * Create a Store File Info
95     * @param conf the {@link Configuration} to use
96     * @param fs The current file system to use.
97     * @param fileStatus The {@link FileStatus} of the file
98     */
99    public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
100       throws IOException {
101     this.fileStatus = fileStatus;
102     Path p = fileStatus.getPath();
103     if (HFileLink.isHFileLink(p)) {
104       // HFileLink
105       this.reference = null;
106       this.link = new HFileLink(conf, p);
107       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
108     } else if (isReference(p)) {
109       this.reference = Reference.read(fs, p);
110       Path referencePath = getReferredToFile(p);
111       if (HFileLink.isHFileLink(referencePath)) {
112         // HFileLink Reference
113         this.link = new HFileLink(conf, referencePath);
114       } else {
115         // Reference
116         this.link = null;
117       }
118       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
119         " reference to " + referencePath);
120     } else if (isHFile(p)) {
121       // HFile
122       this.reference = null;
123       this.link = null;
124     } else {
125       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
126     }
127   }
128 
129   /** @return True if the store file is a Reference */
130   public boolean isReference() {
131     return this.reference != null;
132   }
133 
134   /** @return True if the store file is a top Reference */
135   public boolean isTopReference() {
136     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
137   }
138 
139   /** @return True if the store file is a link */
140   public boolean isLink() {
141     return this.link != null && this.reference == null;
142   }
143 
144   /** @return the HDFS block distribution */
145   public HDFSBlocksDistribution getHDFSBlockDistribution() {
146     return this.hdfsBlocksDistribution;
147   }
148 
149   /**
150    * Open a Reader for the StoreFile
151    * @param fs The current file system to use.
152    * @param cacheConf The cache configuration and block cache reference.
153    * @param dataBlockEncoding data block encoding algorithm.
154    * @return The StoreFile.Reader for the file
155    */
156   public StoreFile.Reader open(final FileSystem fs, final CacheConfig cacheConf,
157       final DataBlockEncoding dataBlockEncoding) throws IOException {
158     FSDataInputStreamWrapper in;
159     FileStatus status;
160 
161     if (this.link != null) {
162       // HFileLink
163       in = new FSDataInputStreamWrapper(fs, this.link);
164       status = this.link.getFileStatus(fs);
165     } else if (this.reference != null) {
166       // HFile Reference
167       Path referencePath = getReferredToFile(this.getPath());
168       in = new FSDataInputStreamWrapper(fs, referencePath);
169       status = fs.getFileStatus(referencePath);
170     } else {
171       in = new FSDataInputStreamWrapper(fs, this.getPath());
172       status = fileStatus;
173     }
174     long length = status.getLen();
175     if (this.reference != null) {
176       hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution(fs, reference, status);
177       return new HalfStoreFileReader(
178           fs, this.getPath(), in, length, cacheConf, reference, dataBlockEncoding);
179     } else {
180       hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length);
181       return new StoreFile.Reader(fs, this.getPath(), in, length, cacheConf, dataBlockEncoding);
182     }
183   }
184 
185   /**
186    * Compute the HDFS Block Distribution for this StoreFile
187    */
188   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
189       throws IOException {
190     FileStatus status;
191     if (this.reference != null) {
192       if (this.link != null) {
193         // HFileLink Reference
194         status = link.getFileStatus(fs);
195       } else {
196         // HFile Reference
197         Path referencePath = getReferredToFile(this.getPath());
198         status = fs.getFileStatus(referencePath);
199       }
200       return computeRefFileHDFSBlockDistribution(fs, reference, status);
201     } else {
202       if (this.link != null) {
203         // HFileLink
204         status = link.getFileStatus(fs);
205       } else {
206         status = this.fileStatus;
207       }
208       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
209     }
210   }
211 
212   /** @return The {@link Path} of the file */
213   public Path getPath() {
214     return this.fileStatus.getPath();
215   }
216 
217   /** @return The {@link FileStatus} of the file */
218   public FileStatus getFileStatus() {
219     return this.fileStatus;
220   }
221 
222   /** @return Get the modification time of the file. */
223   public long getModificationTime() {
224     return this.fileStatus.getModificationTime();
225   }
226 
227   @Override
228   public String toString() {
229     return this.getPath() +
230       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
231   }
232 
233   /**
234    * @param path Path to check.
235    * @return True if the path has format of a HFile.
236    */
237   public static boolean isHFile(final Path path) {
238     return isHFile(path.getName());
239   }
240 
241   public static boolean isHFile(final String fileName) {
242     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
243     return m.matches() && m.groupCount() > 0;
244   }
245 
246   /**
247    * @param path Path to check.
248    * @return True if the path has format of a HStoreFile reference.
249    */
250   public static boolean isReference(final Path path) {
251     return isReference(path.getName());
252   }
253 
254   /**
255    * @param name file name to check.
256    * @return True if the path has format of a HStoreFile reference.
257    */
258   public static boolean isReference(final String name) {
259     Matcher m = REF_NAME_PATTERN.matcher(name);
260     return m.matches() && m.groupCount() > 1;
261   }
262 
263   /*
264    * Return path to the file referred to by a Reference.  Presumes a directory
265    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
266    * @param p Path to a Reference file.
267    * @return Calculated path to parent region file.
268    * @throws IllegalArgumentException when path regex fails to match.
269    */
270   public static Path getReferredToFile(final Path p) {
271     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
272     if (m == null || !m.matches()) {
273       LOG.warn("Failed match of store file name " + p.toString());
274       throw new IllegalArgumentException("Failed match of store file name " +
275           p.toString());
276     }
277 
278     // Other region name is suffix on the passed Reference file name
279     String otherRegion = m.group(2);
280     // Tabledir is up two directories from where Reference was written.
281     Path tableDir = p.getParent().getParent().getParent();
282     String nameStrippedOfSuffix = m.group(1);
283     LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);
284 
285     // Build up new path with the referenced region in place of our current
286     // region in the reference path.  Also strip regionname suffix from name.
287     return new Path(new Path(new Path(tableDir, otherRegion),
288       p.getParent().getName()), nameStrippedOfSuffix);
289   }
290 
291   /**
292    * Validate the store file name.
293    * @param fileName name of the file to validate
294    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
295    */
296   public static boolean validateStoreFileName(final String fileName) {
297     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
298       return(true);
299     return !fileName.contains("-");
300   }
301 
302   /**
303    * Return if the specified file is a valid store file or not.
304    * @param fileStatus The {@link FileStatus} of the file
305    * @return <tt>true</tt> if the file is valid
306    */
307   public static boolean isValid(final FileStatus fileStatus)
308       throws IOException {
309     final Path p = fileStatus.getPath();
310 
311     if (fileStatus.isDir())
312       return false;
313 
314     // Check for empty hfile. Should never be the case but can happen
315     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
316     // NOTE: that the HFileLink is just a name, so it's an empty file.
317     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
318       LOG.warn("Skipping " + p + " beccreateStoreDirause its empty. HBASE-646 DATA LOSS?");
319       return false;
320     }
321 
322     return validateStoreFileName(p.getName());
323   }
324 
325   /**
326    * helper function to compute HDFS blocks distribution of a given reference
327    * file.For reference file, we don't compute the exact value. We use some
328    * estimate instead given it might be good enough. we assume bottom part
329    * takes the first half of reference file, top part takes the second half
330    * of the reference file. This is just estimate, given
331    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
332    * If this estimate isn't good enough, we can improve it later.
333    * @param fs  The FileSystem
334    * @param reference  The reference
335    * @param status  The reference FileStatus
336    * @return HDFS blocks distribution
337    */
338   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
339       final FileSystem fs, final Reference reference, final FileStatus status)
340       throws IOException {
341     if (status == null) {
342       return null;
343     }
344 
345     long start = 0;
346     long length = 0;
347 
348     if (Reference.isTopFileRegion(reference.getFileRegion())) {
349       start = status.getLen()/2;
350       length = status.getLen() - status.getLen()/2;
351     } else {
352       start = 0;
353       length = status.getLen()/2;
354     }
355     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
356   }
357 }