View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FSDataInputStream;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.fs.HFileSystem;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.io.Reference;
38  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
39  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
40  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
41  import org.apache.hadoop.hbase.util.FSUtils;
42  
43  /**
44   * Describe a StoreFile (hfile, reference, link)
45   */
46  @InterfaceAudience.Private
47  public class StoreFileInfo {
48    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
49  
50    /**
51     * A non-capture group, for hfiles, so that this can be embedded.
52     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
53     */
54    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
55  
56    /** Regex that will work for hfiles */
57    private static final Pattern HFILE_NAME_PATTERN =
58      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
59  
60    /**
61     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
62     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
63     * If reference, then the regex has more than just one group.
64     * Group 1, hfile/hfilelink pattern, is this file's id.
65     * Group 2 '(.+)' is the reference's parent region name.
66     */
67    private static final Pattern REF_NAME_PATTERN =
68      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
69        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
70  
71    // HDFS blocks distribution information
72    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
73  
74    // If this storefile references another, this is the reference instance.
75    private final Reference reference;
76  
77    // If this storefile is a link to another, this is the link instance.
78    private final HFileLink link;
79  
80    // FileSystem information for the file.
81    private final FileStatus fileStatus;
82  
83    /**
84     * Create a Store File Info
85     * @param conf the {@link Configuration} to use
86     * @param fs The current file system to use.
87     * @param path The {@link Path} of the file
88     */
89    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path)
90        throws IOException {
91      this(conf, fs, fs.getFileStatus(path));
92    }
93  
94    /**
95     * Create a Store File Info
96     * @param conf the {@link Configuration} to use
97     * @param fs The current file system to use.
98     * @param fileStatus The {@link FileStatus} of the file
99     */
100   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
101       throws IOException {
102     this.fileStatus = fileStatus;
103 
104     Path p = fileStatus.getPath();
105     if (HFileLink.isHFileLink(p)) {
106       // HFileLink
107       this.reference = null;
108       this.link = new HFileLink(conf, p);
109       LOG.debug("Store file " + p + " is a link");
110     } else if (isReference(p)) {
111       this.reference = Reference.read(fs, p);
112       Path referencePath = getReferredToFile(p);
113       if (HFileLink.isHFileLink(referencePath)) {
114         // HFileLink Reference
115         this.link = new HFileLink(conf, referencePath);
116       } else {
117         // Reference
118         this.link = null;
119       }
120       LOG.debug("Store file " + p + " is a " + reference.getFileRegion() +
121         " reference to " + referencePath);
122     } else if (isHFile(p)) {
123       // HFile
124       this.reference = null;
125       this.link = null;
126     } else {
127       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
128     }
129   }
130 
131   /** @return True if the store file is a Reference */
132   public boolean isReference() {
133     return this.reference != null;
134   }
135 
136   /** @return True if the store file is a top Reference */
137   public boolean isTopReference() {
138     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
139   }
140 
141   /** @return True if the store file is a link */
142   public boolean isLink() {
143     return this.link != null && this.reference == null;
144   }
145 
146   /** @return the HDFS block distribution */
147   public HDFSBlocksDistribution getHDFSBlockDistribution() {
148     return this.hdfsBlocksDistribution;
149   }
150 
151   /**
152    * Open a Reader for the StoreFile
153    * @param fs The current file system to use.
154    * @param cacheConf The cache configuration and block cache reference.
155    * @param dataBlockEncoding data block encoding algorithm.
156    * @return The StoreFile.Reader for the file
157    */
158   public StoreFile.Reader open(final FileSystem fs, final CacheConfig cacheConf,
159       final DataBlockEncoding dataBlockEncoding) throws IOException {
160     FSDataInputStream inNoChecksum = null;
161     FileSystem noChecksumFs = null;
162     FSDataInputStream in;
163     FileStatus status;
164 
165     if (fs instanceof HFileSystem) {
166       noChecksumFs = ((HFileSystem)fs).getNoChecksumFs();
167     }
168 
169     if (this.reference != null) {
170       if (this.link != null) {
171         // HFileLink Reference
172         in = this.link.open(fs);
173         inNoChecksum = (noChecksumFs != null) ? this.link.open(noChecksumFs) : in;
174         status = this.link.getFileStatus(fs);
175       } else {
176         // HFile Reference
177         Path referencePath = getReferredToFile(this.getPath());
178         in = fs.open(referencePath);
179         inNoChecksum = (noChecksumFs != null) ? noChecksumFs.open(referencePath) : in;
180         status = fs.getFileStatus(referencePath);
181       }
182 
183       hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution(fs, reference, status);
184       return new HalfStoreFileReader(fs, this.getPath(), in, inNoChecksum, status.getLen(),
185           cacheConf, reference, dataBlockEncoding);
186     } else {
187       if (this.link != null) {
188         // HFileLink
189         in = this.link.open(fs);
190         inNoChecksum = (noChecksumFs != null) ? link.open(noChecksumFs) : in;
191         status = this.link.getFileStatus(fs);
192       } else {
193         // HFile
194         status = fileStatus;
195         in = fs.open(this.getPath());
196         inNoChecksum = (noChecksumFs != null) ? noChecksumFs.open(this.getPath()) : in;
197       }
198 
199       long length = status.getLen();
200       hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length);
201       return new StoreFile.Reader(fs, this.getPath(), in, inNoChecksum, length,
202           cacheConf, dataBlockEncoding, true);
203     }
204   }
205 
206   /**
207    * Compute the HDFS Block Distribution for this StoreFile
208    */
209   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
210       throws IOException {
211     FileStatus status;
212     if (this.reference != null) {
213       if (this.link != null) {
214         // HFileLink Reference
215         status = link.getFileStatus(fs);
216       } else {
217         // HFile Reference
218         Path referencePath = getReferredToFile(this.getPath());
219         status = fs.getFileStatus(referencePath);
220       }
221       return computeRefFileHDFSBlockDistribution(fs, reference, status);
222     } else {
223       if (this.link != null) {
224         // HFileLink
225         status = link.getFileStatus(fs);
226       } else {
227         status = this.fileStatus;
228       }
229       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
230     }
231   }
232 
233   /** @return The {@link Path} of the file */
234   public Path getPath() {
235     return this.fileStatus.getPath();
236   }
237 
238   /** @return The {@link FileStatus} of the file */
239   public FileStatus getFileStatus() {
240     return this.fileStatus;
241   }
242 
243   /** @return Get the modification time of the file. */
244   public long getModificationTime() {
245     return this.fileStatus.getModificationTime();
246   }
247 
248   @Override
249   public String toString() {
250     return this.getPath() +
251       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
252   }
253 
254   /**
255    * @param path Path to check.
256    * @return True if the path has format of a HFile.
257    */
258   public static boolean isHFile(final Path path) {
259     return isHFile(path.getName());
260   }
261 
262   public static boolean isHFile(final String fileName) {
263     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
264     return m.matches() && m.groupCount() > 0;
265   }
266 
267   /**
268    * @param path Path to check.
269    * @return True if the path has format of a HStoreFile reference.
270    */
271   public static boolean isReference(final Path path) {
272     return isReference(path.getName());
273   }
274 
275   /**
276    * @param name file name to check.
277    * @return True if the path has format of a HStoreFile reference.
278    */
279   public static boolean isReference(final String name) {
280     Matcher m = REF_NAME_PATTERN.matcher(name);
281     return m.matches() && m.groupCount() > 1;
282   }
283 
284   /*
285    * Return path to the file referred to by a Reference.  Presumes a directory
286    * hierarchy of <code>${hbase.rootdir}/tablename/regionname/familyname</code>.
287    * @param p Path to a Reference file.
288    * @return Calculated path to parent region file.
289    * @throws IllegalArgumentException when path regex fails to match.
290    */
291   public static Path getReferredToFile(final Path p) {
292     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
293     if (m == null || !m.matches()) {
294       LOG.warn("Failed match of store file name " + p.toString());
295       throw new IllegalArgumentException("Failed match of store file name " +
296           p.toString());
297     }
298 
299     // Other region name is suffix on the passed Reference file name
300     String otherRegion = m.group(2);
301     // Tabledir is up two directories from where Reference was written.
302     Path tableDir = p.getParent().getParent().getParent();
303     String nameStrippedOfSuffix = m.group(1);
304     LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);
305 
306     // Build up new path with the referenced region in place of our current
307     // region in the reference path.  Also strip regionname suffix from name.
308     return new Path(new Path(new Path(tableDir, otherRegion),
309       p.getParent().getName()), nameStrippedOfSuffix);
310   }
311 
312   /**
313    * Validate the store file name.
314    * @param fileName name of the file to validate
315    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
316    */
317   public static boolean validateStoreFileName(final String fileName) {
318     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
319       return(true);
320     return !fileName.contains("-");
321   }
322 
323   /**
324    * Return if the specified file is a valid store file or not.
325    * @param fileStatus The {@link FileStatus} of the file
326    * @return <tt>true</tt> if the file is valid
327    */
328   public static boolean isValid(final FileStatus fileStatus)
329       throws IOException {
330     final Path p = fileStatus.getPath();
331 
332     if (fileStatus.isDir())
333       return false;
334 
335     // Check for empty hfile. Should never be the case but can happen
336     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
337     // NOTE: that the HFileLink is just a name, so it's an empty file.
338     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
339       LOG.warn("Skipping " + p + " beccreateStoreDirause its empty. HBASE-646 DATA LOSS?");
340       return false;
341     }
342 
343     return validateStoreFileName(p.getName());
344   }
345 
346   /**
347    * helper function to compute HDFS blocks distribution of a given reference
348    * file.For reference file, we don't compute the exact value. We use some
349    * estimate instead given it might be good enough. we assume bottom part
350    * takes the first half of reference file, top part takes the second half
351    * of the reference file. This is just estimate, given
352    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
353    * If this estimate isn't good enough, we can improve it later.
354    * @param fs  The FileSystem
355    * @param reference  The reference
356    * @param status  The reference FileStatus
357    * @return HDFS blocks distribution
358    */
359   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
360       final FileSystem fs, final Reference reference, final FileStatus status)
361       throws IOException {
362     if (status == null) {
363       return null;
364     }
365 
366     long start = 0;
367     long length = 0;
368 
369     if (Reference.isTopFileRegion(reference.getFileRegion())) {
370       start = status.getLen()/2;
371       length = status.getLen() - status.getLen()/2;
372     } else {
373       start = 0;
374       length = status.getLen()/2;
375     }
376     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
377   }
378 }