View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileStatus;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
34  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
35  import org.apache.hadoop.hbase.io.HFileLink;
36  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
37  import org.apache.hadoop.hbase.io.Reference;
38  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
39  import org.apache.hadoop.hbase.util.FSUtils;
40  
41  /**
42   * Describe a StoreFile (hfile, reference, link)
43   */
44  @InterfaceAudience.Private
45  public class StoreFileInfo {
46    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
47  
48    /**
49     * A non-capture group, for hfiles, so that this can be embedded.
50     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
51     */
52    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
53  
54    /** Regex that will work for hfiles */
55    private static final Pattern HFILE_NAME_PATTERN =
56      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
57  
58    /**
59     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
60     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
61     * If reference, then the regex has more than just one group.
62     * Group 1, hfile/hfilelink pattern, is this file's id.
63     * Group 2 '(.+)' is the reference's parent region name.
64     */
65    private static final Pattern REF_NAME_PATTERN =
66      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
67        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
68  
69    // Configuration
70    private Configuration conf;
71  
72    // HDFS blocks distribution information
73    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
74  
75    // If this storefile references another, this is the reference instance.
76    private final Reference reference;
77  
78    // If this storefile is a link to another, this is the link instance.
79    private final HFileLink link;
80  
81    // FileSystem information for the file.
82    private final FileStatus fileStatus;
83  
84    private RegionCoprocessorHost coprocessorHost;
85  
86    // timestamp on when the file was created, is 0 and ignored for reference or link files
87    private long createdTimestamp;
88  
89    /**
90     * Create a Store File Info
91     * @param conf the {@link Configuration} to use
92     * @param fs The current file system to use.
93     * @param path The {@link Path} of the file
94     */
95    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path)
96        throws IOException {
97      this(conf, fs, fs.getFileStatus(path));
98    }
99  
100   /**
101    * Create a Store File Info
102    * @param conf the {@link Configuration} to use
103    * @param fs The current file system to use.
104    * @param fileStatus The {@link FileStatus} of the file
105    */
106   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
107       throws IOException {
108     this.conf = conf;
109     this.fileStatus = fileStatus;
110     Path p = fileStatus.getPath();
111     if (HFileLink.isHFileLink(p)) {
112       // HFileLink
113       this.reference = null;
114       this.link = new HFileLink(conf, p);
115       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
116     } else if (isReference(p)) {
117       this.reference = Reference.read(fs, p);
118       Path referencePath = getReferredToFile(p);
119       if (HFileLink.isHFileLink(referencePath)) {
120         // HFileLink Reference
121         this.link = new HFileLink(conf, referencePath);
122       } else {
123         // Reference
124         this.link = null;
125       }
126       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
127         " reference to " + referencePath);
128     } else if (isHFile(p)) {
129       // HFile
130       this.createdTimestamp = fileStatus.getModificationTime();
131       this.reference = null;
132       this.link = null;
133     } else {
134       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
135     }
136   }
137 
138   /**
139    * Sets the region coprocessor env.
140    * @param coprocessorHost
141    */
142   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
143     this.coprocessorHost = coprocessorHost;
144   }
145 
146   /*
147    * @return the Reference object associated to this StoreFileInfo.
148    *         null if the StoreFile is not a reference.
149    */
150   public Reference getReference() {
151     return this.reference;
152   }
153 
154   /** @return True if the store file is a Reference */
155   public boolean isReference() {
156     return this.reference != null;
157   }
158 
159   /** @return True if the store file is a top Reference */
160   public boolean isTopReference() {
161     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
162   }
163 
164   /** @return True if the store file is a link */
165   public boolean isLink() {
166     return this.link != null && this.reference == null;
167   }
168 
169   /** @return the HDFS block distribution */
170   public HDFSBlocksDistribution getHDFSBlockDistribution() {
171     return this.hdfsBlocksDistribution;
172   }
173 
174   /**
175    * Open a Reader for the StoreFile
176    * @param fs The current file system to use.
177    * @param cacheConf The cache configuration and block cache reference.
178    * @return The StoreFile.Reader for the file
179    */
180   public StoreFile.Reader open(final FileSystem fs,
181       final CacheConfig cacheConf, final boolean canUseDropBehind) throws IOException {
182     FSDataInputStreamWrapper in;
183     FileStatus status;
184 
185     final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
186     if (this.link != null) {
187       // HFileLink
188       in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind);
189       status = this.link.getFileStatus(fs);
190     } else if (this.reference != null) {
191       // HFile Reference
192       Path referencePath = getReferredToFile(this.getPath());
193       in = new FSDataInputStreamWrapper(fs, referencePath,
194           doDropBehind);
195       status = fs.getFileStatus(referencePath);
196     } else {
197       in = new FSDataInputStreamWrapper(fs, this.getPath(),
198           doDropBehind);
199       status = fileStatus;
200     }
201     long length = status.getLen();
202     if (this.reference != null) {
203       hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution(fs, reference, status);
204     } else {
205       hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length);
206     }
207     StoreFile.Reader reader = null;
208     if (this.coprocessorHost != null) {
209       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
210         cacheConf, reference);
211     }
212     if (reader == null) {
213       if (this.reference != null) {
214         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
215           conf);
216       } else {
217         reader = new StoreFile.Reader(fs, this.getPath(), in, length, cacheConf, conf);
218       }
219     }
220     if (this.coprocessorHost != null) {
221       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
222         cacheConf, reference, reader);
223     }
224     return reader;
225   }
226 
227   /**
228    * Compute the HDFS Block Distribution for this StoreFile
229    */
230   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
231       throws IOException {
232     FileStatus status = getReferencedFileStatus(fs);
233     if (this.reference != null) {
234       return computeRefFileHDFSBlockDistribution(fs, reference, status);
235     } else {
236       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
237     }
238   }
239 
240   /**
241    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
242    * @param fs The current file system to use.
243    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
244    */
245   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
246     FileStatus status;
247     if (this.reference != null) {
248       if (this.link != null) {
249         // HFileLink Reference
250         status = link.getFileStatus(fs);
251       } else {
252         // HFile Reference
253         Path referencePath = getReferredToFile(this.getPath());
254         status = fs.getFileStatus(referencePath);
255       }
256     } else {
257       if (this.link != null) {
258         // HFileLink
259         status = link.getFileStatus(fs);
260       } else {
261         status = this.fileStatus;
262       }
263     }
264     return status;
265   }
266 
267   /** @return The {@link Path} of the file */
268   public Path getPath() {
269     return this.fileStatus.getPath();
270   }
271 
272   /** @return The {@link FileStatus} of the file */
273   public FileStatus getFileStatus() {
274     return this.fileStatus;
275   }
276 
277   /** @return Get the modification time of the file. */
278   public long getModificationTime() {
279     return this.fileStatus.getModificationTime();
280   }
281 
282   @Override
283   public String toString() {
284     return this.getPath() +
285       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
286   }
287 
288   /**
289    * @param path Path to check.
290    * @return True if the path has format of a HFile.
291    */
292   public static boolean isHFile(final Path path) {
293     return isHFile(path.getName());
294   }
295 
296   public static boolean isHFile(final String fileName) {
297     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
298     return m.matches() && m.groupCount() > 0;
299   }
300 
301   /**
302    * @param path Path to check.
303    * @return True if the path has format of a HStoreFile reference.
304    */
305   public static boolean isReference(final Path path) {
306     return isReference(path.getName());
307   }
308 
309   /**
310    * @param name file name to check.
311    * @return True if the path has format of a HStoreFile reference.
312    */
313   public static boolean isReference(final String name) {
314     Matcher m = REF_NAME_PATTERN.matcher(name);
315     return m.matches() && m.groupCount() > 1;
316   }
317 
318   /**
319    * @return timestamp when this file was created (as returned by filesystem)
320    */
321   public long getCreatedTimestamp() {
322     return createdTimestamp;
323   }
324 
325   /*
326    * Return path to the file referred to by a Reference.  Presumes a directory
327    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
328    * @param p Path to a Reference file.
329    * @return Calculated path to parent region file.
330    * @throws IllegalArgumentException when path regex fails to match.
331    */
332   public static Path getReferredToFile(final Path p) {
333     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
334     if (m == null || !m.matches()) {
335       LOG.warn("Failed match of store file name " + p.toString());
336       throw new IllegalArgumentException("Failed match of store file name " +
337           p.toString());
338     }
339 
340     // Other region name is suffix on the passed Reference file name
341     String otherRegion = m.group(2);
342     // Tabledir is up two directories from where Reference was written.
343     Path tableDir = p.getParent().getParent().getParent();
344     String nameStrippedOfSuffix = m.group(1);
345     LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);
346 
347     // Build up new path with the referenced region in place of our current
348     // region in the reference path.  Also strip regionname suffix from name.
349     return new Path(new Path(new Path(tableDir, otherRegion),
350       p.getParent().getName()), nameStrippedOfSuffix);
351   }
352 
353   /**
354    * Validate the store file name.
355    * @param fileName name of the file to validate
356    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
357    */
358   public static boolean validateStoreFileName(final String fileName) {
359     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
360       return(true);
361     return !fileName.contains("-");
362   }
363 
364   /**
365    * Return if the specified file is a valid store file or not.
366    * @param fileStatus The {@link FileStatus} of the file
367    * @return <tt>true</tt> if the file is valid
368    */
369   public static boolean isValid(final FileStatus fileStatus)
370       throws IOException {
371     final Path p = fileStatus.getPath();
372 
373     if (fileStatus.isDir())
374       return false;
375 
376     // Check for empty hfile. Should never be the case but can happen
377     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
378     // NOTE: that the HFileLink is just a name, so it's an empty file.
379     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
380       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
381       return false;
382     }
383 
384     return validateStoreFileName(p.getName());
385   }
386 
387   /**
388    * helper function to compute HDFS blocks distribution of a given reference
389    * file.For reference file, we don't compute the exact value. We use some
390    * estimate instead given it might be good enough. we assume bottom part
391    * takes the first half of reference file, top part takes the second half
392    * of the reference file. This is just estimate, given
393    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
394    * If this estimate isn't good enough, we can improve it later.
395    * @param fs  The FileSystem
396    * @param reference  The reference
397    * @param status  The reference FileStatus
398    * @return HDFS blocks distribution
399    */
400   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
401       final FileSystem fs, final Reference reference, final FileStatus status)
402       throws IOException {
403     if (status == null) {
404       return null;
405     }
406 
407     long start = 0;
408     long length = 0;
409 
410     if (Reference.isTopFileRegion(reference.getFileRegion())) {
411       start = status.getLen()/2;
412       length = status.getLen() - status.getLen()/2;
413     } else {
414       start = 0;
415       length = status.getLen()/2;
416     }
417     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
418   }
419 }