View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.UUID;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FSDataOutputStream;
36  import org.apache.hadoop.fs.FileStatus;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.FileUtil;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.permission.FsPermission;
41  import org.apache.hadoop.hbase.HColumnDescriptor;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.KeyValue;
46  import org.apache.hadoop.hbase.backup.HFileArchiver;
47  import org.apache.hadoop.hbase.fs.HFileSystem;
48  import org.apache.hadoop.hbase.io.Reference;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSHDFSUtils;
51  import org.apache.hadoop.hbase.util.FSUtils;
52  import org.apache.hadoop.hbase.util.Threads;
53  
54  /**
55   * View to an on-disk Region.
56   * Provides the set of methods necessary to interact with the on-disk region data.
57   */
58  @InterfaceAudience.Private
59  public class HRegionFileSystem {
60    public static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
61  
62    /** Name of the region info file that resides just under the region directory. */
63    public final static String REGION_INFO_FILE = ".regioninfo";
64  
65    /** Temporary subdirectory of the region directory used for merges. */
66    public static final String REGION_MERGES_DIR = ".merges";
67  
68    /** Temporary subdirectory of the region directory used for splits. */
69    public static final String REGION_SPLITS_DIR = ".splits";
70  
71    /** Temporary subdirectory of the region directory used for compaction output. */
72    private static final String REGION_TEMP_DIR = ".tmp";
73  
74    private final HRegionInfo regionInfo;
75    private final Configuration conf;
76    private final Path tableDir;
77    private final FileSystem fs;
78  
79    /**
80     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
81     * client level.
82     */
83    private final int hdfsClientRetriesNumber;
84    private final int baseSleepBeforeRetries;
85    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
86    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
87  
88    /**
89     * Create a view to the on-disk region
90     * @param conf the {@link Configuration} to use
91     * @param fs {@link FileSystem} that contains the region
92     * @param tableDir {@link Path} to where the table is being stored
93     * @param regionInfo {@link HRegionInfo} for region
94     */
95    HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
96        final HRegionInfo regionInfo) {
97      this.fs = fs;
98      this.conf = conf;
99      this.tableDir = tableDir;
100     this.regionInfo = regionInfo;
101     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
102       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
103     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
104       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
105  }
106 
107   /** @return the underlying {@link FileSystem} */
108   public FileSystem getFileSystem() {
109     return this.fs;
110   }
111 
112   /** @return the {@link HRegionInfo} that describe this on-disk region view */
113   public HRegionInfo getRegionInfo() {
114     return this.regionInfo;
115   }
116 
117   /** @return {@link Path} to the region's root directory. */
118   public Path getTableDir() {
119     return this.tableDir;
120   }
121 
122   /** @return {@link Path} to the region directory. */
123   public Path getRegionDir() {
124     return new Path(this.tableDir, this.regionInfo.getEncodedName());
125   }
126 
127   // ===========================================================================
128   //  Temp Helpers
129   // ===========================================================================
130   /** @return {@link Path} to the region's temp directory, used for file creations */
131   Path getTempDir() {
132     return new Path(getRegionDir(), REGION_TEMP_DIR);
133   }
134 
135   /**
136    * Clean up any temp detritus that may have been left around from previous operation attempts.
137    */
138   void cleanupTempDir() throws IOException {
139     deleteDir(getTempDir());
140   }
141 
142   // ===========================================================================
143   //  Store/StoreFile Helpers
144   // ===========================================================================
145   /**
146    * Returns the directory path of the specified family
147    * @param familyName Column Family Name
148    * @return {@link Path} to the directory of the specified family
149    */
150   public Path getStoreDir(final String familyName) {
151     return new Path(this.getRegionDir(), familyName);
152   }
153 
154   /**
155    * Create the store directory for the specified family name
156    * @param familyName Column Family Name
157    * @return {@link Path} to the directory of the specified family
158    * @throws IOException if the directory creation fails.
159    */
160   Path createStoreDir(final String familyName) throws IOException {
161     Path storeDir = getStoreDir(familyName);
162     if(!fs.exists(storeDir) && !createDir(storeDir))
163       throw new IOException("Failed creating "+storeDir);
164     return storeDir;
165   }
166 
167   /**
168    * Returns the store files available for the family.
169    * This methods performs the filtering based on the valid store files.
170    * @param familyName Column Family Name
171    * @return a set of {@link StoreFileInfo} for the specified family.
172    */
173   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
174     return getStoreFiles(Bytes.toString(familyName));
175   }
176 
177   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
178     return getStoreFiles(familyName, true);
179   }
180 
181   /**
182    * Returns the store files available for the family.
183    * This methods performs the filtering based on the valid store files.
184    * @param familyName Column Family Name
185    * @return a set of {@link StoreFileInfo} for the specified family.
186    */
187   public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
188       throws IOException {
189     Path familyDir = getStoreDir(familyName);
190     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
191     if (files == null) {
192       LOG.debug("No StoreFiles for: " + familyDir);
193       return null;
194     }
195 
196     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
197     for (FileStatus status: files) {
198       if (validate && !StoreFileInfo.isValid(status)) {
199         LOG.warn("Invalid StoreFile: " + status.getPath());
200         continue;
201       }
202 
203       storeFiles.add(new StoreFileInfo(this.conf, this.fs, status));
204     }
205     return storeFiles;
206   }
207 
208   /**
209    * Return Qualified Path of the specified family/file
210    *
211    * @param familyName Column Family Name
212    * @param fileName File Name
213    * @return The qualified Path for the specified family/file
214    */
215   Path getStoreFilePath(final String familyName, final String fileName) {
216     Path familyDir = getStoreDir(familyName);
217     return new Path(familyDir, fileName).makeQualified(this.fs);
218   }
219 
220   /**
221    * Return the store file information of the specified family/file.
222    *
223    * @param familyName Column Family Name
224    * @param fileName File Name
225    * @return The {@link StoreFileInfo} for the specified family/file
226    */
227   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
228       throws IOException {
229     Path familyDir = getStoreDir(familyName);
230     FileStatus status = fs.getFileStatus(new Path(familyDir, fileName));
231     return new StoreFileInfo(this.conf, this.fs, status);
232   }
233 
234   /**
235    * Returns true if the specified family has reference files
236    * @param familyName Column Family Name
237    * @return true if family contains reference files
238    * @throws IOException
239    */
240   public boolean hasReferences(final String familyName) throws IOException {
241     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName),
242         new FSUtils.ReferenceFileFilter(fs));
243     return files != null && files.length > 0;
244   }
245 
246   /**
247    * Check whether region has Reference file
248    * @param htd table desciptor of the region
249    * @return true if region has reference file
250    * @throws IOException
251    */
252   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
253     for (HColumnDescriptor family : htd.getFamilies()) {
254       if (hasReferences(family.getNameAsString())) {
255         return true;
256       }
257     }
258     return false;
259   }
260 
261   /**
262    * @return the set of families present on disk
263    * @throws IOException
264    */
265   public Collection<String> getFamilies() throws IOException {
266     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
267     if (fds == null) return null;
268 
269     ArrayList<String> families = new ArrayList<String>(fds.length);
270     for (FileStatus status: fds) {
271       families.add(status.getPath().getName());
272     }
273 
274     return families;
275   }
276 
277   /**
278    * Remove the region family from disk, archiving the store files.
279    * @param familyName Column Family Name
280    * @throws IOException if an error occours during the archiving
281    */
282   public void deleteFamily(final String familyName) throws IOException {
283     // archive family store files
284     HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, Bytes.toBytes(familyName));
285 
286     // delete the family folder
287     Path familyDir = getStoreDir(familyName);
288     if(fs.exists(familyDir) && !deleteDir(familyDir))
289       throw new IOException("Could not delete family " + familyName
290           + " from FileSystem for region " + regionInfo.getRegionNameAsString() + "("
291           + regionInfo.getEncodedName() + ")");
292   }
293 
294   /**
295    * Generate a unique file name, used by createTempName() and commitStoreFile()
296    * @param suffix extra information to append to the generated name
297    * @return Unique file name
298    */
299   private static String generateUniqueName(final String suffix) {
300     String name = UUID.randomUUID().toString().replaceAll("-", "");
301     if (suffix != null) name += suffix;
302     return name;
303   }
304 
305   /**
306    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
307    * to get a safer file creation.
308    * <code>
309    * Path file = fs.createTempName();
310    * ...StoreFile.Writer(file)...
311    * fs.commitStoreFile("family", file);
312    * </code>
313    *
314    * @return Unique {@link Path} of the temporary file
315    */
316   public Path createTempName() {
317     return createTempName(null);
318   }
319 
320   /**
321    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
322    * to get a safer file creation.
323    * <code>
324    * Path file = fs.createTempName();
325    * ...StoreFile.Writer(file)...
326    * fs.commitStoreFile("family", file);
327    * </code>
328    *
329    * @param suffix extra information to append to the generated name
330    * @return Unique {@link Path} of the temporary file
331    */
332   public Path createTempName(final String suffix) {
333     return new Path(getTempDir(), generateUniqueName(suffix));
334   }
335 
336   /**
337    * Move the file from a build/temp location to the main family store directory.
338    * @param familyName Family that will gain the file
339    * @param buildPath {@link Path} to the file to commit.
340    * @return The new {@link Path} of the committed file
341    * @throws IOException
342    */
343   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
344     return commitStoreFile(familyName, buildPath, -1, false);
345   }
346 
347   /**
348    * Move the file from a build/temp location to the main family store directory.
349    * @param familyName Family that will gain the file
350    * @param buildPath {@link Path} to the file to commit.
351    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
352    * @param generateNewName False if you want to keep the buildPath name
353    * @return The new {@link Path} of the committed file
354    * @throws IOException
355    */
356   private Path commitStoreFile(final String familyName, final Path buildPath,
357       final long seqNum, final boolean generateNewName) throws IOException {
358     Path storeDir = getStoreDir(familyName);
359     if(!fs.exists(storeDir) && !createDir(storeDir))
360       throw new IOException("Failed creating " + storeDir);
361 
362     String name = buildPath.getName();
363     if (generateNewName) {
364       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
365     }
366     Path dstPath = new Path(storeDir, name);
367     if (!fs.exists(buildPath)) {
368       throw new FileNotFoundException(buildPath.toString());
369     }
370     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
371     // buildPath exists, therefore not doing an exists() check.
372     if (!rename(buildPath, dstPath)) {
373       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
374     }
375     return dstPath;
376   }
377 
378 
379   /**
380    * Moves multiple store files to the relative region's family store directory.
381    * @param storeFiles list of store files divided by family
382    * @throws IOException
383    */
384   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
385     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
386       String familyName = Bytes.toString(es.getKey());
387       for (StoreFile sf: es.getValue()) {
388         commitStoreFile(familyName, sf.getPath());
389       }
390     }
391   }
392 
393   /**
394    * Archives the specified store file from the specified family.
395    * @param familyName Family that contains the store files
396    * @param filePath {@link Path} to the store file to remove
397    * @throws IOException if the archiving fails
398    */
399   public void removeStoreFile(final String familyName, final Path filePath)
400       throws IOException {
401     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfo,
402         this.tableDir, Bytes.toBytes(familyName), filePath);
403   }
404 
405   /**
406    * Closes and archives the specified store files from the specified family.
407    * @param familyName Family that contains the store files
408    * @param storeFiles set of store files to remove
409    * @throws IOException if the archiving fails
410    */
411   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
412       throws IOException {
413     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfo,
414         this.tableDir, Bytes.toBytes(familyName), storeFiles);
415   }
416 
417   /**
418    * Bulk load: Add a specified store file to the specified family.
419    * If the source file is on the same different file-system is moved from the
420    * source location to the destination location, otherwise is copied over.
421    *
422    * @param familyName Family that will gain the file
423    * @param srcPath {@link Path} to the file to import
424    * @param seqNum Bulk Load sequence number
425    * @return The destination {@link Path} of the bulk loaded file
426    * @throws IOException
427    */
428   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
429       throws IOException {
430     // Copy the file if it's on another filesystem
431     FileSystem srcFs = srcPath.getFileSystem(conf);
432     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
433 
434     // We can't compare FileSystem instances as equals() includes UGI instance
435     // as part of the comparison and won't work when doing SecureBulkLoad
436     // TODO deal with viewFS
437     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
438       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
439           "the destination store. Copying file over to destination filesystem.");
440       Path tmpPath = createTempName();
441       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
442       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
443       srcPath = tmpPath;
444     }
445 
446     return commitStoreFile(familyName, srcPath, seqNum, true);
447   }
448 
449   // ===========================================================================
450   //  Splits Helpers
451   // ===========================================================================
452   /** @return {@link Path} to the temp directory used during split operations */
453   Path getSplitsDir() {
454     return new Path(getRegionDir(), REGION_SPLITS_DIR);
455   }
456 
457   Path getSplitsDir(final HRegionInfo hri) {
458     return new Path(getSplitsDir(), hri.getEncodedName());
459   }
460 
461   /**
462    * Clean up any split detritus that may have been left around from previous split attempts.
463    */
464   void cleanupSplitsDir() throws IOException {
465     deleteDir(getSplitsDir());
466   }
467 
468   /**
469    * Clean up any split detritus that may have been left around from previous
470    * split attempts.
471    * Call this method on initial region deploy.
472    * @throws IOException
473    */
474   void cleanupAnySplitDetritus() throws IOException {
475     Path splitdir = this.getSplitsDir();
476     if (!fs.exists(splitdir)) return;
477     // Look at the splitdir.  It could have the encoded names of the daughter
478     // regions we tried to make.  See if the daughter regions actually got made
479     // out under the tabledir.  If here under splitdir still, then the split did
480     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
481     // where we successfully created daughter a but regionserver crashed during
482     // the creation of region b.  In this case, there'll be an orphan daughter
483     // dir in the filesystem.  TOOD: Fix.
484     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
485     if (daughters != null) {
486       for (FileStatus daughter: daughters) {
487         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
488         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
489           throw new IOException("Failed delete of " + daughterDir);
490         }
491       }
492     }
493     cleanupSplitsDir();
494     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
495   }
496 
497   /**
498    * Remove daughter region
499    * @param regionInfo daughter {@link HRegionInfo}
500    * @throws IOException
501    */
502   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
503     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
504     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
505       throw new IOException("Failed delete of " + regionDir);
506     }
507   }
508 
509   /**
510    * Commit a daughter region, moving it from the split temporary directory
511    * to the proper location in the filesystem.
512    *
513    * @param regionInfo                 daughter {@link org.apache.hadoop.hbase.HRegionInfo}
514    * @throws IOException
515    */
516   Path commitDaughterRegion(final HRegionInfo regionInfo)
517       throws IOException {
518     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
519     Path daughterTmpDir = this.getSplitsDir(regionInfo);
520 
521     if (fs.exists(daughterTmpDir)) {
522 
523       // Write HRI to a file in case we need to recover hbase:meta
524       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
525       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
526       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
527 
528       // Move the daughter temp dir to the table dir
529       if (!rename(daughterTmpDir, regionDir)) {
530         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
531       }
532     }
533 
534     return regionDir;
535   }
536 
537   /**
538    * Create the region splits directory.
539    */
540   void createSplitsDir() throws IOException {
541     Path splitdir = getSplitsDir();
542     if (fs.exists(splitdir)) {
543       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
544       if (!deleteDir(splitdir)) {
545         throw new IOException("Failed deletion of " + splitdir
546             + " before creating them again.");
547       }
548     }
549     // splitDir doesn't exists now. No need to do an exists() call for it.
550     if (!createDir(splitdir)) {
551       throw new IOException("Failed create of " + splitdir);
552     }
553   }
554 
555   /**
556    * Write out a split reference. Package local so it doesnt leak out of
557    * regionserver.
558    * @param hri {@link HRegionInfo} of the destination
559    * @param familyName Column Family Name
560    * @param f File to split.
561    * @param splitRow Split Row
562    * @param top True if we are referring to the top half of the hfile.
563    * @param splitPolicy
564    * @return Path to created reference.
565    * @throws IOException
566    */
567   Path splitStoreFile(final HRegionInfo hri, final String familyName, final StoreFile f,
568       final byte[] splitRow, final boolean top, RegionSplitPolicy splitPolicy) throws IOException {
569 
570     if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
571       // Check whether the split row lies in the range of the store file
572       // If it is outside the range, return directly.
573       try {
574         if (top) {
575           //check if larger than last key.
576           KeyValue splitKey = KeyValue.createFirstOnRow(splitRow);
577           byte[] lastKey = f.createReader().getLastKey();
578           // If lastKey is null means storefile is empty.
579           if (lastKey == null) return null;
580           if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
581             splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
582             return null;
583           }
584         } else {
585           //check if smaller than first key
586           KeyValue splitKey = KeyValue.createLastOnRow(splitRow);
587           byte[] firstKey = f.createReader().getFirstKey();
588           // If firstKey is null means storefile is empty.
589           if (firstKey == null) return null;
590           if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
591             splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
592             return null;
593           }
594         }
595       } finally {
596         f.closeReader(true);
597       }
598     }
599 
600     Path splitDir = new Path(getSplitsDir(hri), familyName);
601     // A reference to the bottom half of the hsf store file.
602     Reference r =
603       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
604     // Add the referred-to regions name as a dot separated suffix.
605     // See REF_NAME_REGEX regex above.  The referred-to regions name is
606     // up in the path of the passed in <code>f</code> -- parentdir is family,
607     // then the directory above is the region name.
608     String parentRegionName = regionInfo.getEncodedName();
609     // Write reference with same file id only with the other region name as
610     // suffix and into the new region location (under same family).
611     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
612     return r.write(fs, p);
613   }
614 
615   // ===========================================================================
616   //  Merge Helpers
617   // ===========================================================================
618   /** @return {@link Path} to the temp directory used during merge operations */
619   Path getMergesDir() {
620     return new Path(getRegionDir(), REGION_MERGES_DIR);
621   }
622 
623   Path getMergesDir(final HRegionInfo hri) {
624     return new Path(getMergesDir(), hri.getEncodedName());
625   }
626 
627   /**
628    * Clean up any merge detritus that may have been left around from previous merge attempts.
629    */
630   void cleanupMergesDir() throws IOException {
631     deleteDir(getMergesDir());
632   }
633 
634   /**
635    * Remove merged region
636    * @param mergedRegion {@link HRegionInfo}
637    * @throws IOException
638    */
639   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
640     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
641     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
642       throw new IOException("Failed delete of " + regionDir);
643     }
644   }
645 
646   /**
647    * Create the region merges directory.
648    * @throws IOException If merges dir already exists or we fail to create it.
649    * @see HRegionFileSystem#cleanupMergesDir()
650    */
651   void createMergesDir() throws IOException {
652     Path mergesdir = getMergesDir();
653     if (fs.exists(mergesdir)) {
654       LOG.info("The " + mergesdir
655           + " directory exists.  Hence deleting it to recreate it");
656       if (!fs.delete(mergesdir, true)) {
657         throw new IOException("Failed deletion of " + mergesdir
658             + " before creating them again.");
659       }
660     }
661     if (!fs.mkdirs(mergesdir))
662       throw new IOException("Failed create of " + mergesdir);
663   }
664 
665   /**
666    * Write out a merge reference under the given merges directory. Package local
667    * so it doesnt leak out of regionserver.
668    * @param mergedRegion {@link HRegionInfo} of the merged region
669    * @param familyName Column Family Name
670    * @param f File to create reference.
671    * @param mergedDir
672    * @return Path to created reference.
673    * @throws IOException
674    */
675   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
676       final StoreFile f, final Path mergedDir)
677       throws IOException {
678     Path referenceDir = new Path(new Path(mergedDir,
679         mergedRegion.getEncodedName()), familyName);
680     // A whole reference to the store file.
681     Reference r = Reference.createTopReference(regionInfo.getStartKey());
682     // Add the referred-to regions name as a dot separated suffix.
683     // See REF_NAME_REGEX regex above. The referred-to regions name is
684     // up in the path of the passed in <code>f</code> -- parentdir is family,
685     // then the directory above is the region name.
686     String mergingRegionName = regionInfo.getEncodedName();
687     // Write reference with same file id only with the other region name as
688     // suffix and into the new region location (under same family).
689     Path p = new Path(referenceDir, f.getPath().getName() + "."
690         + mergingRegionName);
691     return r.write(fs, p);
692   }
693 
694   /**
695    * Commit a merged region, moving it from the merges temporary directory to
696    * the proper location in the filesystem.
697    * @param mergedRegionInfo merged region {@link HRegionInfo}
698    * @throws IOException
699    */
700   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
701     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
702     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
703     // Move the tmp dir in the expected location
704     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
705       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
706         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
707             + regionDir);
708       }
709     }
710   }
711 
712   // ===========================================================================
713   //  Create/Open/Delete Helpers
714   // ===========================================================================
715   /**
716    * Log the current state of the region
717    * @param LOG log to output information
718    * @throws IOException if an unexpected exception occurs
719    */
720   void logFileSystemState(final Log LOG) throws IOException {
721     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
722   }
723 
724   /**
725    * @param hri
726    * @return Content of the file we write out to the filesystem under a region
727    * @throws IOException
728    */
729   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
730     return hri.toDelimitedByteArray();
731   }
732 
733   /**
734    * Create a {@link HRegionInfo} from the serialized version on-disk.
735    * @param fs {@link FileSystem} that contains the Region Info file
736    * @param regionDir {@link Path} to the Region Directory that contains the Info file
737    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
738    * @throws IOException if an error occurred during file open/read operation.
739    */
740   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
741       throws IOException {
742     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
743     try {
744       return HRegionInfo.parseFrom(in);
745     } finally {
746       in.close();
747     }
748   }
749 
750   /**
751    * Write the .regioninfo file on-disk.
752    */
753   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
754       final Path regionInfoFile, final byte[] content) throws IOException {
755     // First check to get the permissions
756     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
757     // Write the RegionInfo file content
758     FSDataOutputStream out = FSUtils.create(fs, regionInfoFile, perms, null);
759     try {
760       out.write(content);
761     } finally {
762       out.close();
763     }
764   }
765 
766   /**
767    * Write out an info file under the stored region directory. Useful recovering mangled regions.
768    * If the regionInfo already exists on-disk, then we fast exit.
769    */
770   void checkRegionInfoOnFilesystem() throws IOException {
771     // Compose the content of the file so we can compare to length in filesystem. If not same,
772     // rewrite it (it may have been written in the old format using Writables instead of pb). The
773     // pb version is much shorter -- we write now w/o the toString version -- so checking length
774     // only should be sufficient. I don't want to read the file every time to check if it pb
775     // serialized.
776     byte[] content = getRegionInfoFileContent(regionInfo);
777     try {
778       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
779 
780       FileStatus status = fs.getFileStatus(regionInfoFile);
781       if (status != null && status.getLen() == content.length) {
782         // Then assume the content good and move on.
783         // NOTE: that the length is not sufficient to define the the content matches.
784         return;
785       }
786 
787       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
788       if (!fs.delete(regionInfoFile, false)) {
789         throw new IOException("Unable to remove existing " + regionInfoFile);
790       }
791     } catch (FileNotFoundException e) {
792       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfo.getEncodedName());
793     }
794 
795     // Write HRI to a file in case we need to recover hbase:meta
796     writeRegionInfoOnFilesystem(content, true);
797   }
798 
799   /**
800    * Write out an info file under the region directory. Useful recovering mangled regions.
801    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
802    */
803   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
804     byte[] content = getRegionInfoFileContent(regionInfo);
805     writeRegionInfoOnFilesystem(content, useTempDir);
806   }
807 
808   /**
809    * Write out an info file under the region directory. Useful recovering mangled regions.
810    * @param regionInfoContent serialized version of the {@link HRegionInfo}
811    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
812    */
813   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
814       final boolean useTempDir) throws IOException {
815     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
816     if (useTempDir) {
817       // Create in tmpDir and then move into place in case we crash after
818       // create but before close. If we don't successfully close the file,
819       // subsequent region reopens will fail the below because create is
820       // registered in NN.
821 
822       // And then create the file
823       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
824 
825       // If datanode crashes or if the RS goes down just before the close is called while trying to
826       // close the created regioninfo file in the .tmp directory then on next
827       // creation we will be getting AlreadyCreatedException.
828       // Hence delete and create the file if exists.
829       if (FSUtils.isExists(fs, tmpPath)) {
830         FSUtils.delete(fs, tmpPath, true);
831       }
832 
833       // Write HRI to a file in case we need to recover hbase:meta
834       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
835 
836       // Move the created file to the original path
837       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
838         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
839       }
840     } else {
841       // Write HRI to a file in case we need to recover hbase:meta
842       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
843     }
844   }
845 
846   /**
847    * Create a new Region on file-system.
848    * @param conf the {@link Configuration} to use
849    * @param fs {@link FileSystem} from which to add the region
850    * @param tableDir {@link Path} to where the table is being stored
851    * @param regionInfo {@link HRegionInfo} for region to be added
852    * @throws IOException if the region creation fails due to a FileSystem exception.
853    */
854   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
855       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
856     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
857     Path regionDir = regionFs.getRegionDir();
858 
859     if (fs.exists(regionDir)) {
860       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
861       throw new IOException("The specified region already exists on disk: " + regionDir);
862     }
863 
864     // Create the region directory
865     if (!createDirOnFileSystem(fs, conf, regionDir)) {
866       LOG.warn("Unable to create the region directory: " + regionDir);
867       throw new IOException("Unable to create region directory: " + regionDir);
868     }
869 
870     // Write HRI to a file in case we need to recover hbase:meta
871     regionFs.writeRegionInfoOnFilesystem(false);
872     return regionFs;
873   }
874 
875   /**
876    * Open Region from file-system.
877    * @param conf the {@link Configuration} to use
878    * @param fs {@link FileSystem} from which to add the region
879    * @param tableDir {@link Path} to where the table is being stored
880    * @param regionInfo {@link HRegionInfo} for region to be added
881    * @param readOnly True if you don't want to edit the region data
882    * @throws IOException if the region creation fails due to a FileSystem exception.
883    */
884   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
885       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
886       throws IOException {
887     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
888     Path regionDir = regionFs.getRegionDir();
889 
890     if (!fs.exists(regionDir)) {
891       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
892       throw new IOException("The specified region do not exists on disk: " + regionDir);
893     }
894 
895     if (!readOnly) {
896       // Cleanup temporary directories
897       regionFs.cleanupTempDir();
898       regionFs.cleanupSplitsDir();
899       regionFs.cleanupMergesDir();
900 
901       // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
902       regionFs.checkRegionInfoOnFilesystem();
903     }
904 
905     return regionFs;
906   }
907 
908   /**
909    * Remove the region from the table directory, archiving the region's hfiles.
910    * @param conf the {@link Configuration} to use
911    * @param fs {@link FileSystem} from which to remove the region
912    * @param tableDir {@link Path} to where the table is being stored
913    * @param regionInfo {@link HRegionInfo} for region to be deleted
914    * @throws IOException if the request cannot be completed
915    */
916   public static void deleteRegionFromFileSystem(final Configuration conf,
917       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
918     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
919     Path regionDir = regionFs.getRegionDir();
920 
921     if (!fs.exists(regionDir)) {
922       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
923       return;
924     }
925 
926     if (LOG.isDebugEnabled()) {
927       LOG.debug("DELETING region " + regionDir);
928     }
929 
930     // Archive region
931     Path rootDir = FSUtils.getRootDir(conf);
932     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
933 
934     // Delete empty region dir
935     if (!fs.delete(regionDir, true)) {
936       LOG.warn("Failed delete of " + regionDir);
937     }
938   }
939 
940   /**
941    * Creates a directory. Assumes the user has already checked for this directory existence.
942    * @param dir
943    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
944    *         whether the directory exists or not, and returns true if it exists.
945    * @throws IOException
946    */
947   boolean createDir(Path dir) throws IOException {
948     int i = 0;
949     IOException lastIOE = null;
950     do {
951       try {
952         return fs.mkdirs(dir);
953       } catch (IOException ioe) {
954         lastIOE = ioe;
955         if (fs.exists(dir)) return true; // directory is present
956         sleepBeforeRetry("Create Directory", i+1);
957       }
958     } while (++i <= hdfsClientRetriesNumber);
959     throw new IOException("Exception in createDir", lastIOE);
960   }
961 
962   /**
963    * Renames a directory. Assumes the user has already checked for this directory existence.
964    * @param srcpath
965    * @param dstPath
966    * @return true if rename is successful.
967    * @throws IOException
968    */
969   boolean rename(Path srcpath, Path dstPath) throws IOException {
970     IOException lastIOE = null;
971     int i = 0;
972     do {
973       try {
974         return fs.rename(srcpath, dstPath);
975       } catch (IOException ioe) {
976         lastIOE = ioe;
977         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
978         // dir is not there, retry after some time.
979         sleepBeforeRetry("Rename Directory", i+1);
980       }
981     } while (++i <= hdfsClientRetriesNumber);
982     throw new IOException("Exception in rename", lastIOE);
983   }
984 
985   /**
986    * Deletes a directory. Assumes the user has already checked for this directory existence.
987    * @param dir
988    * @return true if the directory is deleted.
989    * @throws IOException
990    */
991   boolean deleteDir(Path dir) throws IOException {
992     IOException lastIOE = null;
993     int i = 0;
994     do {
995       try {
996         return fs.delete(dir, true);
997       } catch (IOException ioe) {
998         lastIOE = ioe;
999         if (!fs.exists(dir)) return true;
1000         // dir is there, retry deleting after some time.
1001         sleepBeforeRetry("Delete Directory", i+1);
1002       }
1003     } while (++i <= hdfsClientRetriesNumber);
1004     throw new IOException("Exception in DeleteDir", lastIOE);
1005   }
1006 
1007   /**
1008    * sleeping logic; handles the interrupt exception.
1009    */
1010   private void sleepBeforeRetry(String msg, int sleepMultiplier) {
1011     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1012   }
1013 
1014   /**
1015    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1016    * checked for this directory existence.
1017    * @param fs
1018    * @param conf
1019    * @param dir
1020    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1021    *         whether the directory exists or not, and returns true if it exists.
1022    * @throws IOException
1023    */
1024   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1025       throws IOException {
1026     int i = 0;
1027     IOException lastIOE = null;
1028     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1029       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1030     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1031       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1032     do {
1033       try {
1034         return fs.mkdirs(dir);
1035       } catch (IOException ioe) {
1036         lastIOE = ioe;
1037         if (fs.exists(dir)) return true; // directory is present
1038         sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1039       }
1040     } while (++i <= hdfsClientRetriesNumber);
1041     throw new IOException("Exception in createDir", lastIOE);
1042   }
1043 
1044   /**
1045    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1046    * for this to avoid re-looking for the integer values.
1047    */
1048   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1049       int hdfsClientRetriesNumber) {
1050     if (sleepMultiplier > hdfsClientRetriesNumber) {
1051       LOG.debug(msg + ", retries exhausted");
1052       return;
1053     }
1054     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1055     Threads.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1056   }
1057 }