View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInputStream;
23  import java.io.EOFException;
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.lang.reflect.Method;
27  import java.net.URI;
28  import java.net.URISyntaxException;
29  import java.util.ArrayList;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.regex.Pattern;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.BlockLocation;
39  import org.apache.hadoop.fs.FSDataInputStream;
40  import org.apache.hadoop.fs.FSDataOutputStream;
41  import org.apache.hadoop.fs.FileStatus;
42  import org.apache.hadoop.fs.FileSystem;
43  import org.apache.hadoop.fs.Path;
44  import org.apache.hadoop.fs.PathFilter;
45  import org.apache.hadoop.fs.permission.FsAction;
46  import org.apache.hadoop.fs.permission.FsPermission;
47  import org.apache.hadoop.hbase.HColumnDescriptor;
48  import org.apache.hadoop.hbase.HConstants;
49  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
50  import org.apache.hadoop.hbase.HRegionInfo;
51  import org.apache.hadoop.hbase.RemoteExceptionHandler;
52  import org.apache.hadoop.hbase.master.HMaster;
53  import org.apache.hadoop.hbase.regionserver.HRegion;
54  import org.apache.hadoop.hbase.security.User;
55  import org.apache.hadoop.hdfs.DistributedFileSystem;
56  import org.apache.hadoop.io.SequenceFile;
57  import org.apache.hadoop.security.AccessControlException;
58  import org.apache.hadoop.security.UserGroupInformation;
59  import org.apache.hadoop.util.ReflectionUtils;
60  import org.apache.hadoop.util.StringUtils;
61  
62  /**
63   * Utility methods for interacting with the underlying file system.
64   */
65  public abstract class FSUtils {
66    private static final Log LOG = LogFactory.getLog(FSUtils.class);
67  
68    /** Full access permissions (starting point for a umask) */
69    private static final String FULL_RWX_PERMISSIONS = "777";
70  
71    protected FSUtils() {
72      super();
73    }
74  
75    public static FSUtils getInstance(FileSystem fs, Configuration conf) {
76      String scheme = fs.getUri().getScheme();
77      if (scheme == null) {
78        LOG.warn("Could not find scheme for uri " +
79            fs.getUri() + ", default to hdfs");
80        scheme = "hdfs";
81      }
82      Class<?> fsUtilsClass = conf.getClass("hbase.fsutil." +
83          scheme + ".impl", FSHDFSUtils.class); // Default to HDFS impl
84      FSUtils fsUtils = (FSUtils)ReflectionUtils.newInstance(fsUtilsClass, conf);
85      return fsUtils;
86    }
87  
88    /**
89     * Delete if exists.
90     * @param fs filesystem object
91     * @param dir directory to delete
92     * @return True if deleted <code>dir</code>
93     * @throws IOException e
94     */
95    public static boolean deleteDirectory(final FileSystem fs, final Path dir)
96    throws IOException {
97      return fs.exists(dir) && fs.delete(dir, true);
98    }
99  
100   /**
101    * Check if directory exists.  If it does not, create it.
102    * @param fs filesystem object
103    * @param dir path to check
104    * @return Path
105    * @throws IOException e
106    */
107   public Path checkdir(final FileSystem fs, final Path dir) throws IOException {
108     if (!fs.exists(dir)) {
109       fs.mkdirs(dir);
110     }
111     return dir;
112   }
113 
114   /**
115    * Create the specified file on the filesystem. By default, this will:
116    * <ol>
117    * <li>overwrite the file if it exists</li>
118    * <li>apply the umask in the configuration (if it is enabled)</li>
119    * <li>use the fs configured buffer size (or {@value DEFAULT_BUFFER_SIZE} if
120    * not set)</li>
121    * <li>use the default replication</li>
122    * <li>use the default block size</li>
123    * <li>not track progress</li>
124    * </ol>
125    *
126    * @param fs {@link FileSystem} on which to write the file
127    * @param path {@link Path} to the file to write
128    * @return output stream to the created file
129    * @throws IOException if the file cannot be created
130    */
131   public static FSDataOutputStream create(FileSystem fs, Path path,
132       FsPermission perm) throws IOException {
133     return create(fs, path, perm, true);
134   }
135 
136   /**
137    * Create the specified file on the filesystem. By default, this will:
138    * <ol>
139    * <li>apply the umask in the configuration (if it is enabled)</li>
140    * <li>use the fs configured buffer size (or {@value DEFAULT_BUFFER_SIZE} if
141    * not set)</li>
142    * <li>use the default replication</li>
143    * <li>use the default block size</li>
144    * <li>not track progress</li>
145    * </ol>
146    *
147    * @param fs {@link FileSystem} on which to write the file
148    * @param path {@link Path} to the file to write
149    * @param perm
150    * @param overwrite Whether or not the created file should be overwritten.
151    * @return output stream to the created file
152    * @throws IOException if the file cannot be created
153    */
154   public static FSDataOutputStream create(FileSystem fs, Path path,
155       FsPermission perm, boolean overwrite) throws IOException {
156     LOG.debug("Creating file=" + path + " with permission=" + perm);
157 
158     return fs.create(path, perm, overwrite,
159         fs.getConf().getInt("io.file.buffer.size", 4096),
160         fs.getDefaultReplication(), fs.getDefaultBlockSize(), null);
161   }
162 
163   /**
164    * Get the file permissions specified in the configuration, if they are
165    * enabled.
166    *
167    * @param fs filesystem that the file will be created on.
168    * @param conf configuration to read for determining if permissions are
169    *          enabled and which to use
170    * @param permssionConfKey property key in the configuration to use when
171    *          finding the permission
172    * @return the permission to use when creating a new file on the fs. If
173    *         special permissions are not specified in the configuration, then
174    *         the default permissions on the the fs will be returned.
175    */
176   public static FsPermission getFilePermissions(final FileSystem fs,
177       final Configuration conf, final String permssionConfKey) {
178     boolean enablePermissions = conf.getBoolean(
179         HConstants.ENABLE_DATA_FILE_UMASK, false);
180 
181     if (enablePermissions) {
182       try {
183         FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS);
184         // make sure that we have a mask, if not, go default.
185         String mask = conf.get(permssionConfKey);
186         if (mask == null)
187           return FsPermission.getDefault();
188         // appy the umask
189         FsPermission umask = new FsPermission(mask);
190         return perm.applyUMask(umask);
191       } catch (IllegalArgumentException e) {
192         LOG.warn(
193             "Incorrect umask attempted to be created: "
194                 + conf.get(permssionConfKey)
195                 + ", using default file permissions.", e);
196         return FsPermission.getDefault();
197       }
198     }
199     return FsPermission.getDefault();
200   }
201 
202   /**
203    * Checks to see if the specified file system is available
204    *
205    * @param fs filesystem
206    * @throws IOException e
207    */
208   public static void checkFileSystemAvailable(final FileSystem fs)
209   throws IOException {
210     if (!(fs instanceof DistributedFileSystem)) {
211       return;
212     }
213     IOException exception = null;
214     DistributedFileSystem dfs = (DistributedFileSystem) fs;
215     try {
216       if (dfs.exists(new Path("/"))) {
217         return;
218       }
219     } catch (IOException e) {
220       exception = RemoteExceptionHandler.checkIOException(e);
221     }
222     try {
223       fs.close();
224     } catch (Exception e) {
225       LOG.error("file system close failed: ", e);
226     }
227     IOException io = new IOException("File system is not available");
228     io.initCause(exception);
229     throw io;
230   }
231 
232   /**
233    * We use reflection because {@link DistributedFileSystem#setSafeMode(
234    * FSConstants.SafeModeAction action, boolean isChecked)} is not in hadoop 1.1
235    * 
236    * @param dfs
237    * @return whether we're in safe mode
238    * @throws IOException
239    */
240   private static boolean isInSafeMode(DistributedFileSystem dfs) throws IOException {
241     boolean inSafeMode = false;
242     try {
243       Method m = DistributedFileSystem.class.getMethod("setSafeMode", new Class<?> []{
244           org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.class, boolean.class});
245       inSafeMode = (Boolean) m.invoke(dfs,
246         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET, true);
247     } catch (Exception e) {
248       if (e instanceof IOException) throw (IOException) e;
249       
250       // Check whether dfs is on safemode.
251       inSafeMode = dfs.setSafeMode(
252         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET);      
253     }
254     return inSafeMode;    
255   }
256   
257   /**
258    * Check whether dfs is in safemode.
259    * @param conf
260    * @throws IOException
261    */
262   public static void checkDfsSafeMode(final Configuration conf)
263   throws IOException {
264     boolean isInSafeMode = false;
265     FileSystem fs = FileSystem.get(conf);
266     if (fs instanceof DistributedFileSystem) {
267       DistributedFileSystem dfs = (DistributedFileSystem)fs;
268       isInSafeMode = isInSafeMode(dfs);
269     }
270     if (isInSafeMode) {
271       throw new IOException("File system is in safemode, it can't be written now");
272     }
273   }
274 
275   /**
276    * Verifies current version of file system
277    *
278    * @param fs filesystem object
279    * @param rootdir root hbase directory
280    * @return null if no version file exists, version string otherwise.
281    * @throws IOException e
282    */
283   public static String getVersion(FileSystem fs, Path rootdir)
284   throws IOException {
285     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
286     String version = null;
287     if (fs.exists(versionFile)) {
288       FSDataInputStream s =
289         fs.open(versionFile);
290       try {
291         version = DataInputStream.readUTF(s);
292       } catch (EOFException eof) {
293         LOG.warn("Version file was empty, odd, will try to set it.");
294       } finally {
295         s.close();
296       }
297     }
298     return version;
299   }
300 
301   /**
302    * Verifies current version of file system
303    *
304    * @param fs file system
305    * @param rootdir root directory of HBase installation
306    * @param message if true, issues a message on System.out
307    *
308    * @throws IOException e
309    */
310   public static void checkVersion(FileSystem fs, Path rootdir,
311       boolean message) throws IOException {
312     checkVersion(fs, rootdir, message, 0,
313     		HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
314   }
315 
316   /**
317    * Verifies current version of file system
318    *
319    * @param fs file system
320    * @param rootdir root directory of HBase installation
321    * @param message if true, issues a message on System.out
322    * @param wait wait interval
323    * @param retries number of times to retry
324    *
325    * @throws IOException e
326    */
327   public static void checkVersion(FileSystem fs, Path rootdir,
328       boolean message, int wait, int retries) throws IOException {
329     String version = getVersion(fs, rootdir);
330 
331     if (version == null) {
332       if (!rootRegionExists(fs, rootdir)) {
333         // rootDir is empty (no version file and no root region)
334         // just create new version file (HBASE-1195)
335         FSUtils.setVersion(fs, rootdir, wait, retries);
336         return;
337       }
338     } else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0)
339         return;
340 
341     // version is deprecated require migration
342     // Output on stdout so user sees it in terminal.
343     String msg = "File system needs to be upgraded."
344       + "  You have version " + version
345       + " and I want version " + HConstants.FILE_SYSTEM_VERSION
346       + ".  Run the '${HBASE_HOME}/bin/hbase migrate' script.";
347     if (message) {
348       System.out.println("WARNING! " + msg);
349     }
350     throw new FileSystemVersionException(msg);
351   }
352 
353   /**
354    * Sets version of file system
355    *
356    * @param fs filesystem object
357    * @param rootdir hbase root
358    * @throws IOException e
359    */
360   public static void setVersion(FileSystem fs, Path rootdir)
361   throws IOException {
362     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0,
363     		HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
364   }
365 
366   /**
367    * Sets version of file system
368    *
369    * @param fs filesystem object
370    * @param rootdir hbase root
371    * @param wait time to wait for retry
372    * @param retries number of times to retry before failing
373    * @throws IOException e
374    */
375   public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries)
376   throws IOException {
377     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries);
378   }
379 
380 
381   /**
382    * Sets version of file system
383    *
384    * @param fs filesystem object
385    * @param rootdir hbase root directory
386    * @param version version to set
387    * @param wait time to wait for retry
388    * @param retries number of times to retry before throwing an IOException
389    * @throws IOException e
390    */
391   public static void setVersion(FileSystem fs, Path rootdir, String version,
392       int wait, int retries) throws IOException {
393     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
394     while (true) {
395       try {
396         FSDataOutputStream s = fs.create(versionFile);
397         s.writeUTF(version);
398         LOG.debug("Created version file at " + rootdir.toString() +
399             " set its version at:" + version);
400         s.close();
401         return;
402       } catch (IOException e) {
403         if (retries > 0) {
404           LOG.warn("Unable to create version file at " + rootdir.toString() +
405               ", retrying: " + e.getMessage());
406           fs.delete(versionFile, false);
407           try {
408             if (wait > 0) {
409               Thread.sleep(wait);
410             }
411           } catch (InterruptedException ex) {
412             // ignore
413           }
414           retries--;
415         } else {
416           throw e;
417         }
418       }
419     }
420   }
421 
422   /**
423    * Checks that a cluster ID file exists in the HBase root directory
424    * @param fs the root directory FileSystem
425    * @param rootdir the HBase root directory in HDFS
426    * @param wait how long to wait between retries
427    * @return <code>true</code> if the file exists, otherwise <code>false</code>
428    * @throws IOException if checking the FileSystem fails
429    */
430   public static boolean checkClusterIdExists(FileSystem fs, Path rootdir,
431       int wait) throws IOException {
432     while (true) {
433       try {
434         Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
435         return fs.exists(filePath);
436       } catch (IOException ioe) {
437         if (wait > 0) {
438           LOG.warn("Unable to check cluster ID file in " + rootdir.toString() +
439               ", retrying in "+wait+"msec: "+StringUtils.stringifyException(ioe));
440           try {
441             Thread.sleep(wait);
442           } catch (InterruptedException ie) {
443             Thread.interrupted();
444             break;
445           }
446         } else {
447           throw ioe;
448         }
449       }
450     }
451     return false;
452   }
453 
454   /**
455    * Returns the value of the unique cluster ID stored for this HBase instance.
456    * @param fs the root directory FileSystem
457    * @param rootdir the path to the HBase root directory
458    * @return the unique cluster identifier
459    * @throws IOException if reading the cluster ID file fails
460    */
461   public static String getClusterId(FileSystem fs, Path rootdir)
462       throws IOException {
463     Path idPath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
464     String clusterId = null;
465     if (fs.exists(idPath)) {
466       FSDataInputStream in = fs.open(idPath);
467       try {
468         clusterId = in.readUTF();
469       } catch (EOFException eof) {
470         LOG.warn("Cluster ID file "+idPath.toString()+" was empty");
471       } finally{
472         in.close();
473       }
474     } else {
475       LOG.warn("Cluster ID file does not exist at " + idPath.toString());
476     }
477     return clusterId;
478   }
479 
480   /**
481    * Writes a new unique identifier for this cluster to the "hbase.id" file
482    * in the HBase root directory
483    * @param fs the root directory FileSystem
484    * @param rootdir the path to the HBase root directory
485    * @param clusterId the unique identifier to store
486    * @param wait how long (in milliseconds) to wait between retries
487    * @throws IOException if writing to the FileSystem fails and no wait value
488    */
489   public static void setClusterId(FileSystem fs, Path rootdir, String clusterId,
490       int wait) throws IOException {
491     while (true) {
492       try {
493         Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
494         FSDataOutputStream s = fs.create(filePath);
495         s.writeUTF(clusterId);
496         s.close();
497         if (LOG.isDebugEnabled()) {
498           LOG.debug("Created cluster ID file at " + filePath.toString() +
499               " with ID: " + clusterId);
500         }
501         return;
502       } catch (IOException ioe) {
503         if (wait > 0) {
504           LOG.warn("Unable to create cluster ID file in " + rootdir.toString() +
505               ", retrying in "+wait+"msec: "+StringUtils.stringifyException(ioe));
506           try {
507             Thread.sleep(wait);
508           } catch (InterruptedException ie) {
509             Thread.interrupted();
510             break;
511           }
512         } else {
513           throw ioe;
514         }
515       }
516     }
517   }
518 
519   /**
520    * Verifies root directory path is a valid URI with a scheme
521    *
522    * @param root root directory path
523    * @return Passed <code>root</code> argument.
524    * @throws IOException if not a valid URI with a scheme
525    */
526   public static Path validateRootPath(Path root) throws IOException {
527     try {
528       URI rootURI = new URI(root.toString());
529       String scheme = rootURI.getScheme();
530       if (scheme == null) {
531         throw new IOException("Root directory does not have a scheme");
532       }
533       return root;
534     } catch (URISyntaxException e) {
535       IOException io = new IOException("Root directory path is not a valid " +
536         "URI -- check your " + HConstants.HBASE_DIR + " configuration");
537       io.initCause(e);
538       throw io;
539     }
540   }
541 
542   /**
543    * If DFS, check safe mode and if so, wait until we clear it.
544    * @param conf configuration
545    * @param wait Sleep between retries
546    * @throws IOException e
547    */
548   public static void waitOnSafeMode(final Configuration conf,
549     final long wait)
550   throws IOException {
551     FileSystem fs = FileSystem.get(conf);
552     if (!(fs instanceof DistributedFileSystem)) return;
553     DistributedFileSystem dfs = (DistributedFileSystem)fs;
554     // Make sure dfs is not in safe mode
555     while (isInSafeMode(dfs)) {
556       LOG.info("Waiting for dfs to exit safe mode...");
557       try {
558         Thread.sleep(wait);
559       } catch (InterruptedException e) {
560         //continue
561       }
562     }
563   }
564 
565   /**
566    * Return the 'path' component of a Path.  In Hadoop, Path is an URI.  This
567    * method returns the 'path' component of a Path's URI: e.g. If a Path is
568    * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>,
569    * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>.
570    * This method is useful if you want to print out a Path without qualifying
571    * Filesystem instance.
572    * @param p Filesystem Path whose 'path' component we are to return.
573    * @return Path portion of the Filesystem
574    */
575   public static String getPath(Path p) {
576     return p.toUri().getPath();
577   }
578 
579   /**
580    * @param c configuration
581    * @return Path to hbase root directory: i.e. <code>hbase.rootdir</code> from
582    * configuration as a qualified Path.
583    * @throws IOException e
584    */
585   public static Path getRootDir(final Configuration c) throws IOException {
586     Path p = new Path(c.get(HConstants.HBASE_DIR));
587     FileSystem fs = p.getFileSystem(c);
588     return p.makeQualified(fs);
589   }
590 
591   public static void setRootDir(final Configuration c, final Path root) throws IOException {
592     c.set(HConstants.HBASE_DIR, root.toString());
593   }
594 
595   /**
596    * Checks if root region exists
597    *
598    * @param fs file system
599    * @param rootdir root directory of HBase installation
600    * @return true if exists
601    * @throws IOException e
602    */
603   public static boolean rootRegionExists(FileSystem fs, Path rootdir)
604   throws IOException {
605     Path rootRegionDir =
606       HRegion.getRegionDir(rootdir, HRegionInfo.ROOT_REGIONINFO);
607     return fs.exists(rootRegionDir);
608   }
609 
610   /**
611    * Compute HDFS blocks distribution of a given file, or a portion of the file
612    * @param fs file system
613    * @param status file status of the file
614    * @param start start position of the portion
615    * @param length length of the portion
616    * @return The HDFS blocks distribution
617    */
618   static public HDFSBlocksDistribution computeHDFSBlocksDistribution(
619     final FileSystem fs, FileStatus status, long start, long length)
620     throws IOException {
621     HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution();
622     BlockLocation [] blockLocations =
623       fs.getFileBlockLocations(status, start, length);
624     for(BlockLocation bl : blockLocations) {
625       String [] hosts = bl.getHosts();
626       long len = bl.getLength();
627       blocksDistribution.addHostsAndBlockWeight(hosts, len);
628     }
629 
630     return blocksDistribution;
631   }
632 
633 
634 
635   /**
636    * Runs through the hbase rootdir and checks all stores have only
637    * one file in them -- that is, they've been major compacted.  Looks
638    * at root and meta tables too.
639    * @param fs filesystem
640    * @param hbaseRootDir hbase root directory
641    * @return True if this hbase install is major compacted.
642    * @throws IOException e
643    */
644   public static boolean isMajorCompacted(final FileSystem fs,
645       final Path hbaseRootDir)
646   throws IOException {
647     // Presumes any directory under hbase.rootdir is a table.
648     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs));
649     for (FileStatus tableDir : tableDirs) {
650       // Skip the .log directory.  All others should be tables.  Inside a table,
651       // there are compaction.dir directories to skip.  Otherwise, all else
652       // should be regions.  Then in each region, should only be family
653       // directories.  Under each of these, should be one file only.
654       Path d = tableDir.getPath();
655       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) {
656         continue;
657       }
658       FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs));
659       for (FileStatus regionDir : regionDirs) {
660         Path dd = regionDir.getPath();
661         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
662           continue;
663         }
664         // Else its a region name.  Now look in region for families.
665         FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs));
666         for (FileStatus familyDir : familyDirs) {
667           Path family = familyDir.getPath();
668           // Now in family make sure only one file.
669           FileStatus[] familyStatus = fs.listStatus(family);
670           if (familyStatus.length > 1) {
671             LOG.debug(family.toString() + " has " + familyStatus.length +
672                 " files.");
673             return false;
674           }
675         }
676       }
677     }
678     return true;
679   }
680 
681   // TODO move this method OUT of FSUtils. No dependencies to HMaster
682   /**
683    * Returns the total overall fragmentation percentage. Includes .META. and
684    * -ROOT- as well.
685    *
686    * @param master  The master defining the HBase root and file system.
687    * @return A map for each table and its percentage.
688    * @throws IOException When scanning the directory fails.
689    */
690   public static int getTotalTableFragmentation(final HMaster master)
691   throws IOException {
692     Map<String, Integer> map = getTableFragmentation(master);
693     return map != null && map.size() > 0 ? map.get("-TOTAL-") : -1;
694   }
695 
696   /**
697    * Runs through the HBase rootdir and checks how many stores for each table
698    * have more than one file in them. Checks -ROOT- and .META. too. The total
699    * percentage across all tables is stored under the special key "-TOTAL-".
700    *
701    * @param master  The master defining the HBase root and file system.
702    * @return A map for each table and its percentage.
703    * @throws IOException When scanning the directory fails.
704    */
705   public static Map<String, Integer> getTableFragmentation(
706     final HMaster master)
707   throws IOException {
708     Path path = getRootDir(master.getConfiguration());
709     // since HMaster.getFileSystem() is package private
710     FileSystem fs = path.getFileSystem(master.getConfiguration());
711     return getTableFragmentation(fs, path);
712   }
713 
714   /**
715    * Runs through the HBase rootdir and checks how many stores for each table
716    * have more than one file in them. Checks -ROOT- and .META. too. The total
717    * percentage across all tables is stored under the special key "-TOTAL-".
718    *
719    * @param fs  The file system to use.
720    * @param hbaseRootDir  The root directory to scan.
721    * @return A map for each table and its percentage.
722    * @throws IOException When scanning the directory fails.
723    */
724   public static Map<String, Integer> getTableFragmentation(
725     final FileSystem fs, final Path hbaseRootDir)
726   throws IOException {
727     Map<String, Integer> frags = new HashMap<String, Integer>();
728     int cfCountTotal = 0;
729     int cfFragTotal = 0;
730     DirFilter df = new DirFilter(fs);
731     // presumes any directory under hbase.rootdir is a table
732     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, df);
733     for (FileStatus tableDir : tableDirs) {
734       // Skip the .log directory.  All others should be tables.  Inside a table,
735       // there are compaction.dir directories to skip.  Otherwise, all else
736       // should be regions.  Then in each region, should only be family
737       // directories.  Under each of these, should be one file only.
738       Path d = tableDir.getPath();
739       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) {
740         continue;
741       }
742       int cfCount = 0;
743       int cfFrag = 0;
744       FileStatus[] regionDirs = fs.listStatus(d, df);
745       for (FileStatus regionDir : regionDirs) {
746         Path dd = regionDir.getPath();
747         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
748           continue;
749         }
750         // else its a region name, now look in region for families
751         FileStatus[] familyDirs = fs.listStatus(dd, df);
752         for (FileStatus familyDir : familyDirs) {
753           cfCount++;
754           cfCountTotal++;
755           Path family = familyDir.getPath();
756           // now in family make sure only one file
757           FileStatus[] familyStatus = fs.listStatus(family);
758           if (familyStatus.length > 1) {
759             cfFrag++;
760             cfFragTotal++;
761           }
762         }
763       }
764       // compute percentage per table and store in result list
765       frags.put(d.getName(), Math.round((float) cfFrag / cfCount * 100));
766     }
767     // set overall percentage for all tables
768     frags.put("-TOTAL-", Math.round((float) cfFragTotal / cfCountTotal * 100));
769     return frags;
770   }
771 
772   /**
773    * Expects to find -ROOT- directory.
774    * @param fs filesystem
775    * @param hbaseRootDir hbase root directory
776    * @return True if this a pre020 layout.
777    * @throws IOException e
778    */
779   public static boolean isPre020FileLayout(final FileSystem fs,
780     final Path hbaseRootDir)
781   throws IOException {
782     Path mapfiles = new Path(new Path(new Path(new Path(hbaseRootDir, "-ROOT-"),
783       "70236052"), "info"), "mapfiles");
784     return fs.exists(mapfiles);
785   }
786 
787   /**
788    * Runs through the hbase rootdir and checks all stores have only
789    * one file in them -- that is, they've been major compacted.  Looks
790    * at root and meta tables too.  This version differs from
791    * {@link #isMajorCompacted(FileSystem, Path)} in that it expects a
792    * pre-0.20.0 hbase layout on the filesystem.  Used migrating.
793    * @param fs filesystem
794    * @param hbaseRootDir hbase root directory
795    * @return True if this hbase install is major compacted.
796    * @throws IOException e
797    */
798   public static boolean isMajorCompactedPre020(final FileSystem fs,
799       final Path hbaseRootDir)
800   throws IOException {
801     // Presumes any directory under hbase.rootdir is a table.
802     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs));
803     for (FileStatus tableDir : tableDirs) {
804       // Inside a table, there are compaction.dir directories to skip.
805       // Otherwise, all else should be regions.  Then in each region, should
806       // only be family directories.  Under each of these, should be a mapfile
807       // and info directory and in these only one file.
808       Path d = tableDir.getPath();
809       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) {
810         continue;
811       }
812       FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs));
813       for (FileStatus regionDir : regionDirs) {
814         Path dd = regionDir.getPath();
815         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
816           continue;
817         }
818         // Else its a region name.  Now look in region for families.
819         FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs));
820         for (FileStatus familyDir : familyDirs) {
821           Path family = familyDir.getPath();
822           FileStatus[] infoAndMapfile = fs.listStatus(family);
823           // Assert that only info and mapfile in family dir.
824           if (infoAndMapfile.length != 0 && infoAndMapfile.length != 2) {
825             LOG.debug(family.toString() +
826                 " has more than just info and mapfile: " + infoAndMapfile.length);
827             return false;
828           }
829           // Make sure directory named info or mapfile.
830           for (int ll = 0; ll < 2; ll++) {
831             if (infoAndMapfile[ll].getPath().getName().equals("info") ||
832                 infoAndMapfile[ll].getPath().getName().equals("mapfiles"))
833               continue;
834             LOG.debug("Unexpected directory name: " +
835                 infoAndMapfile[ll].getPath());
836             return false;
837           }
838           // Now in family, there are 'mapfile' and 'info' subdirs.  Just
839           // look in the 'mapfile' subdir.
840           FileStatus[] familyStatus =
841               fs.listStatus(new Path(family, "mapfiles"));
842           if (familyStatus.length > 1) {
843             LOG.debug(family.toString() + " has " + familyStatus.length +
844                 " files.");
845             return false;
846           }
847         }
848       }
849     }
850     return true;
851   }
852 
853   /**
854    * A {@link PathFilter} that returns only regular files.
855    */
856   static class FileFilter implements PathFilter {
857     private final FileSystem fs;
858 
859     public FileFilter(final FileSystem fs) {
860       this.fs = fs;
861     }
862 
863     @Override
864     public boolean accept(Path p) {
865       try {
866         return fs.isFile(p);
867       } catch (IOException e) {
868         LOG.debug("unable to verify if path=" + p + " is a regular file", e);
869         return false;
870       }
871     }
872   }
873 
874   /**
875    * A {@link PathFilter} that returns directories.
876    */
877   public static class DirFilter implements PathFilter {
878     private final FileSystem fs;
879 
880     public DirFilter(final FileSystem fs) {
881       this.fs = fs;
882     }
883 
884     @Override
885     public boolean accept(Path p) {
886       boolean isValid = false;
887       try {
888         if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(p)) {
889           isValid = false;
890         } else {
891           isValid = this.fs.getFileStatus(p).isDir();
892         }
893       } catch (IOException e) {
894         e.printStackTrace();
895       }
896       return isValid;
897     }
898   }
899 
900   /**
901    * Heuristic to determine whether is safe or not to open a file for append
902    * Looks both for dfs.support.append and use reflection to search
903    * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush()
904    * @param conf
905    * @return True if append support
906    */
907   public static boolean isAppendSupported(final Configuration conf) {
908     boolean append = conf.getBoolean("dfs.support.append", false);
909     if (append) {
910       try {
911         // TODO: The implementation that comes back when we do a createWriter
912         // may not be using SequenceFile so the below is not a definitive test.
913         // Will do for now (hdfs-200).
914         SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
915         append = true;
916       } catch (SecurityException e) {
917       } catch (NoSuchMethodException e) {
918         append = false;
919       }
920     }
921     if (!append) {
922       // Look for the 0.21, 0.22, new-style append evidence.
923       try {
924         FSDataOutputStream.class.getMethod("hflush", new Class<?> []{});
925         append = true;
926       } catch (NoSuchMethodException e) {
927         append = false;
928       }
929     }
930     return append;
931   }
932 
933   /**
934    * @param conf
935    * @return True if this filesystem whose scheme is 'hdfs'.
936    * @throws IOException
937    */
938   public static boolean isHDFS(final Configuration conf) throws IOException {
939     FileSystem fs = FileSystem.get(conf);
940     String scheme = fs.getUri().getScheme();
941     return scheme.equalsIgnoreCase("hdfs");
942   }
943 
944   /**
945    * Recover file lease. Used when a file might be suspect
946    * to be had been left open by another process.
947    * @param fs FileSystem handle
948    * @param p Path of file to recover lease
949    * @param conf Configuration handle
950    * @throws IOException
951    */
952   public abstract void recoverFileLease(final FileSystem fs, final Path p,
953       Configuration conf) throws IOException;
954 
955   /**
956    * @param fs
957    * @param rootdir
958    * @return All the table directories under <code>rootdir</code>. Ignore non table hbase folders such as
959    * .logs, .oldlogs, .corrupt, .META., and -ROOT- folders.
960    * @throws IOException
961    */
962   public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir)
963   throws IOException {
964     // presumes any directory under hbase.rootdir is a table
965     FileStatus [] dirs = fs.listStatus(rootdir, new DirFilter(fs));
966     List<Path> tabledirs = new ArrayList<Path>(dirs.length);
967     for (FileStatus dir: dirs) {
968       Path p = dir.getPath();
969       String tableName = p.getName();
970       if (!HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName)) {
971         tabledirs.add(p);
972       }
973     }
974     return tabledirs;
975   }
976 
977   public static Path getTablePath(Path rootdir, byte [] tableName) {
978     return getTablePath(rootdir, Bytes.toString(tableName));
979   }
980 
981   public static Path getTablePath(Path rootdir, final String tableName) {
982     return new Path(rootdir, tableName);
983   }
984 
985   /**
986    * Filter for all dirs that don't start with '.'
987    */
988   public static class RegionDirFilter implements PathFilter {
989     // This pattern will accept 0.90+ style hex region dirs and older numeric region dir names.
990     final public static Pattern regionDirPattern = Pattern.compile("^[0-9a-f]*$");
991     final FileSystem fs;
992 
993     public RegionDirFilter(FileSystem fs) {
994       this.fs = fs;
995     }
996 
997     @Override
998     public boolean accept(Path rd) {
999       if (!regionDirPattern.matcher(rd.getName()).matches()) {
1000         return false;
1001       }
1002 
1003       try {
1004         return fs.getFileStatus(rd).isDir();
1005       } catch (IOException ioe) {
1006         // Maybe the file was moved or the fs was disconnected.
1007         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1008         return false;
1009       }
1010     }
1011   }
1012 
1013   /**
1014    * Given a particular table dir, return all the regiondirs inside it, excluding files such as
1015    * .tableinfo
1016    * @param fs A file system for the Path
1017    * @param tableDir Path to a specific table directory <hbase.rootdir>/<tabledir>
1018    * @return List of paths to valid region directories in table dir.
1019    * @throws IOException
1020    */
1021   public static List<Path> getRegionDirs(final FileSystem fs, final Path tableDir) throws IOException {
1022     // assumes we are in a table dir.
1023     FileStatus[] rds = fs.listStatus(tableDir, new RegionDirFilter(fs));
1024     List<Path> regionDirs = new ArrayList<Path>(rds.length);
1025     for (FileStatus rdfs: rds) {
1026       Path rdPath = rdfs.getPath();
1027       regionDirs.add(rdPath);
1028     }
1029     return regionDirs;
1030   }
1031 
1032   /**
1033    * Filter for all dirs that are legal column family names.  This is generally used for colfam
1034    * dirs <hbase.rootdir>/<tabledir>/<regiondir>/<colfamdir>.
1035    */
1036   public static class FamilyDirFilter implements PathFilter {
1037     final FileSystem fs;
1038 
1039     public FamilyDirFilter(FileSystem fs) {
1040       this.fs = fs;
1041     }
1042 
1043     @Override
1044     public boolean accept(Path rd) {
1045       try {
1046         // throws IAE if invalid
1047         HColumnDescriptor.isLegalFamilyName(Bytes.toBytes(rd.getName()));
1048       } catch (IllegalArgumentException iae) {
1049         // path name is an invalid family name and thus is excluded.
1050         return false;
1051       }
1052 
1053       try {
1054         return fs.getFileStatus(rd).isDir();
1055       } catch (IOException ioe) {
1056         // Maybe the file was moved or the fs was disconnected.
1057         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1058         return false;
1059       }
1060     }
1061   }
1062 
1063   /**
1064    * Given a particular region dir, return all the familydirs inside it
1065    *
1066    * @param fs A file system for the Path
1067    * @param regionDir Path to a specific region directory
1068    * @return List of paths to valid family directories in region dir.
1069    * @throws IOException
1070    */
1071   public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir) throws IOException {
1072     // assumes we are in a region dir.
1073     FileStatus[] fds = fs.listStatus(regionDir, new FamilyDirFilter(fs));
1074     List<Path> familyDirs = new ArrayList<Path>(fds.length);
1075     for (FileStatus fdfs: fds) {
1076       Path fdPath = fdfs.getPath();
1077       familyDirs.add(fdPath);
1078     }
1079     return familyDirs;
1080   }
1081 
1082   /**
1083    * Filter for HFiles that excludes reference files.
1084    */
1085   public static class HFileFilter implements PathFilter {
1086     // This pattern will accept 0.90+ style hex hfies files but reject reference files
1087     final public static Pattern hfilePattern = Pattern.compile("^([0-9a-f]+)$");
1088 
1089     final FileSystem fs;
1090 
1091     public HFileFilter(FileSystem fs) {
1092       this.fs = fs;
1093     }
1094 
1095     @Override
1096     public boolean accept(Path rd) {
1097       if (!hfilePattern.matcher(rd.getName()).matches()) {
1098         return false;
1099       }
1100 
1101       try {
1102         // only files
1103         return !fs.getFileStatus(rd).isDir();
1104       } catch (IOException ioe) {
1105         // Maybe the file was moved or the fs was disconnected.
1106         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1107         return false;
1108       }
1109     }
1110   }
1111 
1112   /**
1113    * @param conf
1114    * @return Returns the filesystem of the hbase rootdir.
1115    * @throws IOException
1116    */
1117   public static FileSystem getCurrentFileSystem(Configuration conf)
1118   throws IOException {
1119     return getRootDir(conf).getFileSystem(conf);
1120   }
1121 
1122   /**
1123    * Runs through the HBase rootdir and creates a reverse lookup map for
1124    * table StoreFile names to the full Path.
1125    * <br>
1126    * Example...<br>
1127    * Key = 3944417774205889744  <br>
1128    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1129    *
1130    * @param fs  The file system to use.
1131    * @param hbaseRootDir  The root directory to scan.
1132    * @return Map keyed by StoreFile name with a value of the full Path.
1133    * @throws IOException When scanning the directory fails.
1134    */
1135   public static Map<String, Path> getTableStoreFilePathMap(
1136     final FileSystem fs, final Path hbaseRootDir)
1137   throws IOException {
1138     Map<String, Path> map = new HashMap<String, Path>();
1139     
1140     // if this method looks similar to 'getTableFragmentation' that is because 
1141     // it was borrowed from it.
1142     
1143     DirFilter df = new DirFilter(fs);
1144     // presumes any directory under hbase.rootdir is a table
1145     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, df);
1146     for (FileStatus tableDir : tableDirs) {
1147       // Skip the .log and other non-table directories.  All others should be tables.
1148       // Inside a table, there are compaction.dir directories to skip.  Otherwise, all else
1149       // should be regions. 
1150       Path d = tableDir.getPath();
1151       if (HConstants.HBASE_NON_TABLE_DIRS.contains(d.getName())) {
1152         continue;
1153       }
1154       FileStatus[] regionDirs = fs.listStatus(d, df);
1155       for (FileStatus regionDir : regionDirs) {
1156         Path dd = regionDir.getPath();
1157         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
1158           continue;
1159         }
1160         // else its a region name, now look in region for families
1161         FileStatus[] familyDirs = fs.listStatus(dd, df);
1162         for (FileStatus familyDir : familyDirs) {
1163           Path family = familyDir.getPath();
1164           // now in family, iterate over the StoreFiles and
1165           // put in map
1166           FileStatus[] familyStatus = fs.listStatus(family);
1167           for (FileStatus sfStatus : familyStatus) {
1168             Path sf = sfStatus.getPath();
1169             map.put( sf.getName(), sf);
1170           }
1171 
1172         }
1173       }
1174     }
1175       return map;
1176   }
1177 
1178   /**
1179    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1180    * This accommodates differences between hadoop versions
1181    *
1182    * @param fs file system
1183    * @param dir directory
1184    * @param filter path filter
1185    * @return null if tabledir doesn't exist, otherwise FileStatus array
1186    */
1187   public static FileStatus [] listStatus(final FileSystem fs,
1188       final Path dir, final PathFilter filter) throws IOException {
1189     FileStatus [] status = null;
1190     try {
1191       status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
1192     } catch (FileNotFoundException fnfe) {
1193       // if directory doesn't exist, return null
1194       LOG.debug(dir + " doesn't exist");
1195     }
1196     if (status == null || status.length < 1) return null;
1197     return status;
1198   }
1199 
1200   /**
1201    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1202    * This would accommodates differences between hadoop versions
1203    *
1204    * @param fs file system
1205    * @param dir directory
1206    * @return null if tabledir doesn't exist, otherwise FileStatus array
1207    */
1208   public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException {
1209     return listStatus(fs, dir, null);
1210   }
1211 
1212   /**
1213    * Calls fs.delete() and returns the value returned by the fs.delete()
1214    *
1215    * @param fs
1216    * @param path
1217    * @param recursive
1218    * @return
1219    * @throws IOException
1220    */
1221   public static boolean delete(final FileSystem fs, final Path path, final boolean recursive)
1222       throws IOException {
1223     return fs.delete(path, recursive);
1224   }
1225 
1226   /**
1227    * Throw an exception if an action is not permitted by a user on a file.
1228    * 
1229    * @param user
1230    *          the user
1231    * @param file
1232    *          the file
1233    * @param action
1234    *          the action
1235    */
1236   public static void checkAccess(User user, FileStatus file,
1237       FsAction action) throws AccessControlException {
1238     // See HBASE-7814. UserGroupInformation from hadoop 0.20.x may not support getShortName().
1239     String username = user.getShortName();
1240     if (username.equals(file.getOwner())) {
1241       if (file.getPermission().getUserAction().implies(action)) {
1242         return;
1243       }
1244     } else if (contains(user.getGroupNames(), file.getGroup())) {
1245       if (file.getPermission().getGroupAction().implies(action)) {
1246         return;
1247       }
1248     } else if (file.getPermission().getOtherAction().implies(action)) {
1249       return;
1250     }
1251     throw new AccessControlException("Permission denied:" + " action=" + action
1252         + " path=" + file.getPath() + " user=" + username);
1253   }
1254 
1255   private static boolean contains(String[] groups, String user) {
1256     for (String group : groups) {
1257       if (group.equals(user)) {
1258         return true;
1259       }
1260     }
1261     return false;
1262   }
1263 
1264   /**
1265    * Calls fs.exists(). Checks if the specified path exists
1266    *
1267    * @param fs
1268    * @param path
1269    * @return
1270    * @throws IOException
1271    */
1272   public static boolean isExists(final FileSystem fs, final Path path) throws IOException {
1273     return fs.exists(path);
1274   }
1275 
1276   /**
1277    * Log the current state of the filesystem from a certain root directory
1278    * @param fs filesystem to investigate
1279    * @param root root file/directory to start logging from
1280    * @param LOG log to output information
1281    * @throws IOException if an unexpected exception occurs
1282    */
1283   public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
1284       throws IOException {
1285     LOG.debug("Current file system:");
1286     logFSTree(LOG, fs, root, "|-");
1287   }
1288 
1289   /**
1290    * Recursive helper to log the state of the FS
1291    * @see #logFileSystemState(FileSystem, Path, Log)
1292    */
1293   private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix)
1294       throws IOException {
1295     FileStatus[] files = FSUtils.listStatus(fs, root, null);
1296     if (files == null) return;
1297 
1298     for (FileStatus file : files) {
1299       if (file.isDir()) {
1300         LOG.debug(prefix + file.getPath().getName() + "/");
1301         logFSTree(LOG, fs, file.getPath(), prefix + "---");
1302       } else {
1303         LOG.debug(prefix + file.getPath().getName());
1304       }
1305     }
1306   }
1307 }