View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInputStream;
23  import java.io.EOFException;
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.lang.reflect.Method;
27  import java.net.URI;
28  import java.net.URISyntaxException;
29  import java.util.ArrayList;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.regex.Pattern;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.BlockLocation;
39  import org.apache.hadoop.fs.FSDataInputStream;
40  import org.apache.hadoop.fs.FSDataOutputStream;
41  import org.apache.hadoop.fs.FileStatus;
42  import org.apache.hadoop.fs.FileSystem;
43  import org.apache.hadoop.fs.Path;
44  import org.apache.hadoop.fs.PathFilter;
45  import org.apache.hadoop.fs.permission.FsAction;
46  import org.apache.hadoop.fs.permission.FsPermission;
47  import org.apache.hadoop.hbase.HBaseFileSystem;
48  import org.apache.hadoop.hbase.HColumnDescriptor;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
51  import org.apache.hadoop.hbase.HRegionInfo;
52  import org.apache.hadoop.hbase.RemoteExceptionHandler;
53  import org.apache.hadoop.hbase.master.HMaster;
54  import org.apache.hadoop.hbase.regionserver.HRegion;
55  import org.apache.hadoop.hbase.security.User;
56  import org.apache.hadoop.hdfs.DistributedFileSystem;
57  import org.apache.hadoop.io.SequenceFile;
58  import org.apache.hadoop.security.AccessControlException;
59  import org.apache.hadoop.security.UserGroupInformation;
60  import org.apache.hadoop.util.ReflectionUtils;
61  import org.apache.hadoop.util.StringUtils;
62  
63  /**
64   * Utility methods for interacting with the underlying file system.
65   */
66  public abstract class FSUtils {
67    private static final Log LOG = LogFactory.getLog(FSUtils.class);
68  
69    /** Full access permissions (starting point for a umask) */
70    private static final String FULL_RWX_PERMISSIONS = "777";
71  
72    protected FSUtils() {
73      super();
74    }
75  
76    public static FSUtils getInstance(FileSystem fs, Configuration conf) {
77      String scheme = fs.getUri().getScheme();
78      if (scheme == null) {
79        LOG.warn("Could not find scheme for uri " +
80            fs.getUri() + ", default to hdfs");
81        scheme = "hdfs";
82      }
83      Class<?> fsUtilsClass = conf.getClass("hbase.fsutil." +
84          scheme + ".impl", FSHDFSUtils.class); // Default to HDFS impl
85      FSUtils fsUtils = (FSUtils)ReflectionUtils.newInstance(fsUtilsClass, conf);
86      return fsUtils;
87    }
88  
89    /**
90     * Delete if exists.
91     * @param fs filesystem object
92     * @param dir directory to delete
93     * @return True if deleted <code>dir</code>
94     * @throws IOException e
95     */
96    public static boolean deleteDirectory(final FileSystem fs, final Path dir)
97    throws IOException {
98      return fs.exists(dir) && fs.delete(dir, true);
99    }
100 
101   /**
102    * Check if directory exists.  If it does not, create it.
103    * @param fs filesystem object
104    * @param dir path to check
105    * @return Path
106    * @throws IOException e
107    */
108   public Path checkdir(final FileSystem fs, final Path dir) throws IOException {
109     if (!fs.exists(dir)) {
110       HBaseFileSystem.makeDirOnFileSystem(fs, dir);
111     }
112     return dir;
113   }
114 
115   /**
116    * Create the specified file on the filesystem. By default, this will:
117    * <ol>
118    * <li>overwrite the file if it exists</li>
119    * <li>apply the umask in the configuration (if it is enabled)</li>
120    * <li>use the fs configured buffer size (or {@value DEFAULT_BUFFER_SIZE} if
121    * not set)</li>
122    * <li>use the default replication</li>
123    * <li>use the default block size</li>
124    * <li>not track progress</li>
125    * </ol>
126    *
127    * @param fs {@link FileSystem} on which to write the file
128    * @param path {@link Path} to the file to write
129    * @return output stream to the created file
130    * @throws IOException if the file cannot be created
131    */
132   public static FSDataOutputStream create(FileSystem fs, Path path,
133       FsPermission perm) throws IOException {
134     return create(fs, path, perm, true);
135   }
136 
137   /**
138    * Create the specified file on the filesystem. By default, this will:
139    * <ol>
140    * <li>apply the umask in the configuration (if it is enabled)</li>
141    * <li>use the fs configured buffer size (or {@value DEFAULT_BUFFER_SIZE} if
142    * not set)</li>
143    * <li>use the default replication</li>
144    * <li>use the default block size</li>
145    * <li>not track progress</li>
146    * </ol>
147    *
148    * @param fs {@link FileSystem} on which to write the file
149    * @param path {@link Path} to the file to write
150    * @param perm
151    * @param overwrite Whether or not the created file should be overwritten.
152    * @return output stream to the created file
153    * @throws IOException if the file cannot be created
154    */
155   public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm,
156       boolean overwrite) throws IOException {
157     LOG.debug("Creating file=" + path + " with permission=" + perm);
158     return HBaseFileSystem.createPathWithPermsOnFileSystem(fs, path, perm, overwrite);
159   }
160 
161   /**
162    * Get the file permissions specified in the configuration, if they are
163    * enabled.
164    *
165    * @param fs filesystem that the file will be created on.
166    * @param conf configuration to read for determining if permissions are
167    *          enabled and which to use
168    * @param permssionConfKey property key in the configuration to use when
169    *          finding the permission
170    * @return the permission to use when creating a new file on the fs. If
171    *         special permissions are not specified in the configuration, then
172    *         the default permissions on the the fs will be returned.
173    */
174   public static FsPermission getFilePermissions(final FileSystem fs,
175       final Configuration conf, final String permssionConfKey) {
176     boolean enablePermissions = conf.getBoolean(
177         HConstants.ENABLE_DATA_FILE_UMASK, false);
178 
179     if (enablePermissions) {
180       try {
181         FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS);
182         // make sure that we have a mask, if not, go default.
183         String mask = conf.get(permssionConfKey);
184         if (mask == null)
185           return FsPermission.getDefault();
186         // appy the umask
187         FsPermission umask = new FsPermission(mask);
188         return perm.applyUMask(umask);
189       } catch (IllegalArgumentException e) {
190         LOG.warn(
191             "Incorrect umask attempted to be created: "
192                 + conf.get(permssionConfKey)
193                 + ", using default file permissions.", e);
194         return FsPermission.getDefault();
195       }
196     }
197     return FsPermission.getDefault();
198   }
199 
200   /**
201    * Checks to see if the specified file system is available
202    *
203    * @param fs filesystem
204    * @throws IOException e
205    */
206   public static void checkFileSystemAvailable(final FileSystem fs)
207   throws IOException {
208     if (!(fs instanceof DistributedFileSystem)) {
209       return;
210     }
211     IOException exception = null;
212     DistributedFileSystem dfs = (DistributedFileSystem) fs;
213     try {
214       if (dfs.exists(new Path("/"))) {
215         return;
216       }
217     } catch (IOException e) {
218       exception = RemoteExceptionHandler.checkIOException(e);
219     }
220     try {
221       fs.close();
222     } catch (Exception e) {
223       LOG.error("file system close failed: ", e);
224     }
225     IOException io = new IOException("File system is not available");
226     io.initCause(exception);
227     throw io;
228   }
229 
230   /**
231    * We use reflection because {@link DistributedFileSystem#setSafeMode(
232    * FSConstants.SafeModeAction action, boolean isChecked)} is not in hadoop 1.1
233    * 
234    * @param dfs
235    * @return whether we're in safe mode
236    * @throws IOException
237    */
238   private static boolean isInSafeMode(DistributedFileSystem dfs) throws IOException {
239     boolean inSafeMode = false;
240     try {
241       Method m = DistributedFileSystem.class.getMethod("setSafeMode", new Class<?> []{
242           org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.class, boolean.class});
243       inSafeMode = (Boolean) m.invoke(dfs,
244         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET, true);
245     } catch (Exception e) {
246       if (e instanceof IOException) throw (IOException) e;
247       
248       // Check whether dfs is on safemode.
249       inSafeMode = dfs.setSafeMode(
250         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET);      
251     }
252     return inSafeMode;    
253   }
254   
255   /**
256    * Check whether dfs is in safemode.
257    * @param conf
258    * @throws IOException
259    */
260   public static void checkDfsSafeMode(final Configuration conf)
261   throws IOException {
262     boolean isInSafeMode = false;
263     FileSystem fs = FileSystem.get(conf);
264     if (fs instanceof DistributedFileSystem) {
265       DistributedFileSystem dfs = (DistributedFileSystem)fs;
266       isInSafeMode = isInSafeMode(dfs);
267     }
268     if (isInSafeMode) {
269       throw new IOException("File system is in safemode, it can't be written now");
270     }
271   }
272 
273   /**
274    * Verifies current version of file system
275    *
276    * @param fs filesystem object
277    * @param rootdir root hbase directory
278    * @return null if no version file exists, version string otherwise.
279    * @throws IOException e
280    */
281   public static String getVersion(FileSystem fs, Path rootdir)
282   throws IOException {
283     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
284     String version = null;
285     if (fs.exists(versionFile)) {
286       FSDataInputStream s =
287         fs.open(versionFile);
288       try {
289         version = DataInputStream.readUTF(s);
290       } catch (EOFException eof) {
291         LOG.warn("Version file was empty, odd, will try to set it.");
292       } finally {
293         s.close();
294       }
295     }
296     return version;
297   }
298 
299   /**
300    * Verifies current version of file system
301    *
302    * @param fs file system
303    * @param rootdir root directory of HBase installation
304    * @param message if true, issues a message on System.out
305    *
306    * @throws IOException e
307    */
308   public static void checkVersion(FileSystem fs, Path rootdir,
309       boolean message) throws IOException {
310     checkVersion(fs, rootdir, message, 0,
311     		HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
312   }
313 
314   /**
315    * Verifies current version of file system
316    *
317    * @param fs file system
318    * @param rootdir root directory of HBase installation
319    * @param message if true, issues a message on System.out
320    * @param wait wait interval
321    * @param retries number of times to retry
322    *
323    * @throws IOException e
324    */
325   public static void checkVersion(FileSystem fs, Path rootdir,
326       boolean message, int wait, int retries) throws IOException {
327     String version = getVersion(fs, rootdir);
328 
329     if (version == null) {
330       if (!rootRegionExists(fs, rootdir)) {
331         // rootDir is empty (no version file and no root region)
332         // just create new version file (HBASE-1195)
333         FSUtils.setVersion(fs, rootdir, wait, retries);
334         return;
335       }
336     } else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0)
337         return;
338 
339     // version is deprecated require migration
340     // Output on stdout so user sees it in terminal.
341     String msg = "HBase file layout needs to be upgraded."
342       + "  You have version " + version
343       + " and I want version " + HConstants.FILE_SYSTEM_VERSION
344       + ".  Is your hbase.rootdir valid?  If so, you may need to run "
345       + "'hbase hbck -fixVersionFile'.";
346     if (message) {
347       System.out.println("WARNING! " + msg);
348     }
349     throw new FileSystemVersionException(msg);
350   }
351 
352   /**
353    * Sets version of file system
354    *
355    * @param fs filesystem object
356    * @param rootdir hbase root
357    * @throws IOException e
358    */
359   public static void setVersion(FileSystem fs, Path rootdir)
360   throws IOException {
361     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0,
362     		HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
363   }
364 
365   /**
366    * Sets version of file system
367    *
368    * @param fs filesystem object
369    * @param rootdir hbase root
370    * @param wait time to wait for retry
371    * @param retries number of times to retry before failing
372    * @throws IOException e
373    */
374   public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries)
375   throws IOException {
376     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries);
377   }
378 
379 
380   /**
381    * Sets version of file system
382    *
383    * @param fs filesystem object
384    * @param rootdir hbase root directory
385    * @param version version to set
386    * @param wait time to wait for retry
387    * @param retries number of times to retry before throwing an IOException
388    * @throws IOException e
389    */
390   public static void setVersion(FileSystem fs, Path rootdir, String version,
391       int wait, int retries) throws IOException {
392     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
393     while (true) {
394       try {
395         FSDataOutputStream s = fs.create(versionFile);
396         s.writeUTF(version);
397         LOG.debug("Created version file at " + rootdir.toString() +
398             " set its version at:" + version);
399         s.close();
400         return;
401       } catch (IOException e) {
402         if (retries > 0) {
403           LOG.warn("Unable to create version file at " + rootdir.toString() +
404               ", retrying: " + e.getMessage());
405           fs.delete(versionFile, false);
406           try {
407             if (wait > 0) {
408               Thread.sleep(wait);
409             }
410           } catch (InterruptedException ex) {
411             // ignore
412           }
413           retries--;
414         } else {
415           throw e;
416         }
417       }
418     }
419   }
420 
421   /**
422    * Checks that a cluster ID file exists in the HBase root directory
423    * @param fs the root directory FileSystem
424    * @param rootdir the HBase root directory in HDFS
425    * @param wait how long to wait between retries
426    * @return <code>true</code> if the file exists, otherwise <code>false</code>
427    * @throws IOException if checking the FileSystem fails
428    */
429   public static boolean checkClusterIdExists(FileSystem fs, Path rootdir,
430       int wait) throws IOException {
431     while (true) {
432       try {
433         Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
434         return fs.exists(filePath);
435       } catch (IOException ioe) {
436         if (wait > 0) {
437           LOG.warn("Unable to check cluster ID file in " + rootdir.toString() +
438               ", retrying in "+wait+"msec: "+StringUtils.stringifyException(ioe));
439           try {
440             Thread.sleep(wait);
441           } catch (InterruptedException ie) {
442             Thread.interrupted();
443             break;
444           }
445         } else {
446           throw ioe;
447         }
448       }
449     }
450     return false;
451   }
452 
453   /**
454    * Returns the value of the unique cluster ID stored for this HBase instance.
455    * @param fs the root directory FileSystem
456    * @param rootdir the path to the HBase root directory
457    * @return the unique cluster identifier
458    * @throws IOException if reading the cluster ID file fails
459    */
460   public static String getClusterId(FileSystem fs, Path rootdir)
461       throws IOException {
462     Path idPath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
463     String clusterId = null;
464     if (fs.exists(idPath)) {
465       FSDataInputStream in = fs.open(idPath);
466       try {
467         clusterId = in.readUTF();
468       } catch (EOFException eof) {
469         LOG.warn("Cluster ID file "+idPath.toString()+" was empty");
470       } finally{
471         in.close();
472       }
473     } else {
474       LOG.warn("Cluster ID file does not exist at " + idPath.toString());
475     }
476     return clusterId;
477   }
478 
479   /**
480    * Writes a new unique identifier for this cluster to the "hbase.id" file
481    * in the HBase root directory
482    * @param fs the root directory FileSystem
483    * @param rootdir the path to the HBase root directory
484    * @param clusterId the unique identifier to store
485    * @param wait how long (in milliseconds) to wait between retries
486    * @throws IOException if writing to the FileSystem fails and no wait value
487    */
488   public static void setClusterId(FileSystem fs, Path rootdir, String clusterId,
489       int wait) throws IOException {
490     while (true) {
491       try {
492         Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
493         FSDataOutputStream s = fs.create(filePath);
494         s.writeUTF(clusterId);
495         s.close();
496         if (LOG.isDebugEnabled()) {
497           LOG.debug("Created cluster ID file at " + filePath.toString() +
498               " with ID: " + clusterId);
499         }
500         return;
501       } catch (IOException ioe) {
502         if (wait > 0) {
503           LOG.warn("Unable to create cluster ID file in " + rootdir.toString() +
504               ", retrying in "+wait+"msec: "+StringUtils.stringifyException(ioe));
505           try {
506             Thread.sleep(wait);
507           } catch (InterruptedException ie) {
508             Thread.interrupted();
509             break;
510           }
511         } else {
512           throw ioe;
513         }
514       }
515     }
516   }
517 
518   /**
519    * Verifies root directory path is a valid URI with a scheme
520    *
521    * @param root root directory path
522    * @return Passed <code>root</code> argument.
523    * @throws IOException if not a valid URI with a scheme
524    */
525   public static Path validateRootPath(Path root) throws IOException {
526     try {
527       URI rootURI = new URI(root.toString());
528       String scheme = rootURI.getScheme();
529       if (scheme == null) {
530         throw new IOException("Root directory does not have a scheme");
531       }
532       return root;
533     } catch (URISyntaxException e) {
534       IOException io = new IOException("Root directory path is not a valid " +
535         "URI -- check your " + HConstants.HBASE_DIR + " configuration");
536       io.initCause(e);
537       throw io;
538     }
539   }
540 
541   /**
542    * If DFS, check safe mode and if so, wait until we clear it.
543    * @param conf configuration
544    * @param wait Sleep between retries
545    * @throws IOException e
546    */
547   public static void waitOnSafeMode(final Configuration conf,
548     final long wait)
549   throws IOException {
550     FileSystem fs = FileSystem.get(conf);
551     if (!(fs instanceof DistributedFileSystem)) return;
552     DistributedFileSystem dfs = (DistributedFileSystem)fs;
553     // Make sure dfs is not in safe mode
554     while (isInSafeMode(dfs)) {
555       LOG.info("Waiting for dfs to exit safe mode...");
556       try {
557         Thread.sleep(wait);
558       } catch (InterruptedException e) {
559         //continue
560       }
561     }
562   }
563 
564   /**
565    * Return the 'path' component of a Path.  In Hadoop, Path is an URI.  This
566    * method returns the 'path' component of a Path's URI: e.g. If a Path is
567    * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>,
568    * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>.
569    * This method is useful if you want to print out a Path without qualifying
570    * Filesystem instance.
571    * @param p Filesystem Path whose 'path' component we are to return.
572    * @return Path portion of the Filesystem
573    */
574   public static String getPath(Path p) {
575     return p.toUri().getPath();
576   }
577 
578   /**
579    * @param c configuration
580    * @return Path to hbase root directory: i.e. <code>hbase.rootdir</code> from
581    * configuration as a qualified Path.
582    * @throws IOException e
583    */
584   public static Path getRootDir(final Configuration c) throws IOException {
585     Path p = new Path(c.get(HConstants.HBASE_DIR));
586     FileSystem fs = p.getFileSystem(c);
587     return p.makeQualified(fs);
588   }
589 
590   public static void setRootDir(final Configuration c, final Path root) throws IOException {
591     c.set(HConstants.HBASE_DIR, root.toString());
592   }
593 
594   /**
595    * Checks if root region exists
596    *
597    * @param fs file system
598    * @param rootdir root directory of HBase installation
599    * @return true if exists
600    * @throws IOException e
601    */
602   public static boolean rootRegionExists(FileSystem fs, Path rootdir)
603   throws IOException {
604     Path rootRegionDir =
605       HRegion.getRegionDir(rootdir, HRegionInfo.ROOT_REGIONINFO);
606     return fs.exists(rootRegionDir);
607   }
608 
609   /**
610    * Compute HDFS blocks distribution of a given file, or a portion of the file
611    * @param fs file system
612    * @param status file status of the file
613    * @param start start position of the portion
614    * @param length length of the portion
615    * @return The HDFS blocks distribution
616    */
617   static public HDFSBlocksDistribution computeHDFSBlocksDistribution(
618     final FileSystem fs, FileStatus status, long start, long length)
619     throws IOException {
620     HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution();
621     BlockLocation [] blockLocations =
622       fs.getFileBlockLocations(status, start, length);
623     for(BlockLocation bl : blockLocations) {
624       String [] hosts = bl.getHosts();
625       long len = bl.getLength();
626       blocksDistribution.addHostsAndBlockWeight(hosts, len);
627     }
628 
629     return blocksDistribution;
630   }
631 
632 
633 
634   /**
635    * Runs through the hbase rootdir and checks all stores have only
636    * one file in them -- that is, they've been major compacted.  Looks
637    * at root and meta tables too.
638    * @param fs filesystem
639    * @param hbaseRootDir hbase root directory
640    * @return True if this hbase install is major compacted.
641    * @throws IOException e
642    */
643   public static boolean isMajorCompacted(final FileSystem fs,
644       final Path hbaseRootDir)
645   throws IOException {
646     // Presumes any directory under hbase.rootdir is a table.
647     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs));
648     for (FileStatus tableDir : tableDirs) {
649       // Skip the .log directory.  All others should be tables.  Inside a table,
650       // there are compaction.dir directories to skip.  Otherwise, all else
651       // should be regions.  Then in each region, should only be family
652       // directories.  Under each of these, should be one file only.
653       Path d = tableDir.getPath();
654       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) {
655         continue;
656       }
657       FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs));
658       for (FileStatus regionDir : regionDirs) {
659         Path dd = regionDir.getPath();
660         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
661           continue;
662         }
663         // Else its a region name.  Now look in region for families.
664         FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs));
665         for (FileStatus familyDir : familyDirs) {
666           Path family = familyDir.getPath();
667           // Now in family make sure only one file.
668           FileStatus[] familyStatus = fs.listStatus(family);
669           if (familyStatus.length > 1) {
670             LOG.debug(family.toString() + " has " + familyStatus.length +
671                 " files.");
672             return false;
673           }
674         }
675       }
676     }
677     return true;
678   }
679 
680   // TODO move this method OUT of FSUtils. No dependencies to HMaster
681   /**
682    * Returns the total overall fragmentation percentage. Includes .META. and
683    * -ROOT- as well.
684    *
685    * @param master  The master defining the HBase root and file system.
686    * @return A map for each table and its percentage.
687    * @throws IOException When scanning the directory fails.
688    */
689   public static int getTotalTableFragmentation(final HMaster master)
690   throws IOException {
691     Map<String, Integer> map = getTableFragmentation(master);
692     return map != null && map.size() > 0 ? map.get("-TOTAL-") : -1;
693   }
694 
695   /**
696    * Runs through the HBase rootdir and checks how many stores for each table
697    * have more than one file in them. Checks -ROOT- and .META. too. The total
698    * percentage across all tables is stored under the special key "-TOTAL-".
699    *
700    * @param master  The master defining the HBase root and file system.
701    * @return A map for each table and its percentage.
702    * @throws IOException When scanning the directory fails.
703    */
704   public static Map<String, Integer> getTableFragmentation(
705     final HMaster master)
706   throws IOException {
707     Path path = getRootDir(master.getConfiguration());
708     // since HMaster.getFileSystem() is package private
709     FileSystem fs = path.getFileSystem(master.getConfiguration());
710     return getTableFragmentation(fs, path);
711   }
712 
713   /**
714    * Runs through the HBase rootdir and checks how many stores for each table
715    * have more than one file in them. Checks -ROOT- and .META. too. The total
716    * percentage across all tables is stored under the special key "-TOTAL-".
717    *
718    * @param fs  The file system to use.
719    * @param hbaseRootDir  The root directory to scan.
720    * @return A map for each table and its percentage.
721    * @throws IOException When scanning the directory fails.
722    */
723   public static Map<String, Integer> getTableFragmentation(
724     final FileSystem fs, final Path hbaseRootDir)
725   throws IOException {
726     Map<String, Integer> frags = new HashMap<String, Integer>();
727     int cfCountTotal = 0;
728     int cfFragTotal = 0;
729     DirFilter df = new DirFilter(fs);
730     // presumes any directory under hbase.rootdir is a table
731     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, df);
732     for (FileStatus tableDir : tableDirs) {
733       // Skip the .log directory.  All others should be tables.  Inside a table,
734       // there are compaction.dir directories to skip.  Otherwise, all else
735       // should be regions.  Then in each region, should only be family
736       // directories.  Under each of these, should be one file only.
737       Path d = tableDir.getPath();
738       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) {
739         continue;
740       }
741       int cfCount = 0;
742       int cfFrag = 0;
743       FileStatus[] regionDirs = fs.listStatus(d, df);
744       for (FileStatus regionDir : regionDirs) {
745         Path dd = regionDir.getPath();
746         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
747           continue;
748         }
749         // else its a region name, now look in region for families
750         FileStatus[] familyDirs = fs.listStatus(dd, df);
751         for (FileStatus familyDir : familyDirs) {
752           cfCount++;
753           cfCountTotal++;
754           Path family = familyDir.getPath();
755           // now in family make sure only one file
756           FileStatus[] familyStatus = fs.listStatus(family);
757           if (familyStatus.length > 1) {
758             cfFrag++;
759             cfFragTotal++;
760           }
761         }
762       }
763       // compute percentage per table and store in result list
764       frags.put(d.getName(), Math.round((float) cfFrag / cfCount * 100));
765     }
766     // set overall percentage for all tables
767     frags.put("-TOTAL-", Math.round((float) cfFragTotal / cfCountTotal * 100));
768     return frags;
769   }
770 
771   /**
772    * Expects to find -ROOT- directory.
773    * @param fs filesystem
774    * @param hbaseRootDir hbase root directory
775    * @return True if this a pre020 layout.
776    * @throws IOException e
777    */
778   public static boolean isPre020FileLayout(final FileSystem fs,
779     final Path hbaseRootDir)
780   throws IOException {
781     Path mapfiles = new Path(new Path(new Path(new Path(hbaseRootDir, "-ROOT-"),
782       "70236052"), "info"), "mapfiles");
783     return fs.exists(mapfiles);
784   }
785 
786   /**
787    * Runs through the hbase rootdir and checks all stores have only
788    * one file in them -- that is, they've been major compacted.  Looks
789    * at root and meta tables too.  This version differs from
790    * {@link #isMajorCompacted(FileSystem, Path)} in that it expects a
791    * pre-0.20.0 hbase layout on the filesystem.  Used migrating.
792    * @param fs filesystem
793    * @param hbaseRootDir hbase root directory
794    * @return True if this hbase install is major compacted.
795    * @throws IOException e
796    */
797   public static boolean isMajorCompactedPre020(final FileSystem fs,
798       final Path hbaseRootDir)
799   throws IOException {
800     // Presumes any directory under hbase.rootdir is a table.
801     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs));
802     for (FileStatus tableDir : tableDirs) {
803       // Inside a table, there are compaction.dir directories to skip.
804       // Otherwise, all else should be regions.  Then in each region, should
805       // only be family directories.  Under each of these, should be a mapfile
806       // and info directory and in these only one file.
807       Path d = tableDir.getPath();
808       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) {
809         continue;
810       }
811       FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs));
812       for (FileStatus regionDir : regionDirs) {
813         Path dd = regionDir.getPath();
814         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
815           continue;
816         }
817         // Else its a region name.  Now look in region for families.
818         FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs));
819         for (FileStatus familyDir : familyDirs) {
820           Path family = familyDir.getPath();
821           FileStatus[] infoAndMapfile = fs.listStatus(family);
822           // Assert that only info and mapfile in family dir.
823           if (infoAndMapfile.length != 0 && infoAndMapfile.length != 2) {
824             LOG.debug(family.toString() +
825                 " has more than just info and mapfile: " + infoAndMapfile.length);
826             return false;
827           }
828           // Make sure directory named info or mapfile.
829           for (int ll = 0; ll < 2; ll++) {
830             if (infoAndMapfile[ll].getPath().getName().equals("info") ||
831                 infoAndMapfile[ll].getPath().getName().equals("mapfiles"))
832               continue;
833             LOG.debug("Unexpected directory name: " +
834                 infoAndMapfile[ll].getPath());
835             return false;
836           }
837           // Now in family, there are 'mapfile' and 'info' subdirs.  Just
838           // look in the 'mapfile' subdir.
839           FileStatus[] familyStatus =
840               fs.listStatus(new Path(family, "mapfiles"));
841           if (familyStatus.length > 1) {
842             LOG.debug(family.toString() + " has " + familyStatus.length +
843                 " files.");
844             return false;
845           }
846         }
847       }
848     }
849     return true;
850   }
851 
852   /**
853    * A {@link PathFilter} that returns only regular files.
854    */
855   static class FileFilter implements PathFilter {
856     private final FileSystem fs;
857 
858     public FileFilter(final FileSystem fs) {
859       this.fs = fs;
860     }
861 
862     @Override
863     public boolean accept(Path p) {
864       try {
865         return fs.isFile(p);
866       } catch (IOException e) {
867         LOG.debug("unable to verify if path=" + p + " is a regular file", e);
868         return false;
869       }
870     }
871   }
872 
873   /**
874    * A {@link PathFilter} that returns directories.
875    */
876   public static class DirFilter implements PathFilter {
877     private final FileSystem fs;
878 
879     public DirFilter(final FileSystem fs) {
880       this.fs = fs;
881     }
882 
883     @Override
884     public boolean accept(Path p) {
885       boolean isValid = false;
886       try {
887         if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(p)) {
888           isValid = false;
889         } else {
890           isValid = this.fs.getFileStatus(p).isDir();
891         }
892       } catch (IOException e) {
893         e.printStackTrace();
894       }
895       return isValid;
896     }
897   }
898 
899   /**
900    * Heuristic to determine whether is safe or not to open a file for append
901    * Looks both for dfs.support.append and use reflection to search
902    * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush()
903    * @param conf
904    * @return True if append support
905    */
906   public static boolean isAppendSupported(final Configuration conf) {
907     boolean append = conf.getBoolean("dfs.support.append", false);
908     if (append) {
909       try {
910         // TODO: The implementation that comes back when we do a createWriter
911         // may not be using SequenceFile so the below is not a definitive test.
912         // Will do for now (hdfs-200).
913         SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
914         append = true;
915       } catch (SecurityException e) {
916       } catch (NoSuchMethodException e) {
917         append = false;
918       }
919     }
920     if (!append) {
921       // Look for the 0.21, 0.22, new-style append evidence.
922       try {
923         FSDataOutputStream.class.getMethod("hflush", new Class<?> []{});
924         append = true;
925       } catch (NoSuchMethodException e) {
926         append = false;
927       }
928     }
929     return append;
930   }
931 
932   /**
933    * @param conf
934    * @return True if this filesystem whose scheme is 'hdfs'.
935    * @throws IOException
936    */
937   public static boolean isHDFS(final Configuration conf) throws IOException {
938     FileSystem fs = FileSystem.get(conf);
939     String scheme = fs.getUri().getScheme();
940     return scheme.equalsIgnoreCase("hdfs");
941   }
942 
943   /**
944    * Recover file lease. Used when a file might be suspect
945    * to be had been left open by another process.
946    * @param fs FileSystem handle
947    * @param p Path of file to recover lease
948    * @param conf Configuration handle
949    * @throws IOException
950    */
951   public abstract void recoverFileLease(final FileSystem fs, final Path p,
952       Configuration conf) throws IOException;
953 
954   /**
955    * @param fs
956    * @param rootdir
957    * @return All the table directories under <code>rootdir</code>. Ignore non table hbase folders such as
958    * .logs, .oldlogs, .corrupt, .META., and -ROOT- folders.
959    * @throws IOException
960    */
961   public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir)
962   throws IOException {
963     // presumes any directory under hbase.rootdir is a table
964     FileStatus [] dirs = fs.listStatus(rootdir, new DirFilter(fs));
965     List<Path> tabledirs = new ArrayList<Path>(dirs.length);
966     for (FileStatus dir: dirs) {
967       Path p = dir.getPath();
968       String tableName = p.getName();
969       if (!HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName)) {
970         tabledirs.add(p);
971       }
972     }
973     return tabledirs;
974   }
975 
976   public static Path getTablePath(Path rootdir, byte [] tableName) {
977     return getTablePath(rootdir, Bytes.toString(tableName));
978   }
979 
980   public static Path getTablePath(Path rootdir, final String tableName) {
981     return new Path(rootdir, tableName);
982   }
983 
984   /**
985    * Filter for all dirs that don't start with '.'
986    */
987   public static class RegionDirFilter implements PathFilter {
988     // This pattern will accept 0.90+ style hex region dirs and older numeric region dir names.
989     final public static Pattern regionDirPattern = Pattern.compile("^[0-9a-f]*$");
990     final FileSystem fs;
991 
992     public RegionDirFilter(FileSystem fs) {
993       this.fs = fs;
994     }
995 
996     @Override
997     public boolean accept(Path rd) {
998       if (!regionDirPattern.matcher(rd.getName()).matches()) {
999         return false;
1000       }
1001 
1002       try {
1003         return fs.getFileStatus(rd).isDir();
1004       } catch (IOException ioe) {
1005         // Maybe the file was moved or the fs was disconnected.
1006         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1007         return false;
1008       }
1009     }
1010   }
1011 
1012   /**
1013    * Given a particular table dir, return all the regiondirs inside it, excluding files such as
1014    * .tableinfo
1015    * @param fs A file system for the Path
1016    * @param tableDir Path to a specific table directory <hbase.rootdir>/<tabledir>
1017    * @return List of paths to valid region directories in table dir.
1018    * @throws IOException
1019    */
1020   public static List<Path> getRegionDirs(final FileSystem fs, final Path tableDir) throws IOException {
1021     // assumes we are in a table dir.
1022     FileStatus[] rds = fs.listStatus(tableDir, new RegionDirFilter(fs));
1023     List<Path> regionDirs = new ArrayList<Path>(rds.length);
1024     for (FileStatus rdfs: rds) {
1025       Path rdPath = rdfs.getPath();
1026       regionDirs.add(rdPath);
1027     }
1028     return regionDirs;
1029   }
1030 
1031   /**
1032    * Filter for all dirs that are legal column family names.  This is generally used for colfam
1033    * dirs <hbase.rootdir>/<tabledir>/<regiondir>/<colfamdir>.
1034    */
1035   public static class FamilyDirFilter implements PathFilter {
1036     final FileSystem fs;
1037 
1038     public FamilyDirFilter(FileSystem fs) {
1039       this.fs = fs;
1040     }
1041 
1042     @Override
1043     public boolean accept(Path rd) {
1044       try {
1045         // throws IAE if invalid
1046         HColumnDescriptor.isLegalFamilyName(Bytes.toBytes(rd.getName()));
1047       } catch (IllegalArgumentException iae) {
1048         // path name is an invalid family name and thus is excluded.
1049         return false;
1050       }
1051 
1052       try {
1053         return fs.getFileStatus(rd).isDir();
1054       } catch (IOException ioe) {
1055         // Maybe the file was moved or the fs was disconnected.
1056         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1057         return false;
1058       }
1059     }
1060   }
1061 
1062   /**
1063    * Given a particular region dir, return all the familydirs inside it
1064    *
1065    * @param fs A file system for the Path
1066    * @param regionDir Path to a specific region directory
1067    * @return List of paths to valid family directories in region dir.
1068    * @throws IOException
1069    */
1070   public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir) throws IOException {
1071     // assumes we are in a region dir.
1072     FileStatus[] fds = fs.listStatus(regionDir, new FamilyDirFilter(fs));
1073     List<Path> familyDirs = new ArrayList<Path>(fds.length);
1074     for (FileStatus fdfs: fds) {
1075       Path fdPath = fdfs.getPath();
1076       familyDirs.add(fdPath);
1077     }
1078     return familyDirs;
1079   }
1080 
1081   /**
1082    * Filter for HFiles that excludes reference files.
1083    */
1084   public static class HFileFilter implements PathFilter {
1085     // This pattern will accept 0.90+ style hex hfies files but reject reference files
1086     final public static Pattern hfilePattern = Pattern.compile("^([0-9a-f]+)$");
1087 
1088     final FileSystem fs;
1089 
1090     public HFileFilter(FileSystem fs) {
1091       this.fs = fs;
1092     }
1093 
1094     @Override
1095     public boolean accept(Path rd) {
1096       if (!hfilePattern.matcher(rd.getName()).matches()) {
1097         return false;
1098       }
1099 
1100       try {
1101         // only files
1102         return !fs.getFileStatus(rd).isDir();
1103       } catch (IOException ioe) {
1104         // Maybe the file was moved or the fs was disconnected.
1105         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1106         return false;
1107       }
1108     }
1109   }
1110 
1111   /**
1112    * @param conf
1113    * @return Returns the filesystem of the hbase rootdir.
1114    * @throws IOException
1115    */
1116   public static FileSystem getCurrentFileSystem(Configuration conf)
1117   throws IOException {
1118     return getRootDir(conf).getFileSystem(conf);
1119   }
1120 
1121   /**
1122    * Runs through the HBase rootdir and creates a reverse lookup map for
1123    * table StoreFile names to the full Path.
1124    * <br>
1125    * Example...<br>
1126    * Key = 3944417774205889744  <br>
1127    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1128    *
1129    * @param fs  The file system to use.
1130    * @param hbaseRootDir  The root directory to scan.
1131    * @return Map keyed by StoreFile name with a value of the full Path.
1132    * @throws IOException When scanning the directory fails.
1133    */
1134   public static Map<String, Path> getTableStoreFilePathMap(
1135     final FileSystem fs, final Path hbaseRootDir)
1136   throws IOException {
1137     Map<String, Path> map = new HashMap<String, Path>();
1138     
1139     // if this method looks similar to 'getTableFragmentation' that is because 
1140     // it was borrowed from it.
1141     
1142     DirFilter df = new DirFilter(fs);
1143     // presumes any directory under hbase.rootdir is a table
1144     FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, df);
1145     for (FileStatus tableDir : tableDirs) {
1146       // Skip the .log and other non-table directories.  All others should be tables.
1147       // Inside a table, there are compaction.dir directories to skip.  Otherwise, all else
1148       // should be regions. 
1149       Path d = tableDir.getPath();
1150       if (HConstants.HBASE_NON_TABLE_DIRS.contains(d.getName())) {
1151         continue;
1152       }
1153       FileStatus[] regionDirs = fs.listStatus(d, df);
1154       for (FileStatus regionDir : regionDirs) {
1155         Path dd = regionDir.getPath();
1156         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
1157           continue;
1158         }
1159         // else its a region name, now look in region for families
1160         FileStatus[] familyDirs = fs.listStatus(dd, df);
1161         for (FileStatus familyDir : familyDirs) {
1162           Path family = familyDir.getPath();
1163           // now in family, iterate over the StoreFiles and
1164           // put in map
1165           FileStatus[] familyStatus = fs.listStatus(family);
1166           for (FileStatus sfStatus : familyStatus) {
1167             Path sf = sfStatus.getPath();
1168             map.put( sf.getName(), sf);
1169           }
1170 
1171         }
1172       }
1173     }
1174       return map;
1175   }
1176 
1177   /**
1178    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1179    * This accommodates differences between hadoop versions
1180    *
1181    * @param fs file system
1182    * @param dir directory
1183    * @param filter path filter
1184    * @return null if tabledir doesn't exist, otherwise FileStatus array
1185    */
1186   public static FileStatus [] listStatus(final FileSystem fs,
1187       final Path dir, final PathFilter filter) throws IOException {
1188     FileStatus [] status = null;
1189     try {
1190       status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
1191     } catch (FileNotFoundException fnfe) {
1192       // if directory doesn't exist, return null
1193       LOG.debug(dir + " doesn't exist");
1194     }
1195     if (status == null || status.length < 1) return null;
1196     return status;
1197   }
1198 
1199   /**
1200    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1201    * This would accommodates differences between hadoop versions
1202    *
1203    * @param fs file system
1204    * @param dir directory
1205    * @return null if tabledir doesn't exist, otherwise FileStatus array
1206    */
1207   public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException {
1208     return listStatus(fs, dir, null);
1209   }
1210 
1211   /**
1212    * Calls fs.delete() and returns the value returned by the fs.delete()
1213    *
1214    * @param fs
1215    * @param path
1216    * @param recursive
1217    * @return
1218    * @throws IOException
1219    */
1220   public static boolean delete(final FileSystem fs, final Path path, final boolean recursive)
1221       throws IOException {
1222     return fs.delete(path, recursive);
1223   }
1224 
1225   /**
1226    * Throw an exception if an action is not permitted by a user on a file.
1227    * 
1228    * @param user
1229    *          the user
1230    * @param file
1231    *          the file
1232    * @param action
1233    *          the action
1234    */
1235   public static void checkAccess(User user, FileStatus file,
1236       FsAction action) throws AccessControlException {
1237     // See HBASE-7814. UserGroupInformation from hadoop 0.20.x may not support getShortName().
1238     String username = user.getShortName();
1239     if (username.equals(file.getOwner())) {
1240       if (file.getPermission().getUserAction().implies(action)) {
1241         return;
1242       }
1243     } else if (contains(user.getGroupNames(), file.getGroup())) {
1244       if (file.getPermission().getGroupAction().implies(action)) {
1245         return;
1246       }
1247     } else if (file.getPermission().getOtherAction().implies(action)) {
1248       return;
1249     }
1250     throw new AccessControlException("Permission denied:" + " action=" + action
1251         + " path=" + file.getPath() + " user=" + username);
1252   }
1253 
1254   private static boolean contains(String[] groups, String user) {
1255     for (String group : groups) {
1256       if (group.equals(user)) {
1257         return true;
1258       }
1259     }
1260     return false;
1261   }
1262 
1263   /**
1264    * Calls fs.exists(). Checks if the specified path exists
1265    *
1266    * @param fs
1267    * @param path
1268    * @return
1269    * @throws IOException
1270    */
1271   public static boolean isExists(final FileSystem fs, final Path path) throws IOException {
1272     return fs.exists(path);
1273   }
1274 
1275   /**
1276    * Log the current state of the filesystem from a certain root directory
1277    * @param fs filesystem to investigate
1278    * @param root root file/directory to start logging from
1279    * @param LOG log to output information
1280    * @throws IOException if an unexpected exception occurs
1281    */
1282   public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
1283       throws IOException {
1284     LOG.debug("Current file system:");
1285     logFSTree(LOG, fs, root, "|-");
1286   }
1287 
1288   /**
1289    * Recursive helper to log the state of the FS
1290    * @see #logFileSystemState(FileSystem, Path, Log)
1291    */
1292   private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix)
1293       throws IOException {
1294     FileStatus[] files = FSUtils.listStatus(fs, root, null);
1295     if (files == null) return;
1296 
1297     for (FileStatus file : files) {
1298       if (file.isDir()) {
1299         LOG.debug(prefix + file.getPath().getName() + "/");
1300         logFSTree(LOG, fs, file.getPath(), prefix + "---");
1301       } else {
1302         LOG.debug(prefix + file.getPath().getName());
1303       }
1304     }
1305   }
1306 }