View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.DataInputStream;
23  import java.io.EOFException;
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.lang.reflect.InvocationTargetException;
28  import java.lang.reflect.Method;
29  import java.net.InetSocketAddress;
30  import java.net.URI;
31  import java.net.URISyntaxException;
32  import java.util.ArrayList;
33  import java.util.Collections;
34  import java.util.HashMap;
35  import java.util.LinkedList;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.regex.Pattern;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.classification.InterfaceAudience;
43  import org.apache.hadoop.classification.InterfaceStability;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.fs.BlockLocation;
46  import org.apache.hadoop.fs.FSDataInputStream;
47  import org.apache.hadoop.fs.FSDataOutputStream;
48  import org.apache.hadoop.fs.FileStatus;
49  import org.apache.hadoop.fs.FileSystem;
50  import org.apache.hadoop.fs.Path;
51  import org.apache.hadoop.fs.PathFilter;
52  import org.apache.hadoop.fs.permission.FsAction;
53  import org.apache.hadoop.fs.permission.FsPermission;
54  import org.apache.hadoop.hbase.ClusterId;
55  import org.apache.hadoop.hbase.TableName;
56  import org.apache.hadoop.hbase.HColumnDescriptor;
57  import org.apache.hadoop.hbase.HConstants;
58  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
59  import org.apache.hadoop.hbase.HRegionInfo;
60  import org.apache.hadoop.hbase.RemoteExceptionHandler;
61  import org.apache.hadoop.hbase.exceptions.DeserializationException;
62  import org.apache.hadoop.hbase.fs.HFileSystem;
63  import org.apache.hadoop.hbase.master.HMaster;
64  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
65  import org.apache.hadoop.hbase.protobuf.generated.FSProtos;
66  import org.apache.hadoop.hbase.regionserver.HRegion;
67  import org.apache.hadoop.hdfs.DistributedFileSystem;
68  import org.apache.hadoop.hdfs.protocol.FSConstants;
69  import org.apache.hadoop.io.IOUtils;
70  import org.apache.hadoop.io.SequenceFile;
71  import org.apache.hadoop.security.AccessControlException;
72  import org.apache.hadoop.security.UserGroupInformation;
73  import org.apache.hadoop.util.Progressable;
74  import org.apache.hadoop.util.ReflectionUtils;
75  import org.apache.hadoop.util.StringUtils;
76  
77  import com.google.common.primitives.Ints;
78  import com.google.protobuf.InvalidProtocolBufferException;
79  
80  /**
81   * Utility methods for interacting with the underlying file system.
82   */
83  @InterfaceAudience.Public
84  @InterfaceStability.Evolving
85  public abstract class FSUtils {
86    private static final Log LOG = LogFactory.getLog(FSUtils.class);
87  
88    /** Full access permissions (starting point for a umask) */
89    private static final String FULL_RWX_PERMISSIONS = "777";
90  
91    /** Set to true on Windows platforms */
92    public static final boolean WINDOWS = System.getProperty("os.name").startsWith("Windows");
93  
94    protected FSUtils() {
95      super();
96    }
97  
98    /**
99     * Compare of path component. Does not consider schema; i.e. if schemas different but <code>path
100    * <code> starts with <code>rootPath<code>, then the function returns true
101    * @param rootPath
102    * @param path
103    * @return True if <code>path</code> starts with <code>rootPath</code>
104    */
105   public static boolean isStartingWithPath(final Path rootPath, final String path) {
106     String uriRootPath = rootPath.toUri().getPath();
107     String tailUriPath = (new Path(path)).toUri().getPath();
108     return tailUriPath.startsWith(uriRootPath);
109   }
110 
111   /**
112    * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
113    * '/a/b/c' part. Does not consider schema; i.e. if schemas different but path or subpath matches,
114    * the two will equate.
115    * @param pathToSearch Path we will be trying to match.
116    * @param pathTail
117    * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
118    */
119   public static boolean isMatchingTail(final Path pathToSearch, String pathTail) {
120     return isMatchingTail(pathToSearch, new Path(pathTail));
121   }
122 
123   /**
124    * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
125    * '/a/b/c' part. If you passed in 'hdfs://a/b/c and b/c, it would return true.  Does not consider
126    * schema; i.e. if schemas different but path or subpath matches, the two will equate.
127    * @param pathToSearch Path we will be trying to match.
128    * @param pathTail
129    * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
130    */
131   public static boolean isMatchingTail(final Path pathToSearch, final Path pathTail) {
132     if (pathToSearch.depth() != pathTail.depth()) return false;
133     Path tailPath = pathTail;
134     String tailName;
135     Path toSearch = pathToSearch;
136     String toSearchName;
137     boolean result = false;
138     do {
139       tailName = tailPath.getName();
140       if (tailName == null || tailName.length() <= 0) {
141         result = true;
142         break;
143       }
144       toSearchName = toSearch.getName();
145       if (toSearchName == null || toSearchName.length() <= 0) break;
146       // Move up a parent on each path for next go around.  Path doesn't let us go off the end.
147       tailPath = tailPath.getParent();
148       toSearch = toSearch.getParent();
149     } while(tailName.equals(toSearchName));
150     return result;
151   }
152 
153   public static FSUtils getInstance(FileSystem fs, Configuration conf) {
154     String scheme = fs.getUri().getScheme();
155     if (scheme == null) {
156       LOG.warn("Could not find scheme for uri " +
157           fs.getUri() + ", default to hdfs");
158       scheme = "hdfs";
159     }
160     Class<?> fsUtilsClass = conf.getClass("hbase.fsutil." +
161         scheme + ".impl", FSHDFSUtils.class); // Default to HDFS impl
162     FSUtils fsUtils = (FSUtils)ReflectionUtils.newInstance(fsUtilsClass, conf);
163     return fsUtils;
164   }
165 
166   /**
167    * Delete if exists.
168    * @param fs filesystem object
169    * @param dir directory to delete
170    * @return True if deleted <code>dir</code>
171    * @throws IOException e
172    */
173   public static boolean deleteDirectory(final FileSystem fs, final Path dir)
174   throws IOException {
175     return fs.exists(dir) && fs.delete(dir, true);
176   }
177 
178   /**
179    * Return the number of bytes that large input files should be optimally
180    * be split into to minimize i/o time.
181    *
182    * use reflection to search for getDefaultBlockSize(Path f)
183    * if the method doesn't exist, fall back to using getDefaultBlockSize()
184    *
185    * @param fs filesystem object
186    * @return the default block size for the path's filesystem
187    * @throws IOException e
188    */
189   public static long getDefaultBlockSize(final FileSystem fs, final Path path) throws IOException {
190     Method m = null;
191     Class<? extends FileSystem> cls = fs.getClass();
192     try {
193       m = cls.getMethod("getDefaultBlockSize", new Class<?>[] { Path.class });
194     } catch (NoSuchMethodException e) {
195       LOG.info("FileSystem doesn't support getDefaultBlockSize");
196     } catch (SecurityException e) {
197       LOG.info("Doesn't have access to getDefaultBlockSize on FileSystems", e);
198       m = null; // could happen on setAccessible()
199     }
200     if (m == null) {
201       return fs.getDefaultBlockSize();
202     } else {
203       try {
204         Object ret = m.invoke(fs, path);
205         return ((Long)ret).longValue();
206       } catch (Exception e) {
207         throw new IOException(e);
208       }
209     }
210   }
211 
212   /*
213    * Get the default replication.
214    *
215    * use reflection to search for getDefaultReplication(Path f)
216    * if the method doesn't exist, fall back to using getDefaultReplication()
217    *
218    * @param fs filesystem object
219    * @param f path of file
220    * @return default replication for the path's filesystem
221    * @throws IOException e
222    */
223   public static short getDefaultReplication(final FileSystem fs, final Path path) throws IOException {
224     Method m = null;
225     Class<? extends FileSystem> cls = fs.getClass();
226     try {
227       m = cls.getMethod("getDefaultReplication", new Class<?>[] { Path.class });
228     } catch (NoSuchMethodException e) {
229       LOG.info("FileSystem doesn't support getDefaultReplication");
230     } catch (SecurityException e) {
231       LOG.info("Doesn't have access to getDefaultReplication on FileSystems", e);
232       m = null; // could happen on setAccessible()
233     }
234     if (m == null) {
235       return fs.getDefaultReplication();
236     } else {
237       try {
238         Object ret = m.invoke(fs, path);
239         return ((Number)ret).shortValue();
240       } catch (Exception e) {
241         throw new IOException(e);
242       }
243     }
244   }
245 
246   /**
247    * Returns the default buffer size to use during writes.
248    *
249    * The size of the buffer should probably be a multiple of hardware
250    * page size (4096 on Intel x86), and it determines how much data is
251    * buffered during read and write operations.
252    *
253    * @param fs filesystem object
254    * @return default buffer size to use during writes
255    */
256   public static int getDefaultBufferSize(final FileSystem fs) {
257     return fs.getConf().getInt("io.file.buffer.size", 4096);
258   }
259 
260   /**
261    * Create the specified file on the filesystem. By default, this will:
262    * <ol>
263    * <li>overwrite the file if it exists</li>
264    * <li>apply the umask in the configuration (if it is enabled)</li>
265    * <li>use the fs configured buffer size (or 4096 if not set)</li>
266    * <li>use the default replication</li>
267    * <li>use the default block size</li>
268    * <li>not track progress</li>
269    * </ol>
270    *
271    * @param fs {@link FileSystem} on which to write the file
272    * @param path {@link Path} to the file to write
273    * @param perm permissions
274    * @param favoredNodes
275    * @return output stream to the created file
276    * @throws IOException if the file cannot be created
277    */
278   public static FSDataOutputStream create(FileSystem fs, Path path,
279       FsPermission perm, InetSocketAddress[] favoredNodes) throws IOException {
280     if (fs instanceof HFileSystem) {
281       FileSystem backingFs = ((HFileSystem)fs).getBackingFs();
282       if (backingFs instanceof DistributedFileSystem) {
283         // Try to use the favoredNodes version via reflection to allow backwards-
284         // compatibility.
285         try {
286           return (FSDataOutputStream) (DistributedFileSystem.class
287               .getDeclaredMethod("create", Path.class, FsPermission.class,
288                   boolean.class, int.class, short.class, long.class,
289                   Progressable.class, InetSocketAddress[].class)
290                   .invoke(backingFs, path, FsPermission.getDefault(), true,
291                       getDefaultBufferSize(backingFs),
292                       getDefaultReplication(backingFs, path),
293                       getDefaultBlockSize(backingFs, path),
294                       null, favoredNodes));
295         } catch (InvocationTargetException ite) {
296           // Function was properly called, but threw it's own exception.
297           throw new IOException(ite.getCause());
298         } catch (NoSuchMethodException e) {
299           LOG.debug("DFS Client does not support most favored nodes create; using default create");
300           if (LOG.isTraceEnabled()) LOG.trace("Ignoring; use default create", e);
301         } catch (IllegalArgumentException e) {
302           LOG.debug("Ignoring (most likely Reflection related exception) " + e);
303         } catch (SecurityException e) {
304           LOG.debug("Ignoring (most likely Reflection related exception) " + e);
305         } catch (IllegalAccessException e) {
306           LOG.debug("Ignoring (most likely Reflection related exception) " + e);
307         }
308       }
309     }
310     return create(fs, path, perm, true);
311   }
312 
313   /**
314    * Create the specified file on the filesystem. By default, this will:
315    * <ol>
316    * <li>apply the umask in the configuration (if it is enabled)</li>
317    * <li>use the fs configured buffer size (or 4096 if not set)</li>
318    * <li>use the default replication</li>
319    * <li>use the default block size</li>
320    * <li>not track progress</li>
321    * </ol>
322    *
323    * @param fs {@link FileSystem} on which to write the file
324    * @param path {@link Path} to the file to write
325    * @param perm
326    * @param overwrite Whether or not the created file should be overwritten.
327    * @return output stream to the created file
328    * @throws IOException if the file cannot be created
329    */
330   public static FSDataOutputStream create(FileSystem fs, Path path,
331       FsPermission perm, boolean overwrite) throws IOException {
332     if (LOG.isTraceEnabled()) {
333       LOG.trace("Creating file=" + path + " with permission=" + perm + ", overwrite=" + overwrite);
334     }
335     return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
336         getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
337   }
338 
339   /**
340    * Get the file permissions specified in the configuration, if they are
341    * enabled.
342    *
343    * @param fs filesystem that the file will be created on.
344    * @param conf configuration to read for determining if permissions are
345    *          enabled and which to use
346    * @param permssionConfKey property key in the configuration to use when
347    *          finding the permission
348    * @return the permission to use when creating a new file on the fs. If
349    *         special permissions are not specified in the configuration, then
350    *         the default permissions on the the fs will be returned.
351    */
352   public static FsPermission getFilePermissions(final FileSystem fs,
353       final Configuration conf, final String permssionConfKey) {
354     boolean enablePermissions = conf.getBoolean(
355         HConstants.ENABLE_DATA_FILE_UMASK, false);
356 
357     if (enablePermissions) {
358       try {
359         FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS);
360         // make sure that we have a mask, if not, go default.
361         String mask = conf.get(permssionConfKey);
362         if (mask == null)
363           return FsPermission.getDefault();
364         // appy the umask
365         FsPermission umask = new FsPermission(mask);
366         return perm.applyUMask(umask);
367       } catch (IllegalArgumentException e) {
368         LOG.warn(
369             "Incorrect umask attempted to be created: "
370                 + conf.get(permssionConfKey)
371                 + ", using default file permissions.", e);
372         return FsPermission.getDefault();
373       }
374     }
375     return FsPermission.getDefault();
376   }
377 
378   /**
379    * Checks to see if the specified file system is available
380    *
381    * @param fs filesystem
382    * @throws IOException e
383    */
384   public static void checkFileSystemAvailable(final FileSystem fs)
385   throws IOException {
386     if (!(fs instanceof DistributedFileSystem)) {
387       return;
388     }
389     IOException exception = null;
390     DistributedFileSystem dfs = (DistributedFileSystem) fs;
391     try {
392       if (dfs.exists(new Path("/"))) {
393         return;
394       }
395     } catch (IOException e) {
396       exception = RemoteExceptionHandler.checkIOException(e);
397     }
398     try {
399       fs.close();
400     } catch (Exception e) {
401       LOG.error("file system close failed: ", e);
402     }
403     IOException io = new IOException("File system is not available");
404     io.initCause(exception);
405     throw io;
406   }
407 
408   /**
409    * We use reflection because {@link DistributedFileSystem#setSafeMode(
410    * FSConstants.SafeModeAction action, boolean isChecked)} is not in hadoop 1.1
411    *
412    * @param dfs
413    * @return whether we're in safe mode
414    * @throws IOException
415    */
416   private static boolean isInSafeMode(DistributedFileSystem dfs) throws IOException {
417     boolean inSafeMode = false;
418     try {
419       Method m = DistributedFileSystem.class.getMethod("setSafeMode", new Class<?> []{
420           org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.class, boolean.class});
421       inSafeMode = (Boolean) m.invoke(dfs,
422         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET, true);
423     } catch (Exception e) {
424       if (e instanceof IOException) throw (IOException) e;
425 
426       // Check whether dfs is on safemode.
427       inSafeMode = dfs.setSafeMode(
428         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET);
429     }
430     return inSafeMode;
431   }
432 
433   /**
434    * Check whether dfs is in safemode.
435    * @param conf
436    * @throws IOException
437    */
438   public static void checkDfsSafeMode(final Configuration conf)
439   throws IOException {
440     boolean isInSafeMode = false;
441     FileSystem fs = FileSystem.get(conf);
442     if (fs instanceof DistributedFileSystem) {
443       DistributedFileSystem dfs = (DistributedFileSystem)fs;
444       isInSafeMode = isInSafeMode(dfs);
445     }
446     if (isInSafeMode) {
447       throw new IOException("File system is in safemode, it can't be written now");
448     }
449   }
450 
451   /**
452    * Verifies current version of file system
453    *
454    * @param fs filesystem object
455    * @param rootdir root hbase directory
456    * @return null if no version file exists, version string otherwise.
457    * @throws IOException e
458    * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
459    */
460   public static String getVersion(FileSystem fs, Path rootdir)
461   throws IOException, DeserializationException {
462     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
463     FileStatus[] status = null;
464     try {
465       // hadoop 2.0 throws FNFE if directory does not exist.
466       // hadoop 1.0 returns null if directory does not exist.
467       status = fs.listStatus(versionFile);
468     } catch (FileNotFoundException fnfe) {
469       return null;
470     }
471     if (status == null || status.length == 0) return null;
472     String version = null;
473     byte [] content = new byte [(int)status[0].getLen()];
474     FSDataInputStream s = fs.open(versionFile);
475     try {
476       IOUtils.readFully(s, content, 0, content.length);
477       if (ProtobufUtil.isPBMagicPrefix(content)) {
478         version = parseVersionFrom(content);
479       } else {
480         // Presume it pre-pb format.
481         InputStream is = new ByteArrayInputStream(content);
482         DataInputStream dis = new DataInputStream(is);
483         try {
484           version = dis.readUTF();
485         } finally {
486           dis.close();
487         }
488         // Update the format
489         LOG.info("Updating the hbase.version file format with version=" + version);
490         setVersion(fs, rootdir, version, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
491       }
492     } catch (EOFException eof) {
493       LOG.warn("Version file was empty, odd, will try to set it.");
494     } finally {
495       s.close();
496     }
497     return version;
498   }
499 
500   /**
501    * Parse the content of the ${HBASE_ROOTDIR}/hbase.version file.
502    * @param bytes The byte content of the hbase.version file.
503    * @return The version found in the file as a String.
504    * @throws DeserializationException
505    */
506   static String parseVersionFrom(final byte [] bytes)
507   throws DeserializationException {
508     ProtobufUtil.expectPBMagicPrefix(bytes);
509     int pblen = ProtobufUtil.lengthOfPBMagic();
510     FSProtos.HBaseVersionFileContent.Builder builder =
511       FSProtos.HBaseVersionFileContent.newBuilder();
512     FSProtos.HBaseVersionFileContent fileContent;
513     try {
514       fileContent = builder.mergeFrom(bytes, pblen, bytes.length - pblen).build();
515       return fileContent.getVersion();
516     } catch (InvalidProtocolBufferException e) {
517       // Convert
518       throw new DeserializationException(e);
519     }
520   }
521 
522   /**
523    * Create the content to write into the ${HBASE_ROOTDIR}/hbase.version file.
524    * @param version Version to persist
525    * @return Serialized protobuf with <code>version</code> content and a bit of pb magic for a prefix.
526    */
527   static byte [] toVersionByteArray(final String version) {
528     FSProtos.HBaseVersionFileContent.Builder builder =
529       FSProtos.HBaseVersionFileContent.newBuilder();
530     return ProtobufUtil.prependPBMagic(builder.setVersion(version).build().toByteArray());
531   }
532 
533   /**
534    * Verifies current version of file system
535    *
536    * @param fs file system
537    * @param rootdir root directory of HBase installation
538    * @param message if true, issues a message on System.out
539    *
540    * @throws IOException e
541    * @throws DeserializationException
542    */
543   public static void checkVersion(FileSystem fs, Path rootdir, boolean message)
544   throws IOException, DeserializationException {
545     checkVersion(fs, rootdir, message, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
546   }
547 
548   /**
549    * Verifies current version of file system
550    *
551    * @param fs file system
552    * @param rootdir root directory of HBase installation
553    * @param message if true, issues a message on System.out
554    * @param wait wait interval
555    * @param retries number of times to retry
556    *
557    * @throws IOException e
558    * @throws DeserializationException
559    */
560   public static void checkVersion(FileSystem fs, Path rootdir,
561       boolean message, int wait, int retries)
562   throws IOException, DeserializationException {
563     String version = getVersion(fs, rootdir);
564     if (version == null) {
565       if (!metaRegionExists(fs, rootdir)) {
566         // rootDir is empty (no version file and no root region)
567         // just create new version file (HBASE-1195)
568         setVersion(fs, rootdir, wait, retries);
569         return;
570       }
571     } else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0) return;
572 
573     // version is deprecated require migration
574     // Output on stdout so user sees it in terminal.
575     String msg = "HBase file layout needs to be upgraded."
576       + "  You have version " + version
577       + " and I want version " + HConstants.FILE_SYSTEM_VERSION
578       + ".  Is your hbase.rootdir valid?  If so, you may need to run "
579       + "'hbase hbck -fixVersionFile'.";
580     if (message) {
581       System.out.println("WARNING! " + msg);
582     }
583     throw new FileSystemVersionException(msg);
584   }
585 
586   /**
587    * Sets version of file system
588    *
589    * @param fs filesystem object
590    * @param rootdir hbase root
591    * @throws IOException e
592    */
593   public static void setVersion(FileSystem fs, Path rootdir)
594   throws IOException {
595     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0,
596       HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
597   }
598 
599   /**
600    * Sets version of file system
601    *
602    * @param fs filesystem object
603    * @param rootdir hbase root
604    * @param wait time to wait for retry
605    * @param retries number of times to retry before failing
606    * @throws IOException e
607    */
608   public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries)
609   throws IOException {
610     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries);
611   }
612 
613 
614   /**
615    * Sets version of file system
616    *
617    * @param fs filesystem object
618    * @param rootdir hbase root directory
619    * @param version version to set
620    * @param wait time to wait for retry
621    * @param retries number of times to retry before throwing an IOException
622    * @throws IOException e
623    */
624   public static void setVersion(FileSystem fs, Path rootdir, String version,
625       int wait, int retries) throws IOException {
626     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
627     while (true) {
628       try {
629         FSDataOutputStream s = fs.create(versionFile);
630         s.write(toVersionByteArray(version));
631         s.close();
632         LOG.debug("Created version file at " + rootdir.toString() + " with version=" + version);
633         return;
634       } catch (IOException e) {
635         if (retries > 0) {
636           LOG.warn("Unable to create version file at " + rootdir.toString() + ", retrying", e);
637           fs.delete(versionFile, false);
638           try {
639             if (wait > 0) {
640               Thread.sleep(wait);
641             }
642           } catch (InterruptedException ex) {
643             // ignore
644           }
645           retries--;
646         } else {
647           throw e;
648         }
649       }
650     }
651   }
652 
653   /**
654    * Checks that a cluster ID file exists in the HBase root directory
655    * @param fs the root directory FileSystem
656    * @param rootdir the HBase root directory in HDFS
657    * @param wait how long to wait between retries
658    * @return <code>true</code> if the file exists, otherwise <code>false</code>
659    * @throws IOException if checking the FileSystem fails
660    */
661   public static boolean checkClusterIdExists(FileSystem fs, Path rootdir,
662       int wait) throws IOException {
663     while (true) {
664       try {
665         Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
666         return fs.exists(filePath);
667       } catch (IOException ioe) {
668         if (wait > 0) {
669           LOG.warn("Unable to check cluster ID file in " + rootdir.toString() +
670               ", retrying in "+wait+"msec: "+StringUtils.stringifyException(ioe));
671           try {
672             Thread.sleep(wait);
673           } catch (InterruptedException ie) {
674             Thread.interrupted();
675             break;
676           }
677         } else {
678           throw ioe;
679         }
680       }
681     }
682     return false;
683   }
684 
685   /**
686    * Returns the value of the unique cluster ID stored for this HBase instance.
687    * @param fs the root directory FileSystem
688    * @param rootdir the path to the HBase root directory
689    * @return the unique cluster identifier
690    * @throws IOException if reading the cluster ID file fails
691    */
692   public static ClusterId getClusterId(FileSystem fs, Path rootdir)
693   throws IOException {
694     Path idPath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
695     ClusterId clusterId = null;
696     FileStatus status = fs.exists(idPath)? fs.getFileStatus(idPath):  null;
697     if (status != null) {
698       int len = Ints.checkedCast(status.getLen());
699       byte [] content = new byte[len];
700       FSDataInputStream in = fs.open(idPath);
701       try {
702         in.readFully(content);
703       } catch (EOFException eof) {
704         LOG.warn("Cluster ID file " + idPath.toString() + " was empty");
705       } finally{
706         in.close();
707       }
708       try {
709         clusterId = ClusterId.parseFrom(content);
710       } catch (DeserializationException e) {
711         throw new IOException("content=" + Bytes.toString(content), e);
712       }
713       // If not pb'd, make it so.
714       if (!ProtobufUtil.isPBMagicPrefix(content)) {
715         String cid = new String();
716         in = fs.open(idPath);
717         try {
718           cid = in.readUTF();
719           clusterId = new ClusterId(cid);
720         } catch (EOFException eof) {
721           LOG.warn("Cluster ID file " + idPath.toString() + " was empty");
722         } finally {
723           in.close();
724         }
725         rewriteAsPb(fs, rootdir, idPath, clusterId);
726       }
727       return clusterId;
728     } else {
729       LOG.warn("Cluster ID file does not exist at " + idPath.toString());
730     }
731     return clusterId;
732   }
733 
734   /**
735    * @param cid
736    * @throws IOException
737    */
738   private static void rewriteAsPb(final FileSystem fs, final Path rootdir, final Path p,
739       final ClusterId cid)
740   throws IOException {
741     // Rewrite the file as pb.  Move aside the old one first, write new
742     // then delete the moved-aside file.
743     Path movedAsideName = new Path(p + "." + System.currentTimeMillis());
744     if (!fs.rename(p, movedAsideName)) throw new IOException("Failed rename of " + p);
745     setClusterId(fs, rootdir, cid, 100);
746     if (!fs.delete(movedAsideName, false)) {
747       throw new IOException("Failed delete of " + movedAsideName);
748     }
749     LOG.debug("Rewrote the hbase.id file as pb");
750   }
751 
752   /**
753    * Writes a new unique identifier for this cluster to the "hbase.id" file
754    * in the HBase root directory
755    * @param fs the root directory FileSystem
756    * @param rootdir the path to the HBase root directory
757    * @param clusterId the unique identifier to store
758    * @param wait how long (in milliseconds) to wait between retries
759    * @throws IOException if writing to the FileSystem fails and no wait value
760    */
761   public static void setClusterId(FileSystem fs, Path rootdir, ClusterId clusterId,
762       int wait) throws IOException {
763     while (true) {
764       try {
765         Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
766         FSDataOutputStream s = fs.create(filePath);
767         try {
768           s.write(clusterId.toByteArray());
769         } finally {
770           s.close();
771         }
772         if (LOG.isDebugEnabled()) {
773           LOG.debug("Created cluster ID file at " + filePath.toString() + " with ID: " + clusterId);
774         }
775         return;
776       } catch (IOException ioe) {
777         if (wait > 0) {
778           LOG.warn("Unable to create cluster ID file in " + rootdir.toString() +
779               ", retrying in " + wait + "msec: " + StringUtils.stringifyException(ioe));
780           try {
781             Thread.sleep(wait);
782           } catch (InterruptedException ie) {
783             Thread.interrupted();
784             break;
785           }
786         } else {
787           throw ioe;
788         }
789       }
790     }
791   }
792 
793   /**
794    * Verifies root directory path is a valid URI with a scheme
795    *
796    * @param root root directory path
797    * @return Passed <code>root</code> argument.
798    * @throws IOException if not a valid URI with a scheme
799    */
800   public static Path validateRootPath(Path root) throws IOException {
801     try {
802       URI rootURI = new URI(root.toString());
803       String scheme = rootURI.getScheme();
804       if (scheme == null) {
805         throw new IOException("Root directory does not have a scheme");
806       }
807       return root;
808     } catch (URISyntaxException e) {
809       IOException io = new IOException("Root directory path is not a valid " +
810         "URI -- check your " + HConstants.HBASE_DIR + " configuration");
811       io.initCause(e);
812       throw io;
813     }
814   }
815 
816   /**
817    * Checks for the presence of the root path (using the provided conf object) in the given path. If
818    * it exists, this method removes it and returns the String representation of remaining relative path.
819    * @param path
820    * @param conf
821    * @return String representation of the remaining relative path
822    * @throws IOException
823    */
824   public static String removeRootPath(Path path, final Configuration conf) throws IOException {
825     Path root = FSUtils.getRootDir(conf);
826     String pathStr = path.toString();
827     // check that the path is absolute... it has the root path in it.
828     if (!pathStr.startsWith(root.toString())) return pathStr;
829     // if not, return as it is.
830     return pathStr.substring(root.toString().length() + 1);// remove the "/" too.
831   }
832 
833   /**
834    * If DFS, check safe mode and if so, wait until we clear it.
835    * @param conf configuration
836    * @param wait Sleep between retries
837    * @throws IOException e
838    */
839   public static void waitOnSafeMode(final Configuration conf,
840     final long wait)
841   throws IOException {
842     FileSystem fs = FileSystem.get(conf);
843     if (!(fs instanceof DistributedFileSystem)) return;
844     DistributedFileSystem dfs = (DistributedFileSystem)fs;
845     // Make sure dfs is not in safe mode
846     while (isInSafeMode(dfs)) {
847       LOG.info("Waiting for dfs to exit safe mode...");
848       try {
849         Thread.sleep(wait);
850       } catch (InterruptedException e) {
851         //continue
852       }
853     }
854   }
855 
856   /**
857    * Return the 'path' component of a Path.  In Hadoop, Path is an URI.  This
858    * method returns the 'path' component of a Path's URI: e.g. If a Path is
859    * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>,
860    * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>.
861    * This method is useful if you want to print out a Path without qualifying
862    * Filesystem instance.
863    * @param p Filesystem Path whose 'path' component we are to return.
864    * @return Path portion of the Filesystem
865    */
866   public static String getPath(Path p) {
867     return p.toUri().getPath();
868   }
869 
870   /**
871    * @param c configuration
872    * @return Path to hbase root directory: i.e. <code>hbase.rootdir</code> from
873    * configuration as a qualified Path.
874    * @throws IOException e
875    */
876   public static Path getRootDir(final Configuration c) throws IOException {
877     Path p = new Path(c.get(HConstants.HBASE_DIR));
878     FileSystem fs = p.getFileSystem(c);
879     return p.makeQualified(fs);
880   }
881 
882   public static void setRootDir(final Configuration c, final Path root) throws IOException {
883     c.set(HConstants.HBASE_DIR, root.toString());
884   }
885 
886   public static void setFsDefault(final Configuration c, final Path root) throws IOException {
887     c.set("fs.defaultFS", root.toString());    // for hadoop 0.21+
888     c.set("fs.default.name", root.toString()); // for hadoop 0.20
889   }
890 
891   /**
892    * Checks if meta region exists
893    *
894    * @param fs file system
895    * @param rootdir root directory of HBase installation
896    * @return true if exists
897    * @throws IOException e
898    */
899   @SuppressWarnings("deprecation")
900   public static boolean metaRegionExists(FileSystem fs, Path rootdir)
901   throws IOException {
902     Path metaRegionDir =
903       HRegion.getRegionDir(rootdir, HRegionInfo.FIRST_META_REGIONINFO);
904     return fs.exists(metaRegionDir);
905   }
906 
907   /**
908    * Compute HDFS blocks distribution of a given file, or a portion of the file
909    * @param fs file system
910    * @param status file status of the file
911    * @param start start position of the portion
912    * @param length length of the portion
913    * @return The HDFS blocks distribution
914    */
915   static public HDFSBlocksDistribution computeHDFSBlocksDistribution(
916     final FileSystem fs, FileStatus status, long start, long length)
917     throws IOException {
918     HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution();
919     BlockLocation [] blockLocations =
920       fs.getFileBlockLocations(status, start, length);
921     for(BlockLocation bl : blockLocations) {
922       String [] hosts = bl.getHosts();
923       long len = bl.getLength();
924       blocksDistribution.addHostsAndBlockWeight(hosts, len);
925     }
926 
927     return blocksDistribution;
928   }
929 
930 
931 
932   /**
933    * Runs through the hbase rootdir and checks all stores have only
934    * one file in them -- that is, they've been major compacted.  Looks
935    * at root and meta tables too.
936    * @param fs filesystem
937    * @param hbaseRootDir hbase root directory
938    * @return True if this hbase install is major compacted.
939    * @throws IOException e
940    */
941   public static boolean isMajorCompacted(final FileSystem fs,
942       final Path hbaseRootDir)
943   throws IOException {
944     List<Path> tableDirs = getTableDirs(fs, hbaseRootDir);
945     for (Path d : tableDirs) {
946       FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs));
947       for (FileStatus regionDir : regionDirs) {
948         Path dd = regionDir.getPath();
949         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
950           continue;
951         }
952         // Else its a region name.  Now look in region for families.
953         FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs));
954         for (FileStatus familyDir : familyDirs) {
955           Path family = familyDir.getPath();
956           // Now in family make sure only one file.
957           FileStatus[] familyStatus = fs.listStatus(family);
958           if (familyStatus.length > 1) {
959             LOG.debug(family.toString() + " has " + familyStatus.length +
960                 " files.");
961             return false;
962           }
963         }
964       }
965     }
966     return true;
967   }
968 
969   // TODO move this method OUT of FSUtils. No dependencies to HMaster
970   /**
971    * Returns the total overall fragmentation percentage. Includes .META. and
972    * -ROOT- as well.
973    *
974    * @param master  The master defining the HBase root and file system.
975    * @return A map for each table and its percentage.
976    * @throws IOException When scanning the directory fails.
977    */
978   public static int getTotalTableFragmentation(final HMaster master)
979   throws IOException {
980     Map<String, Integer> map = getTableFragmentation(master);
981     return map != null && map.size() > 0 ? map.get("-TOTAL-") : -1;
982   }
983 
984   /**
985    * Runs through the HBase rootdir and checks how many stores for each table
986    * have more than one file in them. Checks -ROOT- and .META. too. The total
987    * percentage across all tables is stored under the special key "-TOTAL-".
988    *
989    * @param master  The master defining the HBase root and file system.
990    * @return A map for each table and its percentage.
991    *
992    * @throws IOException When scanning the directory fails.
993    */
994   public static Map<String, Integer> getTableFragmentation(
995     final HMaster master)
996   throws IOException {
997     Path path = getRootDir(master.getConfiguration());
998     // since HMaster.getFileSystem() is package private
999     FileSystem fs = path.getFileSystem(master.getConfiguration());
1000     return getTableFragmentation(fs, path);
1001   }
1002 
1003   /**
1004    * Runs through the HBase rootdir and checks how many stores for each table
1005    * have more than one file in them. Checks -ROOT- and .META. too. The total
1006    * percentage across all tables is stored under the special key "-TOTAL-".
1007    *
1008    * @param fs  The file system to use.
1009    * @param hbaseRootDir  The root directory to scan.
1010    * @return A map for each table and its percentage.
1011    * @throws IOException When scanning the directory fails.
1012    */
1013   public static Map<String, Integer> getTableFragmentation(
1014     final FileSystem fs, final Path hbaseRootDir)
1015   throws IOException {
1016     Map<String, Integer> frags = new HashMap<String, Integer>();
1017     int cfCountTotal = 0;
1018     int cfFragTotal = 0;
1019     DirFilter df = new DirFilter(fs);
1020     List<Path> tableDirs = getTableDirs(fs, hbaseRootDir);
1021     for (Path d : tableDirs) {
1022       int cfCount = 0;
1023       int cfFrag = 0;
1024       FileStatus[] regionDirs = fs.listStatus(d, df);
1025       for (FileStatus regionDir : regionDirs) {
1026         Path dd = regionDir.getPath();
1027         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
1028           continue;
1029         }
1030         // else its a region name, now look in region for families
1031         FileStatus[] familyDirs = fs.listStatus(dd, df);
1032         for (FileStatus familyDir : familyDirs) {
1033           cfCount++;
1034           cfCountTotal++;
1035           Path family = familyDir.getPath();
1036           // now in family make sure only one file
1037           FileStatus[] familyStatus = fs.listStatus(family);
1038           if (familyStatus.length > 1) {
1039             cfFrag++;
1040             cfFragTotal++;
1041           }
1042         }
1043       }
1044       // compute percentage per table and store in result list
1045       frags.put(FSUtils.getTableName(d).getNameAsString(),
1046           Math.round((float) cfFrag / cfCount * 100));
1047     }
1048     // set overall percentage for all tables
1049     frags.put("-TOTAL-", Math.round((float) cfFragTotal / cfCountTotal * 100));
1050     return frags;
1051   }
1052 
1053   /**
1054    * Expects to find -ROOT- directory.
1055    * @param fs filesystem
1056    * @param hbaseRootDir hbase root directory
1057    * @return True if this a pre020 layout.
1058    * @throws IOException e
1059    */
1060   public static boolean isPre020FileLayout(final FileSystem fs,
1061     final Path hbaseRootDir)
1062   throws IOException {
1063     Path mapfiles = new Path(new Path(new Path(new Path(hbaseRootDir, "-ROOT-"),
1064       "70236052"), "info"), "mapfiles");
1065     return fs.exists(mapfiles);
1066   }
1067 
1068   /**
1069    * Runs through the hbase rootdir and checks all stores have only
1070    * one file in them -- that is, they've been major compacted.  Looks
1071    * at root and meta tables too.  This version differs from
1072    * {@link #isMajorCompacted(FileSystem, Path)} in that it expects a
1073    * pre-0.20.0 hbase layout on the filesystem.  Used migrating.
1074    * @param fs filesystem
1075    * @param hbaseRootDir hbase root directory
1076    * @return True if this hbase install is major compacted.
1077    * @throws IOException e
1078    */
1079   public static boolean isMajorCompactedPre020(final FileSystem fs,
1080       final Path hbaseRootDir)
1081   throws IOException {
1082     // Presumes any directory under hbase.rootdir is a table.
1083     List<Path> tableDirs = getTableDirs(fs, hbaseRootDir);
1084     for (Path d: tableDirs) {
1085       // Inside a table, there are compaction.dir directories to skip.
1086       // Otherwise, all else should be regions.  Then in each region, should
1087       // only be family directories.  Under each of these, should be a mapfile
1088       // and info directory and in these only one file.
1089       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) {
1090         continue;
1091       }
1092       FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs));
1093       for (FileStatus regionDir : regionDirs) {
1094         Path dd = regionDir.getPath();
1095         if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
1096           continue;
1097         }
1098         // Else its a region name.  Now look in region for families.
1099         FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs));
1100         for (FileStatus familyDir : familyDirs) {
1101           Path family = familyDir.getPath();
1102           FileStatus[] infoAndMapfile = fs.listStatus(family);
1103           // Assert that only info and mapfile in family dir.
1104           if (infoAndMapfile.length != 0 && infoAndMapfile.length != 2) {
1105             LOG.debug(family.toString() +
1106                 " has more than just info and mapfile: " + infoAndMapfile.length);
1107             return false;
1108           }
1109           // Make sure directory named info or mapfile.
1110           for (int ll = 0; ll < 2; ll++) {
1111             if (infoAndMapfile[ll].getPath().getName().equals("info") ||
1112                 infoAndMapfile[ll].getPath().getName().equals("mapfiles"))
1113               continue;
1114             LOG.debug("Unexpected directory name: " +
1115                 infoAndMapfile[ll].getPath());
1116             return false;
1117           }
1118           // Now in family, there are 'mapfile' and 'info' subdirs.  Just
1119           // look in the 'mapfile' subdir.
1120           FileStatus[] familyStatus =
1121               fs.listStatus(new Path(family, "mapfiles"));
1122           if (familyStatus.length > 1) {
1123             LOG.debug(family.toString() + " has " + familyStatus.length +
1124                 " files.");
1125             return false;
1126           }
1127         }
1128       }
1129     }
1130     return true;
1131   }
1132 
1133   /**
1134    * Returns the {@link org.apache.hadoop.fs.Path} object representing the table directory under
1135    * path rootdir
1136    *
1137    * @param rootdir qualified path of HBase root directory
1138    * @param tableName name of table
1139    * @return {@link org.apache.hadoop.fs.Path} for table
1140    */
1141   public static Path getTableDir(Path rootdir, final TableName tableName) {
1142     return new Path(getNamespaceDir(rootdir, tableName.getNamespaceAsString()),
1143         tableName.getQualifierAsString());
1144   }
1145 
1146   /**
1147    * Returns the {@link org.apache.hadoop.hbase.TableName} object representing
1148    * the table directory under
1149    * path rootdir
1150    *
1151    * @param tablePath path of table
1152    * @return {@link org.apache.hadoop.fs.Path} for table
1153    */
1154   public static TableName getTableName(Path tablePath) {
1155     return TableName.valueOf(tablePath.getParent().getName(), tablePath.getName());
1156   }
1157 
1158   /**
1159    * Returns the {@link org.apache.hadoop.fs.Path} object representing
1160    * the namespace directory under path rootdir
1161    *
1162    * @param rootdir qualified path of HBase root directory
1163    * @param namespace namespace name
1164    * @return {@link org.apache.hadoop.fs.Path} for table
1165    */
1166   public static Path getNamespaceDir(Path rootdir, final String namespace) {
1167     return new Path(rootdir, new Path(HConstants.BASE_NAMESPACE_DIR,
1168         new Path(namespace)));
1169   }
1170 
1171   /**
1172    * A {@link PathFilter} that returns only regular files.
1173    */
1174   static class FileFilter implements PathFilter {
1175     private final FileSystem fs;
1176 
1177     public FileFilter(final FileSystem fs) {
1178       this.fs = fs;
1179     }
1180 
1181     @Override
1182     public boolean accept(Path p) {
1183       try {
1184         return fs.isFile(p);
1185       } catch (IOException e) {
1186         LOG.debug("unable to verify if path=" + p + " is a regular file", e);
1187         return false;
1188       }
1189     }
1190   }
1191 
1192   /**
1193    * Directory filter that doesn't include any of the directories in the specified blacklist
1194    */
1195   public static class BlackListDirFilter implements PathFilter {
1196     private final FileSystem fs;
1197     private List<String> blacklist;
1198 
1199     /**
1200      * Create a filter on the give filesystem with the specified blacklist
1201      * @param fs filesystem to filter
1202      * @param directoryNameBlackList list of the names of the directories to filter. If
1203      *          <tt>null</tt>, all directories are returned
1204      */
1205     @SuppressWarnings("unchecked")
1206     public BlackListDirFilter(final FileSystem fs, final List<String> directoryNameBlackList) {
1207       this.fs = fs;
1208       blacklist =
1209         (List<String>) (directoryNameBlackList == null ? Collections.emptyList()
1210           : directoryNameBlackList);
1211     }
1212 
1213     @Override
1214     public boolean accept(Path p) {
1215       boolean isValid = false;
1216       try {
1217         if (blacklist.contains(p.getName().toString())) {
1218           isValid = false;
1219         } else {
1220           isValid = fs.getFileStatus(p).isDir();
1221         }
1222       } catch (IOException e) {
1223         LOG.warn("An error occurred while verifying if [" + p.toString()
1224             + "] is a valid directory. Returning 'not valid' and continuing.", e);
1225       }
1226       return isValid;
1227     }
1228   }
1229 
1230   /**
1231    * A {@link PathFilter} that only allows directories.
1232    */
1233   public static class DirFilter extends BlackListDirFilter {
1234 
1235     public DirFilter(FileSystem fs) {
1236       super(fs, null);
1237     }
1238   }
1239 
1240   /**
1241    * A {@link PathFilter} that returns usertable directories. To get all directories use the
1242    * {@link BlackListDirFilter} with a <tt>null</tt> blacklist
1243    */
1244   public static class UserTableDirFilter extends BlackListDirFilter {
1245 
1246     public UserTableDirFilter(FileSystem fs) {
1247       super(fs, HConstants.HBASE_NON_TABLE_DIRS);
1248     }
1249   }
1250 
1251   /**
1252    * Heuristic to determine whether is safe or not to open a file for append
1253    * Looks both for dfs.support.append and use reflection to search
1254    * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush()
1255    * @param conf
1256    * @return True if append support
1257    */
1258   public static boolean isAppendSupported(final Configuration conf) {
1259     boolean append = conf.getBoolean("dfs.support.append", false);
1260     if (append) {
1261       try {
1262         // TODO: The implementation that comes back when we do a createWriter
1263         // may not be using SequenceFile so the below is not a definitive test.
1264         // Will do for now (hdfs-200).
1265         SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
1266         append = true;
1267       } catch (SecurityException e) {
1268       } catch (NoSuchMethodException e) {
1269         append = false;
1270       }
1271     }
1272     if (!append) {
1273       // Look for the 0.21, 0.22, new-style append evidence.
1274       try {
1275         FSDataOutputStream.class.getMethod("hflush", new Class<?> []{});
1276         append = true;
1277       } catch (NoSuchMethodException e) {
1278         append = false;
1279       }
1280     }
1281     return append;
1282   }
1283 
1284   /**
1285    * @param conf
1286    * @return True if this filesystem whose scheme is 'hdfs'.
1287    * @throws IOException
1288    */
1289   public static boolean isHDFS(final Configuration conf) throws IOException {
1290     FileSystem fs = FileSystem.get(conf);
1291     String scheme = fs.getUri().getScheme();
1292     return scheme.equalsIgnoreCase("hdfs");
1293   }
1294 
1295   /**
1296    * Recover file lease. Used when a file might be suspect
1297    * to be had been left open by another process.
1298    * @param fs FileSystem handle
1299    * @param p Path of file to recover lease
1300    * @param conf Configuration handle
1301    * @throws IOException
1302    */
1303   public abstract void recoverFileLease(final FileSystem fs, final Path p,
1304       Configuration conf, CancelableProgressable reporter) throws IOException;
1305 
1306   public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir)
1307       throws IOException {
1308     List<Path> tableDirs = new LinkedList<Path>();
1309 
1310     for(FileStatus status :
1311         fs.globStatus(new Path(rootdir,
1312             new Path(HConstants.BASE_NAMESPACE_DIR, "*")))) {
1313       tableDirs.addAll(FSUtils.getLocalTableDirs(fs, status.getPath()));
1314     }
1315     return tableDirs;
1316   }
1317 
1318   /**
1319    * @param fs
1320    * @param rootdir
1321    * @return All the table directories under <code>rootdir</code>. Ignore non table hbase folders such as
1322    * .logs, .oldlogs, .corrupt folders.
1323    * @throws IOException
1324    */
1325   public static List<Path> getLocalTableDirs(final FileSystem fs, final Path rootdir)
1326       throws IOException {
1327     // presumes any directory under hbase.rootdir is a table
1328     FileStatus[] dirs = fs.listStatus(rootdir, new UserTableDirFilter(fs));
1329     List<Path> tabledirs = new ArrayList<Path>(dirs.length);
1330     for (FileStatus dir: dirs) {
1331       tabledirs.add(dir.getPath());
1332     }
1333     return tabledirs;
1334   }
1335 
1336   /**
1337    * Checks if the given path is the one with 'recovered.edits' dir.
1338    * @param path
1339    * @return
1340    */
1341   public static boolean isRecoveredEdits(Path path) {
1342     return path.toString().contains(HConstants.RECOVERED_EDITS_DIR);
1343   }
1344 
1345   /**
1346    * Filter for all dirs that don't start with '.'
1347    */
1348   public static class RegionDirFilter implements PathFilter {
1349     // This pattern will accept 0.90+ style hex region dirs and older numeric region dir names.
1350     final public static Pattern regionDirPattern = Pattern.compile("^[0-9a-f]*$");
1351     final FileSystem fs;
1352 
1353     public RegionDirFilter(FileSystem fs) {
1354       this.fs = fs;
1355     }
1356 
1357     @Override
1358     public boolean accept(Path rd) {
1359       if (!regionDirPattern.matcher(rd.getName()).matches()) {
1360         return false;
1361       }
1362 
1363       try {
1364         return fs.getFileStatus(rd).isDir();
1365       } catch (IOException ioe) {
1366         // Maybe the file was moved or the fs was disconnected.
1367         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1368         return false;
1369       }
1370     }
1371   }
1372 
1373   /**
1374    * Given a particular table dir, return all the regiondirs inside it, excluding files such as
1375    * .tableinfo
1376    * @param fs A file system for the Path
1377    * @param tableDir Path to a specific table directory <hbase.rootdir>/<tabledir>
1378    * @return List of paths to valid region directories in table dir.
1379    * @throws IOException
1380    */
1381   public static List<Path> getRegionDirs(final FileSystem fs, final Path tableDir) throws IOException {
1382     // assumes we are in a table dir.
1383     FileStatus[] rds = fs.listStatus(tableDir, new RegionDirFilter(fs));
1384     List<Path> regionDirs = new ArrayList<Path>(rds.length);
1385     for (FileStatus rdfs: rds) {
1386       Path rdPath = rdfs.getPath();
1387       regionDirs.add(rdPath);
1388     }
1389     return regionDirs;
1390   }
1391 
1392   /**
1393    * Filter for all dirs that are legal column family names.  This is generally used for colfam
1394    * dirs <hbase.rootdir>/<tabledir>/<regiondir>/<colfamdir>.
1395    */
1396   public static class FamilyDirFilter implements PathFilter {
1397     final FileSystem fs;
1398 
1399     public FamilyDirFilter(FileSystem fs) {
1400       this.fs = fs;
1401     }
1402 
1403     @Override
1404     public boolean accept(Path rd) {
1405       try {
1406         // throws IAE if invalid
1407         HColumnDescriptor.isLegalFamilyName(Bytes.toBytes(rd.getName()));
1408       } catch (IllegalArgumentException iae) {
1409         // path name is an invalid family name and thus is excluded.
1410         return false;
1411       }
1412 
1413       try {
1414         return fs.getFileStatus(rd).isDir();
1415       } catch (IOException ioe) {
1416         // Maybe the file was moved or the fs was disconnected.
1417         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1418         return false;
1419       }
1420     }
1421   }
1422 
1423   /**
1424    * Given a particular region dir, return all the familydirs inside it
1425    *
1426    * @param fs A file system for the Path
1427    * @param regionDir Path to a specific region directory
1428    * @return List of paths to valid family directories in region dir.
1429    * @throws IOException
1430    */
1431   public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir) throws IOException {
1432     // assumes we are in a region dir.
1433     FileStatus[] fds = fs.listStatus(regionDir, new FamilyDirFilter(fs));
1434     List<Path> familyDirs = new ArrayList<Path>(fds.length);
1435     for (FileStatus fdfs: fds) {
1436       Path fdPath = fdfs.getPath();
1437       familyDirs.add(fdPath);
1438     }
1439     return familyDirs;
1440   }
1441 
1442   /**
1443    * Filter for HFiles that excludes reference files.
1444    */
1445   public static class HFileFilter implements PathFilter {
1446     // This pattern will accept 0.90+ style hex hfies files but reject reference files
1447     final public static Pattern hfilePattern = Pattern.compile("^([0-9a-f]+)$");
1448 
1449     final FileSystem fs;
1450 
1451     public HFileFilter(FileSystem fs) {
1452       this.fs = fs;
1453     }
1454 
1455     @Override
1456     public boolean accept(Path rd) {
1457       if (!hfilePattern.matcher(rd.getName()).matches()) {
1458         return false;
1459       }
1460 
1461       try {
1462         // only files
1463         return !fs.getFileStatus(rd).isDir();
1464       } catch (IOException ioe) {
1465         // Maybe the file was moved or the fs was disconnected.
1466         LOG.warn("Skipping file " + rd +" due to IOException", ioe);
1467         return false;
1468       }
1469     }
1470   }
1471 
1472   /**
1473    * @param conf
1474    * @return Returns the filesystem of the hbase rootdir.
1475    * @throws IOException
1476    */
1477   public static FileSystem getCurrentFileSystem(Configuration conf)
1478   throws IOException {
1479     return getRootDir(conf).getFileSystem(conf);
1480   }
1481 
1482 
1483   /**
1484    * Runs through the HBase rootdir/tablename and creates a reverse lookup map for
1485    * table StoreFile names to the full Path.
1486    * <br>
1487    * Example...<br>
1488    * Key = 3944417774205889744  <br>
1489    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1490    *
1491    * @param map map to add values.  If null, this method will create and populate one to return
1492    * @param fs  The file system to use.
1493    * @param hbaseRootDir  The root directory to scan.
1494    * @param tableName name of the table to scan.
1495    * @return Map keyed by StoreFile name with a value of the full Path.
1496    * @throws IOException When scanning the directory fails.
1497    */
1498   public static Map<String, Path> getTableStoreFilePathMap(Map<String, Path> map,
1499   final FileSystem fs, final Path hbaseRootDir, TableName tableName)
1500   throws IOException {
1501     if (map == null) {
1502       map = new HashMap<String, Path>();
1503     }
1504 
1505     // only include the directory paths to tables
1506     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
1507     // Inside a table, there are compaction.dir directories to skip.  Otherwise, all else
1508     // should be regions.
1509     PathFilter df = new BlackListDirFilter(fs, HConstants.HBASE_NON_TABLE_DIRS);
1510     FileStatus[] regionDirs = fs.listStatus(tableDir);
1511     for (FileStatus regionDir : regionDirs) {
1512       Path dd = regionDir.getPath();
1513       if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) {
1514         continue;
1515       }
1516       // else its a region name, now look in region for families
1517       FileStatus[] familyDirs = fs.listStatus(dd, df);
1518       for (FileStatus familyDir : familyDirs) {
1519         Path family = familyDir.getPath();
1520         // now in family, iterate over the StoreFiles and
1521         // put in map
1522         FileStatus[] familyStatus = fs.listStatus(family);
1523         for (FileStatus sfStatus : familyStatus) {
1524           Path sf = sfStatus.getPath();
1525           map.put( sf.getName(), sf);
1526         }
1527       }
1528     }
1529     return map;
1530   }
1531 
1532 
1533   /**
1534    * Runs through the HBase rootdir and creates a reverse lookup map for
1535    * table StoreFile names to the full Path.
1536    * <br>
1537    * Example...<br>
1538    * Key = 3944417774205889744  <br>
1539    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1540    *
1541    * @param fs  The file system to use.
1542    * @param hbaseRootDir  The root directory to scan.
1543    * @return Map keyed by StoreFile name with a value of the full Path.
1544    * @throws IOException When scanning the directory fails.
1545    */
1546   public static Map<String, Path> getTableStoreFilePathMap(
1547     final FileSystem fs, final Path hbaseRootDir)
1548   throws IOException {
1549     Map<String, Path> map = new HashMap<String, Path>();
1550 
1551     // if this method looks similar to 'getTableFragmentation' that is because
1552     // it was borrowed from it.
1553 
1554     // only include the directory paths to tables
1555     for (Path tableDir : FSUtils.getTableDirs(fs, hbaseRootDir)) {
1556       getTableStoreFilePathMap(map, fs, hbaseRootDir,
1557           FSUtils.getTableName(tableDir));
1558     }
1559     return map;
1560   }
1561 
1562   /**
1563    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1564    * This accommodates differences between hadoop versions, where hadoop 1
1565    * does not throw a FileNotFoundException, and return an empty FileStatus[]
1566    * while Hadoop 2 will throw FileNotFoundException.
1567    *
1568    * @param fs file system
1569    * @param dir directory
1570    * @param filter path filter
1571    * @return null if tabledir doesn't exist, otherwise FileStatus array
1572    */
1573   public static FileStatus [] listStatus(final FileSystem fs,
1574       final Path dir, final PathFilter filter) throws IOException {
1575     FileStatus [] status = null;
1576     try {
1577       status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
1578     } catch (FileNotFoundException fnfe) {
1579       // if directory doesn't exist, return null
1580       if (LOG.isTraceEnabled()) {
1581         LOG.trace(dir + " doesn't exist");
1582       }
1583     }
1584     if (status == null || status.length < 1) return null;
1585     return status;
1586   }
1587 
1588   /**
1589    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1590    * This would accommodates differences between hadoop versions
1591    *
1592    * @param fs file system
1593    * @param dir directory
1594    * @return null if tabledir doesn't exist, otherwise FileStatus array
1595    */
1596   public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException {
1597     return listStatus(fs, dir, null);
1598   }
1599 
1600   /**
1601    * Calls fs.delete() and returns the value returned by the fs.delete()
1602    *
1603    * @param fs
1604    * @param path
1605    * @param recursive
1606    * @return the value returned by the fs.delete()
1607    * @throws IOException
1608    */
1609   public static boolean delete(final FileSystem fs, final Path path, final boolean recursive)
1610       throws IOException {
1611     return fs.delete(path, recursive);
1612   }
1613 
1614   /**
1615    * Calls fs.exists(). Checks if the specified path exists
1616    *
1617    * @param fs
1618    * @param path
1619    * @return the value returned by fs.exists()
1620    * @throws IOException
1621    */
1622   public static boolean isExists(final FileSystem fs, final Path path) throws IOException {
1623     return fs.exists(path);
1624   }
1625 
1626   /**
1627    * Throw an exception if an action is not permitted by a user on a file.
1628    *
1629    * @param ugi
1630    *          the user
1631    * @param file
1632    *          the file
1633    * @param action
1634    *          the action
1635    */
1636   public static void checkAccess(UserGroupInformation ugi, FileStatus file,
1637       FsAction action) throws AccessControlException {
1638     if (ugi.getShortUserName().equals(file.getOwner())) {
1639       if (file.getPermission().getUserAction().implies(action)) {
1640         return;
1641       }
1642     } else if (contains(ugi.getGroupNames(), file.getGroup())) {
1643       if (file.getPermission().getGroupAction().implies(action)) {
1644         return;
1645       }
1646     } else if (file.getPermission().getOtherAction().implies(action)) {
1647       return;
1648     }
1649     throw new AccessControlException("Permission denied:" + " action=" + action
1650         + " path=" + file.getPath() + " user=" + ugi.getShortUserName());
1651   }
1652 
1653   private static boolean contains(String[] groups, String user) {
1654     for (String group : groups) {
1655       if (group.equals(user)) {
1656         return true;
1657       }
1658     }
1659     return false;
1660   }
1661 
1662   /**
1663    * Log the current state of the filesystem from a certain root directory
1664    * @param fs filesystem to investigate
1665    * @param root root file/directory to start logging from
1666    * @param LOG log to output information
1667    * @throws IOException if an unexpected exception occurs
1668    */
1669   public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
1670       throws IOException {
1671     LOG.debug("Current file system:");
1672     logFSTree(LOG, fs, root, "|-");
1673   }
1674 
1675   /**
1676    * Recursive helper to log the state of the FS
1677    *
1678    * @see #logFileSystemState(FileSystem, Path, Log)
1679    */
1680   private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix)
1681       throws IOException {
1682     FileStatus[] files = FSUtils.listStatus(fs, root, null);
1683     if (files == null) return;
1684 
1685     for (FileStatus file : files) {
1686       if (file.isDir()) {
1687         LOG.debug(prefix + file.getPath().getName() + "/");
1688         logFSTree(LOG, fs, file.getPath(), prefix + "---");
1689       } else {
1690         LOG.debug(prefix + file.getPath().getName());
1691       }
1692     }
1693   }
1694 
1695   public static boolean renameAndSetModifyTime(final FileSystem fs, final Path src, final Path dest)
1696       throws IOException {
1697     // set the modify time for TimeToLive Cleaner
1698     fs.setTimes(src, EnvironmentEdgeManager.currentTimeMillis(), -1);
1699     return fs.rename(src, dest);
1700   }
1701 }