View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io;
18  
19  import java.io.File;
20  import java.util.ArrayList;
21  import java.util.Collection;
22  import java.util.Iterator;
23  import java.util.Stack;
24  
25  /**
26   * General filename and filepath manipulation utilities.
27   * <p>
28   * When dealing with filenames you can hit problems when moving from a Windows
29   * based development machine to a Unix based production machine.
30   * This class aims to help avoid those problems.
31   * <p>
32   * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
33   * using JDK {@link java.io.File File} objects and the two argument constructor
34   * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
35   * <p>
36   * Most methods on this class are designed to work the same on both Unix and Windows.
37   * Those that don't include 'System', 'Unix' or 'Windows' in their name.
38   * <p>
39   * Most methods recognise both separators (forward and back), and both
40   * sets of prefixes. See the javadoc of each method for details.
41   * <p>
42   * This class defines six components within a filename
43   * (example C:\dev\project\file.txt):
44   * <ul>
45   * <li>the prefix - C:\</li>
46   * <li>the path - dev\project\</li>
47   * <li>the full path - C:\dev\project\</li>
48   * <li>the name - file.txt</li>
49   * <li>the base name - file</li>
50   * <li>the extension - txt</li>
51   * </ul>
52   * Note that this class works best if directory filenames end with a separator.
53   * If you omit the last separator, it is impossible to determine if the filename
54   * corresponds to a file or a directory. As a result, we have chosen to say
55   * it corresponds to a file.
56   * <p>
57   * This class only supports Unix and Windows style names.
58   * Prefixes are matched as follows:
59   * <pre>
60   * Windows:
61   * a\b\c.txt           --> ""          --> relative
62   * \a\b\c.txt          --> "\"         --> current drive absolute
63   * C:a\b\c.txt         --> "C:"        --> drive relative
64   * C:\a\b\c.txt        --> "C:\"       --> absolute
65   * \\server\a\b\c.txt  --> "\\server\" --> UNC
66   *
67   * Unix:
68   * a/b/c.txt           --> ""          --> relative
69   * /a/b/c.txt          --> "/"         --> absolute
70   * ~/a/b/c.txt         --> "~/"        --> current user
71   * ~                   --> "~/"        --> current user (slash added)
72   * ~user/a/b/c.txt     --> "~user/"    --> named user
73   * ~user               --> "~user/"    --> named user (slash added)
74   * </pre>
75   * Both prefix styles are matched always, irrespective of the machine that you are
76   * currently running on.
77   * <p>
78   * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
79   *
80   * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A>
81   * @author <a href="mailto:sanders@apache.org">Scott Sanders</a>
82   * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
83   * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a>
84   * @author <a href="mailto:peter@apache.org">Peter Donald</a>
85   * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
86   * @author Matthew Hawthorne
87   * @author Martin Cooper
88   * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
89   * @author Stephen Colebourne
90   * @version $Id: FilenameUtils.java 490424 2006-12-27 01:20:43Z bayard $
91   * @since Commons IO 1.1
92   */
93  public class FilenameUtils {
94  
95      /**
96       * The extension separator character.
97       */
98      private static final char EXTENSION_SEPARATOR = '.';
99  
100     /**
101      * The Unix separator character.
102      */
103     private static final char UNIX_SEPARATOR = '/';
104 
105     /**
106      * The Windows separator character.
107      */
108     private static final char WINDOWS_SEPARATOR = '\\';
109 
110     /**
111      * The system separator character.
112      */
113     private static final char SYSTEM_SEPARATOR = File.separatorChar;
114 
115     /**
116      * The separator character that is the opposite of the system separator.
117      */
118     private static final char OTHER_SEPARATOR;
119     static {
120         if (isSystemWindows()) {
121             OTHER_SEPARATOR = UNIX_SEPARATOR;
122         } else {
123             OTHER_SEPARATOR = WINDOWS_SEPARATOR;
124         }
125     }
126 
127     /**
128      * Instances should NOT be constructed in standard programming.
129      */
130     public FilenameUtils() {
131         super();
132     }
133 
134     //-----------------------------------------------------------------------
135     /**
136      * Determines if Windows file system is in use.
137      * 
138      * @return true if the system is Windows
139      */
140     static boolean isSystemWindows() {
141         return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
142     }
143 
144     //-----------------------------------------------------------------------
145     /**
146      * Checks if the character is a separator.
147      * 
148      * @param ch  the character to check
149      * @return true if it is a separator character
150      */
151     private static boolean isSeparator(char ch) {
152         return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR);
153     }
154 
155     //-----------------------------------------------------------------------
156     /**
157      * Normalizes a path, removing double and single dot path steps.
158      * <p>
159      * This method normalizes a path to a standard format.
160      * The input may contain separators in either Unix or Windows format.
161      * The output will contain separators in the format of the system.
162      * <p>
163      * A trailing slash will be retained.
164      * A double slash will be merged to a single slash (but UNC names are handled).
165      * A single dot path segment will be removed.
166      * A double dot will cause that path segment and the one before to be removed.
167      * If the double dot has no parent path segment to work with, <code>null</code>
168      * is returned.
169      * <p>
170      * The output will be the same on both Unix and Windows except
171      * for the separator character.
172      * <pre>
173      * /foo//               -->   /foo/
174      * /foo/./              -->   /foo/
175      * /foo/../bar          -->   /bar
176      * /foo/../bar/         -->   /bar/
177      * /foo/../bar/../baz   -->   /baz
178      * //foo//./bar         -->   /foo/bar
179      * /../                 -->   null
180      * ../foo               -->   null
181      * foo/bar/..           -->   foo/
182      * foo/../../bar        -->   null
183      * foo/../bar           -->   bar
184      * //server/foo/../bar  -->   //server/bar
185      * //server/../bar      -->   null
186      * C:\foo\..\bar        -->   C:\bar
187      * C:\..\bar            -->   null
188      * ~/foo/../bar/        -->   ~/bar/
189      * ~/../bar             -->   null
190      * </pre>
191      * (Note the file separator returned will be correct for Windows/Unix)
192      *
193      * @param filename  the filename to normalize, null returns null
194      * @return the normalized filename, or null if invalid
195      */
196     public static String normalize(String filename) {
197         return doNormalize(filename, true);
198     }
199 
200     //-----------------------------------------------------------------------
201     /**
202      * Normalizes a path, removing double and single dot path steps,
203      * and removing any final directory separator.
204      * <p>
205      * This method normalizes a path to a standard format.
206      * The input may contain separators in either Unix or Windows format.
207      * The output will contain separators in the format of the system.
208      * <p>
209      * A trailing slash will be removed.
210      * A double slash will be merged to a single slash (but UNC names are handled).
211      * A single dot path segment will be removed.
212      * A double dot will cause that path segment and the one before to be removed.
213      * If the double dot has no parent path segment to work with, <code>null</code>
214      * is returned.
215      * <p>
216      * The output will be the same on both Unix and Windows except
217      * for the separator character.
218      * <pre>
219      * /foo//               -->   /foo
220      * /foo/./              -->   /foo
221      * /foo/../bar          -->   /bar
222      * /foo/../bar/         -->   /bar
223      * /foo/../bar/../baz   -->   /baz
224      * //foo//./bar         -->   /foo/bar
225      * /../                 -->   null
226      * ../foo               -->   null
227      * foo/bar/..           -->   foo
228      * foo/../../bar        -->   null
229      * foo/../bar           -->   bar
230      * //server/foo/../bar  -->   //server/bar
231      * //server/../bar      -->   null
232      * C:\foo\..\bar        -->   C:\bar
233      * C:\..\bar            -->   null
234      * ~/foo/../bar/        -->   ~/bar
235      * ~/../bar             -->   null
236      * </pre>
237      * (Note the file separator returned will be correct for Windows/Unix)
238      *
239      * @param filename  the filename to normalize, null returns null
240      * @return the normalized filename, or null if invalid
241      */
242     public static String normalizeNoEndSeparator(String filename) {
243         return doNormalize(filename, false);
244     }
245 
246     /**
247      * Internal method to perform the normalization.
248      *
249      * @param filename  the filename
250      * @param keepSeparator  true to keep the final separator
251      * @return the normalized filename
252      */
253     private static String doNormalize(String filename, boolean keepSeparator) {
254         if (filename == null) {
255             return null;
256         }
257         int size = filename.length();
258         if (size == 0) {
259             return filename;
260         }
261         int prefix = getPrefixLength(filename);
262         if (prefix < 0) {
263             return null;
264         }
265         
266         char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
267         filename.getChars(0, filename.length(), array, 0);
268         
269         // fix separators throughout
270         for (int i = 0; i < array.length; i++) {
271             if (array[i] == OTHER_SEPARATOR) {
272                 array[i] = SYSTEM_SEPARATOR;
273             }
274         }
275         
276         // add extra separator on the end to simplify code below
277         boolean lastIsDirectory = true;
278         if (array[size - 1] != SYSTEM_SEPARATOR) {
279             array[size++] = SYSTEM_SEPARATOR;
280             lastIsDirectory = false;
281         }
282         
283         // adjoining slashes
284         for (int i = prefix + 1; i < size; i++) {
285             if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == SYSTEM_SEPARATOR) {
286                 System.arraycopy(array, i, array, i - 1, size - i);
287                 size--;
288                 i--;
289             }
290         }
291         
292         // dot slash
293         for (int i = prefix + 1; i < size; i++) {
294             if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' &&
295                     (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)) {
296                 if (i == size - 1) {
297                     lastIsDirectory = true;
298                 }
299                 System.arraycopy(array, i + 1, array, i - 1, size - i);
300                 size -=2;
301                 i--;
302             }
303         }
304         
305         // double dot slash
306         outer:
307         for (int i = prefix + 2; i < size; i++) {
308             if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && array[i - 2] == '.' &&
309                     (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)) {
310                 if (i == prefix + 2) {
311                     return null;
312                 }
313                 if (i == size - 1) {
314                     lastIsDirectory = true;
315                 }
316                 int j;
317                 for (j = i - 4 ; j >= prefix; j--) {
318                     if (array[j] == SYSTEM_SEPARATOR) {
319                         // remove b/../ from a/b/../c
320                         System.arraycopy(array, i + 1, array, j + 1, size - i);
321                         size -= (i - j);
322                         i = j + 1;
323                         continue outer;
324                     }
325                 }
326                 // remove a/../ from a/../c
327                 System.arraycopy(array, i + 1, array, prefix, size - i);
328                 size -= (i + 1 - prefix);
329                 i = prefix + 1;
330             }
331         }
332         
333         if (size <= 0) {  // should never be less than 0
334             return "";
335         }
336         if (size <= prefix) {  // should never be less than prefix
337             return new String(array, 0, size);
338         }
339         if (lastIsDirectory && keepSeparator) {
340             return new String(array, 0, size);  // keep trailing separator
341         }
342         return new String(array, 0, size - 1);  // lose trailing separator
343     }
344 
345     //-----------------------------------------------------------------------
346     /**
347      * Concatenates a filename to a base path using normal command line style rules.
348      * <p>
349      * The effect is equivalent to resultant directory after changing
350      * directory to the first argument, followed by changing directory to
351      * the second argument.
352      * <p>
353      * The first argument is the base path, the second is the path to concatenate.
354      * The returned path is always normalized via {@link #normalize(String)},
355      * thus <code>..</code> is handled.
356      * <p>
357      * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
358      * it will be normalized and returned.
359      * Otherwise, the paths will be joined, normalized and returned.
360      * <p>
361      * The output will be the same on both Unix and Windows except
362      * for the separator character.
363      * <pre>
364      * /foo/ + bar          -->   /foo/bar
365      * /foo + bar           -->   /foo/bar
366      * /foo + /bar          -->   /bar
367      * /foo + C:/bar        -->   C:/bar
368      * /foo + C:bar         -->   C:bar (*)
369      * /foo/a/ + ../bar     -->   foo/bar
370      * /foo/ + ../../bar    -->   null
371      * /foo/ + /bar         -->   /bar
372      * /foo/.. + /bar       -->   /bar
373      * /foo + bar/c.txt     -->   /foo/bar/c.txt
374      * /foo/c.txt + bar     -->   /foo/c.txt/bar (!)
375      * </pre>
376      * (*) Note that the Windows relative drive prefix is unreliable when
377      * used with this method.
378      * (!) Note that the first parameter must be a path. If it ends with a name, then
379      * the name will be built into the concatenated path. If this might be a problem,
380      * use {@link #getFullPath(String)} on the base path argument.
381      *
382      * @param basePath  the base path to attach to, always treated as a path
383      * @param fullFilenameToAdd  the filename (or path) to attach to the base
384      * @return the concatenated path, or null if invalid
385      */
386     public static String concat(String basePath, String fullFilenameToAdd) {
387         int prefix = getPrefixLength(fullFilenameToAdd);
388         if (prefix < 0) {
389             return null;
390         }
391         if (prefix > 0) {
392             return normalize(fullFilenameToAdd);
393         }
394         if (basePath == null) {
395             return null;
396         }
397         int len = basePath.length();
398         if (len == 0) {
399             return normalize(fullFilenameToAdd);
400         }
401         char ch = basePath.charAt(len - 1);
402         if (isSeparator(ch)) {
403             return normalize(basePath + fullFilenameToAdd);
404         } else {
405             return normalize(basePath + '/' + fullFilenameToAdd);
406         }
407     }
408 
409     //-----------------------------------------------------------------------
410     /**
411      * Converts all separators to the Unix separator of forward slash.
412      * 
413      * @param path  the path to be changed, null ignored
414      * @return the updated path
415      */
416     public static String separatorsToUnix(String path) {
417         if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
418             return path;
419         }
420         return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
421     }
422 
423     /**
424      * Converts all separators to the Windows separator of backslash.
425      * 
426      * @param path  the path to be changed, null ignored
427      * @return the updated path
428      */
429     public static String separatorsToWindows(String path) {
430         if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
431             return path;
432         }
433         return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
434     }
435 
436     /**
437      * Converts all separators to the system separator.
438      * 
439      * @param path  the path to be changed, null ignored
440      * @return the updated path
441      */
442     public static String separatorsToSystem(String path) {
443         if (path == null) {
444             return null;
445         }
446         if (isSystemWindows()) {
447             return separatorsToWindows(path);
448         } else {
449             return separatorsToUnix(path);
450         }
451     }
452 
453     //-----------------------------------------------------------------------
454     /**
455      * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
456      * <p>
457      * This method will handle a file in either Unix or Windows format.
458      * <p>
459      * The prefix length includes the first slash in the full filename
460      * if applicable. Thus, it is possible that the length returned is greater
461      * than the length of the input string.
462      * <pre>
463      * Windows:
464      * a\b\c.txt           --> ""          --> relative
465      * \a\b\c.txt          --> "\"         --> current drive absolute
466      * C:a\b\c.txt         --> "C:"        --> drive relative
467      * C:\a\b\c.txt        --> "C:\"       --> absolute
468      * \\server\a\b\c.txt  --> "\\server\" --> UNC
469      *
470      * Unix:
471      * a/b/c.txt           --> ""          --> relative
472      * /a/b/c.txt          --> "/"         --> absolute
473      * ~/a/b/c.txt         --> "~/"        --> current user
474      * ~                   --> "~/"        --> current user (slash added)
475      * ~user/a/b/c.txt     --> "~user/"    --> named user
476      * ~user               --> "~user/"    --> named user (slash added)
477      * </pre>
478      * <p>
479      * The output will be the same irrespective of the machine that the code is running on.
480      * ie. both Unix and Windows prefixes are matched regardless.
481      *
482      * @param filename  the filename to find the prefix in, null returns -1
483      * @return the length of the prefix, -1 if invalid or null
484      */
485     public static int getPrefixLength(String filename) {
486         if (filename == null) {
487             return -1;
488         }
489         int len = filename.length();
490         if (len == 0) {
491             return 0;
492         }
493         char ch0 = filename.charAt(0);
494         if (ch0 == ':') {
495             return -1;
496         }
497         if (len == 1) {
498             if (ch0 == '~') {
499                 return 2;  // return a length greater than the input
500             }
501             return (isSeparator(ch0) ? 1 : 0);
502         } else {
503             if (ch0 == '~') {
504                 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
505                 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
506                 if (posUnix == -1 && posWin == -1) {
507                     return len + 1;  // return a length greater than the input
508                 }
509                 posUnix = (posUnix == -1 ? posWin : posUnix);
510                 posWin = (posWin == -1 ? posUnix : posWin);
511                 return Math.min(posUnix, posWin) + 1;
512             }
513             char ch1 = filename.charAt(1);
514             if (ch1 == ':') {
515                 ch0 = Character.toUpperCase(ch0);
516                 if (ch0 >= 'A' && ch0 <= 'Z') {
517                     if (len == 2 || isSeparator(filename.charAt(2)) == false) {
518                         return 2;
519                     }
520                     return 3;
521                 }
522                 return -1;
523                 
524             } else if (isSeparator(ch0) && isSeparator(ch1)) {
525                 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
526                 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
527                 if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) {
528                     return -1;
529                 }
530                 posUnix = (posUnix == -1 ? posWin : posUnix);
531                 posWin = (posWin == -1 ? posUnix : posWin);
532                 return Math.min(posUnix, posWin) + 1;
533             } else {
534                 return (isSeparator(ch0) ? 1 : 0);
535             }
536         }
537     }
538 
539     /**
540      * Returns the index of the last directory separator character.
541      * <p>
542      * This method will handle a file in either Unix or Windows format.
543      * The position of the last forward or backslash is returned.
544      * <p>
545      * The output will be the same irrespective of the machine that the code is running on.
546      * 
547      * @param filename  the filename to find the last path separator in, null returns -1
548      * @return the index of the last separator character, or -1 if there
549      * is no such character
550      */
551     public static int indexOfLastSeparator(String filename) {
552         if (filename == null) {
553             return -1;
554         }
555         int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
556         int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
557         return Math.max(lastUnixPos, lastWindowsPos);
558     }
559 
560     /**
561      * Returns the index of the last extension separator character, which is a dot.
562      * <p>
563      * This method also checks that there is no directory separator after the last dot.
564      * To do this it uses {@link #indexOfLastSeparator(String)} which will
565      * handle a file in either Unix or Windows format.
566      * <p>
567      * The output will be the same irrespective of the machine that the code is running on.
568      * 
569      * @param filename  the filename to find the last path separator in, null returns -1
570      * @return the index of the last separator character, or -1 if there
571      * is no such character
572      */
573     public static int indexOfExtension(String filename) {
574         if (filename == null) {
575             return -1;
576         }
577         int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
578         int lastSeparator = indexOfLastSeparator(filename);
579         return (lastSeparator > extensionPos ? -1 : extensionPos);
580     }
581 
582     //-----------------------------------------------------------------------
583     /**
584      * Gets the prefix from a full filename, such as <code>C:/</code>
585      * or <code>~/</code>.
586      * <p>
587      * This method will handle a file in either Unix or Windows format.
588      * The prefix includes the first slash in the full filename where applicable.
589      * <pre>
590      * Windows:
591      * a\b\c.txt           --> ""          --> relative
592      * \a\b\c.txt          --> "\"         --> current drive absolute
593      * C:a\b\c.txt         --> "C:"        --> drive relative
594      * C:\a\b\c.txt        --> "C:\"       --> absolute
595      * \\server\a\b\c.txt  --> "\\server\" --> UNC
596      *
597      * Unix:
598      * a/b/c.txt           --> ""          --> relative
599      * /a/b/c.txt          --> "/"         --> absolute
600      * ~/a/b/c.txt         --> "~/"        --> current user
601      * ~                   --> "~/"        --> current user (slash added)
602      * ~user/a/b/c.txt     --> "~user/"    --> named user
603      * ~user               --> "~user/"    --> named user (slash added)
604      * </pre>
605      * <p>
606      * The output will be the same irrespective of the machine that the code is running on.
607      * ie. both Unix and Windows prefixes are matched regardless.
608      *
609      * @param filename  the filename to query, null returns null
610      * @return the prefix of the file, null if invalid
611      */
612     public static String getPrefix(String filename) {
613         if (filename == null) {
614             return null;
615         }
616         int len = getPrefixLength(filename);
617         if (len < 0) {
618             return null;
619         }
620         if (len > filename.length()) {
621             return filename + UNIX_SEPARATOR;  // we know this only happens for unix
622         }
623         return filename.substring(0, len);
624     }
625 
626     /**
627      * Gets the path from a full filename, which excludes the prefix.
628      * <p>
629      * This method will handle a file in either Unix or Windows format.
630      * The method is entirely text based, and returns the text before and
631      * including the last forward or backslash.
632      * <pre>
633      * C:\a\b\c.txt --> a\b\
634      * ~/a/b/c.txt  --> a/b/
635      * a.txt        --> ""
636      * a/b/c        --> a/b/
637      * a/b/c/       --> a/b/c/
638      * </pre>
639      * <p>
640      * The output will be the same irrespective of the machine that the code is running on.
641      * <p>
642      * This method drops the prefix from the result.
643      * See {@link #getFullPath(String)} for the method that retains the prefix.
644      *
645      * @param filename  the filename to query, null returns null
646      * @return the path of the file, an empty string if none exists, null if invalid
647      */
648     public static String getPath(String filename) {
649         return doGetPath(filename, 1);
650     }
651 
652     /**
653      * Gets the path from a full filename, which excludes the prefix, and
654      * also excluding the final directory separator.
655      * <p>
656      * This method will handle a file in either Unix or Windows format.
657      * The method is entirely text based, and returns the text before the
658      * last forward or backslash.
659      * <pre>
660      * C:\a\b\c.txt --> a\b
661      * ~/a/b/c.txt  --> a/b
662      * a.txt        --> ""
663      * a/b/c        --> a/b
664      * a/b/c/       --> a/b/c
665      * </pre>
666      * <p>
667      * The output will be the same irrespective of the machine that the code is running on.
668      * <p>
669      * This method drops the prefix from the result.
670      * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
671      *
672      * @param filename  the filename to query, null returns null
673      * @return the path of the file, an empty string if none exists, null if invalid
674      */
675     public static String getPathNoEndSeparator(String filename) {
676         return doGetPath(filename, 0);
677     }
678 
679     /**
680      * Does the work of getting the path.
681      * 
682      * @param filename  the filename
683      * @param separatorAdd  0 to omit the end separator, 1 to return it
684      * @return the path
685      */
686     private static String doGetPath(String filename, int separatorAdd) {
687         if (filename == null) {
688             return null;
689         }
690         int prefix = getPrefixLength(filename);
691         if (prefix < 0) {
692             return null;
693         }
694         int index = indexOfLastSeparator(filename);
695         if (prefix >= filename.length() || index < 0) {
696             return "";
697         }
698         return filename.substring(prefix, index + separatorAdd);
699     }
700 
701     /**
702      * Gets the full path from a full filename, which is the prefix + path.
703      * <p>
704      * This method will handle a file in either Unix or Windows format.
705      * The method is entirely text based, and returns the text before and
706      * including the last forward or backslash.
707      * <pre>
708      * C:\a\b\c.txt --> C:\a\b\
709      * ~/a/b/c.txt  --> ~/a/b/
710      * a.txt        --> ""
711      * a/b/c        --> a/b/
712      * a/b/c/       --> a/b/c/
713      * C:           --> C:
714      * C:\          --> C:\
715      * ~            --> ~/
716      * ~/           --> ~/
717      * ~user        --> ~user/
718      * ~user/       --> ~user/
719      * </pre>
720      * <p>
721      * The output will be the same irrespective of the machine that the code is running on.
722      *
723      * @param filename  the filename to query, null returns null
724      * @return the path of the file, an empty string if none exists, null if invalid
725      */
726     public static String getFullPath(String filename) {
727         return doGetFullPath(filename, true);
728     }
729 
730     /**
731      * Gets the full path from a full filename, which is the prefix + path,
732      * and also excluding the final directory separator.
733      * <p>
734      * This method will handle a file in either Unix or Windows format.
735      * The method is entirely text based, and returns the text before the
736      * last forward or backslash.
737      * <pre>
738      * C:\a\b\c.txt --> C:\a\b
739      * ~/a/b/c.txt  --> ~/a/b
740      * a.txt        --> ""
741      * a/b/c        --> a/b
742      * a/b/c/       --> a/b/c
743      * C:           --> C:
744      * C:\          --> C:\
745      * ~            --> ~
746      * ~/           --> ~
747      * ~user        --> ~user
748      * ~user/       --> ~user
749      * </pre>
750      * <p>
751      * The output will be the same irrespective of the machine that the code is running on.
752      *
753      * @param filename  the filename to query, null returns null
754      * @return the path of the file, an empty string if none exists, null if invalid
755      */
756     public static String getFullPathNoEndSeparator(String filename) {
757         return doGetFullPath(filename, false);
758     }
759 
760     /**
761      * Does the work of getting the path.
762      * 
763      * @param filename  the filename
764      * @param includeSeparator  true to include the end separator
765      * @return the path
766      */
767     private static String doGetFullPath(String filename, boolean includeSeparator) {
768         if (filename == null) {
769             return null;
770         }
771         int prefix = getPrefixLength(filename);
772         if (prefix < 0) {
773             return null;
774         }
775         if (prefix >= filename.length()) {
776             if (includeSeparator) {
777                 return getPrefix(filename);  // add end slash if necessary
778             } else {
779                 return filename;
780             }
781         }
782         int index = indexOfLastSeparator(filename);
783         if (index < 0) {
784             return filename.substring(0, prefix);
785         }
786         int end = index + (includeSeparator ?  1 : 0);
787         return filename.substring(0, end);
788     }
789 
790     /**
791      * Gets the name minus the path from a full filename.
792      * <p>
793      * This method will handle a file in either Unix or Windows format.
794      * The text after the last forward or backslash is returned.
795      * <pre>
796      * a/b/c.txt --> c.txt
797      * a.txt     --> a.txt
798      * a/b/c     --> c
799      * a/b/c/    --> ""
800      * </pre>
801      * <p>
802      * The output will be the same irrespective of the machine that the code is running on.
803      *
804      * @param filename  the filename to query, null returns null
805      * @return the name of the file without the path, or an empty string if none exists
806      */
807     public static String getName(String filename) {
808         if (filename == null) {
809             return null;
810         }
811         int index = indexOfLastSeparator(filename);
812         return filename.substring(index + 1);
813     }
814 
815     /**
816      * Gets the base name, minus the full path and extension, from a full filename.
817      * <p>
818      * This method will handle a file in either Unix or Windows format.
819      * The text after the last forward or backslash and before the last dot is returned.
820      * <pre>
821      * a/b/c.txt --> c
822      * a.txt     --> a
823      * a/b/c     --> c
824      * a/b/c/    --> ""
825      * </pre>
826      * <p>
827      * The output will be the same irrespective of the machine that the code is running on.
828      *
829      * @param filename  the filename to query, null returns null
830      * @return the name of the file without the path, or an empty string if none exists
831      */
832     public static String getBaseName(String filename) {
833         return removeExtension(getName(filename));
834     }
835 
836     /**
837      * Gets the extension of a filename.
838      * <p>
839      * This method returns the textual part of the filename after the last dot.
840      * There must be no directory separator after the dot.
841      * <pre>
842      * foo.txt      --> "txt"
843      * a/b/c.jpg    --> "jpg"
844      * a/b.txt/c    --> ""
845      * a/b/c        --> ""
846      * </pre>
847      * <p>
848      * The output will be the same irrespective of the machine that the code is running on.
849      *
850      * @param filename the filename to retrieve the extension of.
851      * @return the extension of the file or an empty string if none exists.
852      */
853     public static String getExtension(String filename) {
854         if (filename == null) {
855             return null;
856         }
857         int index = indexOfExtension(filename);
858         if (index == -1) {
859             return "";
860         } else {
861             return filename.substring(index + 1);
862         }
863     }
864 
865     //-----------------------------------------------------------------------
866     /**
867      * Removes the extension from a filename.
868      * <p>
869      * This method returns the textual part of the filename before the last dot.
870      * There must be no directory separator after the dot.
871      * <pre>
872      * foo.txt    --> foo
873      * a\b\c.jpg  --> a\b\c
874      * a\b\c      --> a\b\c
875      * a.b\c      --> a.b\c
876      * </pre>
877      * <p>
878      * The output will be the same irrespective of the machine that the code is running on.
879      *
880      * @param filename  the filename to query, null returns null
881      * @return the filename minus the extension
882      */
883     public static String removeExtension(String filename) {
884         if (filename == null) {
885             return null;
886         }
887         int index = indexOfExtension(filename);
888         if (index == -1) {
889             return filename;
890         } else {
891             return filename.substring(0, index);
892         }
893     }
894 
895     //-----------------------------------------------------------------------
896     /**
897      * Checks whether two filenames are equal exactly.
898      * <p>
899      * No processing is performed on the filenames other than comparison,
900      * thus this is merely a null-safe case-sensitive equals.
901      *
902      * @param filename1  the first filename to query, may be null
903      * @param filename2  the second filename to query, may be null
904      * @return true if the filenames are equal, null equals null
905      * @see IOCase#SENSITIVE
906      */
907     public static boolean equals(String filename1, String filename2) {
908         return equals(filename1, filename2, false, IOCase.SENSITIVE);
909     }
910 
911     /**
912      * Checks whether two filenames are equal using the case rules of the system.
913      * <p>
914      * No processing is performed on the filenames other than comparison.
915      * The check is case-sensitive on Unix and case-insensitive on Windows.
916      *
917      * @param filename1  the first filename to query, may be null
918      * @param filename2  the second filename to query, may be null
919      * @return true if the filenames are equal, null equals null
920      * @see IOCase#SYSTEM
921      */
922     public static boolean equalsOnSystem(String filename1, String filename2) {
923         return equals(filename1, filename2, false, IOCase.SYSTEM);
924     }
925 
926     //-----------------------------------------------------------------------
927     /**
928      * Checks whether two filenames are equal after both have been normalized.
929      * <p>
930      * Both filenames are first passed to {@link #normalize(String)}.
931      * The check is then performed in a case-sensitive manner.
932      *
933      * @param filename1  the first filename to query, may be null
934      * @param filename2  the second filename to query, may be null
935      * @return true if the filenames are equal, null equals null
936      * @see IOCase#SENSITIVE
937      */
938     public static boolean equalsNormalized(String filename1, String filename2) {
939         return equals(filename1, filename2, true, IOCase.SENSITIVE);
940     }
941 
942     /**
943      * Checks whether two filenames are equal after both have been normalized
944      * and using the case rules of the system.
945      * <p>
946      * Both filenames are first passed to {@link #normalize(String)}.
947      * The check is then performed case-sensitive on Unix and
948      * case-insensitive on Windows.
949      *
950      * @param filename1  the first filename to query, may be null
951      * @param filename2  the second filename to query, may be null
952      * @return true if the filenames are equal, null equals null
953      * @see IOCase#SYSTEM
954      */
955     public static boolean equalsNormalizedOnSystem(String filename1, String filename2) {
956         return equals(filename1, filename2, true, IOCase.SYSTEM);
957     }
958 
959     /**
960      * Checks whether two filenames are equal, optionally normalizing and providing
961      * control over the case-sensitivity.
962      *
963      * @param filename1  the first filename to query, may be null
964      * @param filename2  the second filename to query, may be null
965      * @param normalized  whether to normalize the filenames
966      * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
967      * @return true if the filenames are equal, null equals null
968      * @since Commons IO 1.3
969      */
970     public static boolean equals(
971             String filename1, String filename2,
972             boolean normalized, IOCase caseSensitivity) {
973         
974         if (filename1 == null || filename2 == null) {
975             return filename1 == filename2;
976         }
977         if (normalized) {
978             filename1 = normalize(filename1);
979             filename2 = normalize(filename2);
980         }
981         if (caseSensitivity == null) {
982             caseSensitivity = IOCase.SENSITIVE;
983         }
984         return caseSensitivity.checkEquals(filename1, filename2);
985     }
986 
987     //-----------------------------------------------------------------------
988     /**
989      * Checks whether the extension of the filename is that specified.
990      * <p>
991      * This method obtains the extension as the textual part of the filename
992      * after the last dot. There must be no directory separator after the dot.
993      * The extension check is case-sensitive on all platforms.
994      *
995      * @param filename  the filename to query, null returns false
996      * @param extension  the extension to check for, null or empty checks for no extension
997      * @return true if the filename has the specified extension
998      */
999     public static boolean isExtension(String filename, String extension) {
1000         if (filename == null) {
1001             return false;
1002         }
1003         if (extension == null || extension.length() == 0) {
1004             return (indexOfExtension(filename) == -1);
1005         }
1006         String fileExt = getExtension(filename);
1007         return fileExt.equals(extension);
1008     }
1009 
1010     /**
1011      * Checks whether the extension of the filename is one of those specified.
1012      * <p>
1013      * This method obtains the extension as the textual part of the filename
1014      * after the last dot. There must be no directory separator after the dot.
1015      * The extension check is case-sensitive on all platforms.
1016      *
1017      * @param filename  the filename to query, null returns false
1018      * @param extensions  the extensions to check for, null checks for no extension
1019      * @return true if the filename is one of the extensions
1020      */
1021     public static boolean isExtension(String filename, String[] extensions) {
1022         if (filename == null) {
1023             return false;
1024         }
1025         if (extensions == null || extensions.length == 0) {
1026             return (indexOfExtension(filename) == -1);
1027         }
1028         String fileExt = getExtension(filename);
1029         for (int i = 0; i < extensions.length; i++) {
1030             if (fileExt.equals(extensions[i])) {
1031                 return true;
1032             }
1033         }
1034         return false;
1035     }
1036 
1037     /**
1038      * Checks whether the extension of the filename is one of those specified.
1039      * <p>
1040      * This method obtains the extension as the textual part of the filename
1041      * after the last dot. There must be no directory separator after the dot.
1042      * The extension check is case-sensitive on all platforms.
1043      *
1044      * @param filename  the filename to query, null returns false
1045      * @param extensions  the extensions to check for, null checks for no extension
1046      * @return true if the filename is one of the extensions
1047      */
1048     public static boolean isExtension(String filename, Collection extensions) {
1049         if (filename == null) {
1050             return false;
1051         }
1052         if (extensions == null || extensions.isEmpty()) {
1053             return (indexOfExtension(filename) == -1);
1054         }
1055         String fileExt = getExtension(filename);
1056         for (Iterator it = extensions.iterator(); it.hasNext();) {
1057             if (fileExt.equals(it.next())) {
1058                 return true;
1059             }
1060         }
1061         return false;
1062     }
1063 
1064     //-----------------------------------------------------------------------
1065     /**
1066      * Checks a filename to see if it matches the specified wildcard matcher,
1067      * always testing case-sensitive.
1068      * <p>
1069      * The wildcard matcher uses the characters '?' and '*' to represent a
1070      * single or multiple wildcard characters.
1071      * This is the same as often found on Dos/Unix command lines.
1072      * The check is case-sensitive always.
1073      * <pre>
1074      * wildcardMatch("c.txt", "*.txt")      --> true
1075      * wildcardMatch("c.txt", "*.jpg")      --> false
1076      * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1077      * wildcardMatch("c.txt", "*.???")      --> true
1078      * wildcardMatch("c.txt", "*.????")     --> false
1079      * </pre>
1080      * 
1081      * @param filename  the filename to match on
1082      * @param wildcardMatcher  the wildcard string to match against
1083      * @return true if the filename matches the wilcard string
1084      * @see IOCase#SENSITIVE
1085      */
1086     public static boolean wildcardMatch(String filename, String wildcardMatcher) {
1087         return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1088     }
1089 
1090     /**
1091      * Checks a filename to see if it matches the specified wildcard matcher
1092      * using the case rules of the system.
1093      * <p>
1094      * The wildcard matcher uses the characters '?' and '*' to represent a
1095      * single or multiple wildcard characters.
1096      * This is the same as often found on Dos/Unix command lines.
1097      * The check is case-sensitive on Unix and case-insensitive on Windows.
1098      * <pre>
1099      * wildcardMatch("c.txt", "*.txt")      --> true
1100      * wildcardMatch("c.txt", "*.jpg")      --> false
1101      * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1102      * wildcardMatch("c.txt", "*.???")      --> true
1103      * wildcardMatch("c.txt", "*.????")     --> false
1104      * </pre>
1105      * 
1106      * @param filename  the filename to match on
1107      * @param wildcardMatcher  the wildcard string to match against
1108      * @return true if the filename matches the wilcard string
1109      * @see IOCase#SYSTEM
1110      */
1111     public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) {
1112         return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1113     }
1114 
1115     /**
1116      * Checks a filename to see if it matches the specified wildcard matcher
1117      * allowing control over case-sensitivity.
1118      * <p>
1119      * The wildcard matcher uses the characters '?' and '*' to represent a
1120      * single or multiple wildcard characters.
1121      * 
1122      * @param filename  the filename to match on
1123      * @param wildcardMatcher  the wildcard string to match against
1124      * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1125      * @return true if the filename matches the wilcard string
1126      * @since Commons IO 1.3
1127      */
1128     public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) {
1129         if (filename == null && wildcardMatcher == null) {
1130             return true;
1131         }
1132         if (filename == null || wildcardMatcher == null) {
1133             return false;
1134         }
1135         if (caseSensitivity == null) {
1136             caseSensitivity = IOCase.SENSITIVE;
1137         }
1138         filename = caseSensitivity.convertCase(filename);
1139         wildcardMatcher = caseSensitivity.convertCase(wildcardMatcher);
1140         String[] wcs = splitOnTokens(wildcardMatcher);
1141         boolean anyChars = false;
1142         int textIdx = 0;
1143         int wcsIdx = 0;
1144         Stack backtrack = new Stack();
1145         
1146         // loop around a backtrack stack, to handle complex * matching
1147         do {
1148             if (backtrack.size() > 0) {
1149                 int[] array = (int[]) backtrack.pop();
1150                 wcsIdx = array[0];
1151                 textIdx = array[1];
1152                 anyChars = true;
1153             }
1154             
1155             // loop whilst tokens and text left to process
1156             while (wcsIdx < wcs.length) {
1157       
1158                 if (wcs[wcsIdx].equals("?")) {
1159                     // ? so move to next text char
1160                     textIdx++;
1161                     anyChars = false;
1162                     
1163                 } else if (wcs[wcsIdx].equals("*")) {
1164                     // set any chars status
1165                     anyChars = true;
1166                     if (wcsIdx == wcs.length - 1) {
1167                         textIdx = filename.length();
1168                     }
1169                     
1170                 } else {
1171                     // matching text token
1172                     if (anyChars) {
1173                         // any chars then try to locate text token
1174                         textIdx = filename.indexOf(wcs[wcsIdx], textIdx);
1175                         if (textIdx == -1) {
1176                             // token not found
1177                             break;
1178                         }
1179                         int repeat = filename.indexOf(wcs[wcsIdx], textIdx + 1);
1180                         if (repeat >= 0) {
1181                             backtrack.push(new int[] {wcsIdx, repeat});
1182                         }
1183                     } else {
1184                         // matching from current position
1185                         if (!filename.startsWith(wcs[wcsIdx], textIdx)) {
1186                             // couldnt match token
1187                             break;
1188                         }
1189                     }
1190       
1191                     // matched text token, move text index to end of matched token
1192                     textIdx += wcs[wcsIdx].length();
1193                     anyChars = false;
1194                 }
1195       
1196                 wcsIdx++;
1197             }
1198             
1199             // full match
1200             if (wcsIdx == wcs.length && textIdx == filename.length()) {
1201                 return true;
1202             }
1203             
1204         } while (backtrack.size() > 0);
1205   
1206         return false;
1207     }
1208 
1209     /**
1210      * Splits a string into a number of tokens.
1211      * 
1212      * @param text  the text to split
1213      * @return the tokens, never null
1214      */
1215     static String[] splitOnTokens(String text) {
1216         // used by wildcardMatch
1217         // package level so a unit test may run on this
1218         
1219         if (text.indexOf("?") == -1 && text.indexOf("*") == -1) {
1220             return new String[] { text };
1221         }
1222 
1223         char[] array = text.toCharArray();
1224         ArrayList list = new ArrayList();
1225         StringBuffer buffer = new StringBuffer();
1226         for (int i = 0; i < array.length; i++) {
1227             if (array[i] == '?' || array[i] == '*') {
1228                 if (buffer.length() != 0) {
1229                     list.add(buffer.toString());
1230                     buffer.setLength(0);
1231                 }
1232                 if (array[i] == '?') {
1233                     list.add("?");
1234                 } else if (list.size() == 0 ||
1235                         (i > 0 && list.get(list.size() - 1).equals("*") == false)) {
1236                     list.add("*");
1237                 }
1238             } else {
1239                 buffer.append(array[i]);
1240             }
1241         }
1242         if (buffer.length() != 0) {
1243             list.add(buffer.toString());
1244         }
1245 
1246         return (String[]) list.toArray( new String[ list.size() ] );
1247     }
1248 
1249 }