1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.io; 18 19 import java.io.File; 20 import java.util.ArrayList; 21 import java.util.Collection; 22 import java.util.Iterator; 23 import java.util.Stack; 24 25 /** 26 * General filename and filepath manipulation utilities. 27 * <p> 28 * When dealing with filenames you can hit problems when moving from a Windows 29 * based development machine to a Unix based production machine. 30 * This class aims to help avoid those problems. 31 * <p> 32 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 33 * using JDK {@link java.io.File File} objects and the two argument constructor 34 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 35 * <p> 36 * Most methods on this class are designed to work the same on both Unix and Windows. 37 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 38 * <p> 39 * Most methods recognise both separators (forward and back), and both 40 * sets of prefixes. See the javadoc of each method for details. 41 * <p> 42 * This class defines six components within a filename 43 * (example C:\dev\project\file.txt): 44 * <ul> 45 * <li>the prefix - C:\</li> 46 * <li>the path - dev\project\</li> 47 * <li>the full path - C:\dev\project\</li> 48 * <li>the name - file.txt</li> 49 * <li>the base name - file</li> 50 * <li>the extension - txt</li> 51 * </ul> 52 * Note that this class works best if directory filenames end with a separator. 53 * If you omit the last separator, it is impossible to determine if the filename 54 * corresponds to a file or a directory. As a result, we have chosen to say 55 * it corresponds to a file. 56 * <p> 57 * This class only supports Unix and Windows style names. 58 * Prefixes are matched as follows: 59 * <pre> 60 * Windows: 61 * a\b\c.txt --> "" --> relative 62 * \a\b\c.txt --> "\" --> current drive absolute 63 * C:a\b\c.txt --> "C:" --> drive relative 64 * C:\a\b\c.txt --> "C:\" --> absolute 65 * \\server\a\b\c.txt --> "\\server\" --> UNC 66 * 67 * Unix: 68 * a/b/c.txt --> "" --> relative 69 * /a/b/c.txt --> "/" --> absolute 70 * ~/a/b/c.txt --> "~/" --> current user 71 * ~ --> "~/" --> current user (slash added) 72 * ~user/a/b/c.txt --> "~user/" --> named user 73 * ~user --> "~user/" --> named user (slash added) 74 * </pre> 75 * Both prefix styles are matched always, irrespective of the machine that you are 76 * currently running on. 77 * <p> 78 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 79 * 80 * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A> 81 * @author <a href="mailto:sanders@apache.org">Scott Sanders</a> 82 * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a> 83 * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a> 84 * @author <a href="mailto:peter@apache.org">Peter Donald</a> 85 * @author <a href="mailto:jefft@apache.org">Jeff Turner</a> 86 * @author Matthew Hawthorne 87 * @author Martin Cooper 88 * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a> 89 * @author Stephen Colebourne 90 * @version $Id: FilenameUtils.java 490424 2006-12-27 01:20:43Z bayard $ 91 * @since Commons IO 1.1 92 */ 93 public class FilenameUtils { 94 95 /** 96 * The extension separator character. 97 */ 98 private static final char EXTENSION_SEPARATOR = '.'; 99 100 /** 101 * The Unix separator character. 102 */ 103 private static final char UNIX_SEPARATOR = '/'; 104 105 /** 106 * The Windows separator character. 107 */ 108 private static final char WINDOWS_SEPARATOR = '\\'; 109 110 /** 111 * The system separator character. 112 */ 113 private static final char SYSTEM_SEPARATOR = File.separatorChar; 114 115 /** 116 * The separator character that is the opposite of the system separator. 117 */ 118 private static final char OTHER_SEPARATOR; 119 static { 120 if (isSystemWindows()) { 121 OTHER_SEPARATOR = UNIX_SEPARATOR; 122 } else { 123 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 124 } 125 } 126 127 /** 128 * Instances should NOT be constructed in standard programming. 129 */ 130 public FilenameUtils() { 131 super(); 132 } 133 134 //----------------------------------------------------------------------- 135 /** 136 * Determines if Windows file system is in use. 137 * 138 * @return true if the system is Windows 139 */ 140 static boolean isSystemWindows() { 141 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 142 } 143 144 //----------------------------------------------------------------------- 145 /** 146 * Checks if the character is a separator. 147 * 148 * @param ch the character to check 149 * @return true if it is a separator character 150 */ 151 private static boolean isSeparator(char ch) { 152 return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR); 153 } 154 155 //----------------------------------------------------------------------- 156 /** 157 * Normalizes a path, removing double and single dot path steps. 158 * <p> 159 * This method normalizes a path to a standard format. 160 * The input may contain separators in either Unix or Windows format. 161 * The output will contain separators in the format of the system. 162 * <p> 163 * A trailing slash will be retained. 164 * A double slash will be merged to a single slash (but UNC names are handled). 165 * A single dot path segment will be removed. 166 * A double dot will cause that path segment and the one before to be removed. 167 * If the double dot has no parent path segment to work with, <code>null</code> 168 * is returned. 169 * <p> 170 * The output will be the same on both Unix and Windows except 171 * for the separator character. 172 * <pre> 173 * /foo// --> /foo/ 174 * /foo/./ --> /foo/ 175 * /foo/../bar --> /bar 176 * /foo/../bar/ --> /bar/ 177 * /foo/../bar/../baz --> /baz 178 * //foo//./bar --> /foo/bar 179 * /../ --> null 180 * ../foo --> null 181 * foo/bar/.. --> foo/ 182 * foo/../../bar --> null 183 * foo/../bar --> bar 184 * //server/foo/../bar --> //server/bar 185 * //server/../bar --> null 186 * C:\foo\..\bar --> C:\bar 187 * C:\..\bar --> null 188 * ~/foo/../bar/ --> ~/bar/ 189 * ~/../bar --> null 190 * </pre> 191 * (Note the file separator returned will be correct for Windows/Unix) 192 * 193 * @param filename the filename to normalize, null returns null 194 * @return the normalized filename, or null if invalid 195 */ 196 public static String normalize(String filename) { 197 return doNormalize(filename, true); 198 } 199 200 //----------------------------------------------------------------------- 201 /** 202 * Normalizes a path, removing double and single dot path steps, 203 * and removing any final directory separator. 204 * <p> 205 * This method normalizes a path to a standard format. 206 * The input may contain separators in either Unix or Windows format. 207 * The output will contain separators in the format of the system. 208 * <p> 209 * A trailing slash will be removed. 210 * A double slash will be merged to a single slash (but UNC names are handled). 211 * A single dot path segment will be removed. 212 * A double dot will cause that path segment and the one before to be removed. 213 * If the double dot has no parent path segment to work with, <code>null</code> 214 * is returned. 215 * <p> 216 * The output will be the same on both Unix and Windows except 217 * for the separator character. 218 * <pre> 219 * /foo// --> /foo 220 * /foo/./ --> /foo 221 * /foo/../bar --> /bar 222 * /foo/../bar/ --> /bar 223 * /foo/../bar/../baz --> /baz 224 * //foo//./bar --> /foo/bar 225 * /../ --> null 226 * ../foo --> null 227 * foo/bar/.. --> foo 228 * foo/../../bar --> null 229 * foo/../bar --> bar 230 * //server/foo/../bar --> //server/bar 231 * //server/../bar --> null 232 * C:\foo\..\bar --> C:\bar 233 * C:\..\bar --> null 234 * ~/foo/../bar/ --> ~/bar 235 * ~/../bar --> null 236 * </pre> 237 * (Note the file separator returned will be correct for Windows/Unix) 238 * 239 * @param filename the filename to normalize, null returns null 240 * @return the normalized filename, or null if invalid 241 */ 242 public static String normalizeNoEndSeparator(String filename) { 243 return doNormalize(filename, false); 244 } 245 246 /** 247 * Internal method to perform the normalization. 248 * 249 * @param filename the filename 250 * @param keepSeparator true to keep the final separator 251 * @return the normalized filename 252 */ 253 private static String doNormalize(String filename, boolean keepSeparator) { 254 if (filename == null) { 255 return null; 256 } 257 int size = filename.length(); 258 if (size == 0) { 259 return filename; 260 } 261 int prefix = getPrefixLength(filename); 262 if (prefix < 0) { 263 return null; 264 } 265 266 char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 267 filename.getChars(0, filename.length(), array, 0); 268 269 // fix separators throughout 270 for (int i = 0; i < array.length; i++) { 271 if (array[i] == OTHER_SEPARATOR) { 272 array[i] = SYSTEM_SEPARATOR; 273 } 274 } 275 276 // add extra separator on the end to simplify code below 277 boolean lastIsDirectory = true; 278 if (array[size - 1] != SYSTEM_SEPARATOR) { 279 array[size++] = SYSTEM_SEPARATOR; 280 lastIsDirectory = false; 281 } 282 283 // adjoining slashes 284 for (int i = prefix + 1; i < size; i++) { 285 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == SYSTEM_SEPARATOR) { 286 System.arraycopy(array, i, array, i - 1, size - i); 287 size--; 288 i--; 289 } 290 } 291 292 // dot slash 293 for (int i = prefix + 1; i < size; i++) { 294 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && 295 (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)) { 296 if (i == size - 1) { 297 lastIsDirectory = true; 298 } 299 System.arraycopy(array, i + 1, array, i - 1, size - i); 300 size -=2; 301 i--; 302 } 303 } 304 305 // double dot slash 306 outer: 307 for (int i = prefix + 2; i < size; i++) { 308 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && array[i - 2] == '.' && 309 (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)) { 310 if (i == prefix + 2) { 311 return null; 312 } 313 if (i == size - 1) { 314 lastIsDirectory = true; 315 } 316 int j; 317 for (j = i - 4 ; j >= prefix; j--) { 318 if (array[j] == SYSTEM_SEPARATOR) { 319 // remove b/../ from a/b/../c 320 System.arraycopy(array, i + 1, array, j + 1, size - i); 321 size -= (i - j); 322 i = j + 1; 323 continue outer; 324 } 325 } 326 // remove a/../ from a/../c 327 System.arraycopy(array, i + 1, array, prefix, size - i); 328 size -= (i + 1 - prefix); 329 i = prefix + 1; 330 } 331 } 332 333 if (size <= 0) { // should never be less than 0 334 return ""; 335 } 336 if (size <= prefix) { // should never be less than prefix 337 return new String(array, 0, size); 338 } 339 if (lastIsDirectory && keepSeparator) { 340 return new String(array, 0, size); // keep trailing separator 341 } 342 return new String(array, 0, size - 1); // lose trailing separator 343 } 344 345 //----------------------------------------------------------------------- 346 /** 347 * Concatenates a filename to a base path using normal command line style rules. 348 * <p> 349 * The effect is equivalent to resultant directory after changing 350 * directory to the first argument, followed by changing directory to 351 * the second argument. 352 * <p> 353 * The first argument is the base path, the second is the path to concatenate. 354 * The returned path is always normalized via {@link #normalize(String)}, 355 * thus <code>..</code> is handled. 356 * <p> 357 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then 358 * it will be normalized and returned. 359 * Otherwise, the paths will be joined, normalized and returned. 360 * <p> 361 * The output will be the same on both Unix and Windows except 362 * for the separator character. 363 * <pre> 364 * /foo/ + bar --> /foo/bar 365 * /foo + bar --> /foo/bar 366 * /foo + /bar --> /bar 367 * /foo + C:/bar --> C:/bar 368 * /foo + C:bar --> C:bar (*) 369 * /foo/a/ + ../bar --> foo/bar 370 * /foo/ + ../../bar --> null 371 * /foo/ + /bar --> /bar 372 * /foo/.. + /bar --> /bar 373 * /foo + bar/c.txt --> /foo/bar/c.txt 374 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 375 * </pre> 376 * (*) Note that the Windows relative drive prefix is unreliable when 377 * used with this method. 378 * (!) Note that the first parameter must be a path. If it ends with a name, then 379 * the name will be built into the concatenated path. If this might be a problem, 380 * use {@link #getFullPath(String)} on the base path argument. 381 * 382 * @param basePath the base path to attach to, always treated as a path 383 * @param fullFilenameToAdd the filename (or path) to attach to the base 384 * @return the concatenated path, or null if invalid 385 */ 386 public static String concat(String basePath, String fullFilenameToAdd) { 387 int prefix = getPrefixLength(fullFilenameToAdd); 388 if (prefix < 0) { 389 return null; 390 } 391 if (prefix > 0) { 392 return normalize(fullFilenameToAdd); 393 } 394 if (basePath == null) { 395 return null; 396 } 397 int len = basePath.length(); 398 if (len == 0) { 399 return normalize(fullFilenameToAdd); 400 } 401 char ch = basePath.charAt(len - 1); 402 if (isSeparator(ch)) { 403 return normalize(basePath + fullFilenameToAdd); 404 } else { 405 return normalize(basePath + '/' + fullFilenameToAdd); 406 } 407 } 408 409 //----------------------------------------------------------------------- 410 /** 411 * Converts all separators to the Unix separator of forward slash. 412 * 413 * @param path the path to be changed, null ignored 414 * @return the updated path 415 */ 416 public static String separatorsToUnix(String path) { 417 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) { 418 return path; 419 } 420 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 421 } 422 423 /** 424 * Converts all separators to the Windows separator of backslash. 425 * 426 * @param path the path to be changed, null ignored 427 * @return the updated path 428 */ 429 public static String separatorsToWindows(String path) { 430 if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) { 431 return path; 432 } 433 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 434 } 435 436 /** 437 * Converts all separators to the system separator. 438 * 439 * @param path the path to be changed, null ignored 440 * @return the updated path 441 */ 442 public static String separatorsToSystem(String path) { 443 if (path == null) { 444 return null; 445 } 446 if (isSystemWindows()) { 447 return separatorsToWindows(path); 448 } else { 449 return separatorsToUnix(path); 450 } 451 } 452 453 //----------------------------------------------------------------------- 454 /** 455 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>. 456 * <p> 457 * This method will handle a file in either Unix or Windows format. 458 * <p> 459 * The prefix length includes the first slash in the full filename 460 * if applicable. Thus, it is possible that the length returned is greater 461 * than the length of the input string. 462 * <pre> 463 * Windows: 464 * a\b\c.txt --> "" --> relative 465 * \a\b\c.txt --> "\" --> current drive absolute 466 * C:a\b\c.txt --> "C:" --> drive relative 467 * C:\a\b\c.txt --> "C:\" --> absolute 468 * \\server\a\b\c.txt --> "\\server\" --> UNC 469 * 470 * Unix: 471 * a/b/c.txt --> "" --> relative 472 * /a/b/c.txt --> "/" --> absolute 473 * ~/a/b/c.txt --> "~/" --> current user 474 * ~ --> "~/" --> current user (slash added) 475 * ~user/a/b/c.txt --> "~user/" --> named user 476 * ~user --> "~user/" --> named user (slash added) 477 * </pre> 478 * <p> 479 * The output will be the same irrespective of the machine that the code is running on. 480 * ie. both Unix and Windows prefixes are matched regardless. 481 * 482 * @param filename the filename to find the prefix in, null returns -1 483 * @return the length of the prefix, -1 if invalid or null 484 */ 485 public static int getPrefixLength(String filename) { 486 if (filename == null) { 487 return -1; 488 } 489 int len = filename.length(); 490 if (len == 0) { 491 return 0; 492 } 493 char ch0 = filename.charAt(0); 494 if (ch0 == ':') { 495 return -1; 496 } 497 if (len == 1) { 498 if (ch0 == '~') { 499 return 2; // return a length greater than the input 500 } 501 return (isSeparator(ch0) ? 1 : 0); 502 } else { 503 if (ch0 == '~') { 504 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1); 505 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1); 506 if (posUnix == -1 && posWin == -1) { 507 return len + 1; // return a length greater than the input 508 } 509 posUnix = (posUnix == -1 ? posWin : posUnix); 510 posWin = (posWin == -1 ? posUnix : posWin); 511 return Math.min(posUnix, posWin) + 1; 512 } 513 char ch1 = filename.charAt(1); 514 if (ch1 == ':') { 515 ch0 = Character.toUpperCase(ch0); 516 if (ch0 >= 'A' && ch0 <= 'Z') { 517 if (len == 2 || isSeparator(filename.charAt(2)) == false) { 518 return 2; 519 } 520 return 3; 521 } 522 return -1; 523 524 } else if (isSeparator(ch0) && isSeparator(ch1)) { 525 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2); 526 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2); 527 if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) { 528 return -1; 529 } 530 posUnix = (posUnix == -1 ? posWin : posUnix); 531 posWin = (posWin == -1 ? posUnix : posWin); 532 return Math.min(posUnix, posWin) + 1; 533 } else { 534 return (isSeparator(ch0) ? 1 : 0); 535 } 536 } 537 } 538 539 /** 540 * Returns the index of the last directory separator character. 541 * <p> 542 * This method will handle a file in either Unix or Windows format. 543 * The position of the last forward or backslash is returned. 544 * <p> 545 * The output will be the same irrespective of the machine that the code is running on. 546 * 547 * @param filename the filename to find the last path separator in, null returns -1 548 * @return the index of the last separator character, or -1 if there 549 * is no such character 550 */ 551 public static int indexOfLastSeparator(String filename) { 552 if (filename == null) { 553 return -1; 554 } 555 int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR); 556 int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR); 557 return Math.max(lastUnixPos, lastWindowsPos); 558 } 559 560 /** 561 * Returns the index of the last extension separator character, which is a dot. 562 * <p> 563 * This method also checks that there is no directory separator after the last dot. 564 * To do this it uses {@link #indexOfLastSeparator(String)} which will 565 * handle a file in either Unix or Windows format. 566 * <p> 567 * The output will be the same irrespective of the machine that the code is running on. 568 * 569 * @param filename the filename to find the last path separator in, null returns -1 570 * @return the index of the last separator character, or -1 if there 571 * is no such character 572 */ 573 public static int indexOfExtension(String filename) { 574 if (filename == null) { 575 return -1; 576 } 577 int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR); 578 int lastSeparator = indexOfLastSeparator(filename); 579 return (lastSeparator > extensionPos ? -1 : extensionPos); 580 } 581 582 //----------------------------------------------------------------------- 583 /** 584 * Gets the prefix from a full filename, such as <code>C:/</code> 585 * or <code>~/</code>. 586 * <p> 587 * This method will handle a file in either Unix or Windows format. 588 * The prefix includes the first slash in the full filename where applicable. 589 * <pre> 590 * Windows: 591 * a\b\c.txt --> "" --> relative 592 * \a\b\c.txt --> "\" --> current drive absolute 593 * C:a\b\c.txt --> "C:" --> drive relative 594 * C:\a\b\c.txt --> "C:\" --> absolute 595 * \\server\a\b\c.txt --> "\\server\" --> UNC 596 * 597 * Unix: 598 * a/b/c.txt --> "" --> relative 599 * /a/b/c.txt --> "/" --> absolute 600 * ~/a/b/c.txt --> "~/" --> current user 601 * ~ --> "~/" --> current user (slash added) 602 * ~user/a/b/c.txt --> "~user/" --> named user 603 * ~user --> "~user/" --> named user (slash added) 604 * </pre> 605 * <p> 606 * The output will be the same irrespective of the machine that the code is running on. 607 * ie. both Unix and Windows prefixes are matched regardless. 608 * 609 * @param filename the filename to query, null returns null 610 * @return the prefix of the file, null if invalid 611 */ 612 public static String getPrefix(String filename) { 613 if (filename == null) { 614 return null; 615 } 616 int len = getPrefixLength(filename); 617 if (len < 0) { 618 return null; 619 } 620 if (len > filename.length()) { 621 return filename + UNIX_SEPARATOR; // we know this only happens for unix 622 } 623 return filename.substring(0, len); 624 } 625 626 /** 627 * Gets the path from a full filename, which excludes the prefix. 628 * <p> 629 * This method will handle a file in either Unix or Windows format. 630 * The method is entirely text based, and returns the text before and 631 * including the last forward or backslash. 632 * <pre> 633 * C:\a\b\c.txt --> a\b\ 634 * ~/a/b/c.txt --> a/b/ 635 * a.txt --> "" 636 * a/b/c --> a/b/ 637 * a/b/c/ --> a/b/c/ 638 * </pre> 639 * <p> 640 * The output will be the same irrespective of the machine that the code is running on. 641 * <p> 642 * This method drops the prefix from the result. 643 * See {@link #getFullPath(String)} for the method that retains the prefix. 644 * 645 * @param filename the filename to query, null returns null 646 * @return the path of the file, an empty string if none exists, null if invalid 647 */ 648 public static String getPath(String filename) { 649 return doGetPath(filename, 1); 650 } 651 652 /** 653 * Gets the path from a full filename, which excludes the prefix, and 654 * also excluding the final directory separator. 655 * <p> 656 * This method will handle a file in either Unix or Windows format. 657 * The method is entirely text based, and returns the text before the 658 * last forward or backslash. 659 * <pre> 660 * C:\a\b\c.txt --> a\b 661 * ~/a/b/c.txt --> a/b 662 * a.txt --> "" 663 * a/b/c --> a/b 664 * a/b/c/ --> a/b/c 665 * </pre> 666 * <p> 667 * The output will be the same irrespective of the machine that the code is running on. 668 * <p> 669 * This method drops the prefix from the result. 670 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 671 * 672 * @param filename the filename to query, null returns null 673 * @return the path of the file, an empty string if none exists, null if invalid 674 */ 675 public static String getPathNoEndSeparator(String filename) { 676 return doGetPath(filename, 0); 677 } 678 679 /** 680 * Does the work of getting the path. 681 * 682 * @param filename the filename 683 * @param separatorAdd 0 to omit the end separator, 1 to return it 684 * @return the path 685 */ 686 private static String doGetPath(String filename, int separatorAdd) { 687 if (filename == null) { 688 return null; 689 } 690 int prefix = getPrefixLength(filename); 691 if (prefix < 0) { 692 return null; 693 } 694 int index = indexOfLastSeparator(filename); 695 if (prefix >= filename.length() || index < 0) { 696 return ""; 697 } 698 return filename.substring(prefix, index + separatorAdd); 699 } 700 701 /** 702 * Gets the full path from a full filename, which is the prefix + path. 703 * <p> 704 * This method will handle a file in either Unix or Windows format. 705 * The method is entirely text based, and returns the text before and 706 * including the last forward or backslash. 707 * <pre> 708 * C:\a\b\c.txt --> C:\a\b\ 709 * ~/a/b/c.txt --> ~/a/b/ 710 * a.txt --> "" 711 * a/b/c --> a/b/ 712 * a/b/c/ --> a/b/c/ 713 * C: --> C: 714 * C:\ --> C:\ 715 * ~ --> ~/ 716 * ~/ --> ~/ 717 * ~user --> ~user/ 718 * ~user/ --> ~user/ 719 * </pre> 720 * <p> 721 * The output will be the same irrespective of the machine that the code is running on. 722 * 723 * @param filename the filename to query, null returns null 724 * @return the path of the file, an empty string if none exists, null if invalid 725 */ 726 public static String getFullPath(String filename) { 727 return doGetFullPath(filename, true); 728 } 729 730 /** 731 * Gets the full path from a full filename, which is the prefix + path, 732 * and also excluding the final directory separator. 733 * <p> 734 * This method will handle a file in either Unix or Windows format. 735 * The method is entirely text based, and returns the text before the 736 * last forward or backslash. 737 * <pre> 738 * C:\a\b\c.txt --> C:\a\b 739 * ~/a/b/c.txt --> ~/a/b 740 * a.txt --> "" 741 * a/b/c --> a/b 742 * a/b/c/ --> a/b/c 743 * C: --> C: 744 * C:\ --> C:\ 745 * ~ --> ~ 746 * ~/ --> ~ 747 * ~user --> ~user 748 * ~user/ --> ~user 749 * </pre> 750 * <p> 751 * The output will be the same irrespective of the machine that the code is running on. 752 * 753 * @param filename the filename to query, null returns null 754 * @return the path of the file, an empty string if none exists, null if invalid 755 */ 756 public static String getFullPathNoEndSeparator(String filename) { 757 return doGetFullPath(filename, false); 758 } 759 760 /** 761 * Does the work of getting the path. 762 * 763 * @param filename the filename 764 * @param includeSeparator true to include the end separator 765 * @return the path 766 */ 767 private static String doGetFullPath(String filename, boolean includeSeparator) { 768 if (filename == null) { 769 return null; 770 } 771 int prefix = getPrefixLength(filename); 772 if (prefix < 0) { 773 return null; 774 } 775 if (prefix >= filename.length()) { 776 if (includeSeparator) { 777 return getPrefix(filename); // add end slash if necessary 778 } else { 779 return filename; 780 } 781 } 782 int index = indexOfLastSeparator(filename); 783 if (index < 0) { 784 return filename.substring(0, prefix); 785 } 786 int end = index + (includeSeparator ? 1 : 0); 787 return filename.substring(0, end); 788 } 789 790 /** 791 * Gets the name minus the path from a full filename. 792 * <p> 793 * This method will handle a file in either Unix or Windows format. 794 * The text after the last forward or backslash is returned. 795 * <pre> 796 * a/b/c.txt --> c.txt 797 * a.txt --> a.txt 798 * a/b/c --> c 799 * a/b/c/ --> "" 800 * </pre> 801 * <p> 802 * The output will be the same irrespective of the machine that the code is running on. 803 * 804 * @param filename the filename to query, null returns null 805 * @return the name of the file without the path, or an empty string if none exists 806 */ 807 public static String getName(String filename) { 808 if (filename == null) { 809 return null; 810 } 811 int index = indexOfLastSeparator(filename); 812 return filename.substring(index + 1); 813 } 814 815 /** 816 * Gets the base name, minus the full path and extension, from a full filename. 817 * <p> 818 * This method will handle a file in either Unix or Windows format. 819 * The text after the last forward or backslash and before the last dot is returned. 820 * <pre> 821 * a/b/c.txt --> c 822 * a.txt --> a 823 * a/b/c --> c 824 * a/b/c/ --> "" 825 * </pre> 826 * <p> 827 * The output will be the same irrespective of the machine that the code is running on. 828 * 829 * @param filename the filename to query, null returns null 830 * @return the name of the file without the path, or an empty string if none exists 831 */ 832 public static String getBaseName(String filename) { 833 return removeExtension(getName(filename)); 834 } 835 836 /** 837 * Gets the extension of a filename. 838 * <p> 839 * This method returns the textual part of the filename after the last dot. 840 * There must be no directory separator after the dot. 841 * <pre> 842 * foo.txt --> "txt" 843 * a/b/c.jpg --> "jpg" 844 * a/b.txt/c --> "" 845 * a/b/c --> "" 846 * </pre> 847 * <p> 848 * The output will be the same irrespective of the machine that the code is running on. 849 * 850 * @param filename the filename to retrieve the extension of. 851 * @return the extension of the file or an empty string if none exists. 852 */ 853 public static String getExtension(String filename) { 854 if (filename == null) { 855 return null; 856 } 857 int index = indexOfExtension(filename); 858 if (index == -1) { 859 return ""; 860 } else { 861 return filename.substring(index + 1); 862 } 863 } 864 865 //----------------------------------------------------------------------- 866 /** 867 * Removes the extension from a filename. 868 * <p> 869 * This method returns the textual part of the filename before the last dot. 870 * There must be no directory separator after the dot. 871 * <pre> 872 * foo.txt --> foo 873 * a\b\c.jpg --> a\b\c 874 * a\b\c --> a\b\c 875 * a.b\c --> a.b\c 876 * </pre> 877 * <p> 878 * The output will be the same irrespective of the machine that the code is running on. 879 * 880 * @param filename the filename to query, null returns null 881 * @return the filename minus the extension 882 */ 883 public static String removeExtension(String filename) { 884 if (filename == null) { 885 return null; 886 } 887 int index = indexOfExtension(filename); 888 if (index == -1) { 889 return filename; 890 } else { 891 return filename.substring(0, index); 892 } 893 } 894 895 //----------------------------------------------------------------------- 896 /** 897 * Checks whether two filenames are equal exactly. 898 * <p> 899 * No processing is performed on the filenames other than comparison, 900 * thus this is merely a null-safe case-sensitive equals. 901 * 902 * @param filename1 the first filename to query, may be null 903 * @param filename2 the second filename to query, may be null 904 * @return true if the filenames are equal, null equals null 905 * @see IOCase#SENSITIVE 906 */ 907 public static boolean equals(String filename1, String filename2) { 908 return equals(filename1, filename2, false, IOCase.SENSITIVE); 909 } 910 911 /** 912 * Checks whether two filenames are equal using the case rules of the system. 913 * <p> 914 * No processing is performed on the filenames other than comparison. 915 * The check is case-sensitive on Unix and case-insensitive on Windows. 916 * 917 * @param filename1 the first filename to query, may be null 918 * @param filename2 the second filename to query, may be null 919 * @return true if the filenames are equal, null equals null 920 * @see IOCase#SYSTEM 921 */ 922 public static boolean equalsOnSystem(String filename1, String filename2) { 923 return equals(filename1, filename2, false, IOCase.SYSTEM); 924 } 925 926 //----------------------------------------------------------------------- 927 /** 928 * Checks whether two filenames are equal after both have been normalized. 929 * <p> 930 * Both filenames are first passed to {@link #normalize(String)}. 931 * The check is then performed in a case-sensitive manner. 932 * 933 * @param filename1 the first filename to query, may be null 934 * @param filename2 the second filename to query, may be null 935 * @return true if the filenames are equal, null equals null 936 * @see IOCase#SENSITIVE 937 */ 938 public static boolean equalsNormalized(String filename1, String filename2) { 939 return equals(filename1, filename2, true, IOCase.SENSITIVE); 940 } 941 942 /** 943 * Checks whether two filenames are equal after both have been normalized 944 * and using the case rules of the system. 945 * <p> 946 * Both filenames are first passed to {@link #normalize(String)}. 947 * The check is then performed case-sensitive on Unix and 948 * case-insensitive on Windows. 949 * 950 * @param filename1 the first filename to query, may be null 951 * @param filename2 the second filename to query, may be null 952 * @return true if the filenames are equal, null equals null 953 * @see IOCase#SYSTEM 954 */ 955 public static boolean equalsNormalizedOnSystem(String filename1, String filename2) { 956 return equals(filename1, filename2, true, IOCase.SYSTEM); 957 } 958 959 /** 960 * Checks whether two filenames are equal, optionally normalizing and providing 961 * control over the case-sensitivity. 962 * 963 * @param filename1 the first filename to query, may be null 964 * @param filename2 the second filename to query, may be null 965 * @param normalized whether to normalize the filenames 966 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 967 * @return true if the filenames are equal, null equals null 968 * @since Commons IO 1.3 969 */ 970 public static boolean equals( 971 String filename1, String filename2, 972 boolean normalized, IOCase caseSensitivity) { 973 974 if (filename1 == null || filename2 == null) { 975 return filename1 == filename2; 976 } 977 if (normalized) { 978 filename1 = normalize(filename1); 979 filename2 = normalize(filename2); 980 } 981 if (caseSensitivity == null) { 982 caseSensitivity = IOCase.SENSITIVE; 983 } 984 return caseSensitivity.checkEquals(filename1, filename2); 985 } 986 987 //----------------------------------------------------------------------- 988 /** 989 * Checks whether the extension of the filename is that specified. 990 * <p> 991 * This method obtains the extension as the textual part of the filename 992 * after the last dot. There must be no directory separator after the dot. 993 * The extension check is case-sensitive on all platforms. 994 * 995 * @param filename the filename to query, null returns false 996 * @param extension the extension to check for, null or empty checks for no extension 997 * @return true if the filename has the specified extension 998 */ 999 public static boolean isExtension(String filename, String extension) { 1000 if (filename == null) { 1001 return false; 1002 } 1003 if (extension == null || extension.length() == 0) { 1004 return (indexOfExtension(filename) == -1); 1005 } 1006 String fileExt = getExtension(filename); 1007 return fileExt.equals(extension); 1008 } 1009 1010 /** 1011 * Checks whether the extension of the filename is one of those specified. 1012 * <p> 1013 * This method obtains the extension as the textual part of the filename 1014 * after the last dot. There must be no directory separator after the dot. 1015 * The extension check is case-sensitive on all platforms. 1016 * 1017 * @param filename the filename to query, null returns false 1018 * @param extensions the extensions to check for, null checks for no extension 1019 * @return true if the filename is one of the extensions 1020 */ 1021 public static boolean isExtension(String filename, String[] extensions) { 1022 if (filename == null) { 1023 return false; 1024 } 1025 if (extensions == null || extensions.length == 0) { 1026 return (indexOfExtension(filename) == -1); 1027 } 1028 String fileExt = getExtension(filename); 1029 for (int i = 0; i < extensions.length; i++) { 1030 if (fileExt.equals(extensions[i])) { 1031 return true; 1032 } 1033 } 1034 return false; 1035 } 1036 1037 /** 1038 * Checks whether the extension of the filename is one of those specified. 1039 * <p> 1040 * This method obtains the extension as the textual part of the filename 1041 * after the last dot. There must be no directory separator after the dot. 1042 * The extension check is case-sensitive on all platforms. 1043 * 1044 * @param filename the filename to query, null returns false 1045 * @param extensions the extensions to check for, null checks for no extension 1046 * @return true if the filename is one of the extensions 1047 */ 1048 public static boolean isExtension(String filename, Collection extensions) { 1049 if (filename == null) { 1050 return false; 1051 } 1052 if (extensions == null || extensions.isEmpty()) { 1053 return (indexOfExtension(filename) == -1); 1054 } 1055 String fileExt = getExtension(filename); 1056 for (Iterator it = extensions.iterator(); it.hasNext();) { 1057 if (fileExt.equals(it.next())) { 1058 return true; 1059 } 1060 } 1061 return false; 1062 } 1063 1064 //----------------------------------------------------------------------- 1065 /** 1066 * Checks a filename to see if it matches the specified wildcard matcher, 1067 * always testing case-sensitive. 1068 * <p> 1069 * The wildcard matcher uses the characters '?' and '*' to represent a 1070 * single or multiple wildcard characters. 1071 * This is the same as often found on Dos/Unix command lines. 1072 * The check is case-sensitive always. 1073 * <pre> 1074 * wildcardMatch("c.txt", "*.txt") --> true 1075 * wildcardMatch("c.txt", "*.jpg") --> false 1076 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1077 * wildcardMatch("c.txt", "*.???") --> true 1078 * wildcardMatch("c.txt", "*.????") --> false 1079 * </pre> 1080 * 1081 * @param filename the filename to match on 1082 * @param wildcardMatcher the wildcard string to match against 1083 * @return true if the filename matches the wilcard string 1084 * @see IOCase#SENSITIVE 1085 */ 1086 public static boolean wildcardMatch(String filename, String wildcardMatcher) { 1087 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE); 1088 } 1089 1090 /** 1091 * Checks a filename to see if it matches the specified wildcard matcher 1092 * using the case rules of the system. 1093 * <p> 1094 * The wildcard matcher uses the characters '?' and '*' to represent a 1095 * single or multiple wildcard characters. 1096 * This is the same as often found on Dos/Unix command lines. 1097 * The check is case-sensitive on Unix and case-insensitive on Windows. 1098 * <pre> 1099 * wildcardMatch("c.txt", "*.txt") --> true 1100 * wildcardMatch("c.txt", "*.jpg") --> false 1101 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1102 * wildcardMatch("c.txt", "*.???") --> true 1103 * wildcardMatch("c.txt", "*.????") --> false 1104 * </pre> 1105 * 1106 * @param filename the filename to match on 1107 * @param wildcardMatcher the wildcard string to match against 1108 * @return true if the filename matches the wilcard string 1109 * @see IOCase#SYSTEM 1110 */ 1111 public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) { 1112 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM); 1113 } 1114 1115 /** 1116 * Checks a filename to see if it matches the specified wildcard matcher 1117 * allowing control over case-sensitivity. 1118 * <p> 1119 * The wildcard matcher uses the characters '?' and '*' to represent a 1120 * single or multiple wildcard characters. 1121 * 1122 * @param filename the filename to match on 1123 * @param wildcardMatcher the wildcard string to match against 1124 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1125 * @return true if the filename matches the wilcard string 1126 * @since Commons IO 1.3 1127 */ 1128 public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) { 1129 if (filename == null && wildcardMatcher == null) { 1130 return true; 1131 } 1132 if (filename == null || wildcardMatcher == null) { 1133 return false; 1134 } 1135 if (caseSensitivity == null) { 1136 caseSensitivity = IOCase.SENSITIVE; 1137 } 1138 filename = caseSensitivity.convertCase(filename); 1139 wildcardMatcher = caseSensitivity.convertCase(wildcardMatcher); 1140 String[] wcs = splitOnTokens(wildcardMatcher); 1141 boolean anyChars = false; 1142 int textIdx = 0; 1143 int wcsIdx = 0; 1144 Stack backtrack = new Stack(); 1145 1146 // loop around a backtrack stack, to handle complex * matching 1147 do { 1148 if (backtrack.size() > 0) { 1149 int[] array = (int[]) backtrack.pop(); 1150 wcsIdx = array[0]; 1151 textIdx = array[1]; 1152 anyChars = true; 1153 } 1154 1155 // loop whilst tokens and text left to process 1156 while (wcsIdx < wcs.length) { 1157 1158 if (wcs[wcsIdx].equals("?")) { 1159 // ? so move to next text char 1160 textIdx++; 1161 anyChars = false; 1162 1163 } else if (wcs[wcsIdx].equals("*")) { 1164 // set any chars status 1165 anyChars = true; 1166 if (wcsIdx == wcs.length - 1) { 1167 textIdx = filename.length(); 1168 } 1169 1170 } else { 1171 // matching text token 1172 if (anyChars) { 1173 // any chars then try to locate text token 1174 textIdx = filename.indexOf(wcs[wcsIdx], textIdx); 1175 if (textIdx == -1) { 1176 // token not found 1177 break; 1178 } 1179 int repeat = filename.indexOf(wcs[wcsIdx], textIdx + 1); 1180 if (repeat >= 0) { 1181 backtrack.push(new int[] {wcsIdx, repeat}); 1182 } 1183 } else { 1184 // matching from current position 1185 if (!filename.startsWith(wcs[wcsIdx], textIdx)) { 1186 // couldnt match token 1187 break; 1188 } 1189 } 1190 1191 // matched text token, move text index to end of matched token 1192 textIdx += wcs[wcsIdx].length(); 1193 anyChars = false; 1194 } 1195 1196 wcsIdx++; 1197 } 1198 1199 // full match 1200 if (wcsIdx == wcs.length && textIdx == filename.length()) { 1201 return true; 1202 } 1203 1204 } while (backtrack.size() > 0); 1205 1206 return false; 1207 } 1208 1209 /** 1210 * Splits a string into a number of tokens. 1211 * 1212 * @param text the text to split 1213 * @return the tokens, never null 1214 */ 1215 static String[] splitOnTokens(String text) { 1216 // used by wildcardMatch 1217 // package level so a unit test may run on this 1218 1219 if (text.indexOf("?") == -1 && text.indexOf("*") == -1) { 1220 return new String[] { text }; 1221 } 1222 1223 char[] array = text.toCharArray(); 1224 ArrayList list = new ArrayList(); 1225 StringBuffer buffer = new StringBuffer(); 1226 for (int i = 0; i < array.length; i++) { 1227 if (array[i] == '?' || array[i] == '*') { 1228 if (buffer.length() != 0) { 1229 list.add(buffer.toString()); 1230 buffer.setLength(0); 1231 } 1232 if (array[i] == '?') { 1233 list.add("?"); 1234 } else if (list.size() == 0 || 1235 (i > 0 && list.get(list.size() - 1).equals("*") == false)) { 1236 list.add("*"); 1237 } 1238 } else { 1239 buffer.append(array[i]); 1240 } 1241 } 1242 if (buffer.length() != 0) { 1243 list.add(buffer.toString()); 1244 } 1245 1246 return (String[]) list.toArray( new String[ list.size() ] ); 1247 } 1248 1249 }