001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 package org.apache.commons.compress.archivers.zip; 019 020 import java.io.File; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.RandomAccessFile; 024 import java.util.Arrays; 025 import java.util.Collections; 026 import java.util.Comparator; 027 import java.util.Enumeration; 028 import java.util.HashMap; 029 import java.util.LinkedHashMap; 030 import java.util.Map; 031 import java.util.zip.Inflater; 032 import java.util.zip.InflaterInputStream; 033 import java.util.zip.ZipException; 034 035 /** 036 * Replacement for <code>java.util.ZipFile</code>. 037 * 038 * <p>This class adds support for file name encodings other than UTF-8 039 * (which is required to work on ZIP files created by native zip tools 040 * and is able to skip a preamble like the one found in self 041 * extracting archives. Furthermore it returns instances of 042 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 043 * instead of <code>java.util.zip.ZipEntry</code>.</p> 044 * 045 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 046 * have to reimplement all methods anyway. Like 047 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the 048 * covers and supports compressed and uncompressed entries.</p> 049 * 050 * <p>The method signatures mimic the ones of 051 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 052 * 053 * <ul> 054 * <li>There is no getName method.</li> 055 * <li>entries has been renamed to getEntries.</li> 056 * <li>getEntries and getEntry return 057 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 058 * instances.</li> 059 * <li>close is allowed to throw IOException.</li> 060 * </ul> 061 * 062 */ 063 public class ZipFile { 064 private static final int HASH_SIZE = 509; 065 private static final int SHORT = 2; 066 private static final int WORD = 4; 067 static final int NIBLET_MASK = 0x0f; 068 static final int BYTE_SHIFT = 8; 069 private static final int POS_0 = 0; 070 private static final int POS_1 = 1; 071 private static final int POS_2 = 2; 072 private static final int POS_3 = 3; 073 074 /** 075 * Maps ZipArchiveEntrys to Longs, recording the offsets of the local 076 * file headers. 077 */ 078 private final Map entries = new LinkedHashMap(HASH_SIZE); 079 080 /** 081 * Maps String to ZipArchiveEntrys, name -> actual entry. 082 */ 083 private final Map nameMap = new HashMap(HASH_SIZE); 084 085 private static final class OffsetEntry { 086 private long headerOffset = -1; 087 private long dataOffset = -1; 088 } 089 090 /** 091 * The encoding to use for filenames and the file comment. 092 * 093 * <p>For a list of possible values see <a 094 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 095 * Defaults to UTF-8.</p> 096 */ 097 private final String encoding; 098 099 /** 100 * The zip encoding to use for filenames and the file comment. 101 */ 102 private final ZipEncoding zipEncoding; 103 104 /** 105 * File name of actual source. 106 */ 107 private final String archiveName; 108 109 /** 110 * The actual data source. 111 */ 112 private final RandomAccessFile archive; 113 114 /** 115 * Whether to look for and use Unicode extra fields. 116 */ 117 private final boolean useUnicodeExtraFields; 118 119 /** 120 * Whether the file is closed. 121 */ 122 private boolean closed; 123 124 /** 125 * Opens the given file for reading, assuming "UTF8" for file names. 126 * 127 * @param f the archive. 128 * 129 * @throws IOException if an error occurs while reading the file. 130 */ 131 public ZipFile(File f) throws IOException { 132 this(f, ZipEncodingHelper.UTF8); 133 } 134 135 /** 136 * Opens the given file for reading, assuming "UTF8". 137 * 138 * @param name name of the archive. 139 * 140 * @throws IOException if an error occurs while reading the file. 141 */ 142 public ZipFile(String name) throws IOException { 143 this(new File(name), ZipEncodingHelper.UTF8); 144 } 145 146 /** 147 * Opens the given file for reading, assuming the specified 148 * encoding for file names, scanning unicode extra fields. 149 * 150 * @param name name of the archive. 151 * @param encoding the encoding to use for file names, use null 152 * for the platform's default encoding 153 * 154 * @throws IOException if an error occurs while reading the file. 155 */ 156 public ZipFile(String name, String encoding) throws IOException { 157 this(new File(name), encoding, true); 158 } 159 160 /** 161 * Opens the given file for reading, assuming the specified 162 * encoding for file names and scanning for unicode extra fields. 163 * 164 * @param f the archive. 165 * @param encoding the encoding to use for file names, use null 166 * for the platform's default encoding 167 * 168 * @throws IOException if an error occurs while reading the file. 169 */ 170 public ZipFile(File f, String encoding) throws IOException { 171 this(f, encoding, true); 172 } 173 174 /** 175 * Opens the given file for reading, assuming the specified 176 * encoding for file names. 177 * 178 * @param f the archive. 179 * @param encoding the encoding to use for file names, use null 180 * for the platform's default encoding 181 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 182 * Extra Fields (if present) to set the file names. 183 * 184 * @throws IOException if an error occurs while reading the file. 185 */ 186 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) 187 throws IOException { 188 this.archiveName = f.getAbsolutePath(); 189 this.encoding = encoding; 190 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 191 this.useUnicodeExtraFields = useUnicodeExtraFields; 192 archive = new RandomAccessFile(f, "r"); 193 boolean success = false; 194 try { 195 Map entriesWithoutUTF8Flag = populateFromCentralDirectory(); 196 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 197 success = true; 198 } finally { 199 if (!success) { 200 try { 201 closed = true; 202 archive.close(); 203 } catch (IOException e2) { // NOPMD 204 // swallow, throw the original exception instead 205 } 206 } 207 } 208 } 209 210 /** 211 * The encoding to use for filenames and the file comment. 212 * 213 * @return null if using the platform's default character encoding. 214 */ 215 public String getEncoding() { 216 return encoding; 217 } 218 219 /** 220 * Closes the archive. 221 * @throws IOException if an error occurs closing the archive. 222 */ 223 public void close() throws IOException { 224 // this flag is only written here and read in finalize() which 225 // can never be run in parallel. 226 // no synchronization needed. 227 closed = true; 228 229 archive.close(); 230 } 231 232 /** 233 * close a zipfile quietly; throw no io fault, do nothing 234 * on a null parameter 235 * @param zipfile file to close, can be null 236 */ 237 public static void closeQuietly(ZipFile zipfile) { 238 if (zipfile != null) { 239 try { 240 zipfile.close(); 241 } catch (IOException e) { // NOPMD 242 //ignore, that's why the method is called "quietly" 243 } 244 } 245 } 246 247 /** 248 * Returns all entries. 249 * 250 * <p>Entries will be returned in the same order they appear 251 * within the archive's central directory.</p> 252 * 253 * @return all entries as {@link ZipArchiveEntry} instances 254 */ 255 public Enumeration getEntries() { 256 return Collections.enumeration(entries.keySet()); 257 } 258 259 /** 260 * Returns all entries in physical order. 261 * 262 * <p>Entries will be returned in the same order their contents 263 * appear within the archive.</p> 264 * 265 * @return all entries as {@link ZipArchiveEntry} instances 266 * 267 * @since Commons Compress 1.1 268 */ 269 public Enumeration getEntriesInPhysicalOrder() { 270 Object[] allEntries = entries.keySet().toArray(); 271 Arrays.sort(allEntries, OFFSET_COMPARATOR); 272 return Collections.enumeration(Arrays.asList(allEntries)); 273 } 274 275 /** 276 * Returns a named entry - or <code>null</code> if no entry by 277 * that name exists. 278 * @param name name of the entry. 279 * @return the ZipArchiveEntry corresponding to the given name - or 280 * <code>null</code> if not present. 281 */ 282 public ZipArchiveEntry getEntry(String name) { 283 return (ZipArchiveEntry) nameMap.get(name); 284 } 285 286 /** 287 * Whether this class is able to read the given entry. 288 * 289 * <p>May return false if it is set up to use encryption or a 290 * compression method that hasn't been implemented yet.</p> 291 * @since Apache Commons Compress 1.1 292 */ 293 public boolean canReadEntryData(ZipArchiveEntry ze) { 294 return ZipUtil.canHandleEntryData(ze); 295 } 296 297 /** 298 * Returns an InputStream for reading the contents of the given entry. 299 * 300 * @param ze the entry to get the stream for. 301 * @return a stream to read the entry from. 302 * @throws IOException if unable to create an input stream from the zipenty 303 * @throws ZipException if the zipentry uses an unsupported feature 304 */ 305 public InputStream getInputStream(ZipArchiveEntry ze) 306 throws IOException, ZipException { 307 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze); 308 if (offsetEntry == null) { 309 return null; 310 } 311 ZipUtil.checkRequestedFeatures(ze); 312 long start = offsetEntry.dataOffset; 313 BoundedInputStream bis = 314 new BoundedInputStream(start, ze.getCompressedSize()); 315 switch (ze.getMethod()) { 316 case ZipArchiveEntry.STORED: 317 return bis; 318 case ZipArchiveEntry.DEFLATED: 319 bis.addDummy(); 320 final Inflater inflater = new Inflater(true); 321 return new InflaterInputStream(bis, inflater) { 322 public void close() throws IOException { 323 super.close(); 324 inflater.end(); 325 } 326 }; 327 default: 328 throw new ZipException("Found unsupported compression method " 329 + ze.getMethod()); 330 } 331 } 332 333 /** 334 * Ensures that the close method of this zipfile is called when 335 * there are no more references to it. 336 * @see #close() 337 */ 338 protected void finalize() throws Throwable { 339 try { 340 if (!closed) { 341 System.err.println("Cleaning up unclosed ZipFile for archive " 342 + archiveName); 343 close(); 344 } 345 } finally { 346 super.finalize(); 347 } 348 } 349 350 private static final int CFH_LEN = 351 /* version made by */ SHORT 352 /* version needed to extract */ + SHORT 353 /* general purpose bit flag */ + SHORT 354 /* compression method */ + SHORT 355 /* last mod file time */ + SHORT 356 /* last mod file date */ + SHORT 357 /* crc-32 */ + WORD 358 /* compressed size */ + WORD 359 /* uncompressed size */ + WORD 360 /* filename length */ + SHORT 361 /* extra field length */ + SHORT 362 /* file comment length */ + SHORT 363 /* disk number start */ + SHORT 364 /* internal file attributes */ + SHORT 365 /* external file attributes */ + WORD 366 /* relative offset of local header */ + WORD; 367 368 /** 369 * Reads the central directory of the given archive and populates 370 * the internal tables with ZipArchiveEntry instances. 371 * 372 * <p>The ZipArchiveEntrys will know all data that can be obtained from 373 * the central directory alone, but not the data that requires the 374 * local file header or additional data to be read.</p> 375 * 376 * @return a Map<ZipArchiveEntry, NameAndComment>> of 377 * zipentries that didn't have the language encoding flag set when 378 * read. 379 */ 380 private Map populateFromCentralDirectory() 381 throws IOException { 382 HashMap noUTF8Flag = new HashMap(); 383 384 positionAtCentralDirectory(); 385 386 byte[] cfh = new byte[CFH_LEN]; 387 388 byte[] signatureBytes = new byte[WORD]; 389 archive.readFully(signatureBytes); 390 long sig = ZipLong.getValue(signatureBytes); 391 final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 392 if (sig != cfhSig && startsWithLocalFileHeader()) { 393 throw new IOException("central directory is empty, can't expand" 394 + " corrupt archive."); 395 } 396 while (sig == cfhSig) { 397 archive.readFully(cfh); 398 int off = 0; 399 ZipArchiveEntry ze = new ZipArchiveEntry(); 400 401 int versionMadeBy = ZipShort.getValue(cfh, off); 402 off += SHORT; 403 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 404 405 off += SHORT; // skip version info 406 407 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfh, off); 408 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 409 final ZipEncoding entryEncoding = 410 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 411 ze.setGeneralPurposeBit(gpFlag); 412 413 off += SHORT; 414 415 ze.setMethod(ZipShort.getValue(cfh, off)); 416 off += SHORT; 417 418 // FIXME this is actually not very cpu cycles friendly as we are converting from 419 // dos to java while the underlying Sun implementation will convert 420 // from java to dos time for internal storage... 421 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off)); 422 ze.setTime(time); 423 off += WORD; 424 425 ze.setCrc(ZipLong.getValue(cfh, off)); 426 off += WORD; 427 428 ze.setCompressedSize(ZipLong.getValue(cfh, off)); 429 off += WORD; 430 431 ze.setSize(ZipLong.getValue(cfh, off)); 432 off += WORD; 433 434 int fileNameLen = ZipShort.getValue(cfh, off); 435 off += SHORT; 436 437 int extraLen = ZipShort.getValue(cfh, off); 438 off += SHORT; 439 440 int commentLen = ZipShort.getValue(cfh, off); 441 off += SHORT; 442 443 off += SHORT; // disk number 444 445 ze.setInternalAttributes(ZipShort.getValue(cfh, off)); 446 off += SHORT; 447 448 ze.setExternalAttributes(ZipLong.getValue(cfh, off)); 449 off += WORD; 450 451 byte[] fileName = new byte[fileNameLen]; 452 archive.readFully(fileName); 453 ze.setName(entryEncoding.decode(fileName), fileName); 454 455 // LFH offset, 456 OffsetEntry offset = new OffsetEntry(); 457 offset.headerOffset = ZipLong.getValue(cfh, off); 458 // data offset will be filled later 459 entries.put(ze, offset); 460 461 nameMap.put(ze.getName(), ze); 462 463 byte[] cdExtraData = new byte[extraLen]; 464 archive.readFully(cdExtraData); 465 ze.setCentralDirectoryExtra(cdExtraData); 466 467 byte[] comment = new byte[commentLen]; 468 archive.readFully(comment); 469 ze.setComment(entryEncoding.decode(comment)); 470 471 archive.readFully(signatureBytes); 472 sig = ZipLong.getValue(signatureBytes); 473 474 if (!hasUTF8Flag && useUnicodeExtraFields) { 475 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 476 } 477 } 478 return noUTF8Flag; 479 } 480 481 private static final int MIN_EOCD_SIZE = 482 /* end of central dir signature */ WORD 483 /* number of this disk */ + SHORT 484 /* number of the disk with the */ 485 /* start of the central directory */ + SHORT 486 /* total number of entries in */ 487 /* the central dir on this disk */ + SHORT 488 /* total number of entries in */ 489 /* the central dir */ + SHORT 490 /* size of the central directory */ + WORD 491 /* offset of start of central */ 492 /* directory with respect to */ 493 /* the starting disk number */ + WORD 494 /* zipfile comment length */ + SHORT; 495 496 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 497 /* maximum length of zipfile comment */ + 0xFFFF; 498 499 private static final int CFD_LOCATOR_OFFSET = 500 /* end of central dir signature */ WORD 501 /* number of this disk */ + SHORT 502 /* number of the disk with the */ 503 /* start of the central directory */ + SHORT 504 /* total number of entries in */ 505 /* the central dir on this disk */ + SHORT 506 /* total number of entries in */ 507 /* the central dir */ + SHORT 508 /* size of the central directory */ + WORD; 509 510 /** 511 * Searches for the "End of central dir record", parses 512 * it and positions the stream at the first central directory 513 * record. 514 */ 515 private void positionAtCentralDirectory() 516 throws IOException { 517 boolean found = false; 518 long off = archive.length() - MIN_EOCD_SIZE; 519 long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE); 520 if (off >= 0) { 521 archive.seek(off); 522 byte[] sig = ZipArchiveOutputStream.EOCD_SIG; 523 int curr = archive.read(); 524 while (off >= stopSearching && curr != -1) { 525 if (curr == sig[POS_0]) { 526 curr = archive.read(); 527 if (curr == sig[POS_1]) { 528 curr = archive.read(); 529 if (curr == sig[POS_2]) { 530 curr = archive.read(); 531 if (curr == sig[POS_3]) { 532 found = true; 533 break; 534 } 535 } 536 } 537 } 538 archive.seek(--off); 539 curr = archive.read(); 540 } 541 } 542 if (!found) { 543 throw new ZipException("archive is not a ZIP archive"); 544 } 545 archive.seek(off + CFD_LOCATOR_OFFSET); 546 byte[] cfdOffset = new byte[WORD]; 547 archive.readFully(cfdOffset); 548 archive.seek(ZipLong.getValue(cfdOffset)); 549 } 550 551 /** 552 * Number of bytes in local file header up to the "length of 553 * filename" entry. 554 */ 555 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 556 /* local file header signature */ WORD 557 /* version needed to extract */ + SHORT 558 /* general purpose bit flag */ + SHORT 559 /* compression method */ + SHORT 560 /* last mod file time */ + SHORT 561 /* last mod file date */ + SHORT 562 /* crc-32 */ + WORD 563 /* compressed size */ + WORD 564 /* uncompressed size */ + WORD; 565 566 /** 567 * Walks through all recorded entries and adds the data available 568 * from the local file header. 569 * 570 * <p>Also records the offsets for the data to read from the 571 * entries.</p> 572 */ 573 private void resolveLocalFileHeaderData(Map entriesWithoutUTF8Flag) 574 throws IOException { 575 Enumeration e = getEntries(); 576 while (e.hasMoreElements()) { 577 ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement(); 578 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze); 579 long offset = offsetEntry.headerOffset; 580 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 581 byte[] b = new byte[SHORT]; 582 archive.readFully(b); 583 int fileNameLen = ZipShort.getValue(b); 584 archive.readFully(b); 585 int extraFieldLen = ZipShort.getValue(b); 586 int lenToSkip = fileNameLen; 587 while (lenToSkip > 0) { 588 int skipped = archive.skipBytes(lenToSkip); 589 if (skipped <= 0) { 590 throw new RuntimeException("failed to skip file name in" 591 + " local file header"); 592 } 593 lenToSkip -= skipped; 594 } 595 byte[] localExtraData = new byte[extraFieldLen]; 596 archive.readFully(localExtraData); 597 ze.setExtra(localExtraData); 598 /*dataOffsets.put(ze, 599 new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 600 + SHORT + SHORT + fileNameLen + extraFieldLen)); 601 */ 602 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 603 + SHORT + SHORT + fileNameLen + extraFieldLen; 604 605 if (entriesWithoutUTF8Flag.containsKey(ze)) { 606 String orig = ze.getName(); 607 NameAndComment nc = (NameAndComment) entriesWithoutUTF8Flag.get(ze); 608 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 609 nc.comment); 610 if (!orig.equals(ze.getName())) { 611 nameMap.remove(orig); 612 nameMap.put(ze.getName(), ze); 613 } 614 } 615 } 616 } 617 618 /** 619 * Checks whether the archive starts with a LFH. If it doesn't, 620 * it may be an empty archive. 621 */ 622 private boolean startsWithLocalFileHeader() throws IOException { 623 archive.seek(0); 624 final byte[] start = new byte[WORD]; 625 archive.readFully(start); 626 for (int i = 0; i < start.length; i++) { 627 if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) { 628 return false; 629 } 630 } 631 return true; 632 } 633 634 /** 635 * InputStream that delegates requests to the underlying 636 * RandomAccessFile, making sure that only bytes from a certain 637 * range can be read. 638 */ 639 private class BoundedInputStream extends InputStream { 640 private long remaining; 641 private long loc; 642 private boolean addDummyByte = false; 643 644 BoundedInputStream(long start, long remaining) { 645 this.remaining = remaining; 646 loc = start; 647 } 648 649 public int read() throws IOException { 650 if (remaining-- <= 0) { 651 if (addDummyByte) { 652 addDummyByte = false; 653 return 0; 654 } 655 return -1; 656 } 657 synchronized (archive) { 658 archive.seek(loc++); 659 return archive.read(); 660 } 661 } 662 663 public int read(byte[] b, int off, int len) throws IOException { 664 if (remaining <= 0) { 665 if (addDummyByte) { 666 addDummyByte = false; 667 b[off] = 0; 668 return 1; 669 } 670 return -1; 671 } 672 673 if (len <= 0) { 674 return 0; 675 } 676 677 if (len > remaining) { 678 len = (int) remaining; 679 } 680 int ret = -1; 681 synchronized (archive) { 682 archive.seek(loc); 683 ret = archive.read(b, off, len); 684 } 685 if (ret > 0) { 686 loc += ret; 687 remaining -= ret; 688 } 689 return ret; 690 } 691 692 /** 693 * Inflater needs an extra dummy byte for nowrap - see 694 * Inflater's javadocs. 695 */ 696 void addDummy() { 697 addDummyByte = true; 698 } 699 } 700 701 private static final class NameAndComment { 702 private final byte[] name; 703 private final byte[] comment; 704 private NameAndComment(byte[] name, byte[] comment) { 705 this.name = name; 706 this.comment = comment; 707 } 708 } 709 710 /** 711 * Compares two ZipArchiveEntries based on their offset within the archive. 712 * 713 * <p>Won't return any meaningful results if one of the entries 714 * isn't part of the archive at all.</p> 715 * 716 * @since Commons Compress 1.1 717 */ 718 private final Comparator OFFSET_COMPARATOR = 719 new Comparator() { 720 public int compare(Object o1, Object o2) { 721 if (o1 == o2) 722 return 0; 723 724 ZipArchiveEntry e1 = (ZipArchiveEntry) o1; 725 ZipArchiveEntry e2 = (ZipArchiveEntry) o2; 726 727 OffsetEntry off1 = (OffsetEntry) entries.get(e1); 728 OffsetEntry off2 = (OffsetEntry) entries.get(e2); 729 if (off1 == null) { 730 return 1; 731 } 732 if (off2 == null) { 733 return -1; 734 } 735 long val = (off1.headerOffset - off2.headerOffset); 736 return val == 0 ? 0 : val < 0 ? -1 : +1; 737 } 738 }; 739 }