001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 package org.apache.commons.compress.archivers.zip; 019 020 import java.io.File; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.RandomAccessFile; 024 import java.util.Arrays; 025 import java.util.Collections; 026 import java.util.Comparator; 027 import java.util.Enumeration; 028 import java.util.HashMap; 029 import java.util.LinkedHashMap; 030 import java.util.Map; 031 import java.util.zip.Inflater; 032 import java.util.zip.InflaterInputStream; 033 import java.util.zip.ZipException; 034 035 /** 036 * Replacement for <code>java.util.ZipFile</code>. 037 * 038 * <p>This class adds support for file name encodings other than UTF-8 039 * (which is required to work on ZIP files created by native zip tools 040 * and is able to skip a preamble like the one found in self 041 * extracting archives. Furthermore it returns instances of 042 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 043 * instead of <code>java.util.zip.ZipEntry</code>.</p> 044 * 045 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 046 * have to reimplement all methods anyway. Like 047 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the 048 * covers and supports compressed and uncompressed entries.</p> 049 * 050 * <p>The method signatures mimic the ones of 051 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 052 * 053 * <ul> 054 * <li>There is no getName method.</li> 055 * <li>entries has been renamed to getEntries.</li> 056 * <li>getEntries and getEntry return 057 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 058 * instances.</li> 059 * <li>close is allowed to throw IOException.</li> 060 * </ul> 061 * 062 */ 063 public class ZipFile { 064 private static final int HASH_SIZE = 509; 065 private static final int SHORT = 2; 066 private static final int WORD = 4; 067 static final int NIBLET_MASK = 0x0f; 068 static final int BYTE_SHIFT = 8; 069 private static final int POS_0 = 0; 070 private static final int POS_1 = 1; 071 private static final int POS_2 = 2; 072 private static final int POS_3 = 3; 073 074 /** 075 * Maps ZipArchiveEntrys to Longs, recording the offsets of the local 076 * file headers. 077 */ 078 private final Map entries = new LinkedHashMap(HASH_SIZE); 079 080 /** 081 * Maps String to ZipArchiveEntrys, name -> actual entry. 082 */ 083 private final Map nameMap = new HashMap(HASH_SIZE); 084 085 private static final class OffsetEntry { 086 private long headerOffset = -1; 087 private long dataOffset = -1; 088 } 089 090 /** 091 * The encoding to use for filenames and the file comment. 092 * 093 * <p>For a list of possible values see <a 094 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 095 * Defaults to UTF-8.</p> 096 */ 097 private final String encoding; 098 099 /** 100 * The zip encoding to use for filenames and the file comment. 101 */ 102 private final ZipEncoding zipEncoding; 103 104 /** 105 * The actual data source. 106 */ 107 private final RandomAccessFile archive; 108 109 /** 110 * Whether to look for and use Unicode extra fields. 111 */ 112 private final boolean useUnicodeExtraFields; 113 114 /** 115 * Opens the given file for reading, assuming "UTF8" for file names. 116 * 117 * @param f the archive. 118 * 119 * @throws IOException if an error occurs while reading the file. 120 */ 121 public ZipFile(File f) throws IOException { 122 this(f, ZipEncodingHelper.UTF8); 123 } 124 125 /** 126 * Opens the given file for reading, assuming "UTF8". 127 * 128 * @param name name of the archive. 129 * 130 * @throws IOException if an error occurs while reading the file. 131 */ 132 public ZipFile(String name) throws IOException { 133 this(new File(name), ZipEncodingHelper.UTF8); 134 } 135 136 /** 137 * Opens the given file for reading, assuming the specified 138 * encoding for file names, scanning unicode extra fields. 139 * 140 * @param name name of the archive. 141 * @param encoding the encoding to use for file names, use null 142 * for the platform's default encoding 143 * 144 * @throws IOException if an error occurs while reading the file. 145 */ 146 public ZipFile(String name, String encoding) throws IOException { 147 this(new File(name), encoding, true); 148 } 149 150 /** 151 * Opens the given file for reading, assuming the specified 152 * encoding for file names and scanning for unicode extra fields. 153 * 154 * @param f the archive. 155 * @param encoding the encoding to use for file names, use null 156 * for the platform's default encoding 157 * 158 * @throws IOException if an error occurs while reading the file. 159 */ 160 public ZipFile(File f, String encoding) throws IOException { 161 this(f, encoding, true); 162 } 163 164 /** 165 * Opens the given file for reading, assuming the specified 166 * encoding for file names. 167 * 168 * @param f the archive. 169 * @param encoding the encoding to use for file names, use null 170 * for the platform's default encoding 171 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 172 * Extra Fields (if present) to set the file names. 173 * 174 * @throws IOException if an error occurs while reading the file. 175 */ 176 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) 177 throws IOException { 178 this.encoding = encoding; 179 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 180 this.useUnicodeExtraFields = useUnicodeExtraFields; 181 archive = new RandomAccessFile(f, "r"); 182 boolean success = false; 183 try { 184 Map entriesWithoutUTF8Flag = populateFromCentralDirectory(); 185 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 186 success = true; 187 } finally { 188 if (!success) { 189 try { 190 archive.close(); 191 } catch (IOException e2) { 192 // swallow, throw the original exception instead 193 } 194 } 195 } 196 } 197 198 /** 199 * The encoding to use for filenames and the file comment. 200 * 201 * @return null if using the platform's default character encoding. 202 */ 203 public String getEncoding() { 204 return encoding; 205 } 206 207 /** 208 * Closes the archive. 209 * @throws IOException if an error occurs closing the archive. 210 */ 211 public void close() throws IOException { 212 archive.close(); 213 } 214 215 /** 216 * close a zipfile quietly; throw no io fault, do nothing 217 * on a null parameter 218 * @param zipfile file to close, can be null 219 */ 220 public static void closeQuietly(ZipFile zipfile) { 221 if (zipfile != null) { 222 try { 223 zipfile.close(); 224 } catch (IOException e) { 225 //ignore 226 } 227 } 228 } 229 230 /** 231 * Returns all entries. 232 * 233 * <p>Entries will be returned in the same order they appear 234 * within the archive's central directory.</p> 235 * 236 * @return all entries as {@link ZipArchiveEntry} instances 237 */ 238 public Enumeration getEntries() { 239 return Collections.enumeration(entries.keySet()); 240 } 241 242 /** 243 * Returns all entries in physical order. 244 * 245 * <p>Entries will be returned in the same order their contents 246 * appear within the archive.</p> 247 * 248 * @return all entries as {@link ZipArchiveEntry} instances 249 * 250 * @since Commons Compress 1.1 251 */ 252 public Enumeration getEntriesInPhysicalOrder() { 253 Object[] allEntries = entries.keySet().toArray(); 254 Arrays.sort(allEntries, OFFSET_COMPARATOR); 255 return Collections.enumeration(Arrays.asList(allEntries)); 256 } 257 258 /** 259 * Returns a named entry - or <code>null</code> if no entry by 260 * that name exists. 261 * @param name name of the entry. 262 * @return the ZipArchiveEntry corresponding to the given name - or 263 * <code>null</code> if not present. 264 */ 265 public ZipArchiveEntry getEntry(String name) { 266 return (ZipArchiveEntry) nameMap.get(name); 267 } 268 269 /** 270 * Whether this class is able to read the given entry. 271 * 272 * <p>May return false if it is set up to use encryption or a 273 * compression method that hasn't been implemented yet.</p> 274 * @since Apache Commons Compress 1.1 275 */ 276 public boolean canReadEntryData(ZipArchiveEntry ze) { 277 return ZipUtil.canHandleEntryData(ze); 278 } 279 280 /** 281 * Returns an InputStream for reading the contents of the given entry. 282 * 283 * @param ze the entry to get the stream for. 284 * @return a stream to read the entry from. 285 * @throws IOException if unable to create an input stream from the zipenty 286 * @throws ZipException if the zipentry uses an unsupported feature 287 */ 288 public InputStream getInputStream(ZipArchiveEntry ze) 289 throws IOException, ZipException { 290 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze); 291 if (offsetEntry == null) { 292 return null; 293 } 294 ZipUtil.checkRequestedFeatures(ze); 295 long start = offsetEntry.dataOffset; 296 BoundedInputStream bis = 297 new BoundedInputStream(start, ze.getCompressedSize()); 298 switch (ze.getMethod()) { 299 case ZipArchiveEntry.STORED: 300 return bis; 301 case ZipArchiveEntry.DEFLATED: 302 bis.addDummy(); 303 return new InflaterInputStream(bis, new Inflater(true)); 304 default: 305 throw new ZipException("Found unsupported compression method " 306 + ze.getMethod()); 307 } 308 } 309 310 private static final int CFH_LEN = 311 /* version made by */ SHORT 312 /* version needed to extract */ + SHORT 313 /* general purpose bit flag */ + SHORT 314 /* compression method */ + SHORT 315 /* last mod file time */ + SHORT 316 /* last mod file date */ + SHORT 317 /* crc-32 */ + WORD 318 /* compressed size */ + WORD 319 /* uncompressed size */ + WORD 320 /* filename length */ + SHORT 321 /* extra field length */ + SHORT 322 /* file comment length */ + SHORT 323 /* disk number start */ + SHORT 324 /* internal file attributes */ + SHORT 325 /* external file attributes */ + WORD 326 /* relative offset of local header */ + WORD; 327 328 /** 329 * Reads the central directory of the given archive and populates 330 * the internal tables with ZipArchiveEntry instances. 331 * 332 * <p>The ZipArchiveEntrys will know all data that can be obtained from 333 * the central directory alone, but not the data that requires the 334 * local file header or additional data to be read.</p> 335 * 336 * @return a Map<ZipArchiveEntry, NameAndComment>> of 337 * zipentries that didn't have the language encoding flag set when 338 * read. 339 */ 340 private Map populateFromCentralDirectory() 341 throws IOException { 342 HashMap noUTF8Flag = new HashMap(); 343 344 positionAtCentralDirectory(); 345 346 byte[] cfh = new byte[CFH_LEN]; 347 348 byte[] signatureBytes = new byte[WORD]; 349 archive.readFully(signatureBytes); 350 long sig = ZipLong.getValue(signatureBytes); 351 final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 352 if (sig != cfhSig && startsWithLocalFileHeader()) { 353 throw new IOException("central directory is empty, can't expand" 354 + " corrupt archive."); 355 } 356 while (sig == cfhSig) { 357 archive.readFully(cfh); 358 int off = 0; 359 ZipArchiveEntry ze = new ZipArchiveEntry(); 360 361 int versionMadeBy = ZipShort.getValue(cfh, off); 362 off += SHORT; 363 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 364 365 off += SHORT; // skip version info 366 367 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfh, off); 368 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 369 final ZipEncoding entryEncoding = 370 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 371 ze.setGeneralPurposeBit(gpFlag); 372 373 off += SHORT; 374 375 ze.setMethod(ZipShort.getValue(cfh, off)); 376 off += SHORT; 377 378 // FIXME this is actually not very cpu cycles friendly as we are converting from 379 // dos to java while the underlying Sun implementation will convert 380 // from java to dos time for internal storage... 381 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off)); 382 ze.setTime(time); 383 off += WORD; 384 385 ze.setCrc(ZipLong.getValue(cfh, off)); 386 off += WORD; 387 388 ze.setCompressedSize(ZipLong.getValue(cfh, off)); 389 off += WORD; 390 391 ze.setSize(ZipLong.getValue(cfh, off)); 392 off += WORD; 393 394 int fileNameLen = ZipShort.getValue(cfh, off); 395 off += SHORT; 396 397 int extraLen = ZipShort.getValue(cfh, off); 398 off += SHORT; 399 400 int commentLen = ZipShort.getValue(cfh, off); 401 off += SHORT; 402 403 off += SHORT; // disk number 404 405 ze.setInternalAttributes(ZipShort.getValue(cfh, off)); 406 off += SHORT; 407 408 ze.setExternalAttributes(ZipLong.getValue(cfh, off)); 409 off += WORD; 410 411 byte[] fileName = new byte[fileNameLen]; 412 archive.readFully(fileName); 413 ze.setName(entryEncoding.decode(fileName)); 414 415 // LFH offset, 416 OffsetEntry offset = new OffsetEntry(); 417 offset.headerOffset = ZipLong.getValue(cfh, off); 418 // data offset will be filled later 419 entries.put(ze, offset); 420 421 nameMap.put(ze.getName(), ze); 422 423 byte[] cdExtraData = new byte[extraLen]; 424 archive.readFully(cdExtraData); 425 ze.setCentralDirectoryExtra(cdExtraData); 426 427 byte[] comment = new byte[commentLen]; 428 archive.readFully(comment); 429 ze.setComment(entryEncoding.decode(comment)); 430 431 archive.readFully(signatureBytes); 432 sig = ZipLong.getValue(signatureBytes); 433 434 if (!hasUTF8Flag && useUnicodeExtraFields) { 435 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 436 } 437 } 438 return noUTF8Flag; 439 } 440 441 private static final int MIN_EOCD_SIZE = 442 /* end of central dir signature */ WORD 443 /* number of this disk */ + SHORT 444 /* number of the disk with the */ 445 /* start of the central directory */ + SHORT 446 /* total number of entries in */ 447 /* the central dir on this disk */ + SHORT 448 /* total number of entries in */ 449 /* the central dir */ + SHORT 450 /* size of the central directory */ + WORD 451 /* offset of start of central */ 452 /* directory with respect to */ 453 /* the starting disk number */ + WORD 454 /* zipfile comment length */ + SHORT; 455 456 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 457 /* maximum length of zipfile comment */ + 0xFFFF; 458 459 private static final int CFD_LOCATOR_OFFSET = 460 /* end of central dir signature */ WORD 461 /* number of this disk */ + SHORT 462 /* number of the disk with the */ 463 /* start of the central directory */ + SHORT 464 /* total number of entries in */ 465 /* the central dir on this disk */ + SHORT 466 /* total number of entries in */ 467 /* the central dir */ + SHORT 468 /* size of the central directory */ + WORD; 469 470 /** 471 * Searches for the "End of central dir record", parses 472 * it and positions the stream at the first central directory 473 * record. 474 */ 475 private void positionAtCentralDirectory() 476 throws IOException { 477 boolean found = false; 478 long off = archive.length() - MIN_EOCD_SIZE; 479 long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE); 480 if (off >= 0) { 481 archive.seek(off); 482 byte[] sig = ZipArchiveOutputStream.EOCD_SIG; 483 int curr = archive.read(); 484 while (off >= stopSearching && curr != -1) { 485 if (curr == sig[POS_0]) { 486 curr = archive.read(); 487 if (curr == sig[POS_1]) { 488 curr = archive.read(); 489 if (curr == sig[POS_2]) { 490 curr = archive.read(); 491 if (curr == sig[POS_3]) { 492 found = true; 493 break; 494 } 495 } 496 } 497 } 498 archive.seek(--off); 499 curr = archive.read(); 500 } 501 } 502 if (!found) { 503 throw new ZipException("archive is not a ZIP archive"); 504 } 505 archive.seek(off + CFD_LOCATOR_OFFSET); 506 byte[] cfdOffset = new byte[WORD]; 507 archive.readFully(cfdOffset); 508 archive.seek(ZipLong.getValue(cfdOffset)); 509 } 510 511 /** 512 * Number of bytes in local file header up to the "length of 513 * filename" entry. 514 */ 515 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 516 /* local file header signature */ WORD 517 /* version needed to extract */ + SHORT 518 /* general purpose bit flag */ + SHORT 519 /* compression method */ + SHORT 520 /* last mod file time */ + SHORT 521 /* last mod file date */ + SHORT 522 /* crc-32 */ + WORD 523 /* compressed size */ + WORD 524 /* uncompressed size */ + WORD; 525 526 /** 527 * Walks through all recorded entries and adds the data available 528 * from the local file header. 529 * 530 * <p>Also records the offsets for the data to read from the 531 * entries.</p> 532 */ 533 private void resolveLocalFileHeaderData(Map entriesWithoutUTF8Flag) 534 throws IOException { 535 Enumeration e = getEntries(); 536 while (e.hasMoreElements()) { 537 ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement(); 538 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze); 539 long offset = offsetEntry.headerOffset; 540 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 541 byte[] b = new byte[SHORT]; 542 archive.readFully(b); 543 int fileNameLen = ZipShort.getValue(b); 544 archive.readFully(b); 545 int extraFieldLen = ZipShort.getValue(b); 546 int lenToSkip = fileNameLen; 547 while (lenToSkip > 0) { 548 int skipped = archive.skipBytes(lenToSkip); 549 if (skipped <= 0) { 550 throw new RuntimeException("failed to skip file name in" 551 + " local file header"); 552 } 553 lenToSkip -= skipped; 554 } 555 byte[] localExtraData = new byte[extraFieldLen]; 556 archive.readFully(localExtraData); 557 ze.setExtra(localExtraData); 558 /*dataOffsets.put(ze, 559 new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 560 + SHORT + SHORT + fileNameLen + extraFieldLen)); 561 */ 562 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 563 + SHORT + SHORT + fileNameLen + extraFieldLen; 564 565 if (entriesWithoutUTF8Flag.containsKey(ze)) { 566 String orig = ze.getName(); 567 NameAndComment nc = (NameAndComment) entriesWithoutUTF8Flag.get(ze); 568 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 569 nc.comment); 570 if (!orig.equals(ze.getName())) { 571 nameMap.remove(orig); 572 nameMap.put(ze.getName(), ze); 573 } 574 } 575 } 576 } 577 578 /** 579 * Checks whether the archive starts with a LFH. If it doesn't, 580 * it may be an empty archive. 581 */ 582 private boolean startsWithLocalFileHeader() throws IOException { 583 archive.seek(0); 584 final byte[] start = new byte[WORD]; 585 archive.readFully(start); 586 for (int i = 0; i < start.length; i++) { 587 if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) { 588 return false; 589 } 590 } 591 return true; 592 } 593 594 /** 595 * InputStream that delegates requests to the underlying 596 * RandomAccessFile, making sure that only bytes from a certain 597 * range can be read. 598 */ 599 private class BoundedInputStream extends InputStream { 600 private long remaining; 601 private long loc; 602 private boolean addDummyByte = false; 603 604 BoundedInputStream(long start, long remaining) { 605 this.remaining = remaining; 606 loc = start; 607 } 608 609 public int read() throws IOException { 610 if (remaining-- <= 0) { 611 if (addDummyByte) { 612 addDummyByte = false; 613 return 0; 614 } 615 return -1; 616 } 617 synchronized (archive) { 618 archive.seek(loc++); 619 return archive.read(); 620 } 621 } 622 623 public int read(byte[] b, int off, int len) throws IOException { 624 if (remaining <= 0) { 625 if (addDummyByte) { 626 addDummyByte = false; 627 b[off] = 0; 628 return 1; 629 } 630 return -1; 631 } 632 633 if (len <= 0) { 634 return 0; 635 } 636 637 if (len > remaining) { 638 len = (int) remaining; 639 } 640 int ret = -1; 641 synchronized (archive) { 642 archive.seek(loc); 643 ret = archive.read(b, off, len); 644 } 645 if (ret > 0) { 646 loc += ret; 647 remaining -= ret; 648 } 649 return ret; 650 } 651 652 /** 653 * Inflater needs an extra dummy byte for nowrap - see 654 * Inflater's javadocs. 655 */ 656 void addDummy() { 657 addDummyByte = true; 658 } 659 } 660 661 private static final class NameAndComment { 662 private final byte[] name; 663 private final byte[] comment; 664 private NameAndComment(byte[] name, byte[] comment) { 665 this.name = name; 666 this.comment = comment; 667 } 668 } 669 670 /** 671 * Compares two ZipArchiveEntries based on their offset within the archive. 672 * 673 * <p>Won't return any meaningful results if one of the entries 674 * isn't part of the archive at all.</p> 675 * 676 * @since Commons Compress 1.1 677 */ 678 private final Comparator OFFSET_COMPARATOR = 679 new Comparator() { 680 public int compare(Object o1, Object o2) { 681 if (o1 == o2) 682 return 0; 683 684 ZipArchiveEntry e1 = (ZipArchiveEntry) o1; 685 ZipArchiveEntry e2 = (ZipArchiveEntry) o2; 686 687 OffsetEntry off1 = (OffsetEntry) entries.get(e1); 688 OffsetEntry off2 = (OffsetEntry) entries.get(e2); 689 if (off1 == null) { 690 return 1; 691 } 692 if (off2 == null) { 693 return -1; 694 } 695 long val = (off1.headerOffset - off2.headerOffset); 696 return val == 0 ? 0 : val < 0 ? -1 : +1; 697 } 698 }; 699 }