001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 038import org.apache.commons.compress.utils.ArchiveUtils; 039import org.apache.commons.compress.utils.IOUtils; 040import org.apache.commons.compress.utils.InputStreamStatistics; 041 042import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 046 047/** 048 * Implements an input stream that can read Zip archives. 049 * 050 * <p>As of Apache Commons Compress it transparently supports Zip64 051 * extensions and thus individual entries and archives larger than 4 052 * GB or with more than 65536 entries.</p> 053 * 054 * <p>The {@link ZipFile} class is preferred when reading from files 055 * as {@link ZipArchiveInputStream} is limited by not being able to 056 * read the central directory header before returning entries. In 057 * particular {@link ZipArchiveInputStream}</p> 058 * 059 * <ul> 060 * 061 * <li>may return entries that are not part of the central directory 062 * at all and shouldn't be considered part of the archive.</li> 063 * 064 * <li>may return several entries with the same name.</li> 065 * 066 * <li>will not return internal or external attributes.</li> 067 * 068 * <li>may return incomplete extra field data.</li> 069 * 070 * <li>may return unknown sizes and CRC values for entries until the 071 * next entry has been reached if the archive uses the data 072 * descriptor feature.</li> 073 * 074 * </ul> 075 * 076 * @see ZipFile 077 * @NotThreadSafe 078 */ 079public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics { 080 081 /** The zip encoding to use for filenames and the file comment. */ 082 private final ZipEncoding zipEncoding; 083 084 // the provided encoding (for unit tests) 085 final String encoding; 086 087 /** Whether to look for and use Unicode extra fields. */ 088 private final boolean useUnicodeExtraFields; 089 090 /** Wrapped stream, will always be a PushbackInputStream. */ 091 private final InputStream in; 092 093 /** Inflater used for all deflated entries. */ 094 private final Inflater inf = new Inflater(true); 095 096 /** Buffer used to read from the wrapped stream. */ 097 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 098 099 /** The entry that is currently being read. */ 100 private CurrentEntry current = null; 101 102 /** Whether the stream has been closed. */ 103 private boolean closed = false; 104 105 /** Whether the stream has reached the central directory - and thus found all entries. */ 106 private boolean hitCentralDirectory = false; 107 108 /** 109 * When reading a stored entry that uses the data descriptor this 110 * stream has to read the full entry and caches it. This is the 111 * cache. 112 */ 113 private ByteArrayInputStream lastStoredEntry = null; 114 115 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 116 private boolean allowStoredEntriesWithDataDescriptor = false; 117 118 /** Count decompressed bytes for current entry */ 119 private long uncompressedCount = 0; 120 121 private static final int LFH_LEN = 30; 122 /* 123 local file header signature WORD 124 version needed to extract SHORT 125 general purpose bit flag SHORT 126 compression method SHORT 127 last mod file time SHORT 128 last mod file date SHORT 129 crc-32 WORD 130 compressed size WORD 131 uncompressed size WORD 132 file name length SHORT 133 extra field length SHORT 134 */ 135 136 private static final int CFH_LEN = 46; 137 /* 138 central file header signature WORD 139 version made by SHORT 140 version needed to extract SHORT 141 general purpose bit flag SHORT 142 compression method SHORT 143 last mod file time SHORT 144 last mod file date SHORT 145 crc-32 WORD 146 compressed size WORD 147 uncompressed size WORD 148 file name length SHORT 149 extra field length SHORT 150 file comment length SHORT 151 disk number start SHORT 152 internal file attributes SHORT 153 external file attributes WORD 154 relative offset of local header WORD 155 */ 156 157 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 158 159 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 160 private final byte[] lfhBuf = new byte[LFH_LEN]; 161 private final byte[] skipBuf = new byte[1024]; 162 private final byte[] shortBuf = new byte[SHORT]; 163 private final byte[] wordBuf = new byte[WORD]; 164 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 165 166 private int entriesRead = 0; 167 168 /** 169 * Create an instance using UTF-8 encoding 170 * @param inputStream the stream to wrap 171 */ 172 public ZipArchiveInputStream(final InputStream inputStream) { 173 this(inputStream, ZipEncodingHelper.UTF8); 174 } 175 176 /** 177 * Create an instance using the specified encoding 178 * @param inputStream the stream to wrap 179 * @param encoding the encoding to use for file names, use null 180 * for the platform's default encoding 181 * @since 1.5 182 */ 183 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 184 this(inputStream, encoding, true); 185 } 186 187 /** 188 * Create an instance using the specified encoding 189 * @param inputStream the stream to wrap 190 * @param encoding the encoding to use for file names, use null 191 * for the platform's default encoding 192 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 193 * Extra Fields (if present) to set the file names. 194 */ 195 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 196 this(inputStream, encoding, useUnicodeExtraFields, false); 197 } 198 199 /** 200 * Create an instance using the specified encoding 201 * @param inputStream the stream to wrap 202 * @param encoding the encoding to use for file names, use null 203 * for the platform's default encoding 204 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 205 * Extra Fields (if present) to set the file names. 206 * @param allowStoredEntriesWithDataDescriptor whether the stream 207 * will try to read STORED entries that use a data descriptor 208 * @since 1.1 209 */ 210 public ZipArchiveInputStream(final InputStream inputStream, 211 final String encoding, 212 final boolean useUnicodeExtraFields, 213 final boolean allowStoredEntriesWithDataDescriptor) { 214 this.encoding = encoding; 215 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 216 this.useUnicodeExtraFields = useUnicodeExtraFields; 217 in = new PushbackInputStream(inputStream, buf.capacity()); 218 this.allowStoredEntriesWithDataDescriptor = 219 allowStoredEntriesWithDataDescriptor; 220 // haven't read anything so far 221 buf.limit(0); 222 } 223 224 public ZipArchiveEntry getNextZipEntry() throws IOException { 225 uncompressedCount = 0; 226 227 boolean firstEntry = true; 228 if (closed || hitCentralDirectory) { 229 return null; 230 } 231 if (current != null) { 232 closeEntry(); 233 firstEntry = false; 234 } 235 236 long currentHeaderOffset = getBytesRead(); 237 try { 238 if (firstEntry) { 239 // split archives have a special signature before the 240 // first local file header - look for it and fail with 241 // the appropriate error message if this is a split 242 // archive. 243 readFirstLocalFileHeader(lfhBuf); 244 } else { 245 readFully(lfhBuf); 246 } 247 } catch (final EOFException e) { 248 return null; 249 } 250 251 final ZipLong sig = new ZipLong(lfhBuf); 252 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 253 hitCentralDirectory = true; 254 skipRemainderOfArchive(); 255 return null; 256 } 257 if (!sig.equals(ZipLong.LFH_SIG)) { 258 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 259 } 260 261 int off = WORD; 262 current = new CurrentEntry(); 263 264 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 265 off += SHORT; 266 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 267 268 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 269 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 270 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 271 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 272 current.entry.setGeneralPurposeBit(gpFlag); 273 274 off += SHORT; 275 276 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 277 off += SHORT; 278 279 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 280 current.entry.setTime(time); 281 off += WORD; 282 283 ZipLong size = null, cSize = null; 284 if (!current.hasDataDescriptor) { 285 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 286 off += WORD; 287 288 cSize = new ZipLong(lfhBuf, off); 289 off += WORD; 290 291 size = new ZipLong(lfhBuf, off); 292 off += WORD; 293 } else { 294 off += 3 * WORD; 295 } 296 297 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 298 299 off += SHORT; 300 301 final int extraLen = ZipShort.getValue(lfhBuf, off); 302 off += SHORT; // NOSONAR - assignment as documentation 303 304 final byte[] fileName = new byte[fileNameLen]; 305 readFully(fileName); 306 current.entry.setName(entryEncoding.decode(fileName), fileName); 307 if (hasUTF8Flag) { 308 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 309 } 310 311 final byte[] extraData = new byte[extraLen]; 312 readFully(extraData); 313 current.entry.setExtra(extraData); 314 315 if (!hasUTF8Flag && useUnicodeExtraFields) { 316 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 317 } 318 319 processZip64Extra(size, cSize); 320 321 current.entry.setLocalHeaderOffset(currentHeaderOffset); 322 current.entry.setDataOffset(getBytesRead()); 323 current.entry.setStreamContiguous(true); 324 325 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 326 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 327 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 328 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 329 switch (m) { 330 case UNSHRINKING: 331 current.in = new UnshrinkingInputStream(bis); 332 break; 333 case IMPLODING: 334 current.in = new ExplodingInputStream( 335 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 336 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 337 bis); 338 break; 339 case BZIP2: 340 current.in = new BZip2CompressorInputStream(bis); 341 break; 342 case ENHANCED_DEFLATED: 343 current.in = new Deflate64CompressorInputStream(bis); 344 break; 345 default: 346 // we should never get here as all supported methods have been covered 347 // will cause an error when read is invoked, don't throw an exception here so people can 348 // skip unsupported entries 349 break; 350 } 351 } 352 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 353 current.in = new Deflate64CompressorInputStream(in); 354 } 355 356 entriesRead++; 357 return current.entry; 358 } 359 360 /** 361 * Fills the given array with the first local file header and 362 * deals with splitting/spanning markers that may prefix the first 363 * LFH. 364 */ 365 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 366 readFully(lfh); 367 final ZipLong sig = new ZipLong(lfh); 368 if (sig.equals(ZipLong.DD_SIG)) { 369 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 370 } 371 372 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 373 // The archive is not really split as only one segment was 374 // needed in the end. Just skip over the marker. 375 final byte[] missedLfhBytes = new byte[4]; 376 readFully(missedLfhBytes); 377 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 378 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 379 } 380 } 381 382 /** 383 * Records whether a Zip64 extra is present and sets the size 384 * information from it if sizes are 0xFFFFFFFF and the entry 385 * doesn't use a data descriptor. 386 */ 387 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 388 final Zip64ExtendedInformationExtraField z64 = 389 (Zip64ExtendedInformationExtraField) 390 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 391 current.usesZip64 = z64 != null; 392 if (!current.hasDataDescriptor) { 393 if (z64 != null // same as current.usesZip64 but avoids NPE warning 394 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 395 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 396 current.entry.setSize(z64.getSize().getLongValue()); 397 } else { 398 current.entry.setCompressedSize(cSize.getValue()); 399 current.entry.setSize(size.getValue()); 400 } 401 } 402 } 403 404 @Override 405 public ArchiveEntry getNextEntry() throws IOException { 406 return getNextZipEntry(); 407 } 408 409 /** 410 * Whether this class is able to read the given entry. 411 * 412 * <p>May return false if it is set up to use encryption or a 413 * compression method that hasn't been implemented yet.</p> 414 * @since 1.1 415 */ 416 @Override 417 public boolean canReadEntryData(final ArchiveEntry ae) { 418 if (ae instanceof ZipArchiveEntry) { 419 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 420 return ZipUtil.canHandleEntryData(ze) 421 && supportsDataDescriptorFor(ze) 422 && supportsCompressedSizeFor(ze); 423 } 424 return false; 425 } 426 427 @Override 428 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 429 if (closed) { 430 throw new IOException("The stream is closed"); 431 } 432 433 if (current == null) { 434 return -1; 435 } 436 437 // avoid int overflow, check null buffer 438 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 439 throw new ArrayIndexOutOfBoundsException(); 440 } 441 442 ZipUtil.checkRequestedFeatures(current.entry); 443 if (!supportsDataDescriptorFor(current.entry)) { 444 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 445 current.entry); 446 } 447 if (!supportsCompressedSizeFor(current.entry)) { 448 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 449 current.entry); 450 } 451 452 int read; 453 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 454 read = readStored(buffer, offset, length); 455 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 456 read = readDeflated(buffer, offset, length); 457 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 458 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 459 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 460 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 461 read = current.in.read(buffer, offset, length); 462 } else { 463 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 464 current.entry); 465 } 466 467 if (read >= 0) { 468 current.crc.update(buffer, offset, read); 469 uncompressedCount += read; 470 } 471 472 return read; 473 } 474 475 /** 476 * @since 1.17 477 */ 478 @Override 479 public long getCompressedCount() { 480 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 481 return current.bytesRead; 482 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 483 return getBytesInflated(); 484 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 485 return ((UnshrinkingInputStream) current.in).getCompressedCount(); 486 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 487 return ((ExplodingInputStream) current.in).getCompressedCount(); 488 } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) { 489 return ((Deflate64CompressorInputStream) current.in).getCompressedCount(); 490 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 491 return ((BZip2CompressorInputStream) current.in).getCompressedCount(); 492 } else { 493 return -1; 494 } 495 } 496 497 /** 498 * @since 1.17 499 */ 500 @Override 501 public long getUncompressedCount() { 502 return uncompressedCount; 503 } 504 505 /** 506 * Implementation of read for STORED entries. 507 */ 508 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 509 510 if (current.hasDataDescriptor) { 511 if (lastStoredEntry == null) { 512 readStoredEntry(); 513 } 514 return lastStoredEntry.read(buffer, offset, length); 515 } 516 517 final long csize = current.entry.getSize(); 518 if (current.bytesRead >= csize) { 519 return -1; 520 } 521 522 if (buf.position() >= buf.limit()) { 523 buf.position(0); 524 final int l = in.read(buf.array()); 525 if (l == -1) { 526 return -1; 527 } 528 buf.limit(l); 529 530 count(l); 531 current.bytesReadFromStream += l; 532 } 533 534 int toRead = Math.min(buf.remaining(), length); 535 if ((csize - current.bytesRead) < toRead) { 536 // if it is smaller than toRead then it fits into an int 537 toRead = (int) (csize - current.bytesRead); 538 } 539 buf.get(buffer, offset, toRead); 540 current.bytesRead += toRead; 541 return toRead; 542 } 543 544 /** 545 * Implementation of read for DEFLATED entries. 546 */ 547 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 548 final int read = readFromInflater(buffer, offset, length); 549 if (read <= 0) { 550 if (inf.finished()) { 551 return -1; 552 } else if (inf.needsDictionary()) { 553 throw new ZipException("This archive needs a preset dictionary" 554 + " which is not supported by Commons" 555 + " Compress."); 556 } else if (read == -1) { 557 throw new IOException("Truncated ZIP file"); 558 } 559 } 560 return read; 561 } 562 563 /** 564 * Potentially reads more bytes to fill the inflater's buffer and 565 * reads from it. 566 */ 567 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 568 int read = 0; 569 do { 570 if (inf.needsInput()) { 571 final int l = fill(); 572 if (l > 0) { 573 current.bytesReadFromStream += buf.limit(); 574 } else if (l == -1) { 575 return -1; 576 } else { 577 break; 578 } 579 } 580 try { 581 read = inf.inflate(buffer, offset, length); 582 } catch (final DataFormatException e) { 583 throw (IOException) new ZipException(e.getMessage()).initCause(e); 584 } 585 } while (read == 0 && inf.needsInput()); 586 return read; 587 } 588 589 @Override 590 public void close() throws IOException { 591 if (!closed) { 592 closed = true; 593 try { 594 in.close(); 595 } finally { 596 inf.end(); 597 } 598 } 599 } 600 601 /** 602 * Skips over and discards value bytes of data from this input 603 * stream. 604 * 605 * <p>This implementation may end up skipping over some smaller 606 * number of bytes, possibly 0, if and only if it reaches the end 607 * of the underlying stream.</p> 608 * 609 * <p>The actual number of bytes skipped is returned.</p> 610 * 611 * @param value the number of bytes to be skipped. 612 * @return the actual number of bytes skipped. 613 * @throws IOException - if an I/O error occurs. 614 * @throws IllegalArgumentException - if value is negative. 615 */ 616 @Override 617 public long skip(final long value) throws IOException { 618 if (value >= 0) { 619 long skipped = 0; 620 while (skipped < value) { 621 final long rem = value - skipped; 622 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 623 if (x == -1) { 624 return skipped; 625 } 626 skipped += x; 627 } 628 return skipped; 629 } 630 throw new IllegalArgumentException(); 631 } 632 633 /** 634 * Checks if the signature matches what is expected for a zip file. 635 * Does not currently handle self-extracting zips which may have arbitrary 636 * leading content. 637 * 638 * @param signature the bytes to check 639 * @param length the number of bytes to check 640 * @return true, if this stream is a zip archive stream, false otherwise 641 */ 642 public static boolean matches(final byte[] signature, final int length) { 643 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 644 return false; 645 } 646 647 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 648 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 649 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 650 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 651 } 652 653 private static boolean checksig(final byte[] signature, final byte[] expected) { 654 for (int i = 0; i < expected.length; i++) { 655 if (signature[i] != expected[i]) { 656 return false; 657 } 658 } 659 return true; 660 } 661 662 /** 663 * Closes the current ZIP archive entry and positions the underlying 664 * stream to the beginning of the next entry. All per-entry variables 665 * and data structures are cleared. 666 * <p> 667 * If the compressed size of this entry is included in the entry header, 668 * then any outstanding bytes are simply skipped from the underlying 669 * stream without uncompressing them. This allows an entry to be safely 670 * closed even if the compression method is unsupported. 671 * <p> 672 * In case we don't know the compressed size of this entry or have 673 * already buffered too much data from the underlying stream to support 674 * uncompression, then the uncompression process is completed and the 675 * end position of the stream is adjusted based on the result of that 676 * process. 677 * 678 * @throws IOException if an error occurs 679 */ 680 private void closeEntry() throws IOException { 681 if (closed) { 682 throw new IOException("The stream is closed"); 683 } 684 if (current == null) { 685 return; 686 } 687 688 // Ensure all entry bytes are read 689 if (currentEntryHasOutstandingBytes()) { 690 drainCurrentEntryData(); 691 } else { 692 // this is guaranteed to exhaust the stream 693 skip(Long.MAX_VALUE); //NOSONAR 694 695 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 696 ? getBytesInflated() : current.bytesRead; 697 698 // this is at most a single read() operation and can't 699 // exceed the range of int 700 final int diff = (int) (current.bytesReadFromStream - inB); 701 702 // Pushback any required bytes 703 if (diff > 0) { 704 pushback(buf.array(), buf.limit() - diff, diff); 705 current.bytesReadFromStream -= diff; 706 } 707 708 // Drain remainder of entry if not all data bytes were required 709 if (currentEntryHasOutstandingBytes()) { 710 drainCurrentEntryData(); 711 } 712 } 713 714 if (lastStoredEntry == null && current.hasDataDescriptor) { 715 readDataDescriptor(); 716 } 717 718 inf.reset(); 719 buf.clear().flip(); 720 current = null; 721 lastStoredEntry = null; 722 } 723 724 /** 725 * If the compressed size of the current entry is included in the entry header 726 * and there are any outstanding bytes in the underlying stream, then 727 * this returns true. 728 * 729 * @return true, if current entry is determined to have outstanding bytes, false otherwise 730 */ 731 private boolean currentEntryHasOutstandingBytes() { 732 return current.bytesReadFromStream <= current.entry.getCompressedSize() 733 && !current.hasDataDescriptor; 734 } 735 736 /** 737 * Read all data of the current entry from the underlying stream 738 * that hasn't been read, yet. 739 */ 740 private void drainCurrentEntryData() throws IOException { 741 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 742 while (remaining > 0) { 743 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 744 if (n < 0) { 745 throw new EOFException("Truncated ZIP entry: " 746 + ArchiveUtils.sanitize(current.entry.getName())); 747 } 748 count(n); 749 remaining -= n; 750 } 751 } 752 753 /** 754 * Get the number of bytes Inflater has actually processed. 755 * 756 * <p>for Java < Java7 the getBytes* methods in 757 * Inflater/Deflater seem to return unsigned ints rather than 758 * longs that start over with 0 at 2^32.</p> 759 * 760 * <p>The stream knows how many bytes it has read, but not how 761 * many the Inflater actually consumed - it should be between the 762 * total number of bytes read for the entry and the total number 763 * minus the last read operation. Here we just try to make the 764 * value close enough to the bytes we've read by assuming the 765 * number of bytes consumed must be smaller than (or equal to) the 766 * number of bytes read but not smaller by more than 2^32.</p> 767 */ 768 private long getBytesInflated() { 769 long inB = inf.getBytesRead(); 770 if (current.bytesReadFromStream >= TWO_EXP_32) { 771 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 772 inB += TWO_EXP_32; 773 } 774 } 775 return inB; 776 } 777 778 private int fill() throws IOException { 779 if (closed) { 780 throw new IOException("The stream is closed"); 781 } 782 final int length = in.read(buf.array()); 783 if (length > 0) { 784 buf.limit(length); 785 count(buf.limit()); 786 inf.setInput(buf.array(), 0, buf.limit()); 787 } 788 return length; 789 } 790 791 private void readFully(final byte[] b) throws IOException { 792 final int count = IOUtils.readFully(in, b); 793 count(count); 794 if (count < b.length) { 795 throw new EOFException(); 796 } 797 } 798 799 private void readDataDescriptor() throws IOException { 800 readFully(wordBuf); 801 ZipLong val = new ZipLong(wordBuf); 802 if (ZipLong.DD_SIG.equals(val)) { 803 // data descriptor with signature, skip sig 804 readFully(wordBuf); 805 val = new ZipLong(wordBuf); 806 } 807 current.entry.setCrc(val.getValue()); 808 809 // if there is a ZIP64 extra field, sizes are eight bytes 810 // each, otherwise four bytes each. Unfortunately some 811 // implementations - namely Java7 - use eight bytes without 812 // using a ZIP64 extra field - 813 // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 814 815 // just read 16 bytes and check whether bytes nine to twelve 816 // look like one of the signatures of what could follow a data 817 // descriptor (ignoring archive decryption headers for now). 818 // If so, push back eight bytes and assume sizes are four 819 // bytes, otherwise sizes are eight bytes each. 820 readFully(twoDwordBuf); 821 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 822 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 823 pushback(twoDwordBuf, DWORD, DWORD); 824 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 825 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 826 } else { 827 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 828 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 829 } 830 } 831 832 /** 833 * Whether this entry requires a data descriptor this library can work with. 834 * 835 * @return true if allowStoredEntriesWithDataDescriptor is true, 836 * the entry doesn't require any data descriptor or the method is 837 * DEFLATED or ENHANCED_DEFLATED. 838 */ 839 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 840 return !entry.getGeneralPurposeBit().usesDataDescriptor() 841 842 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 843 || entry.getMethod() == ZipEntry.DEFLATED 844 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 845 } 846 847 /** 848 * Whether the compressed size for the entry is either known or 849 * not required by the compression method being used. 850 */ 851 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 852 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 853 || entry.getMethod() == ZipEntry.DEFLATED 854 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 855 || (entry.getGeneralPurposeBit().usesDataDescriptor() 856 && allowStoredEntriesWithDataDescriptor 857 && entry.getMethod() == ZipEntry.STORED); 858 } 859 860 /** 861 * Caches a stored entry that uses the data descriptor. 862 * 863 * <ul> 864 * <li>Reads a stored entry until the signature of a local file 865 * header, central directory header or data descriptor has been 866 * found.</li> 867 * <li>Stores all entry data in lastStoredEntry.</p> 868 * <li>Rewinds the stream to position at the data 869 * descriptor.</li> 870 * <li>reads the data descriptor</li> 871 * </ul> 872 * 873 * <p>After calling this method the entry should know its size, 874 * the entry's data is cached and the stream is positioned at the 875 * next local file or central directory header.</p> 876 */ 877 private void readStoredEntry() throws IOException { 878 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 879 int off = 0; 880 boolean done = false; 881 882 // length of DD without signature 883 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 884 885 while (!done) { 886 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 887 if (r <= 0) { 888 // read the whole archive without ever finding a 889 // central directory 890 throw new IOException("Truncated ZIP file"); 891 } 892 if (r + off < 4) { 893 // buffer too small to check for a signature, loop 894 off += r; 895 continue; 896 } 897 898 done = bufferContainsSignature(bos, off, r, ddLen); 899 if (!done) { 900 off = cacheBytesRead(bos, off, r, ddLen); 901 } 902 } 903 904 final byte[] b = bos.toByteArray(); 905 lastStoredEntry = new ByteArrayInputStream(b); 906 } 907 908 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 909 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 910 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 911 912 /** 913 * Checks whether the current buffer contains the signature of a 914 * "data descriptor", "local file header" or 915 * "central directory entry". 916 * 917 * <p>If it contains such a signature, reads the data descriptor 918 * and positions the stream right after the data descriptor.</p> 919 */ 920 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 921 throws IOException { 922 923 boolean done = false; 924 int readTooMuch = 0; 925 for (int i = 0; !done && i < offset + lastRead - 4; i++) { 926 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 927 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 928 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 929 // found a LFH or CFH: 930 readTooMuch = offset + lastRead - i - expectedDDLen; 931 done = true; 932 } 933 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 934 // found DD: 935 readTooMuch = offset + lastRead - i; 936 done = true; 937 } 938 if (done) { 939 // * push back bytes read in excess as well as the data 940 // descriptor 941 // * copy the remaining bytes to cache 942 // * read data descriptor 943 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 944 bos.write(buf.array(), 0, i); 945 readDataDescriptor(); 946 } 947 } 948 } 949 return done; 950 } 951 952 /** 953 * If the last read bytes could hold a data descriptor and an 954 * incomplete signature then save the last bytes to the front of 955 * the buffer and cache everything in front of the potential data 956 * descriptor into the given ByteArrayOutputStream. 957 * 958 * <p>Data descriptor plus incomplete signature (3 bytes in the 959 * worst case) can be 20 bytes max.</p> 960 */ 961 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 962 final int cacheable = offset + lastRead - expecteDDLen - 3; 963 if (cacheable > 0) { 964 bos.write(buf.array(), 0, cacheable); 965 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 966 offset = expecteDDLen + 3; 967 } else { 968 offset += lastRead; 969 } 970 return offset; 971 } 972 973 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 974 ((PushbackInputStream) in).unread(buf, offset, length); 975 pushedBackBytes(length); 976 } 977 978 // End of Central Directory Record 979 // end of central dir signature WORD 980 // number of this disk SHORT 981 // number of the disk with the 982 // start of the central directory SHORT 983 // total number of entries in the 984 // central directory on this disk SHORT 985 // total number of entries in 986 // the central directory SHORT 987 // size of the central directory WORD 988 // offset of start of central 989 // directory with respect to 990 // the starting disk number WORD 991 // .ZIP file comment length SHORT 992 // .ZIP file comment up to 64KB 993 // 994 995 /** 996 * Reads the stream until it find the "End of central directory 997 * record" and consumes it as well. 998 */ 999 private void skipRemainderOfArchive() throws IOException { 1000 // skip over central directory. One LFH has been read too much 1001 // already. The calculation discounts file names and extra 1002 // data so it will be too short. 1003 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 1004 findEocdRecord(); 1005 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 1006 readFully(shortBuf); 1007 // file comment 1008 realSkip(ZipShort.getValue(shortBuf)); 1009 } 1010 1011 /** 1012 * Reads forward until the signature of the "End of central 1013 * directory" record is found. 1014 */ 1015 private void findEocdRecord() throws IOException { 1016 int currentByte = -1; 1017 boolean skipReadCall = false; 1018 while (skipReadCall || (currentByte = readOneByte()) > -1) { 1019 skipReadCall = false; 1020 if (!isFirstByteOfEocdSig(currentByte)) { 1021 continue; 1022 } 1023 currentByte = readOneByte(); 1024 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 1025 if (currentByte == -1) { 1026 break; 1027 } 1028 skipReadCall = isFirstByteOfEocdSig(currentByte); 1029 continue; 1030 } 1031 currentByte = readOneByte(); 1032 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 1033 if (currentByte == -1) { 1034 break; 1035 } 1036 skipReadCall = isFirstByteOfEocdSig(currentByte); 1037 continue; 1038 } 1039 currentByte = readOneByte(); 1040 if (currentByte == -1 1041 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1042 break; 1043 } 1044 skipReadCall = isFirstByteOfEocdSig(currentByte); 1045 } 1046 } 1047 1048 /** 1049 * Skips bytes by reading from the underlying stream rather than 1050 * the (potentially inflating) archive stream - which {@link 1051 * #skip} would do. 1052 * 1053 * Also updates bytes-read counter. 1054 */ 1055 private void realSkip(final long value) throws IOException { 1056 if (value >= 0) { 1057 long skipped = 0; 1058 while (skipped < value) { 1059 final long rem = value - skipped; 1060 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1061 if (x == -1) { 1062 return; 1063 } 1064 count(x); 1065 skipped += x; 1066 } 1067 return; 1068 } 1069 throw new IllegalArgumentException(); 1070 } 1071 1072 /** 1073 * Reads bytes by reading from the underlying stream rather than 1074 * the (potentially inflating) archive stream - which {@link #read} would do. 1075 * 1076 * Also updates bytes-read counter. 1077 */ 1078 private int readOneByte() throws IOException { 1079 final int b = in.read(); 1080 if (b != -1) { 1081 count(1); 1082 } 1083 return b; 1084 } 1085 1086 private boolean isFirstByteOfEocdSig(final int b) { 1087 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1088 } 1089 1090 /** 1091 * Structure collecting information for the entry that is 1092 * currently being read. 1093 */ 1094 private static final class CurrentEntry { 1095 1096 /** 1097 * Current ZIP entry. 1098 */ 1099 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1100 1101 /** 1102 * Does the entry use a data descriptor? 1103 */ 1104 private boolean hasDataDescriptor; 1105 1106 /** 1107 * Does the entry have a ZIP64 extended information extra field. 1108 */ 1109 private boolean usesZip64; 1110 1111 /** 1112 * Number of bytes of entry content read by the client if the 1113 * entry is STORED. 1114 */ 1115 private long bytesRead; 1116 1117 /** 1118 * Number of bytes of entry content read from the stream. 1119 * 1120 * <p>This may be more than the actual entry's length as some 1121 * stuff gets buffered up and needs to be pushed back when the 1122 * end of the entry has been reached.</p> 1123 */ 1124 private long bytesReadFromStream; 1125 1126 /** 1127 * The checksum calculated as the current entry is read. 1128 */ 1129 private final CRC32 crc = new CRC32(); 1130 1131 /** 1132 * The input stream decompressing the data for shrunk and imploded entries. 1133 */ 1134 private InputStream in; 1135 } 1136 1137 /** 1138 * Bounded input stream adapted from commons-io 1139 */ 1140 private class BoundedInputStream extends InputStream { 1141 1142 /** the wrapped input stream */ 1143 private final InputStream in; 1144 1145 /** the max length to provide */ 1146 private final long max; 1147 1148 /** the number of bytes already returned */ 1149 private long pos = 0; 1150 1151 /** 1152 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1153 * stream and limits it to a certain size. 1154 * 1155 * @param in The wrapped input stream 1156 * @param size The maximum number of bytes to return 1157 */ 1158 public BoundedInputStream(final InputStream in, final long size) { 1159 this.max = size; 1160 this.in = in; 1161 } 1162 1163 @Override 1164 public int read() throws IOException { 1165 if (max >= 0 && pos >= max) { 1166 return -1; 1167 } 1168 final int result = in.read(); 1169 pos++; 1170 count(1); 1171 current.bytesReadFromStream++; 1172 return result; 1173 } 1174 1175 @Override 1176 public int read(final byte[] b) throws IOException { 1177 return this.read(b, 0, b.length); 1178 } 1179 1180 @Override 1181 public int read(final byte[] b, final int off, final int len) throws IOException { 1182 if (max >= 0 && pos >= max) { 1183 return -1; 1184 } 1185 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1186 final int bytesRead = in.read(b, off, (int) maxRead); 1187 1188 if (bytesRead == -1) { 1189 return -1; 1190 } 1191 1192 pos += bytesRead; 1193 count(bytesRead); 1194 current.bytesReadFromStream += bytesRead; 1195 return bytesRead; 1196 } 1197 1198 @Override 1199 public long skip(final long n) throws IOException { 1200 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1201 final long skippedBytes = IOUtils.skip(in, toSkip); 1202 pos += skippedBytes; 1203 return skippedBytes; 1204 } 1205 1206 @Override 1207 public int available() throws IOException { 1208 if (max >= 0 && pos >= max) { 1209 return 0; 1210 } 1211 return in.available(); 1212 } 1213 } 1214}