001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.math.BigInteger; 028import java.nio.ByteBuffer; 029import java.util.Arrays; 030import java.util.zip.CRC32; 031import java.util.zip.DataFormatException; 032import java.util.zip.Inflater; 033import java.util.zip.ZipEntry; 034import java.util.zip.ZipException; 035 036import org.apache.commons.compress.archivers.ArchiveEntry; 037import org.apache.commons.compress.archivers.ArchiveInputStream; 038import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 039import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 040import org.apache.commons.compress.utils.ArchiveUtils; 041import org.apache.commons.compress.utils.IOUtils; 042import org.apache.commons.compress.utils.InputStreamStatistics; 043 044import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 046import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 047import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 048 049/** 050 * Implements an input stream that can read Zip archives. 051 * 052 * <p>As of Apache Commons Compress it transparently supports Zip64 053 * extensions and thus individual entries and archives larger than 4 054 * GB or with more than 65536 entries.</p> 055 * 056 * <p>The {@link ZipFile} class is preferred when reading from files 057 * as {@link ZipArchiveInputStream} is limited by not being able to 058 * read the central directory header before returning entries. In 059 * particular {@link ZipArchiveInputStream}</p> 060 * 061 * <ul> 062 * 063 * <li>may return entries that are not part of the central directory 064 * at all and shouldn't be considered part of the archive.</li> 065 * 066 * <li>may return several entries with the same name.</li> 067 * 068 * <li>will not return internal or external attributes.</li> 069 * 070 * <li>may return incomplete extra field data.</li> 071 * 072 * <li>may return unknown sizes and CRC values for entries until the 073 * next entry has been reached if the archive uses the data 074 * descriptor feature.</li> 075 * 076 * </ul> 077 * 078 * @see ZipFile 079 * @NotThreadSafe 080 */ 081public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics { 082 083 /** The zip encoding to use for file names and the file comment. */ 084 private final ZipEncoding zipEncoding; 085 086 // the provided encoding (for unit tests) 087 final String encoding; 088 089 /** Whether to look for and use Unicode extra fields. */ 090 private final boolean useUnicodeExtraFields; 091 092 /** Wrapped stream, will always be a PushbackInputStream. */ 093 private final InputStream in; 094 095 /** Inflater used for all deflated entries. */ 096 private final Inflater inf = new Inflater(true); 097 098 /** Buffer used to read from the wrapped stream. */ 099 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 100 101 /** The entry that is currently being read. */ 102 private CurrentEntry current = null; 103 104 /** Whether the stream has been closed. */ 105 private boolean closed = false; 106 107 /** Whether the stream has reached the central directory - and thus found all entries. */ 108 private boolean hitCentralDirectory = false; 109 110 /** 111 * When reading a stored entry that uses the data descriptor this 112 * stream has to read the full entry and caches it. This is the 113 * cache. 114 */ 115 private ByteArrayInputStream lastStoredEntry = null; 116 117 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 118 private boolean allowStoredEntriesWithDataDescriptor = false; 119 120 /** Count decompressed bytes for current entry */ 121 private long uncompressedCount = 0; 122 123 /** Whether the stream will try to skip the zip split signature(08074B50) at the beginning **/ 124 private final boolean skipSplitSig; 125 126 private static final int LFH_LEN = 30; 127 /* 128 local file header signature WORD 129 version needed to extract SHORT 130 general purpose bit flag SHORT 131 compression method SHORT 132 last mod file time SHORT 133 last mod file date SHORT 134 crc-32 WORD 135 compressed size WORD 136 uncompressed size WORD 137 file name length SHORT 138 extra field length SHORT 139 */ 140 141 private static final int CFH_LEN = 46; 142 /* 143 central file header signature WORD 144 version made by SHORT 145 version needed to extract SHORT 146 general purpose bit flag SHORT 147 compression method SHORT 148 last mod file time SHORT 149 last mod file date SHORT 150 crc-32 WORD 151 compressed size WORD 152 uncompressed size WORD 153 file name length SHORT 154 extra field length SHORT 155 file comment length SHORT 156 disk number start SHORT 157 internal file attributes SHORT 158 external file attributes WORD 159 relative offset of local header WORD 160 */ 161 162 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 163 164 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 165 private final byte[] lfhBuf = new byte[LFH_LEN]; 166 private final byte[] skipBuf = new byte[1024]; 167 private final byte[] shortBuf = new byte[SHORT]; 168 private final byte[] wordBuf = new byte[WORD]; 169 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 170 171 private int entriesRead = 0; 172 173 /** 174 * Create an instance using UTF-8 encoding 175 * @param inputStream the stream to wrap 176 */ 177 public ZipArchiveInputStream(final InputStream inputStream) { 178 this(inputStream, ZipEncodingHelper.UTF8); 179 } 180 181 /** 182 * Create an instance using the specified encoding 183 * @param inputStream the stream to wrap 184 * @param encoding the encoding to use for file names, use null 185 * for the platform's default encoding 186 * @since 1.5 187 */ 188 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 189 this(inputStream, encoding, true); 190 } 191 192 /** 193 * Create an instance using the specified encoding 194 * @param inputStream the stream to wrap 195 * @param encoding the encoding to use for file names, use null 196 * for the platform's default encoding 197 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 198 * Extra Fields (if present) to set the file names. 199 */ 200 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 201 this(inputStream, encoding, useUnicodeExtraFields, false); 202 } 203 204 /** 205 * Create an instance using the specified encoding 206 * @param inputStream the stream to wrap 207 * @param encoding the encoding to use for file names, use null 208 * for the platform's default encoding 209 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 210 * Extra Fields (if present) to set the file names. 211 * @param allowStoredEntriesWithDataDescriptor whether the stream 212 * will try to read STORED entries that use a data descriptor 213 * @since 1.1 214 */ 215 public ZipArchiveInputStream(final InputStream inputStream, 216 final String encoding, 217 final boolean useUnicodeExtraFields, 218 final boolean allowStoredEntriesWithDataDescriptor) { 219 this(inputStream, encoding, useUnicodeExtraFields, allowStoredEntriesWithDataDescriptor, false); 220 } 221 222 /** 223 * Create an instance using the specified encoding 224 * @param inputStream the stream to wrap 225 * @param encoding the encoding to use for file names, use null 226 * for the platform's default encoding 227 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 228 * Extra Fields (if present) to set the file names. 229 * @param allowStoredEntriesWithDataDescriptor whether the stream 230 * will try to read STORED entries that use a data descriptor 231 * @param skipSplitSig Whether the stream will try to skip the zip 232 * split signature(08074B50) at the beginning. You will need to 233 * set this to true if you want to read a split archive. 234 * @since 1.20 235 */ 236 public ZipArchiveInputStream(final InputStream inputStream, 237 final String encoding, 238 final boolean useUnicodeExtraFields, 239 final boolean allowStoredEntriesWithDataDescriptor, 240 final boolean skipSplitSig) { 241 this.encoding = encoding; 242 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 243 this.useUnicodeExtraFields = useUnicodeExtraFields; 244 in = new PushbackInputStream(inputStream, buf.capacity()); 245 this.allowStoredEntriesWithDataDescriptor = 246 allowStoredEntriesWithDataDescriptor; 247 this.skipSplitSig = skipSplitSig; 248 // haven't read anything so far 249 buf.limit(0); 250 } 251 252 public ZipArchiveEntry getNextZipEntry() throws IOException { 253 uncompressedCount = 0; 254 255 boolean firstEntry = true; 256 if (closed || hitCentralDirectory) { 257 return null; 258 } 259 if (current != null) { 260 closeEntry(); 261 firstEntry = false; 262 } 263 264 long currentHeaderOffset = getBytesRead(); 265 try { 266 if (firstEntry) { 267 // split archives have a special signature before the 268 // first local file header - look for it and fail with 269 // the appropriate error message if this is a split 270 // archive. 271 readFirstLocalFileHeader(lfhBuf); 272 } else { 273 readFully(lfhBuf); 274 } 275 } catch (final EOFException e) { //NOSONAR 276 return null; 277 } 278 279 final ZipLong sig = new ZipLong(lfhBuf); 280 if (!sig.equals(ZipLong.LFH_SIG)) { 281 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) { 282 hitCentralDirectory = true; 283 skipRemainderOfArchive(); 284 return null; 285 } 286 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 287 } 288 289 int off = WORD; 290 current = new CurrentEntry(); 291 292 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 293 off += SHORT; 294 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 295 296 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 297 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 298 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 299 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 300 current.entry.setGeneralPurposeBit(gpFlag); 301 302 off += SHORT; 303 304 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 305 off += SHORT; 306 307 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 308 current.entry.setTime(time); 309 off += WORD; 310 311 ZipLong size = null, cSize = null; 312 if (!current.hasDataDescriptor) { 313 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 314 off += WORD; 315 316 cSize = new ZipLong(lfhBuf, off); 317 off += WORD; 318 319 size = new ZipLong(lfhBuf, off); 320 off += WORD; 321 } else { 322 off += 3 * WORD; 323 } 324 325 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 326 327 off += SHORT; 328 329 final int extraLen = ZipShort.getValue(lfhBuf, off); 330 off += SHORT; // NOSONAR - assignment as documentation 331 332 final byte[] fileName = new byte[fileNameLen]; 333 readFully(fileName); 334 current.entry.setName(entryEncoding.decode(fileName), fileName); 335 if (hasUTF8Flag) { 336 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 337 } 338 339 final byte[] extraData = new byte[extraLen]; 340 readFully(extraData); 341 current.entry.setExtra(extraData); 342 343 if (!hasUTF8Flag && useUnicodeExtraFields) { 344 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 345 } 346 347 processZip64Extra(size, cSize); 348 349 current.entry.setLocalHeaderOffset(currentHeaderOffset); 350 current.entry.setDataOffset(getBytesRead()); 351 current.entry.setStreamContiguous(true); 352 353 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 354 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 355 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 356 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 357 switch (m) { 358 case UNSHRINKING: 359 current.in = new UnshrinkingInputStream(bis); 360 break; 361 case IMPLODING: 362 current.in = new ExplodingInputStream( 363 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 364 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 365 bis); 366 break; 367 case BZIP2: 368 current.in = new BZip2CompressorInputStream(bis); 369 break; 370 case ENHANCED_DEFLATED: 371 current.in = new Deflate64CompressorInputStream(bis); 372 break; 373 default: 374 // we should never get here as all supported methods have been covered 375 // will cause an error when read is invoked, don't throw an exception here so people can 376 // skip unsupported entries 377 break; 378 } 379 } 380 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 381 current.in = new Deflate64CompressorInputStream(in); 382 } 383 384 entriesRead++; 385 return current.entry; 386 } 387 388 /** 389 * Fills the given array with the first local file header and 390 * deals with splitting/spanning markers that may prefix the first 391 * LFH. 392 */ 393 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 394 readFully(lfh); 395 final ZipLong sig = new ZipLong(lfh); 396 397 if (!skipSplitSig && sig.equals(ZipLong.DD_SIG)) { 398 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 399 } 400 401 // the split zip signature(08074B50) should only be skipped when the skipSplitSig is set 402 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER) || sig.equals(ZipLong.DD_SIG)) { 403 // Just skip over the marker. 404 final byte[] missedLfhBytes = new byte[4]; 405 readFully(missedLfhBytes); 406 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 407 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 408 } 409 } 410 411 /** 412 * Records whether a Zip64 extra is present and sets the size 413 * information from it if sizes are 0xFFFFFFFF and the entry 414 * doesn't use a data descriptor. 415 */ 416 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 417 final Zip64ExtendedInformationExtraField z64 = 418 (Zip64ExtendedInformationExtraField) 419 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 420 current.usesZip64 = z64 != null; 421 if (!current.hasDataDescriptor) { 422 if (z64 != null // same as current.usesZip64 but avoids NPE warning 423 && (ZipLong.ZIP64_MAGIC.equals(cSize) || ZipLong.ZIP64_MAGIC.equals(size)) ) { 424 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 425 current.entry.setSize(z64.getSize().getLongValue()); 426 } else if (cSize != null && size != null) { 427 current.entry.setCompressedSize(cSize.getValue()); 428 current.entry.setSize(size.getValue()); 429 } 430 } 431 } 432 433 @Override 434 public ArchiveEntry getNextEntry() throws IOException { 435 return getNextZipEntry(); 436 } 437 438 /** 439 * Whether this class is able to read the given entry. 440 * 441 * <p>May return false if it is set up to use encryption or a 442 * compression method that hasn't been implemented yet.</p> 443 * @since 1.1 444 */ 445 @Override 446 public boolean canReadEntryData(final ArchiveEntry ae) { 447 if (ae instanceof ZipArchiveEntry) { 448 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 449 return ZipUtil.canHandleEntryData(ze) 450 && supportsDataDescriptorFor(ze) 451 && supportsCompressedSizeFor(ze); 452 } 453 return false; 454 } 455 456 @Override 457 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 458 if (length == 0) { 459 return 0; 460 } 461 if (closed) { 462 throw new IOException("The stream is closed"); 463 } 464 465 if (current == null) { 466 return -1; 467 } 468 469 // avoid int overflow, check null buffer 470 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 471 throw new ArrayIndexOutOfBoundsException(); 472 } 473 474 ZipUtil.checkRequestedFeatures(current.entry); 475 if (!supportsDataDescriptorFor(current.entry)) { 476 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 477 current.entry); 478 } 479 if (!supportsCompressedSizeFor(current.entry)) { 480 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 481 current.entry); 482 } 483 484 int read; 485 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 486 read = readStored(buffer, offset, length); 487 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 488 read = readDeflated(buffer, offset, length); 489 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 490 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 491 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 492 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 493 read = current.in.read(buffer, offset, length); 494 } else { 495 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 496 current.entry); 497 } 498 499 if (read >= 0) { 500 current.crc.update(buffer, offset, read); 501 uncompressedCount += read; 502 } 503 504 return read; 505 } 506 507 /** 508 * @since 1.17 509 */ 510 @Override 511 public long getCompressedCount() { 512 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 513 return current.bytesRead; 514 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 515 return getBytesInflated(); 516 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 517 return ((UnshrinkingInputStream) current.in).getCompressedCount(); 518 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 519 return ((ExplodingInputStream) current.in).getCompressedCount(); 520 } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) { 521 return ((Deflate64CompressorInputStream) current.in).getCompressedCount(); 522 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 523 return ((BZip2CompressorInputStream) current.in).getCompressedCount(); 524 } else { 525 return -1; 526 } 527 } 528 529 /** 530 * @since 1.17 531 */ 532 @Override 533 public long getUncompressedCount() { 534 return uncompressedCount; 535 } 536 537 /** 538 * Implementation of read for STORED entries. 539 */ 540 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 541 542 if (current.hasDataDescriptor) { 543 if (lastStoredEntry == null) { 544 readStoredEntry(); 545 } 546 return lastStoredEntry.read(buffer, offset, length); 547 } 548 549 final long csize = current.entry.getSize(); 550 if (current.bytesRead >= csize) { 551 return -1; 552 } 553 554 if (buf.position() >= buf.limit()) { 555 buf.position(0); 556 final int l = in.read(buf.array()); 557 if (l == -1) { 558 buf.limit(0); 559 throw new IOException("Truncated ZIP file"); 560 } 561 buf.limit(l); 562 563 count(l); 564 current.bytesReadFromStream += l; 565 } 566 567 int toRead = Math.min(buf.remaining(), length); 568 if ((csize - current.bytesRead) < toRead) { 569 // if it is smaller than toRead then it fits into an int 570 toRead = (int) (csize - current.bytesRead); 571 } 572 buf.get(buffer, offset, toRead); 573 current.bytesRead += toRead; 574 return toRead; 575 } 576 577 /** 578 * Implementation of read for DEFLATED entries. 579 */ 580 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 581 final int read = readFromInflater(buffer, offset, length); 582 if (read <= 0) { 583 if (inf.finished()) { 584 return -1; 585 } else if (inf.needsDictionary()) { 586 throw new ZipException("This archive needs a preset dictionary" 587 + " which is not supported by Commons" 588 + " Compress."); 589 } else if (read == -1) { 590 throw new IOException("Truncated ZIP file"); 591 } 592 } 593 return read; 594 } 595 596 /** 597 * Potentially reads more bytes to fill the inflater's buffer and 598 * reads from it. 599 */ 600 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 601 int read = 0; 602 do { 603 if (inf.needsInput()) { 604 final int l = fill(); 605 if (l > 0) { 606 current.bytesReadFromStream += buf.limit(); 607 } else if (l == -1) { 608 return -1; 609 } else { 610 break; 611 } 612 } 613 try { 614 read = inf.inflate(buffer, offset, length); 615 } catch (final DataFormatException e) { 616 throw (IOException) new ZipException(e.getMessage()).initCause(e); 617 } 618 } while (read == 0 && inf.needsInput()); 619 return read; 620 } 621 622 @Override 623 public void close() throws IOException { 624 if (!closed) { 625 closed = true; 626 try { 627 in.close(); 628 } finally { 629 inf.end(); 630 } 631 } 632 } 633 634 /** 635 * Skips over and discards value bytes of data from this input 636 * stream. 637 * 638 * <p>This implementation may end up skipping over some smaller 639 * number of bytes, possibly 0, if and only if it reaches the end 640 * of the underlying stream.</p> 641 * 642 * <p>The actual number of bytes skipped is returned.</p> 643 * 644 * @param value the number of bytes to be skipped. 645 * @return the actual number of bytes skipped. 646 * @throws IOException - if an I/O error occurs. 647 * @throws IllegalArgumentException - if value is negative. 648 */ 649 @Override 650 public long skip(final long value) throws IOException { 651 if (value >= 0) { 652 long skipped = 0; 653 while (skipped < value) { 654 final long rem = value - skipped; 655 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 656 if (x == -1) { 657 return skipped; 658 } 659 skipped += x; 660 } 661 return skipped; 662 } 663 throw new IllegalArgumentException(); 664 } 665 666 /** 667 * Checks if the signature matches what is expected for a zip file. 668 * Does not currently handle self-extracting zips which may have arbitrary 669 * leading content. 670 * 671 * @param signature the bytes to check 672 * @param length the number of bytes to check 673 * @return true, if this stream is a zip archive stream, false otherwise 674 */ 675 public static boolean matches(final byte[] signature, final int length) { 676 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 677 return false; 678 } 679 680 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 681 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 682 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 683 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 684 } 685 686 private static boolean checksig(final byte[] signature, final byte[] expected) { 687 for (int i = 0; i < expected.length; i++) { 688 if (signature[i] != expected[i]) { 689 return false; 690 } 691 } 692 return true; 693 } 694 695 /** 696 * Closes the current ZIP archive entry and positions the underlying 697 * stream to the beginning of the next entry. All per-entry variables 698 * and data structures are cleared. 699 * <p> 700 * If the compressed size of this entry is included in the entry header, 701 * then any outstanding bytes are simply skipped from the underlying 702 * stream without uncompressing them. This allows an entry to be safely 703 * closed even if the compression method is unsupported. 704 * <p> 705 * In case we don't know the compressed size of this entry or have 706 * already buffered too much data from the underlying stream to support 707 * uncompression, then the uncompression process is completed and the 708 * end position of the stream is adjusted based on the result of that 709 * process. 710 * 711 * @throws IOException if an error occurs 712 */ 713 private void closeEntry() throws IOException { 714 if (closed) { 715 throw new IOException("The stream is closed"); 716 } 717 if (current == null) { 718 return; 719 } 720 721 // Ensure all entry bytes are read 722 if (currentEntryHasOutstandingBytes()) { 723 drainCurrentEntryData(); 724 } else { 725 // this is guaranteed to exhaust the stream 726 skip(Long.MAX_VALUE); //NOSONAR 727 728 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 729 ? getBytesInflated() : current.bytesRead; 730 731 // this is at most a single read() operation and can't 732 // exceed the range of int 733 final int diff = (int) (current.bytesReadFromStream - inB); 734 735 // Pushback any required bytes 736 if (diff > 0) { 737 pushback(buf.array(), buf.limit() - diff, diff); 738 current.bytesReadFromStream -= diff; 739 } 740 741 // Drain remainder of entry if not all data bytes were required 742 if (currentEntryHasOutstandingBytes()) { 743 drainCurrentEntryData(); 744 } 745 } 746 747 if (lastStoredEntry == null && current.hasDataDescriptor) { 748 readDataDescriptor(); 749 } 750 751 inf.reset(); 752 buf.clear().flip(); 753 current = null; 754 lastStoredEntry = null; 755 } 756 757 /** 758 * If the compressed size of the current entry is included in the entry header 759 * and there are any outstanding bytes in the underlying stream, then 760 * this returns true. 761 * 762 * @return true, if current entry is determined to have outstanding bytes, false otherwise 763 */ 764 private boolean currentEntryHasOutstandingBytes() { 765 return current.bytesReadFromStream <= current.entry.getCompressedSize() 766 && !current.hasDataDescriptor; 767 } 768 769 /** 770 * Read all data of the current entry from the underlying stream 771 * that hasn't been read, yet. 772 */ 773 private void drainCurrentEntryData() throws IOException { 774 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 775 while (remaining > 0) { 776 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 777 if (n < 0) { 778 throw new EOFException("Truncated ZIP entry: " 779 + ArchiveUtils.sanitize(current.entry.getName())); 780 } 781 count(n); 782 remaining -= n; 783 } 784 } 785 786 /** 787 * Get the number of bytes Inflater has actually processed. 788 * 789 * <p>for Java < Java7 the getBytes* methods in 790 * Inflater/Deflater seem to return unsigned ints rather than 791 * longs that start over with 0 at 2^32.</p> 792 * 793 * <p>The stream knows how many bytes it has read, but not how 794 * many the Inflater actually consumed - it should be between the 795 * total number of bytes read for the entry and the total number 796 * minus the last read operation. Here we just try to make the 797 * value close enough to the bytes we've read by assuming the 798 * number of bytes consumed must be smaller than (or equal to) the 799 * number of bytes read but not smaller by more than 2^32.</p> 800 */ 801 private long getBytesInflated() { 802 long inB = inf.getBytesRead(); 803 if (current.bytesReadFromStream >= TWO_EXP_32) { 804 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 805 inB += TWO_EXP_32; 806 } 807 } 808 return inB; 809 } 810 811 private int fill() throws IOException { 812 if (closed) { 813 throw new IOException("The stream is closed"); 814 } 815 final int length = in.read(buf.array()); 816 if (length > 0) { 817 buf.limit(length); 818 count(buf.limit()); 819 inf.setInput(buf.array(), 0, buf.limit()); 820 } 821 return length; 822 } 823 824 private void readFully(final byte[] b) throws IOException { 825 readFully(b, 0); 826 } 827 828 private void readFully(final byte[] b, final int off) throws IOException { 829 final int len = b.length - off; 830 final int count = IOUtils.readFully(in, b, off, len); 831 count(count); 832 if (count < len) { 833 throw new EOFException(); 834 } 835 } 836 837 private void readDataDescriptor() throws IOException { 838 readFully(wordBuf); 839 ZipLong val = new ZipLong(wordBuf); 840 if (ZipLong.DD_SIG.equals(val)) { 841 // data descriptor with signature, skip sig 842 readFully(wordBuf); 843 val = new ZipLong(wordBuf); 844 } 845 current.entry.setCrc(val.getValue()); 846 847 // if there is a ZIP64 extra field, sizes are eight bytes 848 // each, otherwise four bytes each. Unfortunately some 849 // implementations - namely Java7 - use eight bytes without 850 // using a ZIP64 extra field - 851 // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 852 853 // just read 16 bytes and check whether bytes nine to twelve 854 // look like one of the signatures of what could follow a data 855 // descriptor (ignoring archive decryption headers for now). 856 // If so, push back eight bytes and assume sizes are four 857 // bytes, otherwise sizes are eight bytes each. 858 readFully(twoDwordBuf); 859 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 860 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 861 pushback(twoDwordBuf, DWORD, DWORD); 862 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 863 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 864 } else { 865 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 866 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 867 } 868 } 869 870 /** 871 * Whether this entry requires a data descriptor this library can work with. 872 * 873 * @return true if allowStoredEntriesWithDataDescriptor is true, 874 * the entry doesn't require any data descriptor or the method is 875 * DEFLATED or ENHANCED_DEFLATED. 876 */ 877 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 878 return !entry.getGeneralPurposeBit().usesDataDescriptor() 879 880 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 881 || entry.getMethod() == ZipEntry.DEFLATED 882 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 883 } 884 885 /** 886 * Whether the compressed size for the entry is either known or 887 * not required by the compression method being used. 888 */ 889 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 890 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 891 || entry.getMethod() == ZipEntry.DEFLATED 892 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 893 || (entry.getGeneralPurposeBit().usesDataDescriptor() 894 && allowStoredEntriesWithDataDescriptor 895 && entry.getMethod() == ZipEntry.STORED); 896 } 897 898 private static final String USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER = 899 " while reading a stored entry using data descriptor. Either the archive is broken" 900 + " or it can not be read using ZipArchiveInputStream and you must use ZipFile." 901 + " A common cause for this is a ZIP archive containing a ZIP archive." 902 + " See http://commons.apache.org/proper/commons-compress/zip.html#ZipArchiveInputStream_vs_ZipFile"; 903 904 /** 905 * Caches a stored entry that uses the data descriptor. 906 * 907 * <ul> 908 * <li>Reads a stored entry until the signature of a local file 909 * header, central directory header or data descriptor has been 910 * found.</li> 911 * <li>Stores all entry data in lastStoredEntry.</p> 912 * <li>Rewinds the stream to position at the data 913 * descriptor.</li> 914 * <li>reads the data descriptor</li> 915 * </ul> 916 * 917 * <p>After calling this method the entry should know its size, 918 * the entry's data is cached and the stream is positioned at the 919 * next local file or central directory header.</p> 920 */ 921 private void readStoredEntry() throws IOException { 922 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 923 int off = 0; 924 boolean done = false; 925 926 // length of DD without signature 927 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 928 929 while (!done) { 930 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 931 if (r <= 0) { 932 // read the whole archive without ever finding a 933 // central directory 934 throw new IOException("Truncated ZIP file"); 935 } 936 if (r + off < 4) { 937 // buffer too small to check for a signature, loop 938 off += r; 939 continue; 940 } 941 942 done = bufferContainsSignature(bos, off, r, ddLen); 943 if (!done) { 944 off = cacheBytesRead(bos, off, r, ddLen); 945 } 946 } 947 if (current.entry.getCompressedSize() != current.entry.getSize()) { 948 throw new ZipException("compressed and uncompressed size don't match" 949 + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER); 950 } 951 final byte[] b = bos.toByteArray(); 952 if (b.length != current.entry.getSize()) { 953 throw new ZipException("actual and claimed size don't match" 954 + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER); 955 } 956 lastStoredEntry = new ByteArrayInputStream(b); 957 } 958 959 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 960 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 961 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 962 963 /** 964 * Checks whether the current buffer contains the signature of a 965 * "data descriptor", "local file header" or 966 * "central directory entry". 967 * 968 * <p>If it contains such a signature, reads the data descriptor 969 * and positions the stream right after the data descriptor.</p> 970 */ 971 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 972 throws IOException { 973 974 boolean done = false; 975 for (int i = 0; !done && i < offset + lastRead - 4; i++) { 976 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 977 int expectDDPos = i; 978 if (i >= expectedDDLen && 979 (buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 980 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 981 // found a LFH or CFH: 982 expectDDPos = i - expectedDDLen; 983 done = true; 984 } 985 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 986 // found DD: 987 done = true; 988 } 989 if (done) { 990 // * push back bytes read in excess as well as the data 991 // descriptor 992 // * copy the remaining bytes to cache 993 // * read data descriptor 994 pushback(buf.array(), expectDDPos, offset + lastRead - expectDDPos); 995 bos.write(buf.array(), 0, expectDDPos); 996 readDataDescriptor(); 997 } 998 } 999 } 1000 return done; 1001 } 1002 1003 /** 1004 * If the last read bytes could hold a data descriptor and an 1005 * incomplete signature then save the last bytes to the front of 1006 * the buffer and cache everything in front of the potential data 1007 * descriptor into the given ByteArrayOutputStream. 1008 * 1009 * <p>Data descriptor plus incomplete signature (3 bytes in the 1010 * worst case) can be 20 bytes max.</p> 1011 */ 1012 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 1013 final int cacheable = offset + lastRead - expecteDDLen - 3; 1014 if (cacheable > 0) { 1015 bos.write(buf.array(), 0, cacheable); 1016 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 1017 offset = expecteDDLen + 3; 1018 } else { 1019 offset += lastRead; 1020 } 1021 return offset; 1022 } 1023 1024 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 1025 ((PushbackInputStream) in).unread(buf, offset, length); 1026 pushedBackBytes(length); 1027 } 1028 1029 // End of Central Directory Record 1030 // end of central dir signature WORD 1031 // number of this disk SHORT 1032 // number of the disk with the 1033 // start of the central directory SHORT 1034 // total number of entries in the 1035 // central directory on this disk SHORT 1036 // total number of entries in 1037 // the central directory SHORT 1038 // size of the central directory WORD 1039 // offset of start of central 1040 // directory with respect to 1041 // the starting disk number WORD 1042 // .ZIP file comment length SHORT 1043 // .ZIP file comment up to 64KB 1044 // 1045 1046 /** 1047 * Reads the stream until it find the "End of central directory 1048 * record" and consumes it as well. 1049 */ 1050 private void skipRemainderOfArchive() throws IOException { 1051 // skip over central directory. One LFH has been read too much 1052 // already. The calculation discounts file names and extra 1053 // data so it will be too short. 1054 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 1055 findEocdRecord(); 1056 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 1057 readFully(shortBuf); 1058 // file comment 1059 realSkip(ZipShort.getValue(shortBuf)); 1060 } 1061 1062 /** 1063 * Reads forward until the signature of the "End of central 1064 * directory" record is found. 1065 */ 1066 private void findEocdRecord() throws IOException { 1067 int currentByte = -1; 1068 boolean skipReadCall = false; 1069 while (skipReadCall || (currentByte = readOneByte()) > -1) { 1070 skipReadCall = false; 1071 if (!isFirstByteOfEocdSig(currentByte)) { 1072 continue; 1073 } 1074 currentByte = readOneByte(); 1075 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 1076 if (currentByte == -1) { 1077 break; 1078 } 1079 skipReadCall = isFirstByteOfEocdSig(currentByte); 1080 continue; 1081 } 1082 currentByte = readOneByte(); 1083 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 1084 if (currentByte == -1) { 1085 break; 1086 } 1087 skipReadCall = isFirstByteOfEocdSig(currentByte); 1088 continue; 1089 } 1090 currentByte = readOneByte(); 1091 if (currentByte == -1 1092 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1093 break; 1094 } 1095 skipReadCall = isFirstByteOfEocdSig(currentByte); 1096 } 1097 } 1098 1099 /** 1100 * Skips bytes by reading from the underlying stream rather than 1101 * the (potentially inflating) archive stream - which {@link 1102 * #skip} would do. 1103 * 1104 * Also updates bytes-read counter. 1105 */ 1106 private void realSkip(final long value) throws IOException { 1107 if (value >= 0) { 1108 long skipped = 0; 1109 while (skipped < value) { 1110 final long rem = value - skipped; 1111 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1112 if (x == -1) { 1113 return; 1114 } 1115 count(x); 1116 skipped += x; 1117 } 1118 return; 1119 } 1120 throw new IllegalArgumentException(); 1121 } 1122 1123 /** 1124 * Reads bytes by reading from the underlying stream rather than 1125 * the (potentially inflating) archive stream - which {@link #read} would do. 1126 * 1127 * Also updates bytes-read counter. 1128 */ 1129 private int readOneByte() throws IOException { 1130 final int b = in.read(); 1131 if (b != -1) { 1132 count(1); 1133 } 1134 return b; 1135 } 1136 1137 private boolean isFirstByteOfEocdSig(final int b) { 1138 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1139 } 1140 1141 private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] { 1142 'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2', 1143 }; 1144 private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE); 1145 1146 /** 1147 * Checks whether this might be an APK Signing Block. 1148 * 1149 * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It 1150 * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature 1151 * and if we've found it, return true.</p> 1152 * 1153 * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold 1154 * the local file header of the next entry. 1155 * 1156 * @return true if this looks like a APK signing block 1157 * 1158 * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a> 1159 */ 1160 private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException { 1161 // length of block excluding the size field itself 1162 BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader); 1163 // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block, 1164 // also subtract 16 bytes in order to position us at the magic string 1165 BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length 1166 - (long) APK_SIGNING_BLOCK_MAGIC.length)); 1167 byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length]; 1168 1169 try { 1170 if (toSkip.signum() < 0) { 1171 // suspectLocalFileHeader contains the start of suspect magic string 1172 int off = suspectLocalFileHeader.length + toSkip.intValue(); 1173 // length was shorter than magic length 1174 if (off < DWORD) { 1175 return false; 1176 } 1177 int bytesInBuffer = Math.abs(toSkip.intValue()); 1178 System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length)); 1179 if (bytesInBuffer < magic.length) { 1180 readFully(magic, bytesInBuffer); 1181 } 1182 } else { 1183 while (toSkip.compareTo(LONG_MAX) > 0) { 1184 realSkip(Long.MAX_VALUE); 1185 toSkip = toSkip.add(LONG_MAX.negate()); 1186 } 1187 realSkip(toSkip.longValue()); 1188 readFully(magic); 1189 } 1190 } catch (EOFException ex) { //NOSONAR 1191 // length was invalid 1192 return false; 1193 } 1194 return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC); 1195 } 1196 1197 /** 1198 * Structure collecting information for the entry that is 1199 * currently being read. 1200 */ 1201 private static final class CurrentEntry { 1202 1203 /** 1204 * Current ZIP entry. 1205 */ 1206 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1207 1208 /** 1209 * Does the entry use a data descriptor? 1210 */ 1211 private boolean hasDataDescriptor; 1212 1213 /** 1214 * Does the entry have a ZIP64 extended information extra field. 1215 */ 1216 private boolean usesZip64; 1217 1218 /** 1219 * Number of bytes of entry content read by the client if the 1220 * entry is STORED. 1221 */ 1222 private long bytesRead; 1223 1224 /** 1225 * Number of bytes of entry content read from the stream. 1226 * 1227 * <p>This may be more than the actual entry's length as some 1228 * stuff gets buffered up and needs to be pushed back when the 1229 * end of the entry has been reached.</p> 1230 */ 1231 private long bytesReadFromStream; 1232 1233 /** 1234 * The checksum calculated as the current entry is read. 1235 */ 1236 private final CRC32 crc = new CRC32(); 1237 1238 /** 1239 * The input stream decompressing the data for shrunk and imploded entries. 1240 */ 1241 private InputStream in; 1242 } 1243 1244 /** 1245 * Bounded input stream adapted from commons-io 1246 */ 1247 private class BoundedInputStream extends InputStream { 1248 1249 /** the wrapped input stream */ 1250 private final InputStream in; 1251 1252 /** the max length to provide */ 1253 private final long max; 1254 1255 /** the number of bytes already returned */ 1256 private long pos = 0; 1257 1258 /** 1259 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1260 * stream and limits it to a certain size. 1261 * 1262 * @param in The wrapped input stream 1263 * @param size The maximum number of bytes to return 1264 */ 1265 public BoundedInputStream(final InputStream in, final long size) { 1266 this.max = size; 1267 this.in = in; 1268 } 1269 1270 @Override 1271 public int read() throws IOException { 1272 if (max >= 0 && pos >= max) { 1273 return -1; 1274 } 1275 final int result = in.read(); 1276 pos++; 1277 count(1); 1278 current.bytesReadFromStream++; 1279 return result; 1280 } 1281 1282 @Override 1283 public int read(final byte[] b) throws IOException { 1284 return this.read(b, 0, b.length); 1285 } 1286 1287 @Override 1288 public int read(final byte[] b, final int off, final int len) throws IOException { 1289 if (len == 0) { 1290 return 0; 1291 } 1292 if (max >= 0 && pos >= max) { 1293 return -1; 1294 } 1295 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1296 final int bytesRead = in.read(b, off, (int) maxRead); 1297 1298 if (bytesRead == -1) { 1299 return -1; 1300 } 1301 1302 pos += bytesRead; 1303 count(bytesRead); 1304 current.bytesReadFromStream += bytesRead; 1305 return bytesRead; 1306 } 1307 1308 @Override 1309 public long skip(final long n) throws IOException { 1310 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1311 final long skippedBytes = IOUtils.skip(in, toSkip); 1312 pos += skippedBytes; 1313 return skippedBytes; 1314 } 1315 1316 @Override 1317 public int available() throws IOException { 1318 if (max >= 0 && pos >= max) { 1319 return 0; 1320 } 1321 return in.available(); 1322 } 1323 } 1324}