001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.math.BigInteger;
028import java.nio.ByteBuffer;
029import java.util.Arrays;
030import java.util.zip.CRC32;
031import java.util.zip.DataFormatException;
032import java.util.zip.Inflater;
033import java.util.zip.ZipEntry;
034import java.util.zip.ZipException;
035
036import org.apache.commons.compress.archivers.ArchiveEntry;
037import org.apache.commons.compress.archivers.ArchiveInputStream;
038import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
039import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
040import org.apache.commons.compress.utils.ArchiveUtils;
041import org.apache.commons.compress.utils.IOUtils;
042import org.apache.commons.compress.utils.InputStreamStatistics;
043
044import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
045import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
046import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
047import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
048
049/**
050 * Implements an input stream that can read Zip archives.
051 *
052 * <p>As of Apache Commons Compress it transparently supports Zip64
053 * extensions and thus individual entries and archives larger than 4
054 * GB or with more than 65536 entries.</p>
055 *
056 * <p>The {@link ZipFile} class is preferred when reading from files
057 * as {@link ZipArchiveInputStream} is limited by not being able to
058 * read the central directory header before returning entries.  In
059 * particular {@link ZipArchiveInputStream}</p>
060 *
061 * <ul>
062 *
063 *  <li>may return entries that are not part of the central directory
064 *  at all and shouldn't be considered part of the archive.</li>
065 *
066 *  <li>may return several entries with the same name.</li>
067 *
068 *  <li>will not return internal or external attributes.</li>
069 *
070 *  <li>may return incomplete extra field data.</li>
071 *
072 *  <li>may return unknown sizes and CRC values for entries until the
073 *  next entry has been reached if the archive uses the data
074 *  descriptor feature.</li>
075 *
076 * </ul>
077 *
078 * @see ZipFile
079 * @NotThreadSafe
080 */
081public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
082
083    /** The zip encoding to use for file names and the file comment. */
084    private final ZipEncoding zipEncoding;
085
086    // the provided encoding (for unit tests)
087    final String encoding;
088
089    /** Whether to look for and use Unicode extra fields. */
090    private final boolean useUnicodeExtraFields;
091
092    /** Wrapped stream, will always be a PushbackInputStream. */
093    private final InputStream in;
094
095    /** Inflater used for all deflated entries. */
096    private final Inflater inf = new Inflater(true);
097
098    /** Buffer used to read from the wrapped stream. */
099    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
100
101    /** The entry that is currently being read. */
102    private CurrentEntry current = null;
103
104    /** Whether the stream has been closed. */
105    private boolean closed = false;
106
107    /** Whether the stream has reached the central directory - and thus found all entries. */
108    private boolean hitCentralDirectory = false;
109
110    /**
111     * When reading a stored entry that uses the data descriptor this
112     * stream has to read the full entry and caches it.  This is the
113     * cache.
114     */
115    private ByteArrayInputStream lastStoredEntry = null;
116
117    /** Whether the stream will try to read STORED entries that use a data descriptor. */
118    private boolean allowStoredEntriesWithDataDescriptor = false;
119
120    /** Count decompressed bytes for current entry */
121    private long uncompressedCount = 0;
122
123    /** Whether the stream will try to skip the zip split signature(08074B50) at the beginning **/
124    private final boolean skipSplitSig;
125
126    private static final int LFH_LEN = 30;
127    /*
128      local file header signature     WORD
129      version needed to extract       SHORT
130      general purpose bit flag        SHORT
131      compression method              SHORT
132      last mod file time              SHORT
133      last mod file date              SHORT
134      crc-32                          WORD
135      compressed size                 WORD
136      uncompressed size               WORD
137      file name length                SHORT
138      extra field length              SHORT
139    */
140
141    private static final int CFH_LEN = 46;
142    /*
143        central file header signature   WORD
144        version made by                 SHORT
145        version needed to extract       SHORT
146        general purpose bit flag        SHORT
147        compression method              SHORT
148        last mod file time              SHORT
149        last mod file date              SHORT
150        crc-32                          WORD
151        compressed size                 WORD
152        uncompressed size               WORD
153        file name length                SHORT
154        extra field length              SHORT
155        file comment length             SHORT
156        disk number start               SHORT
157        internal file attributes        SHORT
158        external file attributes        WORD
159        relative offset of local header WORD
160    */
161
162    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
163
164    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
165    private final byte[] lfhBuf = new byte[LFH_LEN];
166    private final byte[] skipBuf = new byte[1024];
167    private final byte[] shortBuf = new byte[SHORT];
168    private final byte[] wordBuf = new byte[WORD];
169    private final byte[] twoDwordBuf = new byte[2 * DWORD];
170
171    private int entriesRead = 0;
172
173    /**
174     * Create an instance using UTF-8 encoding
175     * @param inputStream the stream to wrap
176     */
177    public ZipArchiveInputStream(final InputStream inputStream) {
178        this(inputStream, ZipEncodingHelper.UTF8);
179    }
180
181    /**
182     * Create an instance using the specified encoding
183     * @param inputStream the stream to wrap
184     * @param encoding the encoding to use for file names, use null
185     * for the platform's default encoding
186     * @since 1.5
187     */
188    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
189        this(inputStream, encoding, true);
190    }
191
192    /**
193     * Create an instance using the specified encoding
194     * @param inputStream the stream to wrap
195     * @param encoding the encoding to use for file names, use null
196     * for the platform's default encoding
197     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
198     * Extra Fields (if present) to set the file names.
199     */
200    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
201        this(inputStream, encoding, useUnicodeExtraFields, false);
202    }
203
204    /**
205     * Create an instance using the specified encoding
206     * @param inputStream the stream to wrap
207     * @param encoding the encoding to use for file names, use null
208     * for the platform's default encoding
209     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
210     * Extra Fields (if present) to set the file names.
211     * @param allowStoredEntriesWithDataDescriptor whether the stream
212     * will try to read STORED entries that use a data descriptor
213     * @since 1.1
214     */
215    public ZipArchiveInputStream(final InputStream inputStream,
216                                 final String encoding,
217                                 final boolean useUnicodeExtraFields,
218                                 final boolean allowStoredEntriesWithDataDescriptor) {
219        this(inputStream, encoding, useUnicodeExtraFields, allowStoredEntriesWithDataDescriptor, false);
220    }
221
222    /**
223     * Create an instance using the specified encoding
224     * @param inputStream the stream to wrap
225     * @param encoding the encoding to use for file names, use null
226     * for the platform's default encoding
227     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
228     * Extra Fields (if present) to set the file names.
229     * @param allowStoredEntriesWithDataDescriptor whether the stream
230     * will try to read STORED entries that use a data descriptor
231     * @param skipSplitSig Whether the stream will try to skip the zip
232     * split signature(08074B50) at the beginning. You will need to
233     * set this to true if you want to read a split archive.
234     * @since 1.20
235     */
236    public ZipArchiveInputStream(final InputStream inputStream,
237                                 final String encoding,
238                                 final boolean useUnicodeExtraFields,
239                                 final boolean allowStoredEntriesWithDataDescriptor,
240                                 final boolean skipSplitSig) {
241        this.encoding = encoding;
242        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
243        this.useUnicodeExtraFields = useUnicodeExtraFields;
244        in = new PushbackInputStream(inputStream, buf.capacity());
245        this.allowStoredEntriesWithDataDescriptor =
246            allowStoredEntriesWithDataDescriptor;
247        this.skipSplitSig = skipSplitSig;
248        // haven't read anything so far
249        buf.limit(0);
250    }
251
252    public ZipArchiveEntry getNextZipEntry() throws IOException {
253        uncompressedCount = 0;
254
255        boolean firstEntry = true;
256        if (closed || hitCentralDirectory) {
257            return null;
258        }
259        if (current != null) {
260            closeEntry();
261            firstEntry = false;
262        }
263
264        long currentHeaderOffset = getBytesRead();
265        try {
266            if (firstEntry) {
267                // split archives have a special signature before the
268                // first local file header - look for it and fail with
269                // the appropriate error message if this is a split
270                // archive.
271                readFirstLocalFileHeader(lfhBuf);
272            } else {
273                readFully(lfhBuf);
274            }
275        } catch (final EOFException e) { //NOSONAR
276            return null;
277        }
278
279        final ZipLong sig = new ZipLong(lfhBuf);
280        if (!sig.equals(ZipLong.LFH_SIG)) {
281            if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) {
282                hitCentralDirectory = true;
283                skipRemainderOfArchive();
284                return null;
285            }
286            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
287        }
288
289        int off = WORD;
290        current = new CurrentEntry();
291
292        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
293        off += SHORT;
294        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
295
296        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
297        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
298        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
299        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
300        current.entry.setGeneralPurposeBit(gpFlag);
301
302        off += SHORT;
303
304        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
305        off += SHORT;
306
307        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
308        current.entry.setTime(time);
309        off += WORD;
310
311        ZipLong size = null, cSize = null;
312        if (!current.hasDataDescriptor) {
313            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
314            off += WORD;
315
316            cSize = new ZipLong(lfhBuf, off);
317            off += WORD;
318
319            size = new ZipLong(lfhBuf, off);
320            off += WORD;
321        } else {
322            off += 3 * WORD;
323        }
324
325        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
326
327        off += SHORT;
328
329        final int extraLen = ZipShort.getValue(lfhBuf, off);
330        off += SHORT; // NOSONAR - assignment as documentation
331
332        final byte[] fileName = new byte[fileNameLen];
333        readFully(fileName);
334        current.entry.setName(entryEncoding.decode(fileName), fileName);
335        if (hasUTF8Flag) {
336            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
337        }
338
339        final byte[] extraData = new byte[extraLen];
340        readFully(extraData);
341        current.entry.setExtra(extraData);
342
343        if (!hasUTF8Flag && useUnicodeExtraFields) {
344            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
345        }
346
347        processZip64Extra(size, cSize);
348
349        current.entry.setLocalHeaderOffset(currentHeaderOffset);
350        current.entry.setDataOffset(getBytesRead());
351        current.entry.setStreamContiguous(true);
352
353        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
354        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
355            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
356                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
357                switch (m) {
358                case UNSHRINKING:
359                    current.in = new UnshrinkingInputStream(bis);
360                    break;
361                case IMPLODING:
362                    current.in = new ExplodingInputStream(
363                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
364                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
365                        bis);
366                    break;
367                case BZIP2:
368                    current.in = new BZip2CompressorInputStream(bis);
369                    break;
370                case ENHANCED_DEFLATED:
371                    current.in = new Deflate64CompressorInputStream(bis);
372                    break;
373                default:
374                    // we should never get here as all supported methods have been covered
375                    // will cause an error when read is invoked, don't throw an exception here so people can
376                    // skip unsupported entries
377                    break;
378                }
379            }
380        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
381            current.in = new Deflate64CompressorInputStream(in);
382        }
383
384        entriesRead++;
385        return current.entry;
386    }
387
388    /**
389     * Fills the given array with the first local file header and
390     * deals with splitting/spanning markers that may prefix the first
391     * LFH.
392     */
393    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
394        readFully(lfh);
395        final ZipLong sig = new ZipLong(lfh);
396
397        if (!skipSplitSig && sig.equals(ZipLong.DD_SIG)) {
398            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
399        }
400
401        // the split zip signature(08074B50) should only be skipped when the skipSplitSig is set
402        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER) || sig.equals(ZipLong.DD_SIG)) {
403            // Just skip over the marker.
404            final byte[] missedLfhBytes = new byte[4];
405            readFully(missedLfhBytes);
406            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
407            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
408        }
409    }
410
411    /**
412     * Records whether a Zip64 extra is present and sets the size
413     * information from it if sizes are 0xFFFFFFFF and the entry
414     * doesn't use a data descriptor.
415     */
416    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
417        final Zip64ExtendedInformationExtraField z64 =
418            (Zip64ExtendedInformationExtraField)
419            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
420        current.usesZip64 = z64 != null;
421        if (!current.hasDataDescriptor) {
422            if (z64 != null // same as current.usesZip64 but avoids NPE warning
423                    && (ZipLong.ZIP64_MAGIC.equals(cSize) || ZipLong.ZIP64_MAGIC.equals(size)) ) {
424                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
425                current.entry.setSize(z64.getSize().getLongValue());
426            } else if (cSize != null && size != null) {
427                current.entry.setCompressedSize(cSize.getValue());
428                current.entry.setSize(size.getValue());
429            }
430        }
431    }
432
433    @Override
434    public ArchiveEntry getNextEntry() throws IOException {
435        return getNextZipEntry();
436    }
437
438    /**
439     * Whether this class is able to read the given entry.
440     *
441     * <p>May return false if it is set up to use encryption or a
442     * compression method that hasn't been implemented yet.</p>
443     * @since 1.1
444     */
445    @Override
446    public boolean canReadEntryData(final ArchiveEntry ae) {
447        if (ae instanceof ZipArchiveEntry) {
448            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
449            return ZipUtil.canHandleEntryData(ze)
450                && supportsDataDescriptorFor(ze)
451                && supportsCompressedSizeFor(ze);
452        }
453        return false;
454    }
455
456    @Override
457    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
458        if (length == 0) {
459            return 0;
460        }
461        if (closed) {
462            throw new IOException("The stream is closed");
463        }
464
465        if (current == null) {
466            return -1;
467        }
468
469        // avoid int overflow, check null buffer
470        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
471            throw new ArrayIndexOutOfBoundsException();
472        }
473
474        ZipUtil.checkRequestedFeatures(current.entry);
475        if (!supportsDataDescriptorFor(current.entry)) {
476            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
477                    current.entry);
478        }
479        if (!supportsCompressedSizeFor(current.entry)) {
480            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
481                    current.entry);
482        }
483
484        int read;
485        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
486            read = readStored(buffer, offset, length);
487        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
488            read = readDeflated(buffer, offset, length);
489        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
490                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
491                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
492                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
493            read = current.in.read(buffer, offset, length);
494        } else {
495            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
496                    current.entry);
497        }
498
499        if (read >= 0) {
500            current.crc.update(buffer, offset, read);
501            uncompressedCount += read;
502        }
503
504        return read;
505    }
506
507    /**
508     * @since 1.17
509     */
510    @Override
511    public long getCompressedCount() {
512        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
513            return current.bytesRead;
514        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
515            return getBytesInflated();
516        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
517            return ((UnshrinkingInputStream) current.in).getCompressedCount();
518        } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
519            return ((ExplodingInputStream) current.in).getCompressedCount();
520        } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
521            return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
522        } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
523            return ((BZip2CompressorInputStream) current.in).getCompressedCount();
524        } else {
525            return -1;
526        }
527    }
528
529    /**
530     * @since 1.17
531     */
532    @Override
533    public long getUncompressedCount() {
534        return uncompressedCount;
535    }
536
537    /**
538     * Implementation of read for STORED entries.
539     */
540    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
541
542        if (current.hasDataDescriptor) {
543            if (lastStoredEntry == null) {
544                readStoredEntry();
545            }
546            return lastStoredEntry.read(buffer, offset, length);
547        }
548
549        final long csize = current.entry.getSize();
550        if (current.bytesRead >= csize) {
551            return -1;
552        }
553
554        if (buf.position() >= buf.limit()) {
555            buf.position(0);
556            final int l = in.read(buf.array());
557            if (l == -1) {
558                buf.limit(0);
559                throw new IOException("Truncated ZIP file");
560            }
561            buf.limit(l);
562
563            count(l);
564            current.bytesReadFromStream += l;
565        }
566
567        int toRead = Math.min(buf.remaining(), length);
568        if ((csize - current.bytesRead) < toRead) {
569            // if it is smaller than toRead then it fits into an int
570            toRead = (int) (csize - current.bytesRead);
571        }
572        buf.get(buffer, offset, toRead);
573        current.bytesRead += toRead;
574        return toRead;
575    }
576
577    /**
578     * Implementation of read for DEFLATED entries.
579     */
580    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
581        final int read = readFromInflater(buffer, offset, length);
582        if (read <= 0) {
583            if (inf.finished()) {
584                return -1;
585            } else if (inf.needsDictionary()) {
586                throw new ZipException("This archive needs a preset dictionary"
587                                       + " which is not supported by Commons"
588                                       + " Compress.");
589            } else if (read == -1) {
590                throw new IOException("Truncated ZIP file");
591            }
592        }
593        return read;
594    }
595
596    /**
597     * Potentially reads more bytes to fill the inflater's buffer and
598     * reads from it.
599     */
600    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
601        int read = 0;
602        do {
603            if (inf.needsInput()) {
604                final int l = fill();
605                if (l > 0) {
606                    current.bytesReadFromStream += buf.limit();
607                } else if (l == -1) {
608                    return -1;
609                } else {
610                    break;
611                }
612            }
613            try {
614                read = inf.inflate(buffer, offset, length);
615            } catch (final DataFormatException e) {
616                throw (IOException) new ZipException(e.getMessage()).initCause(e);
617            }
618        } while (read == 0 && inf.needsInput());
619        return read;
620    }
621
622    @Override
623    public void close() throws IOException {
624        if (!closed) {
625            closed = true;
626            try {
627                in.close();
628            } finally {
629                inf.end();
630            }
631        }
632    }
633
634    /**
635     * Skips over and discards value bytes of data from this input
636     * stream.
637     *
638     * <p>This implementation may end up skipping over some smaller
639     * number of bytes, possibly 0, if and only if it reaches the end
640     * of the underlying stream.</p>
641     *
642     * <p>The actual number of bytes skipped is returned.</p>
643     *
644     * @param value the number of bytes to be skipped.
645     * @return the actual number of bytes skipped.
646     * @throws IOException - if an I/O error occurs.
647     * @throws IllegalArgumentException - if value is negative.
648     */
649    @Override
650    public long skip(final long value) throws IOException {
651        if (value >= 0) {
652            long skipped = 0;
653            while (skipped < value) {
654                final long rem = value - skipped;
655                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
656                if (x == -1) {
657                    return skipped;
658                }
659                skipped += x;
660            }
661            return skipped;
662        }
663        throw new IllegalArgumentException();
664    }
665
666    /**
667     * Checks if the signature matches what is expected for a zip file.
668     * Does not currently handle self-extracting zips which may have arbitrary
669     * leading content.
670     *
671     * @param signature the bytes to check
672     * @param length    the number of bytes to check
673     * @return true, if this stream is a zip archive stream, false otherwise
674     */
675    public static boolean matches(final byte[] signature, final int length) {
676        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
677            return false;
678        }
679
680        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
681            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
682            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
683            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
684    }
685
686    private static boolean checksig(final byte[] signature, final byte[] expected) {
687        for (int i = 0; i < expected.length; i++) {
688            if (signature[i] != expected[i]) {
689                return false;
690            }
691        }
692        return true;
693    }
694
695    /**
696     * Closes the current ZIP archive entry and positions the underlying
697     * stream to the beginning of the next entry. All per-entry variables
698     * and data structures are cleared.
699     * <p>
700     * If the compressed size of this entry is included in the entry header,
701     * then any outstanding bytes are simply skipped from the underlying
702     * stream without uncompressing them. This allows an entry to be safely
703     * closed even if the compression method is unsupported.
704     * <p>
705     * In case we don't know the compressed size of this entry or have
706     * already buffered too much data from the underlying stream to support
707     * uncompression, then the uncompression process is completed and the
708     * end position of the stream is adjusted based on the result of that
709     * process.
710     *
711     * @throws IOException if an error occurs
712     */
713    private void closeEntry() throws IOException {
714        if (closed) {
715            throw new IOException("The stream is closed");
716        }
717        if (current == null) {
718            return;
719        }
720
721        // Ensure all entry bytes are read
722        if (currentEntryHasOutstandingBytes()) {
723            drainCurrentEntryData();
724        } else {
725            // this is guaranteed to exhaust the stream
726            skip(Long.MAX_VALUE); //NOSONAR
727
728            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
729                       ? getBytesInflated() : current.bytesRead;
730
731            // this is at most a single read() operation and can't
732            // exceed the range of int
733            final int diff = (int) (current.bytesReadFromStream - inB);
734
735            // Pushback any required bytes
736            if (diff > 0) {
737                pushback(buf.array(), buf.limit() - diff, diff);
738                current.bytesReadFromStream -= diff;
739            }
740
741            // Drain remainder of entry if not all data bytes were required
742            if (currentEntryHasOutstandingBytes()) {
743                drainCurrentEntryData();
744            }
745        }
746
747        if (lastStoredEntry == null && current.hasDataDescriptor) {
748            readDataDescriptor();
749        }
750
751        inf.reset();
752        buf.clear().flip();
753        current = null;
754        lastStoredEntry = null;
755    }
756
757    /**
758     * If the compressed size of the current entry is included in the entry header
759     * and there are any outstanding bytes in the underlying stream, then
760     * this returns true.
761     *
762     * @return true, if current entry is determined to have outstanding bytes, false otherwise
763     */
764    private boolean currentEntryHasOutstandingBytes() {
765        return current.bytesReadFromStream <= current.entry.getCompressedSize()
766                && !current.hasDataDescriptor;
767    }
768
769    /**
770     * Read all data of the current entry from the underlying stream
771     * that hasn't been read, yet.
772     */
773    private void drainCurrentEntryData() throws IOException {
774        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
775        while (remaining > 0) {
776            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
777            if (n < 0) {
778                throw new EOFException("Truncated ZIP entry: "
779                                       + ArchiveUtils.sanitize(current.entry.getName()));
780            }
781            count(n);
782            remaining -= n;
783        }
784    }
785
786    /**
787     * Get the number of bytes Inflater has actually processed.
788     *
789     * <p>for Java &lt; Java7 the getBytes* methods in
790     * Inflater/Deflater seem to return unsigned ints rather than
791     * longs that start over with 0 at 2^32.</p>
792     *
793     * <p>The stream knows how many bytes it has read, but not how
794     * many the Inflater actually consumed - it should be between the
795     * total number of bytes read for the entry and the total number
796     * minus the last read operation.  Here we just try to make the
797     * value close enough to the bytes we've read by assuming the
798     * number of bytes consumed must be smaller than (or equal to) the
799     * number of bytes read but not smaller by more than 2^32.</p>
800     */
801    private long getBytesInflated() {
802        long inB = inf.getBytesRead();
803        if (current.bytesReadFromStream >= TWO_EXP_32) {
804            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
805                inB += TWO_EXP_32;
806            }
807        }
808        return inB;
809    }
810
811    private int fill() throws IOException {
812        if (closed) {
813            throw new IOException("The stream is closed");
814        }
815        final int length = in.read(buf.array());
816        if (length > 0) {
817            buf.limit(length);
818            count(buf.limit());
819            inf.setInput(buf.array(), 0, buf.limit());
820        }
821        return length;
822    }
823
824    private void readFully(final byte[] b) throws IOException {
825        readFully(b, 0);
826    }
827
828    private void readFully(final byte[] b, final int off) throws IOException {
829        final int len = b.length - off;
830        final int count = IOUtils.readFully(in, b, off, len);
831        count(count);
832        if (count < len) {
833            throw new EOFException();
834        }
835    }
836
837    private void readDataDescriptor() throws IOException {
838        readFully(wordBuf);
839        ZipLong val = new ZipLong(wordBuf);
840        if (ZipLong.DD_SIG.equals(val)) {
841            // data descriptor with signature, skip sig
842            readFully(wordBuf);
843            val = new ZipLong(wordBuf);
844        }
845        current.entry.setCrc(val.getValue());
846
847        // if there is a ZIP64 extra field, sizes are eight bytes
848        // each, otherwise four bytes each.  Unfortunately some
849        // implementations - namely Java7 - use eight bytes without
850        // using a ZIP64 extra field -
851        // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
852
853        // just read 16 bytes and check whether bytes nine to twelve
854        // look like one of the signatures of what could follow a data
855        // descriptor (ignoring archive decryption headers for now).
856        // If so, push back eight bytes and assume sizes are four
857        // bytes, otherwise sizes are eight bytes each.
858        readFully(twoDwordBuf);
859        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
860        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
861            pushback(twoDwordBuf, DWORD, DWORD);
862            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
863            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
864        } else {
865            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
866            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
867        }
868    }
869
870    /**
871     * Whether this entry requires a data descriptor this library can work with.
872     *
873     * @return true if allowStoredEntriesWithDataDescriptor is true,
874     * the entry doesn't require any data descriptor or the method is
875     * DEFLATED or ENHANCED_DEFLATED.
876     */
877    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
878        return !entry.getGeneralPurposeBit().usesDataDescriptor()
879
880                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
881                || entry.getMethod() == ZipEntry.DEFLATED
882                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
883    }
884
885    /**
886     * Whether the compressed size for the entry is either known or
887     * not required by the compression method being used.
888     */
889    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
890        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
891            || entry.getMethod() == ZipEntry.DEFLATED
892            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
893            || (entry.getGeneralPurposeBit().usesDataDescriptor()
894                && allowStoredEntriesWithDataDescriptor
895                && entry.getMethod() == ZipEntry.STORED);
896    }
897
898    private static final String USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER =
899        " while reading a stored entry using data descriptor. Either the archive is broken"
900        + " or it can not be read using ZipArchiveInputStream and you must use ZipFile."
901        + " A common cause for this is a ZIP archive containing a ZIP archive."
902        + " See http://commons.apache.org/proper/commons-compress/zip.html#ZipArchiveInputStream_vs_ZipFile";
903
904    /**
905     * Caches a stored entry that uses the data descriptor.
906     *
907     * <ul>
908     *   <li>Reads a stored entry until the signature of a local file
909     *     header, central directory header or data descriptor has been
910     *     found.</li>
911     *   <li>Stores all entry data in lastStoredEntry.</p>
912     *   <li>Rewinds the stream to position at the data
913     *     descriptor.</li>
914     *   <li>reads the data descriptor</li>
915     * </ul>
916     *
917     * <p>After calling this method the entry should know its size,
918     * the entry's data is cached and the stream is positioned at the
919     * next local file or central directory header.</p>
920     */
921    private void readStoredEntry() throws IOException {
922        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
923        int off = 0;
924        boolean done = false;
925
926        // length of DD without signature
927        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
928
929        while (!done) {
930            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
931            if (r <= 0) {
932                // read the whole archive without ever finding a
933                // central directory
934                throw new IOException("Truncated ZIP file");
935            }
936            if (r + off < 4) {
937                // buffer too small to check for a signature, loop
938                off += r;
939                continue;
940            }
941
942            done = bufferContainsSignature(bos, off, r, ddLen);
943            if (!done) {
944                off = cacheBytesRead(bos, off, r, ddLen);
945            }
946        }
947        if (current.entry.getCompressedSize() != current.entry.getSize()) {
948            throw new ZipException("compressed and uncompressed size don't match"
949                                   + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER);
950        }
951        final byte[] b = bos.toByteArray();
952        if (b.length != current.entry.getSize()) {
953            throw new ZipException("actual and claimed size don't match"
954                                   + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER);
955        }
956        lastStoredEntry = new ByteArrayInputStream(b);
957    }
958
959    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
960    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
961    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
962
963    /**
964     * Checks whether the current buffer contains the signature of a
965     * &quot;data descriptor&quot;, &quot;local file header&quot; or
966     * &quot;central directory entry&quot;.
967     *
968     * <p>If it contains such a signature, reads the data descriptor
969     * and positions the stream right after the data descriptor.</p>
970     */
971    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
972            throws IOException {
973
974        boolean done = false;
975        for (int i = 0; !done && i < offset + lastRead - 4; i++) {
976            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
977                int expectDDPos = i;
978                if (i >= expectedDDLen &&
979                    (buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
980                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
981                    // found a LFH or CFH:
982                    expectDDPos = i - expectedDDLen;
983                    done = true;
984                }
985                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
986                    // found DD:
987                    done = true;
988                }
989                if (done) {
990                    // * push back bytes read in excess as well as the data
991                    //   descriptor
992                    // * copy the remaining bytes to cache
993                    // * read data descriptor
994                    pushback(buf.array(), expectDDPos, offset + lastRead - expectDDPos);
995                    bos.write(buf.array(), 0, expectDDPos);
996                    readDataDescriptor();
997                }
998            }
999        }
1000        return done;
1001    }
1002
1003    /**
1004     * If the last read bytes could hold a data descriptor and an
1005     * incomplete signature then save the last bytes to the front of
1006     * the buffer and cache everything in front of the potential data
1007     * descriptor into the given ByteArrayOutputStream.
1008     *
1009     * <p>Data descriptor plus incomplete signature (3 bytes in the
1010     * worst case) can be 20 bytes max.</p>
1011     */
1012    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
1013        final int cacheable = offset + lastRead - expecteDDLen - 3;
1014        if (cacheable > 0) {
1015            bos.write(buf.array(), 0, cacheable);
1016            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
1017            offset = expecteDDLen + 3;
1018        } else {
1019            offset += lastRead;
1020        }
1021        return offset;
1022    }
1023
1024    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
1025        ((PushbackInputStream) in).unread(buf, offset, length);
1026        pushedBackBytes(length);
1027    }
1028
1029    // End of Central Directory Record
1030    //   end of central dir signature    WORD
1031    //   number of this disk             SHORT
1032    //   number of the disk with the
1033    //   start of the central directory  SHORT
1034    //   total number of entries in the
1035    //   central directory on this disk  SHORT
1036    //   total number of entries in
1037    //   the central directory           SHORT
1038    //   size of the central directory   WORD
1039    //   offset of start of central
1040    //   directory with respect to
1041    //   the starting disk number        WORD
1042    //   .ZIP file comment length        SHORT
1043    //   .ZIP file comment               up to 64KB
1044    //
1045
1046    /**
1047     * Reads the stream until it find the "End of central directory
1048     * record" and consumes it as well.
1049     */
1050    private void skipRemainderOfArchive() throws IOException {
1051        // skip over central directory. One LFH has been read too much
1052        // already.  The calculation discounts file names and extra
1053        // data so it will be too short.
1054        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1055        findEocdRecord();
1056        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1057        readFully(shortBuf);
1058        // file comment
1059        realSkip(ZipShort.getValue(shortBuf));
1060    }
1061
1062    /**
1063     * Reads forward until the signature of the &quot;End of central
1064     * directory&quot; record is found.
1065     */
1066    private void findEocdRecord() throws IOException {
1067        int currentByte = -1;
1068        boolean skipReadCall = false;
1069        while (skipReadCall || (currentByte = readOneByte()) > -1) {
1070            skipReadCall = false;
1071            if (!isFirstByteOfEocdSig(currentByte)) {
1072                continue;
1073            }
1074            currentByte = readOneByte();
1075            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1076                if (currentByte == -1) {
1077                    break;
1078                }
1079                skipReadCall = isFirstByteOfEocdSig(currentByte);
1080                continue;
1081            }
1082            currentByte = readOneByte();
1083            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1084                if (currentByte == -1) {
1085                    break;
1086                }
1087                skipReadCall = isFirstByteOfEocdSig(currentByte);
1088                continue;
1089            }
1090            currentByte = readOneByte();
1091            if (currentByte == -1
1092                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1093                break;
1094            }
1095            skipReadCall = isFirstByteOfEocdSig(currentByte);
1096        }
1097    }
1098
1099    /**
1100     * Skips bytes by reading from the underlying stream rather than
1101     * the (potentially inflating) archive stream - which {@link
1102     * #skip} would do.
1103     *
1104     * Also updates bytes-read counter.
1105     */
1106    private void realSkip(final long value) throws IOException {
1107        if (value >= 0) {
1108            long skipped = 0;
1109            while (skipped < value) {
1110                final long rem = value - skipped;
1111                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1112                if (x == -1) {
1113                    return;
1114                }
1115                count(x);
1116                skipped += x;
1117            }
1118            return;
1119        }
1120        throw new IllegalArgumentException();
1121    }
1122
1123    /**
1124     * Reads bytes by reading from the underlying stream rather than
1125     * the (potentially inflating) archive stream - which {@link #read} would do.
1126     *
1127     * Also updates bytes-read counter.
1128     */
1129    private int readOneByte() throws IOException {
1130        final int b = in.read();
1131        if (b != -1) {
1132            count(1);
1133        }
1134        return b;
1135    }
1136
1137    private boolean isFirstByteOfEocdSig(final int b) {
1138        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1139    }
1140
1141    private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] {
1142        'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2',
1143    };
1144    private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
1145
1146    /**
1147     * Checks whether this might be an APK Signing Block.
1148     *
1149     * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It
1150     * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature
1151     * and if we've found it, return true.</p>
1152     *
1153     * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold
1154     * the local file header of the next entry.
1155     *
1156     * @return true if this looks like a APK signing block
1157     *
1158     * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a>
1159     */
1160    private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException {
1161        // length of block excluding the size field itself
1162        BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader);
1163        // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block,
1164        // also subtract 16 bytes in order to position us at the magic string
1165        BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length
1166            - (long) APK_SIGNING_BLOCK_MAGIC.length));
1167        byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length];
1168
1169        try {
1170            if (toSkip.signum() < 0) {
1171                // suspectLocalFileHeader contains the start of suspect magic string
1172                int off = suspectLocalFileHeader.length + toSkip.intValue();
1173                // length was shorter than magic length
1174                if (off < DWORD) {
1175                    return false;
1176                }
1177                int bytesInBuffer = Math.abs(toSkip.intValue());
1178                System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length));
1179                if (bytesInBuffer < magic.length) {
1180                    readFully(magic, bytesInBuffer);
1181                }
1182            } else {
1183                while (toSkip.compareTo(LONG_MAX) > 0) {
1184                    realSkip(Long.MAX_VALUE);
1185                    toSkip = toSkip.add(LONG_MAX.negate());
1186                }
1187                realSkip(toSkip.longValue());
1188                readFully(magic);
1189            }
1190        } catch (EOFException ex) { //NOSONAR
1191            // length was invalid
1192            return false;
1193        }
1194        return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC);
1195    }
1196
1197    /**
1198     * Structure collecting information for the entry that is
1199     * currently being read.
1200     */
1201    private static final class CurrentEntry {
1202
1203        /**
1204         * Current ZIP entry.
1205         */
1206        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1207
1208        /**
1209         * Does the entry use a data descriptor?
1210         */
1211        private boolean hasDataDescriptor;
1212
1213        /**
1214         * Does the entry have a ZIP64 extended information extra field.
1215         */
1216        private boolean usesZip64;
1217
1218        /**
1219         * Number of bytes of entry content read by the client if the
1220         * entry is STORED.
1221         */
1222        private long bytesRead;
1223
1224        /**
1225         * Number of bytes of entry content read from the stream.
1226         *
1227         * <p>This may be more than the actual entry's length as some
1228         * stuff gets buffered up and needs to be pushed back when the
1229         * end of the entry has been reached.</p>
1230         */
1231        private long bytesReadFromStream;
1232
1233        /**
1234         * The checksum calculated as the current entry is read.
1235         */
1236        private final CRC32 crc = new CRC32();
1237
1238        /**
1239         * The input stream decompressing the data for shrunk and imploded entries.
1240         */
1241        private InputStream in;
1242    }
1243
1244    /**
1245     * Bounded input stream adapted from commons-io
1246     */
1247    private class BoundedInputStream extends InputStream {
1248
1249        /** the wrapped input stream */
1250        private final InputStream in;
1251
1252        /** the max length to provide */
1253        private final long max;
1254
1255        /** the number of bytes already returned */
1256        private long pos = 0;
1257
1258        /**
1259         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1260         * stream and limits it to a certain size.
1261         *
1262         * @param in The wrapped input stream
1263         * @param size The maximum number of bytes to return
1264         */
1265        public BoundedInputStream(final InputStream in, final long size) {
1266            this.max = size;
1267            this.in = in;
1268        }
1269
1270        @Override
1271        public int read() throws IOException {
1272            if (max >= 0 && pos >= max) {
1273                return -1;
1274            }
1275            final int result = in.read();
1276            pos++;
1277            count(1);
1278            current.bytesReadFromStream++;
1279            return result;
1280        }
1281
1282        @Override
1283        public int read(final byte[] b) throws IOException {
1284            return this.read(b, 0, b.length);
1285        }
1286
1287        @Override
1288        public int read(final byte[] b, final int off, final int len) throws IOException {
1289            if (len == 0) {
1290                return 0;
1291            }
1292            if (max >= 0 && pos >= max) {
1293                return -1;
1294            }
1295            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1296            final int bytesRead = in.read(b, off, (int) maxRead);
1297
1298            if (bytesRead == -1) {
1299                return -1;
1300            }
1301
1302            pos += bytesRead;
1303            count(bytesRead);
1304            current.bytesReadFromStream += bytesRead;
1305            return bytesRead;
1306        }
1307
1308        @Override
1309        public long skip(final long n) throws IOException {
1310            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1311            final long skippedBytes = IOUtils.skip(in, toSkip);
1312            pos += skippedBytes;
1313            return skippedBytes;
1314        }
1315
1316        @Override
1317        public int available() throws IOException {
1318            if (max >= 0 && pos >= max) {
1319                return 0;
1320            }
1321            return in.available();
1322        }
1323    }
1324}