001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
038import org.apache.commons.compress.utils.ArchiveUtils;
039import org.apache.commons.compress.utils.IOUtils;
040import org.apache.commons.compress.utils.InputStreamStatistics;
041
042import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
044import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
046
047/**
048 * Implements an input stream that can read Zip archives.
049 *
050 * <p>As of Apache Commons Compress it transparently supports Zip64
051 * extensions and thus individual entries and archives larger than 4
052 * GB or with more than 65536 entries.</p>
053 *
054 * <p>The {@link ZipFile} class is preferred when reading from files
055 * as {@link ZipArchiveInputStream} is limited by not being able to
056 * read the central directory header before returning entries.  In
057 * particular {@link ZipArchiveInputStream}</p>
058 *
059 * <ul>
060 *
061 *  <li>may return entries that are not part of the central directory
062 *  at all and shouldn't be considered part of the archive.</li>
063 *
064 *  <li>may return several entries with the same name.</li>
065 *
066 *  <li>will not return internal or external attributes.</li>
067 *
068 *  <li>may return incomplete extra field data.</li>
069 *
070 *  <li>may return unknown sizes and CRC values for entries until the
071 *  next entry has been reached if the archive uses the data
072 *  descriptor feature.</li>
073 *
074 * </ul>
075 *
076 * @see ZipFile
077 * @NotThreadSafe
078 */
079public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
080
081    /** The zip encoding to use for filenames and the file comment. */
082    private final ZipEncoding zipEncoding;
083
084    // the provided encoding (for unit tests)
085    final String encoding;
086
087    /** Whether to look for and use Unicode extra fields. */
088    private final boolean useUnicodeExtraFields;
089
090    /** Wrapped stream, will always be a PushbackInputStream. */
091    private final InputStream in;
092
093    /** Inflater used for all deflated entries. */
094    private final Inflater inf = new Inflater(true);
095
096    /** Buffer used to read from the wrapped stream. */
097    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
098
099    /** The entry that is currently being read. */
100    private CurrentEntry current = null;
101
102    /** Whether the stream has been closed. */
103    private boolean closed = false;
104
105    /** Whether the stream has reached the central directory - and thus found all entries. */
106    private boolean hitCentralDirectory = false;
107
108    /**
109     * When reading a stored entry that uses the data descriptor this
110     * stream has to read the full entry and caches it.  This is the
111     * cache.
112     */
113    private ByteArrayInputStream lastStoredEntry = null;
114
115    /** Whether the stream will try to read STORED entries that use a data descriptor. */
116    private boolean allowStoredEntriesWithDataDescriptor = false;
117
118    /** Count decompressed bytes for current entry */
119    private long uncompressedCount = 0;
120
121    private static final int LFH_LEN = 30;
122    /*
123      local file header signature     WORD
124      version needed to extract       SHORT
125      general purpose bit flag        SHORT
126      compression method              SHORT
127      last mod file time              SHORT
128      last mod file date              SHORT
129      crc-32                          WORD
130      compressed size                 WORD
131      uncompressed size               WORD
132      file name length                SHORT
133      extra field length              SHORT
134    */
135
136    private static final int CFH_LEN = 46;
137    /*
138        central file header signature   WORD
139        version made by                 SHORT
140        version needed to extract       SHORT
141        general purpose bit flag        SHORT
142        compression method              SHORT
143        last mod file time              SHORT
144        last mod file date              SHORT
145        crc-32                          WORD
146        compressed size                 WORD
147        uncompressed size               WORD
148        file name length                SHORT
149        extra field length              SHORT
150        file comment length             SHORT
151        disk number start               SHORT
152        internal file attributes        SHORT
153        external file attributes        WORD
154        relative offset of local header WORD
155    */
156
157    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
158
159    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
160    private final byte[] lfhBuf = new byte[LFH_LEN];
161    private final byte[] skipBuf = new byte[1024];
162    private final byte[] shortBuf = new byte[SHORT];
163    private final byte[] wordBuf = new byte[WORD];
164    private final byte[] twoDwordBuf = new byte[2 * DWORD];
165
166    private int entriesRead = 0;
167
168    /**
169     * Create an instance using UTF-8 encoding
170     * @param inputStream the stream to wrap
171     */
172    public ZipArchiveInputStream(final InputStream inputStream) {
173        this(inputStream, ZipEncodingHelper.UTF8);
174    }
175
176    /**
177     * Create an instance using the specified encoding
178     * @param inputStream the stream to wrap
179     * @param encoding the encoding to use for file names, use null
180     * for the platform's default encoding
181     * @since 1.5
182     */
183    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
184        this(inputStream, encoding, true);
185    }
186
187    /**
188     * Create an instance using the specified encoding
189     * @param inputStream the stream to wrap
190     * @param encoding the encoding to use for file names, use null
191     * for the platform's default encoding
192     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
193     * Extra Fields (if present) to set the file names.
194     */
195    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
196        this(inputStream, encoding, useUnicodeExtraFields, false);
197    }
198
199    /**
200     * Create an instance using the specified encoding
201     * @param inputStream the stream to wrap
202     * @param encoding the encoding to use for file names, use null
203     * for the platform's default encoding
204     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
205     * Extra Fields (if present) to set the file names.
206     * @param allowStoredEntriesWithDataDescriptor whether the stream
207     * will try to read STORED entries that use a data descriptor
208     * @since 1.1
209     */
210    public ZipArchiveInputStream(final InputStream inputStream,
211                                 final String encoding,
212                                 final boolean useUnicodeExtraFields,
213                                 final boolean allowStoredEntriesWithDataDescriptor) {
214        this.encoding = encoding;
215        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
216        this.useUnicodeExtraFields = useUnicodeExtraFields;
217        in = new PushbackInputStream(inputStream, buf.capacity());
218        this.allowStoredEntriesWithDataDescriptor =
219            allowStoredEntriesWithDataDescriptor;
220        // haven't read anything so far
221        buf.limit(0);
222    }
223
224    public ZipArchiveEntry getNextZipEntry() throws IOException {
225        uncompressedCount = 0;
226
227        boolean firstEntry = true;
228        if (closed || hitCentralDirectory) {
229            return null;
230        }
231        if (current != null) {
232            closeEntry();
233            firstEntry = false;
234        }
235
236        long currentHeaderOffset = getBytesRead();
237        try {
238            if (firstEntry) {
239                // split archives have a special signature before the
240                // first local file header - look for it and fail with
241                // the appropriate error message if this is a split
242                // archive.
243                readFirstLocalFileHeader(lfhBuf);
244            } else {
245                readFully(lfhBuf);
246            }
247        } catch (final EOFException e) {
248            return null;
249        }
250
251        final ZipLong sig = new ZipLong(lfhBuf);
252        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
253            hitCentralDirectory = true;
254            skipRemainderOfArchive();
255            return null;
256        }
257        if (!sig.equals(ZipLong.LFH_SIG)) {
258            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
259        }
260
261        int off = WORD;
262        current = new CurrentEntry();
263
264        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
265        off += SHORT;
266        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
267
268        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
269        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
270        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
271        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
272        current.entry.setGeneralPurposeBit(gpFlag);
273
274        off += SHORT;
275
276        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
277        off += SHORT;
278
279        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
280        current.entry.setTime(time);
281        off += WORD;
282
283        ZipLong size = null, cSize = null;
284        if (!current.hasDataDescriptor) {
285            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
286            off += WORD;
287
288            cSize = new ZipLong(lfhBuf, off);
289            off += WORD;
290
291            size = new ZipLong(lfhBuf, off);
292            off += WORD;
293        } else {
294            off += 3 * WORD;
295        }
296
297        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
298
299        off += SHORT;
300
301        final int extraLen = ZipShort.getValue(lfhBuf, off);
302        off += SHORT; // NOSONAR - assignment as documentation
303
304        final byte[] fileName = new byte[fileNameLen];
305        readFully(fileName);
306        current.entry.setName(entryEncoding.decode(fileName), fileName);
307        if (hasUTF8Flag) {
308            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
309        }
310
311        final byte[] extraData = new byte[extraLen];
312        readFully(extraData);
313        current.entry.setExtra(extraData);
314
315        if (!hasUTF8Flag && useUnicodeExtraFields) {
316            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
317        }
318
319        processZip64Extra(size, cSize);
320
321        current.entry.setLocalHeaderOffset(currentHeaderOffset);
322        current.entry.setDataOffset(getBytesRead());
323        current.entry.setStreamContiguous(true);
324
325        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
326        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
327            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
328                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
329                switch (m) {
330                case UNSHRINKING:
331                    current.in = new UnshrinkingInputStream(bis);
332                    break;
333                case IMPLODING:
334                    current.in = new ExplodingInputStream(
335                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
336                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
337                        bis);
338                    break;
339                case BZIP2:
340                    current.in = new BZip2CompressorInputStream(bis);
341                    break;
342                case ENHANCED_DEFLATED:
343                    current.in = new Deflate64CompressorInputStream(bis);
344                    break;
345                default:
346                    // we should never get here as all supported methods have been covered
347                    // will cause an error when read is invoked, don't throw an exception here so people can
348                    // skip unsupported entries
349                    break;
350                }
351            }
352        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
353            current.in = new Deflate64CompressorInputStream(in);
354        }
355
356        entriesRead++;
357        return current.entry;
358    }
359
360    /**
361     * Fills the given array with the first local file header and
362     * deals with splitting/spanning markers that may prefix the first
363     * LFH.
364     */
365    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
366        readFully(lfh);
367        final ZipLong sig = new ZipLong(lfh);
368        if (sig.equals(ZipLong.DD_SIG)) {
369            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
370        }
371
372        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
373            // The archive is not really split as only one segment was
374            // needed in the end.  Just skip over the marker.
375            final byte[] missedLfhBytes = new byte[4];
376            readFully(missedLfhBytes);
377            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
378            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
379        }
380    }
381
382    /**
383     * Records whether a Zip64 extra is present and sets the size
384     * information from it if sizes are 0xFFFFFFFF and the entry
385     * doesn't use a data descriptor.
386     */
387    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
388        final Zip64ExtendedInformationExtraField z64 =
389            (Zip64ExtendedInformationExtraField)
390            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
391        current.usesZip64 = z64 != null;
392        if (!current.hasDataDescriptor) {
393            if (z64 != null // same as current.usesZip64 but avoids NPE warning
394                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
395                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
396                current.entry.setSize(z64.getSize().getLongValue());
397            } else {
398                current.entry.setCompressedSize(cSize.getValue());
399                current.entry.setSize(size.getValue());
400            }
401        }
402    }
403
404    @Override
405    public ArchiveEntry getNextEntry() throws IOException {
406        return getNextZipEntry();
407    }
408
409    /**
410     * Whether this class is able to read the given entry.
411     *
412     * <p>May return false if it is set up to use encryption or a
413     * compression method that hasn't been implemented yet.</p>
414     * @since 1.1
415     */
416    @Override
417    public boolean canReadEntryData(final ArchiveEntry ae) {
418        if (ae instanceof ZipArchiveEntry) {
419            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
420            return ZipUtil.canHandleEntryData(ze)
421                && supportsDataDescriptorFor(ze)
422                && supportsCompressedSizeFor(ze);
423        }
424        return false;
425    }
426
427    @Override
428    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
429        if (closed) {
430            throw new IOException("The stream is closed");
431        }
432
433        if (current == null) {
434            return -1;
435        }
436
437        // avoid int overflow, check null buffer
438        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
439            throw new ArrayIndexOutOfBoundsException();
440        }
441
442        ZipUtil.checkRequestedFeatures(current.entry);
443        if (!supportsDataDescriptorFor(current.entry)) {
444            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
445                    current.entry);
446        }
447        if (!supportsCompressedSizeFor(current.entry)) {
448            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
449                    current.entry);
450        }
451
452        int read;
453        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
454            read = readStored(buffer, offset, length);
455        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
456            read = readDeflated(buffer, offset, length);
457        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
458                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
459                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
460                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
461            read = current.in.read(buffer, offset, length);
462        } else {
463            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
464                    current.entry);
465        }
466
467        if (read >= 0) {
468            current.crc.update(buffer, offset, read);
469            uncompressedCount += read;
470        }
471
472        return read;
473    }
474
475    /**
476     * @since 1.17
477     */
478    @Override
479    public long getCompressedCount() {
480        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
481            return current.bytesRead;
482        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
483            return getBytesInflated();
484        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
485            return ((UnshrinkingInputStream) current.in).getCompressedCount();
486        } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
487            return ((ExplodingInputStream) current.in).getCompressedCount();
488        } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
489            return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
490        } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
491            return ((BZip2CompressorInputStream) current.in).getCompressedCount();
492        } else {
493            return -1;
494        }
495    }
496
497    /**
498     * @since 1.17
499     */
500    @Override
501    public long getUncompressedCount() {
502        return uncompressedCount;
503    }
504
505    /**
506     * Implementation of read for STORED entries.
507     */
508    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
509
510        if (current.hasDataDescriptor) {
511            if (lastStoredEntry == null) {
512                readStoredEntry();
513            }
514            return lastStoredEntry.read(buffer, offset, length);
515        }
516
517        final long csize = current.entry.getSize();
518        if (current.bytesRead >= csize) {
519            return -1;
520        }
521
522        if (buf.position() >= buf.limit()) {
523            buf.position(0);
524            final int l = in.read(buf.array());
525            if (l == -1) {
526                return -1;
527            }
528            buf.limit(l);
529
530            count(l);
531            current.bytesReadFromStream += l;
532        }
533
534        int toRead = Math.min(buf.remaining(), length);
535        if ((csize - current.bytesRead) < toRead) {
536            // if it is smaller than toRead then it fits into an int
537            toRead = (int) (csize - current.bytesRead);
538        }
539        buf.get(buffer, offset, toRead);
540        current.bytesRead += toRead;
541        return toRead;
542    }
543
544    /**
545     * Implementation of read for DEFLATED entries.
546     */
547    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
548        final int read = readFromInflater(buffer, offset, length);
549        if (read <= 0) {
550            if (inf.finished()) {
551                return -1;
552            } else if (inf.needsDictionary()) {
553                throw new ZipException("This archive needs a preset dictionary"
554                                       + " which is not supported by Commons"
555                                       + " Compress.");
556            } else if (read == -1) {
557                throw new IOException("Truncated ZIP file");
558            }
559        }
560        return read;
561    }
562
563    /**
564     * Potentially reads more bytes to fill the inflater's buffer and
565     * reads from it.
566     */
567    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
568        int read = 0;
569        do {
570            if (inf.needsInput()) {
571                final int l = fill();
572                if (l > 0) {
573                    current.bytesReadFromStream += buf.limit();
574                } else if (l == -1) {
575                    return -1;
576                } else {
577                    break;
578                }
579            }
580            try {
581                read = inf.inflate(buffer, offset, length);
582            } catch (final DataFormatException e) {
583                throw (IOException) new ZipException(e.getMessage()).initCause(e);
584            }
585        } while (read == 0 && inf.needsInput());
586        return read;
587    }
588
589    @Override
590    public void close() throws IOException {
591        if (!closed) {
592            closed = true;
593            try {
594                in.close();
595            } finally {
596                inf.end();
597            }
598        }
599    }
600
601    /**
602     * Skips over and discards value bytes of data from this input
603     * stream.
604     *
605     * <p>This implementation may end up skipping over some smaller
606     * number of bytes, possibly 0, if and only if it reaches the end
607     * of the underlying stream.</p>
608     *
609     * <p>The actual number of bytes skipped is returned.</p>
610     *
611     * @param value the number of bytes to be skipped.
612     * @return the actual number of bytes skipped.
613     * @throws IOException - if an I/O error occurs.
614     * @throws IllegalArgumentException - if value is negative.
615     */
616    @Override
617    public long skip(final long value) throws IOException {
618        if (value >= 0) {
619            long skipped = 0;
620            while (skipped < value) {
621                final long rem = value - skipped;
622                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
623                if (x == -1) {
624                    return skipped;
625                }
626                skipped += x;
627            }
628            return skipped;
629        }
630        throw new IllegalArgumentException();
631    }
632
633    /**
634     * Checks if the signature matches what is expected for a zip file.
635     * Does not currently handle self-extracting zips which may have arbitrary
636     * leading content.
637     *
638     * @param signature the bytes to check
639     * @param length    the number of bytes to check
640     * @return true, if this stream is a zip archive stream, false otherwise
641     */
642    public static boolean matches(final byte[] signature, final int length) {
643        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
644            return false;
645        }
646
647        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
648            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
649            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
650            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
651    }
652
653    private static boolean checksig(final byte[] signature, final byte[] expected) {
654        for (int i = 0; i < expected.length; i++) {
655            if (signature[i] != expected[i]) {
656                return false;
657            }
658        }
659        return true;
660    }
661
662    /**
663     * Closes the current ZIP archive entry and positions the underlying
664     * stream to the beginning of the next entry. All per-entry variables
665     * and data structures are cleared.
666     * <p>
667     * If the compressed size of this entry is included in the entry header,
668     * then any outstanding bytes are simply skipped from the underlying
669     * stream without uncompressing them. This allows an entry to be safely
670     * closed even if the compression method is unsupported.
671     * <p>
672     * In case we don't know the compressed size of this entry or have
673     * already buffered too much data from the underlying stream to support
674     * uncompression, then the uncompression process is completed and the
675     * end position of the stream is adjusted based on the result of that
676     * process.
677     *
678     * @throws IOException if an error occurs
679     */
680    private void closeEntry() throws IOException {
681        if (closed) {
682            throw new IOException("The stream is closed");
683        }
684        if (current == null) {
685            return;
686        }
687
688        // Ensure all entry bytes are read
689        if (currentEntryHasOutstandingBytes()) {
690            drainCurrentEntryData();
691        } else {
692            // this is guaranteed to exhaust the stream
693            skip(Long.MAX_VALUE); //NOSONAR
694
695            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
696                       ? getBytesInflated() : current.bytesRead;
697
698            // this is at most a single read() operation and can't
699            // exceed the range of int
700            final int diff = (int) (current.bytesReadFromStream - inB);
701
702            // Pushback any required bytes
703            if (diff > 0) {
704                pushback(buf.array(), buf.limit() - diff, diff);
705                current.bytesReadFromStream -= diff;
706            }
707
708            // Drain remainder of entry if not all data bytes were required
709            if (currentEntryHasOutstandingBytes()) {
710                drainCurrentEntryData();
711            }
712        }
713
714        if (lastStoredEntry == null && current.hasDataDescriptor) {
715            readDataDescriptor();
716        }
717
718        inf.reset();
719        buf.clear().flip();
720        current = null;
721        lastStoredEntry = null;
722    }
723
724    /**
725     * If the compressed size of the current entry is included in the entry header
726     * and there are any outstanding bytes in the underlying stream, then
727     * this returns true.
728     *
729     * @return true, if current entry is determined to have outstanding bytes, false otherwise
730     */
731    private boolean currentEntryHasOutstandingBytes() {
732        return current.bytesReadFromStream <= current.entry.getCompressedSize()
733                && !current.hasDataDescriptor;
734    }
735
736    /**
737     * Read all data of the current entry from the underlying stream
738     * that hasn't been read, yet.
739     */
740    private void drainCurrentEntryData() throws IOException {
741        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
742        while (remaining > 0) {
743            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
744            if (n < 0) {
745                throw new EOFException("Truncated ZIP entry: "
746                                       + ArchiveUtils.sanitize(current.entry.getName()));
747            }
748            count(n);
749            remaining -= n;
750        }
751    }
752
753    /**
754     * Get the number of bytes Inflater has actually processed.
755     *
756     * <p>for Java &lt; Java7 the getBytes* methods in
757     * Inflater/Deflater seem to return unsigned ints rather than
758     * longs that start over with 0 at 2^32.</p>
759     *
760     * <p>The stream knows how many bytes it has read, but not how
761     * many the Inflater actually consumed - it should be between the
762     * total number of bytes read for the entry and the total number
763     * minus the last read operation.  Here we just try to make the
764     * value close enough to the bytes we've read by assuming the
765     * number of bytes consumed must be smaller than (or equal to) the
766     * number of bytes read but not smaller by more than 2^32.</p>
767     */
768    private long getBytesInflated() {
769        long inB = inf.getBytesRead();
770        if (current.bytesReadFromStream >= TWO_EXP_32) {
771            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
772                inB += TWO_EXP_32;
773            }
774        }
775        return inB;
776    }
777
778    private int fill() throws IOException {
779        if (closed) {
780            throw new IOException("The stream is closed");
781        }
782        final int length = in.read(buf.array());
783        if (length > 0) {
784            buf.limit(length);
785            count(buf.limit());
786            inf.setInput(buf.array(), 0, buf.limit());
787        }
788        return length;
789    }
790
791    private void readFully(final byte[] b) throws IOException {
792        final int count = IOUtils.readFully(in, b);
793        count(count);
794        if (count < b.length) {
795            throw new EOFException();
796        }
797    }
798
799    private void readDataDescriptor() throws IOException {
800        readFully(wordBuf);
801        ZipLong val = new ZipLong(wordBuf);
802        if (ZipLong.DD_SIG.equals(val)) {
803            // data descriptor with signature, skip sig
804            readFully(wordBuf);
805            val = new ZipLong(wordBuf);
806        }
807        current.entry.setCrc(val.getValue());
808
809        // if there is a ZIP64 extra field, sizes are eight bytes
810        // each, otherwise four bytes each.  Unfortunately some
811        // implementations - namely Java7 - use eight bytes without
812        // using a ZIP64 extra field -
813        // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
814
815        // just read 16 bytes and check whether bytes nine to twelve
816        // look like one of the signatures of what could follow a data
817        // descriptor (ignoring archive decryption headers for now).
818        // If so, push back eight bytes and assume sizes are four
819        // bytes, otherwise sizes are eight bytes each.
820        readFully(twoDwordBuf);
821        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
822        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
823            pushback(twoDwordBuf, DWORD, DWORD);
824            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
825            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
826        } else {
827            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
828            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
829        }
830    }
831
832    /**
833     * Whether this entry requires a data descriptor this library can work with.
834     *
835     * @return true if allowStoredEntriesWithDataDescriptor is true,
836     * the entry doesn't require any data descriptor or the method is
837     * DEFLATED or ENHANCED_DEFLATED.
838     */
839    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
840        return !entry.getGeneralPurposeBit().usesDataDescriptor()
841
842                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
843                || entry.getMethod() == ZipEntry.DEFLATED
844                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
845    }
846
847    /**
848     * Whether the compressed size for the entry is either known or
849     * not required by the compression method being used.
850     */
851    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
852        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
853            || entry.getMethod() == ZipEntry.DEFLATED
854            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
855            || (entry.getGeneralPurposeBit().usesDataDescriptor()
856                && allowStoredEntriesWithDataDescriptor
857                && entry.getMethod() == ZipEntry.STORED);
858    }
859
860    /**
861     * Caches a stored entry that uses the data descriptor.
862     *
863     * <ul>
864     *   <li>Reads a stored entry until the signature of a local file
865     *     header, central directory header or data descriptor has been
866     *     found.</li>
867     *   <li>Stores all entry data in lastStoredEntry.</p>
868     *   <li>Rewinds the stream to position at the data
869     *     descriptor.</li>
870     *   <li>reads the data descriptor</li>
871     * </ul>
872     *
873     * <p>After calling this method the entry should know its size,
874     * the entry's data is cached and the stream is positioned at the
875     * next local file or central directory header.</p>
876     */
877    private void readStoredEntry() throws IOException {
878        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
879        int off = 0;
880        boolean done = false;
881
882        // length of DD without signature
883        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
884
885        while (!done) {
886            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
887            if (r <= 0) {
888                // read the whole archive without ever finding a
889                // central directory
890                throw new IOException("Truncated ZIP file");
891            }
892            if (r + off < 4) {
893                // buffer too small to check for a signature, loop
894                off += r;
895                continue;
896            }
897
898            done = bufferContainsSignature(bos, off, r, ddLen);
899            if (!done) {
900                off = cacheBytesRead(bos, off, r, ddLen);
901            }
902        }
903
904        final byte[] b = bos.toByteArray();
905        lastStoredEntry = new ByteArrayInputStream(b);
906    }
907
908    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
909    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
910    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
911
912    /**
913     * Checks whether the current buffer contains the signature of a
914     * &quot;data descriptor&quot;, &quot;local file header&quot; or
915     * &quot;central directory entry&quot;.
916     *
917     * <p>If it contains such a signature, reads the data descriptor
918     * and positions the stream right after the data descriptor.</p>
919     */
920    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
921            throws IOException {
922
923        boolean done = false;
924        int readTooMuch = 0;
925        for (int i = 0; !done && i < offset + lastRead - 4; i++) {
926            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
927                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
928                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
929                    // found a LFH or CFH:
930                    readTooMuch = offset + lastRead - i - expectedDDLen;
931                    done = true;
932                }
933                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
934                    // found DD:
935                    readTooMuch = offset + lastRead - i;
936                    done = true;
937                }
938                if (done) {
939                    // * push back bytes read in excess as well as the data
940                    //   descriptor
941                    // * copy the remaining bytes to cache
942                    // * read data descriptor
943                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
944                    bos.write(buf.array(), 0, i);
945                    readDataDescriptor();
946                }
947            }
948        }
949        return done;
950    }
951
952    /**
953     * If the last read bytes could hold a data descriptor and an
954     * incomplete signature then save the last bytes to the front of
955     * the buffer and cache everything in front of the potential data
956     * descriptor into the given ByteArrayOutputStream.
957     *
958     * <p>Data descriptor plus incomplete signature (3 bytes in the
959     * worst case) can be 20 bytes max.</p>
960     */
961    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
962        final int cacheable = offset + lastRead - expecteDDLen - 3;
963        if (cacheable > 0) {
964            bos.write(buf.array(), 0, cacheable);
965            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
966            offset = expecteDDLen + 3;
967        } else {
968            offset += lastRead;
969        }
970        return offset;
971    }
972
973    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
974        ((PushbackInputStream) in).unread(buf, offset, length);
975        pushedBackBytes(length);
976    }
977
978    // End of Central Directory Record
979    //   end of central dir signature    WORD
980    //   number of this disk             SHORT
981    //   number of the disk with the
982    //   start of the central directory  SHORT
983    //   total number of entries in the
984    //   central directory on this disk  SHORT
985    //   total number of entries in
986    //   the central directory           SHORT
987    //   size of the central directory   WORD
988    //   offset of start of central
989    //   directory with respect to
990    //   the starting disk number        WORD
991    //   .ZIP file comment length        SHORT
992    //   .ZIP file comment               up to 64KB
993    //
994
995    /**
996     * Reads the stream until it find the "End of central directory
997     * record" and consumes it as well.
998     */
999    private void skipRemainderOfArchive() throws IOException {
1000        // skip over central directory. One LFH has been read too much
1001        // already.  The calculation discounts file names and extra
1002        // data so it will be too short.
1003        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1004        findEocdRecord();
1005        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1006        readFully(shortBuf);
1007        // file comment
1008        realSkip(ZipShort.getValue(shortBuf));
1009    }
1010
1011    /**
1012     * Reads forward until the signature of the &quot;End of central
1013     * directory&quot; record is found.
1014     */
1015    private void findEocdRecord() throws IOException {
1016        int currentByte = -1;
1017        boolean skipReadCall = false;
1018        while (skipReadCall || (currentByte = readOneByte()) > -1) {
1019            skipReadCall = false;
1020            if (!isFirstByteOfEocdSig(currentByte)) {
1021                continue;
1022            }
1023            currentByte = readOneByte();
1024            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1025                if (currentByte == -1) {
1026                    break;
1027                }
1028                skipReadCall = isFirstByteOfEocdSig(currentByte);
1029                continue;
1030            }
1031            currentByte = readOneByte();
1032            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1033                if (currentByte == -1) {
1034                    break;
1035                }
1036                skipReadCall = isFirstByteOfEocdSig(currentByte);
1037                continue;
1038            }
1039            currentByte = readOneByte();
1040            if (currentByte == -1
1041                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1042                break;
1043            }
1044            skipReadCall = isFirstByteOfEocdSig(currentByte);
1045        }
1046    }
1047
1048    /**
1049     * Skips bytes by reading from the underlying stream rather than
1050     * the (potentially inflating) archive stream - which {@link
1051     * #skip} would do.
1052     *
1053     * Also updates bytes-read counter.
1054     */
1055    private void realSkip(final long value) throws IOException {
1056        if (value >= 0) {
1057            long skipped = 0;
1058            while (skipped < value) {
1059                final long rem = value - skipped;
1060                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1061                if (x == -1) {
1062                    return;
1063                }
1064                count(x);
1065                skipped += x;
1066            }
1067            return;
1068        }
1069        throw new IllegalArgumentException();
1070    }
1071
1072    /**
1073     * Reads bytes by reading from the underlying stream rather than
1074     * the (potentially inflating) archive stream - which {@link #read} would do.
1075     *
1076     * Also updates bytes-read counter.
1077     */
1078    private int readOneByte() throws IOException {
1079        final int b = in.read();
1080        if (b != -1) {
1081            count(1);
1082        }
1083        return b;
1084    }
1085
1086    private boolean isFirstByteOfEocdSig(final int b) {
1087        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1088    }
1089
1090    /**
1091     * Structure collecting information for the entry that is
1092     * currently being read.
1093     */
1094    private static final class CurrentEntry {
1095
1096        /**
1097         * Current ZIP entry.
1098         */
1099        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1100
1101        /**
1102         * Does the entry use a data descriptor?
1103         */
1104        private boolean hasDataDescriptor;
1105
1106        /**
1107         * Does the entry have a ZIP64 extended information extra field.
1108         */
1109        private boolean usesZip64;
1110
1111        /**
1112         * Number of bytes of entry content read by the client if the
1113         * entry is STORED.
1114         */
1115        private long bytesRead;
1116
1117        /**
1118         * Number of bytes of entry content read from the stream.
1119         *
1120         * <p>This may be more than the actual entry's length as some
1121         * stuff gets buffered up and needs to be pushed back when the
1122         * end of the entry has been reached.</p>
1123         */
1124        private long bytesReadFromStream;
1125
1126        /**
1127         * The checksum calculated as the current entry is read.
1128         */
1129        private final CRC32 crc = new CRC32();
1130
1131        /**
1132         * The input stream decompressing the data for shrunk and imploded entries.
1133         */
1134        private InputStream in;
1135    }
1136
1137    /**
1138     * Bounded input stream adapted from commons-io
1139     */
1140    private class BoundedInputStream extends InputStream {
1141
1142        /** the wrapped input stream */
1143        private final InputStream in;
1144
1145        /** the max length to provide */
1146        private final long max;
1147
1148        /** the number of bytes already returned */
1149        private long pos = 0;
1150
1151        /**
1152         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1153         * stream and limits it to a certain size.
1154         *
1155         * @param in The wrapped input stream
1156         * @param size The maximum number of bytes to return
1157         */
1158        public BoundedInputStream(final InputStream in, final long size) {
1159            this.max = size;
1160            this.in = in;
1161        }
1162
1163        @Override
1164        public int read() throws IOException {
1165            if (max >= 0 && pos >= max) {
1166                return -1;
1167            }
1168            final int result = in.read();
1169            pos++;
1170            count(1);
1171            current.bytesReadFromStream++;
1172            return result;
1173        }
1174
1175        @Override
1176        public int read(final byte[] b) throws IOException {
1177            return this.read(b, 0, b.length);
1178        }
1179
1180        @Override
1181        public int read(final byte[] b, final int off, final int len) throws IOException {
1182            if (max >= 0 && pos >= max) {
1183                return -1;
1184            }
1185            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1186            final int bytesRead = in.read(b, off, (int) maxRead);
1187
1188            if (bytesRead == -1) {
1189                return -1;
1190            }
1191
1192            pos += bytesRead;
1193            count(bytesRead);
1194            current.bytesReadFromStream += bytesRead;
1195            return bytesRead;
1196        }
1197
1198        @Override
1199        public long skip(final long n) throws IOException {
1200            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1201            final long skippedBytes = IOUtils.skip(in, toSkip);
1202            pos += skippedBytes;
1203            return skippedBytes;
1204        }
1205
1206        @Override
1207        public int available() throws IOException {
1208            if (max >= 0 && pos >= max) {
1209                return 0;
1210            }
1211            return in.available();
1212        }
1213    }
1214}