001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.zip;
020    
021    import java.io.ByteArrayInputStream;
022    import java.io.ByteArrayOutputStream;
023    import java.io.EOFException;
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.io.PushbackInputStream;
027    import java.util.zip.CRC32;
028    import java.util.zip.DataFormatException;
029    import java.util.zip.Inflater;
030    import java.util.zip.ZipException;
031    
032    import org.apache.commons.compress.archivers.ArchiveEntry;
033    import org.apache.commons.compress.archivers.ArchiveInputStream;
034    
035    /**
036     * Implements an input stream that can read Zip archives.
037     * <p>
038     * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
039     * is not available from the header.
040     * <p>
041     * The {@link ZipFile} class is preferred when reading from files.
042     *  
043     * @see ZipFile
044     * @NotThreadSafe
045     */
046    public class ZipArchiveInputStream extends ArchiveInputStream {
047    
048        private static final int SHORT = 2;
049        private static final int WORD = 4;
050    
051        /**
052         * The zip encoding to use for filenames and the file comment.
053         */
054        private final ZipEncoding zipEncoding;
055    
056        /**
057         * Whether to look for and use Unicode extra fields.
058         */
059        private final boolean useUnicodeExtraFields;
060    
061        private final InputStream in;
062    
063        private final Inflater inf = new Inflater(true);
064        private final CRC32 crc = new CRC32();
065    
066        private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
067    
068        private ZipArchiveEntry current = null;
069        private boolean closed = false;
070        private boolean hitCentralDirectory = false;
071        private int offsetInBuffer = 0;
072        private long readBytesOfEntry = 0, bytesReadFromStream = 0;
073        private int lengthOfLastRead = 0;
074        private boolean hasDataDescriptor = false;
075        private ByteArrayInputStream lastStoredEntry = null;
076    
077        private boolean allowStoredEntriesWithDataDescriptor = false;
078    
079        private static final int LFH_LEN = 30;
080        /*
081          local file header signature     4 bytes  (0x04034b50)
082          version needed to extract       2 bytes
083          general purpose bit flag        2 bytes
084          compression method              2 bytes
085          last mod file time              2 bytes
086          last mod file date              2 bytes
087          crc-32                          4 bytes
088          compressed size                 4 bytes
089          uncompressed size               4 bytes
090          file name length                2 bytes
091          extra field length              2 bytes
092        */
093    
094        public ZipArchiveInputStream(InputStream inputStream) {
095            this(inputStream, ZipEncodingHelper.UTF8, true);
096        }
097    
098        /**
099         * @param encoding the encoding to use for file names, use null
100         * for the platform's default encoding
101         * @param useUnicodeExtraFields whether to use InfoZIP Unicode
102         * Extra Fields (if present) to set the file names.
103         */
104        public ZipArchiveInputStream(InputStream inputStream,
105                                     String encoding,
106                                     boolean useUnicodeExtraFields) {
107            this(inputStream, encoding, useUnicodeExtraFields, false);
108        }
109    
110        /**
111         * @param encoding the encoding to use for file names, use null
112         * for the platform's default encoding
113         * @param useUnicodeExtraFields whether to use InfoZIP Unicode
114         * Extra Fields (if present) to set the file names.
115         * @param allowStoredEntriesWithDataDescriptor whether the stream
116         * will try to read STORED entries that use a data descriptor
117         * @since Apache Commons Compress 1.1
118         */
119        public ZipArchiveInputStream(InputStream inputStream,
120                                     String encoding,
121                                     boolean useUnicodeExtraFields,
122                                     boolean allowStoredEntriesWithDataDescriptor) {
123            zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
124            this.useUnicodeExtraFields = useUnicodeExtraFields;
125            in = new PushbackInputStream(inputStream, buf.length);
126            this.allowStoredEntriesWithDataDescriptor =
127                allowStoredEntriesWithDataDescriptor;
128        }
129    
130        public ZipArchiveEntry getNextZipEntry() throws IOException {
131            if (closed || hitCentralDirectory) {
132                return null;
133            }
134            if (current != null) {
135                closeEntry();
136            }
137            byte[] lfh = new byte[LFH_LEN];
138            try {
139                readFully(lfh);
140            } catch (EOFException e) {
141                return null;
142            }
143            ZipLong sig = new ZipLong(lfh);
144            if (sig.equals(ZipLong.CFH_SIG)) {
145                hitCentralDirectory = true;
146                return null;
147            }
148            if (!sig.equals(ZipLong.LFH_SIG)) {
149                return null;
150            }
151    
152            int off = WORD;
153            current = new ZipArchiveEntry();
154    
155            int versionMadeBy = ZipShort.getValue(lfh, off);
156            off += SHORT;
157            current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
158                                & ZipFile.NIBLET_MASK);
159    
160            final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfh, off);
161            final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
162            final ZipEncoding entryEncoding =
163                hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
164            hasDataDescriptor = gpFlag.usesDataDescriptor();
165            current.setGeneralPurposeBit(gpFlag);
166    
167            off += SHORT;
168    
169            current.setMethod(ZipShort.getValue(lfh, off));
170            off += SHORT;
171    
172            long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
173            current.setTime(time);
174            off += WORD;
175    
176            if (!hasDataDescriptor) {
177                current.setCrc(ZipLong.getValue(lfh, off));
178                off += WORD;
179    
180                current.setCompressedSize(ZipLong.getValue(lfh, off));
181                off += WORD;
182    
183                current.setSize(ZipLong.getValue(lfh, off));
184                off += WORD;
185            } else {
186                off += 3 * WORD;
187            }
188    
189            int fileNameLen = ZipShort.getValue(lfh, off);
190    
191            off += SHORT;
192    
193            int extraLen = ZipShort.getValue(lfh, off);
194            off += SHORT;
195    
196            byte[] fileName = new byte[fileNameLen];
197            readFully(fileName);
198            current.setName(entryEncoding.decode(fileName), fileName);
199    
200            byte[] extraData = new byte[extraLen];
201            readFully(extraData);
202            current.setExtra(extraData);
203    
204            if (!hasUTF8Flag && useUnicodeExtraFields) {
205                ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
206            }
207            return current;
208        }
209    
210        /** {@inheritDoc} */
211        public ArchiveEntry getNextEntry() throws IOException {
212            return getNextZipEntry();
213        }
214    
215        /**
216         * Whether this class is able to read the given entry.
217         *
218         * <p>May return false if it is set up to use encryption or a
219         * compression method that hasn't been implemented yet.</p>
220         * @since Apache Commons Compress 1.1
221         */
222        public boolean canReadEntryData(ArchiveEntry ae) {
223            if (ae instanceof ZipArchiveEntry) {
224                ZipArchiveEntry ze = (ZipArchiveEntry) ae;
225                return ZipUtil.canHandleEntryData(ze)
226                    && supportsDataDescriptorFor(ze);
227    
228            }
229            return false;
230        }
231    
232        public int read(byte[] buffer, int start, int length) throws IOException {
233            if (closed) {
234                throw new IOException("The stream is closed");
235            }
236            if (inf.finished() || current == null) {
237                return -1;
238            }
239    
240            // avoid int overflow, check null buffer
241            if (start <= buffer.length && length >= 0 && start >= 0
242                && buffer.length - start >= length) {
243                ZipUtil.checkRequestedFeatures(current);
244                if (!supportsDataDescriptorFor(current)) {
245                    throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException
246                                                             .Feature
247                                                             .DATA_DESCRIPTOR,
248                                                             current);
249                }
250    
251                if (current.getMethod() == ZipArchiveOutputStream.STORED) {
252                    if (hasDataDescriptor) {
253                        if (lastStoredEntry == null) {
254                            readStoredEntry();
255                        }
256                        return lastStoredEntry.read(buffer, start, length);
257                    }
258    
259                    long csize = current.getSize();
260                    if (readBytesOfEntry >= csize) {
261                        return -1;
262                    }
263                    if (offsetInBuffer >= lengthOfLastRead) {
264                        offsetInBuffer = 0;
265                        if ((lengthOfLastRead = in.read(buf)) == -1) {
266                            return -1;
267                        }
268                        count(lengthOfLastRead);
269                        bytesReadFromStream += lengthOfLastRead;
270                    }
271                    int toRead = length > lengthOfLastRead
272                        ? lengthOfLastRead - offsetInBuffer
273                        : length;
274                    if ((csize - readBytesOfEntry) < toRead) {
275                        // if it is smaller than toRead then it fits into an int
276                        toRead = (int) (csize - readBytesOfEntry);
277                    }
278                    System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
279                    offsetInBuffer += toRead;
280                    readBytesOfEntry += toRead;
281                    crc.update(buffer, start, toRead);
282                    return toRead;
283                }
284    
285                if (inf.needsInput()) {
286                    fill();
287                    if (lengthOfLastRead > 0) {
288                        bytesReadFromStream += lengthOfLastRead;
289                    }
290                }
291                int read = 0;
292                try {
293                    read = inf.inflate(buffer, start, length);
294                } catch (DataFormatException e) {
295                    throw new ZipException(e.getMessage());
296                }
297                if (read == 0) {
298                    if (inf.finished()) {
299                        return -1;
300                    } else if (lengthOfLastRead == -1) {
301                        throw new IOException("Truncated ZIP file");
302                    }
303                }
304                crc.update(buffer, start, read);
305                return read;
306            }
307            throw new ArrayIndexOutOfBoundsException();
308        }
309    
310        public void close() throws IOException {
311            if (!closed) {
312                closed = true;
313                in.close();
314            }
315        }
316    
317        /**
318         * Skips over and discards value bytes of data from this input
319         * stream.
320         *
321         * <p>This implementation may end up skipping over some smaller
322         * number of bytes, possibly 0, if an only if it reaches the end
323         * of the underlying stream.</p>
324         *
325         * <p>The actual number of bytes skipped is returned.</p>
326         *
327         * @param value the number of bytes to be skipped.
328         * @return the actual number of bytes skipped.
329         * @throws IOException - if an I/O error occurs.
330         * @throws IllegalArgumentException - if value is negative.
331         */
332        public long skip(long value) throws IOException {
333            if (value >= 0) {
334                long skipped = 0;
335                byte[] b = new byte[1024];
336                while (skipped < value) {
337                    long rem = value - skipped;
338                    int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
339                    if (x == -1) {
340                        return skipped;
341                    }
342                    skipped += x;
343                }
344                return skipped;
345            }
346            throw new IllegalArgumentException();
347        }
348    
349        /**
350         * Checks if the signature matches what is expected for a zip file.
351         * Does not currently handle self-extracting zips which may have arbitrary
352         * leading content.
353         * 
354         * @param signature
355         *            the bytes to check
356         * @param length
357         *            the number of bytes to check
358         * @return true, if this stream is a zip archive stream, false otherwise
359         */
360        public static boolean matches(byte[] signature, int length) {
361            if (length < ZipArchiveOutputStream.LFH_SIG.length) {
362                return false;
363            }
364    
365            return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
366                || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
367        }
368    
369        private static boolean checksig(byte[] signature, byte[] expected){
370            for (int i = 0; i < expected.length; i++) {
371                if (signature[i] != expected[i]) {
372                    return false;
373                }
374            }
375            return true;        
376        }
377    
378        /**
379         * Closes the current ZIP archive entry and positions the underlying
380         * stream to the beginning of the next entry. All per-entry variables
381         * and data structures are cleared.
382         * <p>
383         * If the compressed size of this entry is included in the entry header,
384         * then any outstanding bytes are simply skipped from the underlying
385         * stream without uncompressing them. This allows an entry to be safely
386         * closed even if the compression method is unsupported.
387         * <p>
388         * In case we don't know the compressed size of this entry or have
389         * already buffered too much data from the underlying stream to support
390         * uncompression, then the uncompression process is completed and the
391         * end position of the stream is adjusted based on the result of that
392         * process.
393         *
394         * @throws IOException if an error occurs
395         */
396        private void closeEntry() throws IOException {
397            if (closed) {
398                throw new IOException("The stream is closed");
399            }
400            if (current == null) {
401                return;
402            }
403    
404            // Ensure all entry bytes are read
405            if (bytesReadFromStream <= current.getCompressedSize()
406                    && !hasDataDescriptor) {
407                long remaining = current.getCompressedSize() - bytesReadFromStream;
408                while (remaining > 0) {
409                    long n = in.read(buf, 0, (int) Math.min(buf.length, remaining));
410                    if (n < 0) {
411                        throw new EOFException(
412                                "Truncated ZIP entry: " + current.getName());
413                    } else {
414                        count(n);
415                        remaining -= n;
416                    }
417                }
418            } else {
419                skip(Long.MAX_VALUE);
420    
421                long inB;
422                if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
423                    inB = ZipUtil.adjustToLong(inf.getTotalIn());
424                } else {
425                    inB = readBytesOfEntry;
426                }
427    
428                // this is at most a single read() operation and can't
429                // exceed the range of int
430                int diff = (int) (bytesReadFromStream - inB);
431    
432                // Pushback any required bytes
433                if (diff > 0) {
434                    ((PushbackInputStream) in).unread(
435                            buf,  lengthOfLastRead - diff, diff);
436                    pushedBackBytes(diff);
437                }
438            }
439    
440            if (lastStoredEntry == null && hasDataDescriptor) {
441                readDataDescriptor();
442            }
443    
444            inf.reset();
445            readBytesOfEntry = bytesReadFromStream = 0L;
446            offsetInBuffer = lengthOfLastRead = 0;
447            crc.reset();
448            current = null;
449            lastStoredEntry = null;
450        }
451    
452        private void fill() throws IOException {
453            if (closed) {
454                throw new IOException("The stream is closed");
455            }
456            if ((lengthOfLastRead = in.read(buf)) > 0) {
457                count(lengthOfLastRead);
458                inf.setInput(buf, 0, lengthOfLastRead);
459            }
460        }
461    
462        private void readFully(byte[] b) throws IOException {
463            int count = 0, x = 0;
464            while (count != b.length) {
465                count += x = in.read(b, count, b.length - count);
466                if (x == -1) {
467                    throw new EOFException();
468                }
469                count(x);
470            }
471        }
472    
473        private void readDataDescriptor() throws IOException {
474            byte[] b = new byte[WORD];
475            readFully(b);
476            ZipLong val = new ZipLong(b);
477            if (ZipLong.DD_SIG.equals(val)) {
478                // data descriptor with signature, skip sig
479                readFully(b);
480                val = new ZipLong(b);
481            }
482            current.setCrc(val.getValue());
483            readFully(b);
484            current.setCompressedSize(new ZipLong(b).getValue());
485            readFully(b);
486            current.setSize(new ZipLong(b).getValue());
487        }
488    
489        /**
490         * Whether this entry requires a data descriptor this library can work with.
491         *
492         * @return true if allowStoredEntriesWithDataDescriptor is true,
493         * the entry doesn't require any data descriptor or the method is
494         * DEFLATED.
495         */
496        private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
497            return allowStoredEntriesWithDataDescriptor ||
498                !entry.getGeneralPurposeBit().usesDataDescriptor()
499                || entry.getMethod() == ZipArchiveEntry.DEFLATED;
500        }
501    
502        /**
503         * Caches a stored entry that uses the data descriptor.
504         *
505         * <ul>
506         *   <li>Reads a stored entry until the signature of a local file
507         *     header, central directory header or data descriptor has been
508         *     found.</li>
509         *   <li>Stores all entry data in lastStoredEntry.</p>
510         *   <li>Rewinds the stream to position at the data
511         *     descriptor.</li>
512         *   <li>reads the data descriptor</li>
513         * </ul>
514         *
515         * <p>After calling this method the entry should know its size,
516         * the entry's data is cached and the stream is positioned at the
517         * next local file or central directory header.</p>
518         */
519        private void readStoredEntry() throws IOException {
520            ByteArrayOutputStream bos = new ByteArrayOutputStream();
521            byte[] LFH = ZipLong.LFH_SIG.getBytes();
522            byte[] CFH = ZipLong.CFH_SIG.getBytes();
523            byte[] DD = ZipLong.DD_SIG.getBytes();
524            int off = 0;
525            boolean done = false;
526    
527            while (!done) {
528                int r = in.read(buf, off, ZipArchiveOutputStream.BUFFER_SIZE - off);
529                if (r <= 0) {
530                    // read the whole archive without ever finding a
531                    // central directory
532                    throw new IOException("Truncated ZIP file");
533                }
534                if (r + off < 4) {
535                    // buf is too small to check for a signature, loop
536                    off += r;
537                    continue;
538                }
539    
540                int readTooMuch = 0;
541                for (int i = 0; !done && i < r - 4; i++) {
542                    if (buf[i] == LFH[0] && buf[i + 1] == LFH[1]) {
543                        if ((buf[i + 2] == LFH[2] && buf[i + 3] == LFH[3])
544                            || (buf[i] == CFH[2] && buf[i + 3] == CFH[3])) {
545                            // found a LFH or CFH:
546                            readTooMuch = off + r - i - 12 /* dd without signature */;
547                            done = true;
548                        }
549                        else if (buf[i + 2] == DD[2] && buf[i + 3] == DD[3]) {
550                            // found DD:
551                            readTooMuch = off + r - i;
552                            done = true;
553                        }
554                        if (done) {
555                            // * push back bytes read in excess as well as the data
556                            //   descriptor
557                            // * copy the remaining bytes to cache
558                            // * read data descriptor
559                            ((PushbackInputStream) in).unread(buf, off + r - readTooMuch, readTooMuch);
560                            bos.write(buf, 0, i);
561                            readDataDescriptor();
562                        }
563                    }
564                }
565                if (!done) {
566                    // worst case we've read a data descriptor without a
567                    // signature (12 bytes) plus the first three bytes of
568                    // a LFH or CFH signature
569                    // save the last 15 bytes in the buffer, cache
570                    // anything in front of that, read on
571                    if (off + r > 15) {
572                        bos.write(buf, 0, off + r - 15);
573                        System.arraycopy(buf, off + r - 15, buf, 0, 15);
574                        off = 15;
575                    } else {
576                        off += r;
577                    }
578                }
579            }
580    
581            byte[] b = bos.toByteArray();
582            lastStoredEntry = new ByteArrayInputStream(b);
583        }
584    }