001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one or more
003     *  contributor license agreements.  See the NOTICE file distributed with
004     *  this work for additional information regarding copyright ownership.
005     *  The ASF licenses this file to You under the Apache License, Version 2.0
006     *  (the "License"); you may not use this file except in compliance with
007     *  the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     *
017     */
018    package org.apache.commons.compress.archivers.zip;
019    
020    import java.io.File;
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.io.RandomAccessFile;
024    import java.util.Arrays;
025    import java.util.Collections;
026    import java.util.Comparator;
027    import java.util.Enumeration;
028    import java.util.HashMap;
029    import java.util.LinkedHashMap;
030    import java.util.Map;
031    import java.util.zip.Inflater;
032    import java.util.zip.InflaterInputStream;
033    import java.util.zip.ZipException;
034    
035    /**
036     * Replacement for <code>java.util.ZipFile</code>.
037     *
038     * <p>This class adds support for file name encodings other than UTF-8
039     * (which is required to work on ZIP files created by native zip tools
040     * and is able to skip a preamble like the one found in self
041     * extracting archives.  Furthermore it returns instances of
042     * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
043     * instead of <code>java.util.zip.ZipEntry</code>.</p>
044     *
045     * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
046     * have to reimplement all methods anyway.  Like
047     * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
048     * covers and supports compressed and uncompressed entries.</p>
049     *
050     * <p>The method signatures mimic the ones of
051     * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
052     *
053     * <ul>
054     *   <li>There is no getName method.</li>
055     *   <li>entries has been renamed to getEntries.</li>
056     *   <li>getEntries and getEntry return
057     *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
058     *   instances.</li>
059     *   <li>close is allowed to throw IOException.</li>
060     * </ul>
061     *
062     */
063    public class ZipFile {
064        private static final int HASH_SIZE = 509;
065        private static final int SHORT     =   2;
066        private static final int WORD      =   4;
067        static final int NIBLET_MASK = 0x0f;
068        static final int BYTE_SHIFT = 8;
069        private static final int POS_0 = 0;
070        private static final int POS_1 = 1;
071        private static final int POS_2 = 2;
072        private static final int POS_3 = 3;
073    
074        /**
075         * Maps ZipArchiveEntrys to Longs, recording the offsets of the local
076         * file headers.
077         */
078        private final Map entries = new LinkedHashMap(HASH_SIZE);
079    
080        /**
081         * Maps String to ZipArchiveEntrys, name -> actual entry.
082         */
083        private final Map nameMap = new HashMap(HASH_SIZE);
084    
085        private static final class OffsetEntry {
086            private long headerOffset = -1;
087            private long dataOffset = -1;
088        }
089    
090        /**
091         * The encoding to use for filenames and the file comment.
092         *
093         * <p>For a list of possible values see <a
094         * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
095         * Defaults to UTF-8.</p>
096         */
097        private final String encoding;
098    
099        /**
100         * The zip encoding to use for filenames and the file comment.
101         */
102        private final ZipEncoding zipEncoding;
103    
104        /**
105         * The actual data source.
106         */
107        private final RandomAccessFile archive;
108    
109        /**
110         * Whether to look for and use Unicode extra fields.
111         */
112        private final boolean useUnicodeExtraFields;
113    
114        /**
115         * Opens the given file for reading, assuming "UTF8" for file names.
116         *
117         * @param f the archive.
118         *
119         * @throws IOException if an error occurs while reading the file.
120         */
121        public ZipFile(File f) throws IOException {
122            this(f, ZipEncodingHelper.UTF8);
123        }
124    
125        /**
126         * Opens the given file for reading, assuming "UTF8".
127         *
128         * @param name name of the archive.
129         *
130         * @throws IOException if an error occurs while reading the file.
131         */
132        public ZipFile(String name) throws IOException {
133            this(new File(name), ZipEncodingHelper.UTF8);
134        }
135    
136        /**
137         * Opens the given file for reading, assuming the specified
138         * encoding for file names, scanning unicode extra fields.
139         *
140         * @param name name of the archive.
141         * @param encoding the encoding to use for file names, use null
142         * for the platform's default encoding
143         *
144         * @throws IOException if an error occurs while reading the file.
145         */
146        public ZipFile(String name, String encoding) throws IOException {
147            this(new File(name), encoding, true);
148        }
149    
150        /**
151         * Opens the given file for reading, assuming the specified
152         * encoding for file names and scanning for unicode extra fields.
153         *
154         * @param f the archive.
155         * @param encoding the encoding to use for file names, use null
156         * for the platform's default encoding
157         *
158         * @throws IOException if an error occurs while reading the file.
159         */
160        public ZipFile(File f, String encoding) throws IOException {
161            this(f, encoding, true);
162        }
163    
164        /**
165         * Opens the given file for reading, assuming the specified
166         * encoding for file names.
167         *
168         * @param f the archive.
169         * @param encoding the encoding to use for file names, use null
170         * for the platform's default encoding
171         * @param useUnicodeExtraFields whether to use InfoZIP Unicode
172         * Extra Fields (if present) to set the file names.
173         *
174         * @throws IOException if an error occurs while reading the file.
175         */
176        public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
177            throws IOException {
178            this.encoding = encoding;
179            this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
180            this.useUnicodeExtraFields = useUnicodeExtraFields;
181            archive = new RandomAccessFile(f, "r");
182            boolean success = false;
183            try {
184                Map entriesWithoutUTF8Flag = populateFromCentralDirectory();
185                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
186                success = true;
187            } finally {
188                if (!success) {
189                    try {
190                        archive.close();
191                    } catch (IOException e2) {
192                        // swallow, throw the original exception instead
193                    }
194                }
195            }
196        }
197    
198        /**
199         * The encoding to use for filenames and the file comment.
200         *
201         * @return null if using the platform's default character encoding.
202         */
203        public String getEncoding() {
204            return encoding;
205        }
206    
207        /**
208         * Closes the archive.
209         * @throws IOException if an error occurs closing the archive.
210         */
211        public void close() throws IOException {
212            archive.close();
213        }
214    
215        /**
216         * close a zipfile quietly; throw no io fault, do nothing
217         * on a null parameter
218         * @param zipfile file to close, can be null
219         */
220        public static void closeQuietly(ZipFile zipfile) {
221            if (zipfile != null) {
222                try {
223                    zipfile.close();
224                } catch (IOException e) {
225                    //ignore
226                }
227            }
228        }
229    
230        /**
231         * Returns all entries.
232         *
233         * <p>Entries will be returned in the same order they appear
234         * within the archive's central directory.</p>
235         *
236         * @return all entries as {@link ZipArchiveEntry} instances
237         */
238        public Enumeration getEntries() {
239            return Collections.enumeration(entries.keySet());
240        }
241    
242        /**
243         * Returns all entries in physical order.
244         *
245         * <p>Entries will be returned in the same order their contents
246         * appear within the archive.</p>
247         *
248         * @return all entries as {@link ZipArchiveEntry} instances
249         *
250         * @since Commons Compress 1.1
251         */
252        public Enumeration getEntriesInPhysicalOrder() {
253            Object[] allEntries = entries.keySet().toArray();
254            Arrays.sort(allEntries, OFFSET_COMPARATOR);
255            return Collections.enumeration(Arrays.asList(allEntries));
256        }
257    
258        /**
259         * Returns a named entry - or <code>null</code> if no entry by
260         * that name exists.
261         * @param name name of the entry.
262         * @return the ZipArchiveEntry corresponding to the given name - or
263         * <code>null</code> if not present.
264         */
265        public ZipArchiveEntry getEntry(String name) {
266            return (ZipArchiveEntry) nameMap.get(name);
267        }
268    
269        /**
270         * Whether this class is able to read the given entry.
271         *
272         * <p>May return false if it is set up to use encryption or a
273         * compression method that hasn't been implemented yet.</p>
274         * @since Apache Commons Compress 1.1
275         */
276        public boolean canReadEntryData(ZipArchiveEntry ze) {
277            return ZipUtil.canHandleEntryData(ze);
278        }
279    
280        /**
281         * Returns an InputStream for reading the contents of the given entry.
282         *
283         * @param ze the entry to get the stream for.
284         * @return a stream to read the entry from.
285         * @throws IOException if unable to create an input stream from the zipenty
286         * @throws ZipException if the zipentry uses an unsupported feature
287         */
288        public InputStream getInputStream(ZipArchiveEntry ze)
289            throws IOException, ZipException {
290            OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
291            if (offsetEntry == null) {
292                return null;
293            }
294            ZipUtil.checkRequestedFeatures(ze);
295            long start = offsetEntry.dataOffset;
296            BoundedInputStream bis =
297                new BoundedInputStream(start, ze.getCompressedSize());
298            switch (ze.getMethod()) {
299                case ZipArchiveEntry.STORED:
300                    return bis;
301                case ZipArchiveEntry.DEFLATED:
302                    bis.addDummy();
303                    return new InflaterInputStream(bis, new Inflater(true));
304                default:
305                    throw new ZipException("Found unsupported compression method "
306                                           + ze.getMethod());
307            }
308        }
309    
310        private static final int CFH_LEN =
311            /* version made by                 */ SHORT
312            /* version needed to extract       */ + SHORT
313            /* general purpose bit flag        */ + SHORT
314            /* compression method              */ + SHORT
315            /* last mod file time              */ + SHORT
316            /* last mod file date              */ + SHORT
317            /* crc-32                          */ + WORD
318            /* compressed size                 */ + WORD
319            /* uncompressed size               */ + WORD
320            /* filename length                 */ + SHORT
321            /* extra field length              */ + SHORT
322            /* file comment length             */ + SHORT
323            /* disk number start               */ + SHORT
324            /* internal file attributes        */ + SHORT
325            /* external file attributes        */ + WORD
326            /* relative offset of local header */ + WORD;
327    
328        /**
329         * Reads the central directory of the given archive and populates
330         * the internal tables with ZipArchiveEntry instances.
331         *
332         * <p>The ZipArchiveEntrys will know all data that can be obtained from
333         * the central directory alone, but not the data that requires the
334         * local file header or additional data to be read.</p>
335         *
336         * @return a Map&lt;ZipArchiveEntry, NameAndComment>&gt; of
337         * zipentries that didn't have the language encoding flag set when
338         * read.
339         */
340        private Map populateFromCentralDirectory()
341            throws IOException {
342            HashMap noUTF8Flag = new HashMap();
343    
344            positionAtCentralDirectory();
345    
346            byte[] cfh = new byte[CFH_LEN];
347    
348            byte[] signatureBytes = new byte[WORD];
349            archive.readFully(signatureBytes);
350            long sig = ZipLong.getValue(signatureBytes);
351            final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
352            if (sig != cfhSig && startsWithLocalFileHeader()) {
353                throw new IOException("central directory is empty, can't expand"
354                                      + " corrupt archive.");
355            }
356            while (sig == cfhSig) {
357                archive.readFully(cfh);
358                int off = 0;
359                ZipArchiveEntry ze = new ZipArchiveEntry();
360    
361                int versionMadeBy = ZipShort.getValue(cfh, off);
362                off += SHORT;
363                ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
364    
365                off += SHORT; // skip version info
366    
367                final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfh, off);
368                final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
369                final ZipEncoding entryEncoding =
370                    hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
371                ze.setGeneralPurposeBit(gpFlag);
372    
373                off += SHORT;
374    
375                ze.setMethod(ZipShort.getValue(cfh, off));
376                off += SHORT;
377    
378                // FIXME this is actually not very cpu cycles friendly as we are converting from
379                // dos to java while the underlying Sun implementation will convert
380                // from java to dos time for internal storage...
381                long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off));
382                ze.setTime(time);
383                off += WORD;
384    
385                ze.setCrc(ZipLong.getValue(cfh, off));
386                off += WORD;
387    
388                ze.setCompressedSize(ZipLong.getValue(cfh, off));
389                off += WORD;
390    
391                ze.setSize(ZipLong.getValue(cfh, off));
392                off += WORD;
393    
394                int fileNameLen = ZipShort.getValue(cfh, off);
395                off += SHORT;
396    
397                int extraLen = ZipShort.getValue(cfh, off);
398                off += SHORT;
399    
400                int commentLen = ZipShort.getValue(cfh, off);
401                off += SHORT;
402    
403                off += SHORT; // disk number
404    
405                ze.setInternalAttributes(ZipShort.getValue(cfh, off));
406                off += SHORT;
407    
408                ze.setExternalAttributes(ZipLong.getValue(cfh, off));
409                off += WORD;
410    
411                byte[] fileName = new byte[fileNameLen];
412                archive.readFully(fileName);
413                ze.setName(entryEncoding.decode(fileName));
414    
415                // LFH offset,
416                OffsetEntry offset = new OffsetEntry();
417                offset.headerOffset = ZipLong.getValue(cfh, off);
418                // data offset will be filled later
419                entries.put(ze, offset);
420    
421                nameMap.put(ze.getName(), ze);
422    
423                byte[] cdExtraData = new byte[extraLen];
424                archive.readFully(cdExtraData);
425                ze.setCentralDirectoryExtra(cdExtraData);
426    
427                byte[] comment = new byte[commentLen];
428                archive.readFully(comment);
429                ze.setComment(entryEncoding.decode(comment));
430    
431                archive.readFully(signatureBytes);
432                sig = ZipLong.getValue(signatureBytes);
433    
434                if (!hasUTF8Flag && useUnicodeExtraFields) {
435                    noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
436                }
437            }
438            return noUTF8Flag;
439        }
440    
441        private static final int MIN_EOCD_SIZE =
442            /* end of central dir signature    */ WORD
443            /* number of this disk             */ + SHORT
444            /* number of the disk with the     */
445            /* start of the central directory  */ + SHORT
446            /* total number of entries in      */
447            /* the central dir on this disk    */ + SHORT
448            /* total number of entries in      */
449            /* the central dir                 */ + SHORT
450            /* size of the central directory   */ + WORD
451            /* offset of start of central      */
452            /* directory with respect to       */
453            /* the starting disk number        */ + WORD
454            /* zipfile comment length          */ + SHORT;
455    
456        private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
457            /* maximum length of zipfile comment */ + 0xFFFF;
458    
459        private static final int CFD_LOCATOR_OFFSET =
460            /* end of central dir signature    */ WORD
461            /* number of this disk             */ + SHORT
462            /* number of the disk with the     */
463            /* start of the central directory  */ + SHORT
464            /* total number of entries in      */
465            /* the central dir on this disk    */ + SHORT
466            /* total number of entries in      */
467            /* the central dir                 */ + SHORT
468            /* size of the central directory   */ + WORD;
469    
470        /**
471         * Searches for the &quot;End of central dir record&quot;, parses
472         * it and positions the stream at the first central directory
473         * record.
474         */
475        private void positionAtCentralDirectory()
476            throws IOException {
477            boolean found = false;
478            long off = archive.length() - MIN_EOCD_SIZE;
479            long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
480            if (off >= 0) {
481                archive.seek(off);
482                byte[] sig = ZipArchiveOutputStream.EOCD_SIG;
483                int curr = archive.read();
484                while (off >= stopSearching && curr != -1) {
485                    if (curr == sig[POS_0]) {
486                        curr = archive.read();
487                        if (curr == sig[POS_1]) {
488                            curr = archive.read();
489                            if (curr == sig[POS_2]) {
490                                curr = archive.read();
491                                if (curr == sig[POS_3]) {
492                                    found = true;
493                                    break;
494                                }
495                            }
496                        }
497                    }
498                    archive.seek(--off);
499                    curr = archive.read();
500                }
501            }
502            if (!found) {
503                throw new ZipException("archive is not a ZIP archive");
504            }
505            archive.seek(off + CFD_LOCATOR_OFFSET);
506            byte[] cfdOffset = new byte[WORD];
507            archive.readFully(cfdOffset);
508            archive.seek(ZipLong.getValue(cfdOffset));
509        }
510    
511        /**
512         * Number of bytes in local file header up to the &quot;length of
513         * filename&quot; entry.
514         */
515        private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
516            /* local file header signature     */ WORD
517            /* version needed to extract       */ + SHORT
518            /* general purpose bit flag        */ + SHORT
519            /* compression method              */ + SHORT
520            /* last mod file time              */ + SHORT
521            /* last mod file date              */ + SHORT
522            /* crc-32                          */ + WORD
523            /* compressed size                 */ + WORD
524            /* uncompressed size               */ + WORD;
525    
526        /**
527         * Walks through all recorded entries and adds the data available
528         * from the local file header.
529         *
530         * <p>Also records the offsets for the data to read from the
531         * entries.</p>
532         */
533        private void resolveLocalFileHeaderData(Map entriesWithoutUTF8Flag)
534            throws IOException {
535            Enumeration e = getEntries();
536            while (e.hasMoreElements()) {
537                ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement();
538                OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
539                long offset = offsetEntry.headerOffset;
540                archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
541                byte[] b = new byte[SHORT];
542                archive.readFully(b);
543                int fileNameLen = ZipShort.getValue(b);
544                archive.readFully(b);
545                int extraFieldLen = ZipShort.getValue(b);
546                int lenToSkip = fileNameLen;
547                while (lenToSkip > 0) {
548                    int skipped = archive.skipBytes(lenToSkip);
549                    if (skipped <= 0) {
550                        throw new RuntimeException("failed to skip file name in"
551                                                   + " local file header");
552                    }
553                    lenToSkip -= skipped;
554                }
555                byte[] localExtraData = new byte[extraFieldLen];
556                archive.readFully(localExtraData);
557                ze.setExtra(localExtraData);
558                /*dataOffsets.put(ze,
559                                new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
560                                         + SHORT + SHORT + fileNameLen + extraFieldLen));
561                */
562                offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
563                    + SHORT + SHORT + fileNameLen + extraFieldLen;
564    
565                if (entriesWithoutUTF8Flag.containsKey(ze)) {
566                    String orig = ze.getName();
567                    NameAndComment nc = (NameAndComment) entriesWithoutUTF8Flag.get(ze);
568                    ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
569                                                             nc.comment);
570                    if (!orig.equals(ze.getName())) {
571                        nameMap.remove(orig);
572                        nameMap.put(ze.getName(), ze);
573                    }
574                }
575            }
576        }
577    
578        /**
579         * Checks whether the archive starts with a LFH.  If it doesn't,
580         * it may be an empty archive.
581         */
582        private boolean startsWithLocalFileHeader() throws IOException {
583            archive.seek(0);
584            final byte[] start = new byte[WORD];
585            archive.readFully(start);
586            for (int i = 0; i < start.length; i++) {
587                if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) {
588                    return false;
589                }
590            }
591            return true;
592        }
593    
594        /**
595         * InputStream that delegates requests to the underlying
596         * RandomAccessFile, making sure that only bytes from a certain
597         * range can be read.
598         */
599        private class BoundedInputStream extends InputStream {
600            private long remaining;
601            private long loc;
602            private boolean addDummyByte = false;
603    
604            BoundedInputStream(long start, long remaining) {
605                this.remaining = remaining;
606                loc = start;
607            }
608    
609            public int read() throws IOException {
610                if (remaining-- <= 0) {
611                    if (addDummyByte) {
612                        addDummyByte = false;
613                        return 0;
614                    }
615                    return -1;
616                }
617                synchronized (archive) {
618                    archive.seek(loc++);
619                    return archive.read();
620                }
621            }
622    
623            public int read(byte[] b, int off, int len) throws IOException {
624                if (remaining <= 0) {
625                    if (addDummyByte) {
626                        addDummyByte = false;
627                        b[off] = 0;
628                        return 1;
629                    }
630                    return -1;
631                }
632    
633                if (len <= 0) {
634                    return 0;
635                }
636    
637                if (len > remaining) {
638                    len = (int) remaining;
639                }
640                int ret = -1;
641                synchronized (archive) {
642                    archive.seek(loc);
643                    ret = archive.read(b, off, len);
644                }
645                if (ret > 0) {
646                    loc += ret;
647                    remaining -= ret;
648                }
649                return ret;
650            }
651    
652            /**
653             * Inflater needs an extra dummy byte for nowrap - see
654             * Inflater's javadocs.
655             */
656            void addDummy() {
657                addDummyByte = true;
658            }
659        }
660    
661        private static final class NameAndComment {
662            private final byte[] name;
663            private final byte[] comment;
664            private NameAndComment(byte[] name, byte[] comment) {
665                this.name = name;
666                this.comment = comment;
667            }
668        }
669    
670        /**
671         * Compares two ZipArchiveEntries based on their offset within the archive.
672         *
673         * <p>Won't return any meaningful results if one of the entries
674         * isn't part of the archive at all.</p>
675         *
676         * @since Commons Compress 1.1
677         */
678        private final Comparator OFFSET_COMPARATOR =
679            new Comparator() {
680                public int compare(Object o1, Object o2) {
681                    if (o1 == o2)
682                        return 0;
683    
684                    ZipArchiveEntry e1 = (ZipArchiveEntry) o1;
685                    ZipArchiveEntry e2 = (ZipArchiveEntry) o2;
686    
687                    OffsetEntry off1 = (OffsetEntry) entries.get(e1);
688                    OffsetEntry off2 = (OffsetEntry) entries.get(e2);
689                    if (off1 == null) {
690                        return 1;
691                    }
692                    if (off2 == null) {
693                        return -1;
694                    }
695                    long val = (off1.headerOffset - off2.headerOffset);
696                    return val == 0 ? 0 : val < 0 ? -1 : +1;
697                }
698            };
699    }