001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.ar;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024import java.util.Arrays;
025
026import org.apache.commons.compress.archivers.ArchiveEntry;
027import org.apache.commons.compress.archivers.ArchiveInputStream;
028import org.apache.commons.compress.utils.ArchiveUtils;
029import org.apache.commons.compress.utils.IOUtils;
030
031/**
032 * Implements the "ar" archive format as an input stream.
033 *
034 * @NotThreadSafe
035 *
036 */
037public class ArArchiveInputStream extends ArchiveInputStream {
038
039    private final InputStream input;
040    private long offset = 0;
041    private boolean closed;
042
043    /*
044     * If getNextEnxtry has been called, the entry metadata is stored in
045     * currentEntry.
046     */
047    private ArArchiveEntry currentEntry = null;
048
049    // Storage area for extra long names (GNU ar)
050    private byte[] namebuffer = null;
051
052    /*
053     * The offset where the current entry started. -1 if no entry has been
054     * called
055     */
056    private long entryOffset = -1;
057
058    // offsets and length of meta data parts
059    private static final int NAME_OFFSET = 0;
060    private static final int NAME_LEN = 16;
061    private static final int LAST_MODIFIED_OFFSET = NAME_LEN;
062    private static final int LAST_MODIFIED_LEN = 12;
063    private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;
064    private static final int USER_ID_LEN = 6;
065    private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
066    private static final int GROUP_ID_LEN = 6;
067    private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
068    private static final int FILE_MODE_LEN = 8;
069    private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
070    private static final int LENGTH_LEN = 10;
071
072    // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
073    private final byte[] metaData =
074        new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];
075
076    /**
077     * Constructs an Ar input stream with the referenced stream
078     *
079     * @param pInput
080     *            the ar input stream
081     */
082    public ArArchiveInputStream(final InputStream pInput) {
083        input = pInput;
084        closed = false;
085    }
086
087    /**
088     * Returns the next AR entry in this stream.
089     *
090     * @return the next AR entry.
091     * @throws IOException
092     *             if the entry could not be read
093     */
094    public ArArchiveEntry getNextArEntry() throws IOException {
095        if (currentEntry != null) {
096            final long entryEnd = entryOffset + currentEntry.getLength();
097            long skipped = IOUtils.skip(input, entryEnd - offset);
098            trackReadBytes(skipped);
099            currentEntry = null;
100        }
101
102        if (offset == 0) {
103            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
104            final byte[] realized = new byte[expected.length];
105            final int read = IOUtils.readFully(input, realized);
106            trackReadBytes(read);
107            if (read != expected.length) {
108                throw new IOException("Failed to read header. Occured at byte: " + getBytesRead());
109            }
110            if (!Arrays.equals(expected, realized)) {
111                throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized));
112            }
113        }
114
115        if (offset % 2 != 0) {
116            if (input.read() < 0) {
117                // hit eof
118                return null;
119            }
120            trackReadBytes(1);
121        }
122
123        {
124            final int read = IOUtils.readFully(input, metaData);
125            trackReadBytes(read);
126            if (read == 0) {
127                return null;
128            }
129            if (read < metaData.length) {
130                throw new IOException("Truncated ar archive");
131            }
132        }
133
134        {
135            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
136            final byte[] realized = new byte[expected.length];
137            final int read = IOUtils.readFully(input, realized);
138            trackReadBytes(read);
139            if (read != expected.length) {
140                throw new IOException("Failed to read entry trailer. Occured at byte: " + getBytesRead());
141            }
142            if (!Arrays.equals(expected, realized)) {
143                throw new IOException("Invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
144            }
145        }
146
147        entryOffset = offset;
148
149//        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
150
151        // entry name is stored as ASCII string
152        String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim();
153        if (isGNUStringTable(temp)) { // GNU extended filenames entry
154            currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN);
155            return getNextArEntry();
156        }
157
158        long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN);
159        if (temp.endsWith("/")) { // GNU terminator
160            temp = temp.substring(0, temp.length() - 1);
161        } else if (isGNULongName(temp)) {
162            final int off = Integer.parseInt(temp.substring(1));// get the offset
163            temp = getExtendedName(off); // convert to the long name
164        } else if (isBSDLongName(temp)) {
165            temp = getBSDLongName(temp);
166            // entry length contained the length of the file name in
167            // addition to the real length of the entry.
168            // assume file name was ASCII, there is no "standard" otherwise
169            final int nameLen = temp.length();
170            len -= nameLen;
171            entryOffset += nameLen;
172        }
173
174        currentEntry = new ArArchiveEntry(temp, len,
175                                          asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true),
176                                          asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true),
177                                          asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8),
178                                          asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN));
179        return currentEntry;
180    }
181
182    /**
183     * Get an extended name from the GNU extended name buffer.
184     *
185     * @param offset pointer to entry within the buffer
186     * @return the extended file name; without trailing "/" if present.
187     * @throws IOException if name not found or buffer not set up
188     */
189    private String getExtendedName(final int offset) throws IOException {
190        if (namebuffer == null) {
191            throw new IOException("Cannot process GNU long filename as no // record was found");
192        }
193        for (int i = offset; i < namebuffer.length; i++) {
194            if (namebuffer[i] == '\012' || namebuffer[i] == 0) {
195                if (namebuffer[i - 1] == '/') {
196                    i--; // drop trailing /
197                }
198                return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
199            }
200        }
201        throw new IOException("Failed to read entry: " + offset);
202    }
203
204    private long asLong(final byte[] byteArray, int offset, int len) {
205        return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
206    }
207
208    private int asInt(final byte[] byteArray, int offset, int len) {
209        return asInt(byteArray, offset, len, 10, false);
210    }
211
212    private int asInt(final byte[] byteArray, int offset, int len, final boolean treatBlankAsZero) {
213        return asInt(byteArray, offset, len, 10, treatBlankAsZero);
214    }
215
216    private int asInt(final byte[] byteArray, int offset, int len, final int base) {
217        return asInt(byteArray, offset, len, base, false);
218    }
219
220    private int asInt(final byte[] byteArray, int offset, int len, final int base, final boolean treatBlankAsZero) {
221        final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
222        if (string.length() == 0 && treatBlankAsZero) {
223            return 0;
224        }
225        return Integer.parseInt(string, base);
226    }
227
228    /*
229     * (non-Javadoc)
230     *
231     * @see
232     * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
233     */
234    @Override
235    public ArchiveEntry getNextEntry() throws IOException {
236        return getNextArEntry();
237    }
238
239    /*
240     * (non-Javadoc)
241     *
242     * @see java.io.InputStream#close()
243     */
244    @Override
245    public void close() throws IOException {
246        if (!closed) {
247            closed = true;
248            input.close();
249        }
250        currentEntry = null;
251    }
252
253    /*
254     * (non-Javadoc)
255     *
256     * @see java.io.InputStream#read(byte[], int, int)
257     */
258    @Override
259    public int read(final byte[] b, final int off, final int len) throws IOException {
260        if (len == 0) {
261            return 0;
262        }
263        if (currentEntry == null) {
264            throw new IllegalStateException("No current ar entry");
265        }
266        final long entryEnd = entryOffset + currentEntry.getLength();
267        if (len < 0 || offset >= entryEnd) {
268            return -1;
269        }
270        final int toRead = (int) Math.min(len, entryEnd - offset);
271        final int ret = this.input.read(b, off, toRead);
272        trackReadBytes(ret);
273        return ret;
274    }
275
276    /**
277     * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF
278     * control character
279     *
280     * @param signature
281     *            the bytes to check
282     * @param length
283     *            the number of bytes to check
284     * @return true, if this stream is an Ar archive stream, false otherwise
285     */
286    public static boolean matches(final byte[] signature, final int length) {
287        // 3c21 7261 6863 0a3e
288
289        return length >= 8 && signature[0] == 0x21 &&
290                signature[1] == 0x3c && signature[2] == 0x61 &&
291                signature[3] == 0x72 && signature[4] == 0x63 &&
292                signature[5] == 0x68 && signature[6] == 0x3e &&
293                signature[7] == 0x0a;
294    }
295
296    static final String BSD_LONGNAME_PREFIX = "#1/";
297    private static final int BSD_LONGNAME_PREFIX_LEN =
298        BSD_LONGNAME_PREFIX.length();
299    private static final String BSD_LONGNAME_PATTERN =
300        "^" + BSD_LONGNAME_PREFIX + "\\d+";
301
302    /**
303     * Does the name look like it is a long name (or a name containing
304     * spaces) as encoded by BSD ar?
305     *
306     * <p>From the FreeBSD ar(5) man page:</p>
307     * <pre>
308     * BSD   In the BSD variant, names that are shorter than 16
309     *       characters and without embedded spaces are stored
310     *       directly in this field.  If a name has an embedded
311     *       space, or if it is longer than 16 characters, then
312     *       the string "#1/" followed by the decimal represen-
313     *       tation of the length of the file name is placed in
314     *       this field. The actual file name is stored immedi-
315     *       ately after the archive header.  The content of the
316     *       archive member follows the file name.  The ar_size
317     *       field of the header (see below) will then hold the
318     *       sum of the size of the file name and the size of
319     *       the member.
320     * </pre>
321     *
322     * @since 1.3
323     */
324    private static boolean isBSDLongName(final String name) {
325        return name != null && name.matches(BSD_LONGNAME_PATTERN);
326    }
327
328    /**
329     * Reads the real name from the current stream assuming the very
330     * first bytes to be read are the real file name.
331     *
332     * @see #isBSDLongName
333     *
334     * @since 1.3
335     */
336    private String getBSDLongName(final String bsdLongName) throws IOException {
337        final int nameLen =
338            Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
339        final byte[] name = new byte[nameLen];
340        final int read = IOUtils.readFully(input, name);
341        trackReadBytes(read);
342        if (read != nameLen) {
343            throw new EOFException();
344        }
345        return ArchiveUtils.toAsciiString(name);
346    }
347
348    private static final String GNU_STRING_TABLE_NAME = "//";
349
350    /**
351     * Is this the name of the "Archive String Table" as used by
352     * SVR4/GNU to store long file names?
353     *
354     * <p>GNU ar stores multiple extended file names in the data section
355     * of a file with the name "//", this record is referred to by
356     * future headers.</p>
357     *
358     * <p>A header references an extended file name by storing a "/"
359     * followed by a decimal offset to the start of the file name in
360     * the extended file name data section.</p>
361     *
362     * <p>The format of the "//" file itself is simply a list of the
363     * long file names, each separated by one or more LF
364     * characters. Note that the decimal offsets are number of
365     * characters, not line or string number within the "//" file.</p>
366     */
367    private static boolean isGNUStringTable(final String name) {
368        return GNU_STRING_TABLE_NAME.equals(name);
369    }
370
371    private void trackReadBytes(final long read) {
372        count(read);
373        if (read > 0) {
374            offset += read;
375        }
376    }
377
378    /**
379     * Reads the GNU archive String Table.
380     *
381     * @see #isGNUStringTable
382     */
383    private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException {
384        final int bufflen = asInt(length, offset, len); // Assume length will fit in an int
385        namebuffer = new byte[bufflen];
386        final int read = IOUtils.readFully(input, namebuffer, 0, bufflen);
387        trackReadBytes(read);
388        if (read != bufflen){
389            throw new IOException("Failed to read complete // record: expected="
390                                  + bufflen + " read=" + read);
391        }
392        return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
393    }
394
395    private static final String GNU_LONGNAME_PATTERN = "^/\\d+";
396
397    /**
398     * Does the name look like it is a long name (or a name containing
399     * spaces) as encoded by SVR4/GNU ar?
400     *
401     * @see #isGNUStringTable
402     */
403    private boolean isGNULongName(final String name) {
404        return name != null && name.matches(GNU_LONGNAME_PATTERN);
405    }
406}