001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.ar;
020    
021    import java.io.IOException;
022    import java.io.InputStream;
023    
024    import org.apache.commons.compress.archivers.ArchiveEntry;
025    import org.apache.commons.compress.archivers.ArchiveInputStream;
026    import org.apache.commons.compress.utils.ArchiveUtils;
027    
028    /**
029     * Implements the "ar" archive format as an input stream.
030     * 
031     * @NotThreadSafe
032     * 
033     */
034    public class ArArchiveInputStream extends ArchiveInputStream {
035    
036        private final InputStream input;
037        private long offset = 0;
038        private boolean closed;
039    
040        /*
041         * If getNextEnxtry has been called, the entry metadata is stored in
042         * currentEntry.
043         */
044        private ArArchiveEntry currentEntry = null;
045    
046        // Storage area for extra long names (GNU ar)
047        private byte[] namebuffer = null;
048    
049        /*
050         * The offset where the current entry started. -1 if no entry has been
051         * called
052         */
053        private long entryOffset = -1;
054    
055        /**
056         * Constructs an Ar input stream with the referenced stream
057         * 
058         * @param pInput
059         *            the ar input stream
060         */
061        public ArArchiveInputStream(final InputStream pInput) {
062            input = pInput;
063            closed = false;
064        }
065    
066        /**
067         * Returns the next AR entry in this stream.
068         * 
069         * @return the next AR entry.
070         * @throws IOException
071         *             if the entry could not be read
072         */
073        public ArArchiveEntry getNextArEntry() throws IOException {
074            if (currentEntry != null) {
075                final long entryEnd = entryOffset + currentEntry.getLength();
076                while (offset < entryEnd) {
077                    int x = read();
078                    if (x == -1) {
079                        // hit EOF before previous entry was complete
080                        // TODO: throw an exception instead?
081                        return null;
082                    }
083                }
084                currentEntry = null;
085            }
086    
087            if (offset == 0) {
088                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
089                final byte[] realized = new byte[expected.length];
090                final int read = read(realized);
091                if (read != expected.length) {
092                    throw new IOException("failed to read header. Occured at byte: " + getBytesRead());
093                }
094                for (int i = 0; i < expected.length; i++) {
095                    if (expected[i] != realized[i]) {
096                        throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized));
097                    }
098                }
099            }
100    
101            if (offset % 2 != 0 && read() < 0) {
102                // hit eof
103                return null;
104            }
105    
106            if (input.available() == 0) {
107                return null;
108            }
109    
110            final byte[] name = new byte[16];
111            final byte[] lastmodified = new byte[12];
112            final byte[] userid = new byte[6];
113            final byte[] groupid = new byte[6];
114            final byte[] filemode = new byte[8];
115            final byte[] length = new byte[10];
116    
117            read(name);
118            read(lastmodified);
119            read(userid);
120            read(groupid);
121            read(filemode);
122            read(length);
123    
124            {
125                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
126                final byte[] realized = new byte[expected.length];
127                final int read = read(realized);
128                if (read != expected.length) {
129                    throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead());
130                }
131                for (int i = 0; i < expected.length; i++) {
132                    if (expected[i] != realized[i]) {
133                        throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
134                    }
135                }
136            }
137    
138            entryOffset = offset;
139    
140    //        GNU ar stores multiple extended filenames in the data section of a file with the name "//", this record is referred to by future headers. A header references an extended filename by storing a "/" followed by a decimal offset to the start of the filename in the extended filename data section. The format of this "//" file itself is simply a list of the long filenames, each separated by one or more LF characters. Note that the decimal offsets are number of characters, not line or string number within the "//" file.
141    //
142    //        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
143    
144            // entry name is stored as ASCII string
145            String temp = ArchiveUtils.toAsciiString(name).trim();
146    
147            if (temp.equals("//")){ // GNU extended filenames entry
148                int bufflen = asInt(length); // Assume length will fit in an int
149                namebuffer = new byte[bufflen];
150                int read = read(namebuffer, 0, bufflen);
151                if (read != bufflen){
152                    throw new IOException("Failed to read complete // record: expected="+bufflen+" read="+read);
153                }
154                currentEntry = new ArArchiveEntry(temp, bufflen);
155                return getNextArEntry();
156            } else if (temp.endsWith("/")) { // GNU terminator
157                temp = temp.substring(0, temp.length() - 1);
158            } else if (temp.matches("^/\\d+")) {// GNU long filename ref.
159                int offset = Integer.parseInt(temp.substring(1));// get the offset
160                temp = getExtendedName(offset); // convert to the long name
161            }
162            currentEntry = new ArArchiveEntry(temp, asLong(length), asInt(userid, true),
163                                              asInt(groupid, true), asInt(filemode, 8),
164                                              asLong(lastmodified));
165            return currentEntry;
166        }
167    
168        /**
169         * Get an extended name from the GNU extended name buffer.
170         * 
171         * @param offset pointer to entry within the buffer
172         * @return the extended file name; without trailing "/" if present.
173         * @throws IOException if name not found or buffer not set up
174         */
175        private String getExtendedName(int offset) throws IOException{
176            if (namebuffer == null) {
177                throw new IOException("Cannot process GNU long filename as no // record was found");
178            }
179            for(int i=offset; i < namebuffer.length; i++){
180                if (namebuffer[i]=='\012'){
181                    if (namebuffer[i-1]=='/') {
182                        i--; // drop trailing /
183                    }
184                    return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset);
185                }
186            }
187            throw new IOException("Failed to read entry: "+offset);
188        }
189        private long asLong(byte[] input) {
190            return Long.parseLong(new String(input).trim());
191        }
192    
193        private int asInt(byte[] input) {
194            return asInt(input, 10, false);
195        }
196    
197        private int asInt(byte[] input, boolean treatBlankAsZero) {
198            return asInt(input, 10, treatBlankAsZero);
199        }
200    
201        private int asInt(byte[] input, int base) {
202            return asInt(input, base, false);
203        }
204    
205        private int asInt(byte[] input, int base, boolean treatBlankAsZero) {
206            String string = new String(input).trim();
207            if (string.length() == 0 && treatBlankAsZero) {
208                return 0;
209            }
210            return Integer.parseInt(string, base);
211        }
212    
213        /*
214         * (non-Javadoc)
215         * 
216         * @see
217         * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
218         */
219        public ArchiveEntry getNextEntry() throws IOException {
220            return getNextArEntry();
221        }
222    
223        /*
224         * (non-Javadoc)
225         * 
226         * @see java.io.InputStream#close()
227         */
228        public void close() throws IOException {
229            if (!closed) {
230                closed = true;
231                input.close();
232            }
233            currentEntry = null;
234        }
235    
236        /*
237         * (non-Javadoc)
238         * 
239         * @see java.io.InputStream#read(byte[], int, int)
240         */
241        public int read(byte[] b, final int off, final int len) throws IOException {
242            int toRead = len;
243            if (currentEntry != null) {
244                final long entryEnd = entryOffset + currentEntry.getLength();
245                if (len > 0 && entryEnd > offset) {
246                    toRead = (int) Math.min(len, entryEnd - offset);
247                } else {
248                    return -1;
249                }
250            }
251            final int ret = this.input.read(b, off, toRead);
252            count(ret);
253            offset += (ret > 0 ? ret : 0);
254            return ret;
255        }
256    
257        /**
258         * Checks if the signature matches ASCII "!<arch>" followed by a single LF
259         * control character
260         * 
261         * @param signature
262         *            the bytes to check
263         * @param length
264         *            the number of bytes to check
265         * @return true, if this stream is an Ar archive stream, false otherwise
266         */
267        public static boolean matches(byte[] signature, int length) {
268            // 3c21 7261 6863 0a3e
269    
270            if (length < 8) {
271                return false;
272            }
273            if (signature[0] != 0x21) {
274                return false;
275            }
276            if (signature[1] != 0x3c) {
277                return false;
278            }
279            if (signature[2] != 0x61) {
280                return false;
281            }
282            if (signature[3] != 0x72) {
283                return false;
284            }
285            if (signature[4] != 0x63) {
286                return false;
287            }
288            if (signature[5] != 0x68) {
289                return false;
290            }
291            if (signature[6] != 0x3e) {
292                return false;
293            }
294            if (signature[7] != 0x0a) {
295                return false;
296            }
297    
298            return true;
299        }
300    
301    }