001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.ar;
020    
021    import java.io.IOException;
022    import java.io.InputStream;
023    
024    import org.apache.commons.compress.archivers.ArchiveEntry;
025    import org.apache.commons.compress.archivers.ArchiveInputStream;
026    import org.apache.commons.compress.utils.ArchiveUtils;
027    
028    /**
029     * Implements the "ar" archive format as an input stream.
030     * 
031     * @NotThreadSafe
032     * 
033     */
034    public class ArArchiveInputStream extends ArchiveInputStream {
035    
036        private final InputStream input;
037        private long offset = 0;
038        private boolean closed;
039        
040        /*
041         * If getNextEnxtry has been called, the entry metadata is stored in
042         * currentEntry.
043         */
044        private ArArchiveEntry currentEntry = null;
045        
046        // Storage area for extra long names (GNU ar)
047        private byte[] namebuffer = null;
048        
049        /*
050         * The offset where the current entry started. -1 if no entry has been
051         * called
052         */
053        private long entryOffset = -1;
054    
055        /**
056         * Constructs an Ar input stream with the referenced stream
057         * 
058         * @param pInput
059         *            the ar input stream
060         */
061        public ArArchiveInputStream(final InputStream pInput) {
062            input = pInput;
063            closed = false;
064        }
065    
066        /**
067         * Returns the next AR entry in this stream.
068         * 
069         * @return the next AR entry.
070         * @throws IOException
071         *             if the entry could not be read
072         */
073        public ArArchiveEntry getNextArEntry() throws IOException {
074            if (currentEntry != null) {
075                final long entryEnd = entryOffset + currentEntry.getLength();
076                while (offset < entryEnd) {
077                    int x = read();
078                    if (x == -1) {
079                        // hit EOF before previous entry was complete
080                        // TODO: throw an exception instead?
081                        return null;
082                    }
083                }
084                currentEntry = null;
085            }
086    
087            if (offset == 0) {
088                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
089                final byte[] realized = new byte[expected.length];
090                final int read = read(realized);
091                if (read != expected.length) {
092                    throw new IOException("failed to read header. Occured at byte: " + getBytesRead());
093                }
094                for (int i = 0; i < expected.length; i++) {
095                    if (expected[i] != realized[i]) {
096                        throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized));
097                    }
098                }
099            }
100    
101            if (offset % 2 != 0) {
102                if (read() < 0) {
103                    // hit eof
104                    return null;
105                }
106            }
107    
108            if (input.available() == 0) {
109                return null;
110            }
111    
112            final byte[] name = new byte[16];
113            final byte[] lastmodified = new byte[12];
114            final byte[] userid = new byte[6];
115            final byte[] groupid = new byte[6];
116            final byte[] filemode = new byte[8];
117            final byte[] length = new byte[10];
118    
119            read(name);
120            read(lastmodified);
121            read(userid);
122            read(groupid);
123            read(filemode);
124            read(length);
125    
126            {
127                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
128                final byte[] realized = new byte[expected.length];
129                final int read = read(realized);
130                if (read != expected.length) {
131                    throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead());
132                }
133                for (int i = 0; i < expected.length; i++) {
134                    if (expected[i] != realized[i]) {
135                        throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
136                    }
137                }
138            }
139    
140            entryOffset = offset;
141    
142    //        GNU ar stores multiple extended filenames in the data section of a file with the name "//", this record is referred to by future headers. A header references an extended filename by storing a "/" followed by a decimal offset to the start of the filename in the extended filename data section. The format of this "//" file itself is simply a list of the long filenames, each separated by one or more LF characters. Note that the decimal offsets are number of characters, not line or string number within the "//" file.
143    //
144    //        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
145    
146            // entry name is stored as ASCII string
147            String temp = ArchiveUtils.toAsciiString(name).trim();
148            
149            if (temp.equals("//")){ // GNU extended filenames entry
150                int bufflen = asInt(length); // Assume length will fit in an int
151                namebuffer = new byte[bufflen];
152                int read = read(namebuffer, 0, bufflen);
153                if (read != bufflen){
154                    throw new IOException("Failed to read complete // record: expected="+bufflen+" read="+read);
155                }
156                currentEntry = new ArArchiveEntry(temp, bufflen);
157                return getNextArEntry();
158            } else if (temp.endsWith("/")) { // GNU terminator
159                temp = temp.substring(0, temp.length() - 1);
160            } else if (temp.matches("^/\\d+")) {// GNU long filename ref.
161                int offset = Integer.parseInt(temp.substring(1));// get the offset
162                temp = getExtendedName(offset); // convert to the long name
163            }
164            currentEntry = new ArArchiveEntry(temp, asLong(length), asInt(userid),
165                                              asInt(groupid), asInt(filemode, 8),
166                                              asLong(lastmodified));
167            return currentEntry;
168        }
169    
170        /**
171         * Get an extended name from the GNU extended name buffer.
172         * 
173         * @param offset pointer to entry within the buffer
174         * @return the extended file name; without trailing "/" if present.
175         * @throws IOException if name not found or buffer not set up
176         */
177        private String getExtendedName(int offset) throws IOException{
178            if (namebuffer == null) {
179                throw new IOException("Cannot process GNU long filename as no // record was found");
180            }
181            for(int i=offset; i < namebuffer.length; i++){
182                if (namebuffer[i]=='\012'){
183                    if (namebuffer[i-1]=='/') {
184                        i--; // drop trailing /
185                    }
186                    return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset);
187                }
188            }
189            throw new IOException("Failed to read entry: "+offset);
190        }
191        private long asLong(byte[] input) {
192            return Long.parseLong(new String(input).trim());
193        }
194    
195        private int asInt(byte[] input) {
196            return asInt(input, 10);
197        }
198    
199        private int asInt(byte[] input, int base) {
200            return Integer.parseInt(new String(input).trim(), base);
201        }
202    
203        /*
204         * (non-Javadoc)
205         * 
206         * @see
207         * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
208         */
209        public ArchiveEntry getNextEntry() throws IOException {
210            return getNextArEntry();
211        }
212    
213        /*
214         * (non-Javadoc)
215         * 
216         * @see java.io.InputStream#close()
217         */
218        public void close() throws IOException {
219            if (!closed) {
220                closed = true;
221                input.close();
222            }
223            currentEntry = null;
224        }
225    
226        /*
227         * (non-Javadoc)
228         * 
229         * @see java.io.InputStream#read(byte[], int, int)
230         */
231        public int read(byte[] b, final int off, final int len) throws IOException {
232            int toRead = len;
233            if (currentEntry != null) {
234                final long entryEnd = entryOffset + currentEntry.getLength();
235                if (len > 0 && entryEnd > offset) {
236                    toRead = (int) Math.min(len, entryEnd - offset);
237                } else {
238                    return -1;
239                }
240            }
241            final int ret = this.input.read(b, off, toRead);
242            count(ret);
243            offset += (ret > 0 ? ret : 0);
244            return ret;
245        }
246    
247        /**
248         * Checks if the signature matches ASCII "!<arch>" followed by a single LF
249         * control character
250         * 
251         * @param signature
252         *            the bytes to check
253         * @param length
254         *            the number of bytes to check
255         * @return true, if this stream is an Ar archive stream, false otherwise
256         */
257        public static boolean matches(byte[] signature, int length) {
258            // 3c21 7261 6863 0a3e
259    
260            if (length < 8) {
261                return false;
262            }
263            if (signature[0] != 0x21) {
264                return false;
265            }
266            if (signature[1] != 0x3c) {
267                return false;
268            }
269            if (signature[2] != 0x61) {
270                return false;
271            }
272            if (signature[3] != 0x72) {
273                return false;
274            }
275            if (signature[4] != 0x63) {
276                return false;
277            }
278            if (signature[5] != 0x68) {
279                return false;
280            }
281            if (signature[6] != 0x3e) {
282                return false;
283            }
284            if (signature[7] != 0x0a) {
285                return false;
286            }
287    
288            return true;
289        }
290    
291    }