001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one or more
003     *  contributor license agreements.  See the NOTICE file distributed with
004     *  this work for additional information regarding copyright ownership.
005     *  The ASF licenses this file to You under the Apache License, Version 2.0
006     *  (the "License"); you may not use this file except in compliance with
007     *  the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     *
017     */
018    
019    /*
020     * This package is based on the work done by Timothy Gerard Endres
021     * (time@ice.com) to whom the Ant project is very grateful for his great code.
022     */
023    
024    package org.apache.commons.compress.archivers.tar;
025    
026    import java.io.BufferedReader;
027    import java.io.IOException;
028    import java.io.InputStream;
029    import java.io.InputStreamReader;
030    import java.util.HashMap;
031    import java.util.Iterator;
032    import java.util.Map;
033    import java.util.Map.Entry;
034    
035    import org.apache.commons.compress.archivers.ArchiveEntry;
036    import org.apache.commons.compress.archivers.ArchiveInputStream;
037    import org.apache.commons.compress.utils.ArchiveUtils;
038    
039    /**
040     * The TarInputStream reads a UNIX tar archive as an InputStream.
041     * methods are provided to position at each successive entry in
042     * the archive, and the read each entry as a normal input stream
043     * using read().
044     * @NotThreadSafe
045     */
046    public class TarArchiveInputStream extends ArchiveInputStream {
047        private static final int SMALL_BUFFER_SIZE = 256;
048        private static final int BUFFER_SIZE = 8 * 1024;
049    
050        private boolean hasHitEOF;
051        private long entrySize;
052        private long entryOffset;
053        private byte[] readBuf;
054        protected final TarBuffer buffer;
055        private TarArchiveEntry currEntry;
056    
057        /**
058         * Constructor for TarInputStream.
059         * @param is the input stream to use
060         */
061        public TarArchiveInputStream(InputStream is) {
062            this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
063        }
064    
065        /**
066         * Constructor for TarInputStream.
067         * @param is the input stream to use
068         * @param blockSize the block size to use
069         */
070        public TarArchiveInputStream(InputStream is, int blockSize) {
071            this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
072        }
073    
074        /**
075         * Constructor for TarInputStream.
076         * @param is the input stream to use
077         * @param blockSize the block size to use
078         * @param recordSize the record size to use
079         */
080        public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
081            this.buffer = new TarBuffer(is, blockSize, recordSize);
082            this.readBuf = null;
083            this.hasHitEOF = false;
084        }
085    
086        /**
087         * Closes this stream. Calls the TarBuffer's close() method.
088         * @throws IOException on error
089         */
090        public void close() throws IOException {
091            buffer.close();
092        }
093    
094        /**
095         * Get the record size being used by this stream's TarBuffer.
096         *
097         * @return The TarBuffer record size.
098         */
099        public int getRecordSize() {
100            return buffer.getRecordSize();
101        }
102    
103        /**
104         * Get the available data that can be read from the current
105         * entry in the archive. This does not indicate how much data
106         * is left in the entire archive, only in the current entry.
107         * This value is determined from the entry's size header field
108         * and the amount of data already read from the current entry.
109         * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
110         * bytes are left in the current entry in the archive.
111         *
112         * @return The number of available bytes for the current entry.
113         * @throws IOException for signature
114         */
115        public int available() throws IOException {
116            if (entrySize - entryOffset > Integer.MAX_VALUE) {
117                return Integer.MAX_VALUE;
118            }
119            return (int) (entrySize - entryOffset);
120        }
121    
122        /**
123         * Skip bytes in the input buffer. This skips bytes in the
124         * current entry's data, not the entire archive, and will
125         * stop at the end of the current entry's data if the number
126         * to skip extends beyond that point.
127         *
128         * @param numToSkip The number of bytes to skip.
129         * @return the number actually skipped
130         * @throws IOException on error
131         */
132        public long skip(long numToSkip) throws IOException {
133            // REVIEW
134            // This is horribly inefficient, but it ensures that we
135            // properly skip over bytes via the TarBuffer...
136            //
137            byte[] skipBuf = new byte[BUFFER_SIZE];
138            long skip = numToSkip;
139            while (skip > 0) {
140                int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
141                int numRead = read(skipBuf, 0, realSkip);
142                if (numRead == -1) {
143                    break;
144                }
145                skip -= numRead;
146            }
147            return (numToSkip - skip);
148        }
149    
150        /**
151         * Since we do not support marking just yet, we do nothing.
152         */
153        public synchronized void reset() {
154        }
155    
156        /**
157         * Get the next entry in this tar archive. This will skip
158         * over any remaining data in the current entry, if there
159         * is one, and place the input stream at the header of the
160         * next entry, and read the header and instantiate a new
161         * TarEntry from the header bytes and return that entry.
162         * If there are no more entries in the archive, null will
163         * be returned to indicate that the end of the archive has
164         * been reached.
165         *
166         * @return The next TarEntry in the archive, or null.
167         * @throws IOException on error
168         */
169        public TarArchiveEntry getNextTarEntry() throws IOException {
170            if (hasHitEOF) {
171                return null;
172            }
173    
174            if (currEntry != null) {
175                long numToSkip = entrySize - entryOffset;
176    
177                while (numToSkip > 0) {
178                    long skipped = skip(numToSkip);
179                    if (skipped <= 0) {
180                        throw new RuntimeException("failed to skip current tar entry");
181                    }
182                    numToSkip -= skipped;
183                }
184    
185                readBuf = null;
186            }
187    
188            byte[] headerBuf = buffer.readRecord();
189    
190            if (headerBuf == null) {
191                hasHitEOF = true;
192            } else if (buffer.isEOFRecord(headerBuf)) {
193                hasHitEOF = true;
194            }
195    
196            if (hasHitEOF) {
197                currEntry = null;
198            } else {
199                currEntry = new TarArchiveEntry(headerBuf);
200                entryOffset = 0;
201                entrySize = currEntry.getSize();
202            }
203    
204            if (currEntry != null && currEntry.isGNULongNameEntry()) {
205                // read in the name
206                StringBuffer longName = new StringBuffer();
207                byte[] buf = new byte[SMALL_BUFFER_SIZE];
208                int length = 0;
209                while ((length = read(buf)) >= 0) {
210                    longName.append(new String(buf, 0, length));
211                }
212                getNextEntry();
213                if (currEntry == null) {
214                    // Bugzilla: 40334
215                    // Malformed tar file - long entry name not followed by entry
216                    return null;
217                }
218                // remove trailing null terminator
219                if (longName.length() > 0
220                    && longName.charAt(longName.length() - 1) == 0) {
221                    longName.deleteCharAt(longName.length() - 1);
222                }
223                currEntry.setName(longName.toString());
224            }
225    
226            if (currEntry != null && currEntry.isPaxHeader()){ // Process Pax headers
227                paxHeaders();
228            }
229    
230            return currEntry;
231        }
232    
233        private void paxHeaders() throws IOException{
234            BufferedReader br = new BufferedReader(new InputStreamReader(this, "UTF-8"));
235            Map headers = new HashMap();
236            // Format is "length keyword=value\n";
237            while(true){ // get length
238                int ch;
239                int len=0;
240                int read=0;
241                while((ch = br.read()) != -1){
242                    read++;
243                    if (ch == ' '){ // End of length string
244                        // Get keyword
245                        StringBuffer sb = new StringBuffer();
246                        while((ch = br.read()) != -1){
247                            read++;
248                            if (ch == '='){ // end of keyword
249                                String keyword = sb.toString();
250                                // Get rest of entry
251                                char[] cbuf = new char[len-read];
252                                int got = br.read(cbuf);
253                                if (got != len-read){
254                                    throw new IOException("Failed to read Paxheader. Expected "+(len-read)+" chars, read "+got);
255                                }
256                                String value = new String(cbuf, 0 , len-read-1); // Drop trailing NL
257                                headers.put(keyword, value);
258                                break;
259                            }
260                            sb.append((char)ch);
261                        }
262                        break; // Processed single header
263                    }
264                    len *= 10;
265                    len += ch - '0';
266                }
267                if (ch == -1){ // EOF
268                    break;
269                }
270            }
271            getNextEntry(); // Get the actual file entry
272            /*
273             * The following headers are defined for Pax.
274             * atime, ctime, mtime, charset: cannot use these without changing TarArchiveEntry fields
275             * comment
276             * gid, gname
277             * linkpath
278             * size
279             * uid,uname
280             */
281            Iterator hdrs = headers.entrySet().iterator();
282            while(hdrs.hasNext()){
283                Entry ent = (Entry) hdrs.next();
284                String key = (String) ent.getKey();
285                String val = (String) ent.getValue();
286                if ("path".equals(key)){
287                    currEntry.setName(val);
288                } else if ("linkpath".equals(key)){
289                    currEntry.setLinkName(val);
290                } else if ("gid".equals(key)){
291                    currEntry.setGroupId(Integer.parseInt(val));
292                } else if ("gname".equals(key)){
293                    currEntry.setGroupName(val);
294                } else if ("uid".equals(key)){
295                    currEntry.setUserId(Integer.parseInt(val));
296                } else if ("uname".equals(key)){
297                    currEntry.setUserName(val);
298                } else if ("size".equals(key)){
299                    currEntry.setSize(Long.parseLong(val));
300                }
301            }
302        }
303    
304        public ArchiveEntry getNextEntry() throws IOException {
305            return getNextTarEntry();
306        }
307    
308        /**
309         * Reads bytes from the current tar archive entry.
310         *
311         * This method is aware of the boundaries of the current
312         * entry in the archive and will deal with them as if they
313         * were this stream's start and EOF.
314         *
315         * @param buf The buffer into which to place bytes read.
316         * @param offset The offset at which to place bytes read.
317         * @param numToRead The number of bytes to read.
318         * @return The number of bytes read, or -1 at EOF.
319         * @throws IOException on error
320         */
321        public int read(byte[] buf, int offset, int numToRead) throws IOException {
322            int totalRead = 0;
323    
324            if (entryOffset >= entrySize) {
325                return -1;
326            }
327    
328            if ((numToRead + entryOffset) > entrySize) {
329                numToRead = (int) (entrySize - entryOffset);
330            }
331    
332            if (readBuf != null) {
333                int sz = (numToRead > readBuf.length) ? readBuf.length
334                    : numToRead;
335    
336                System.arraycopy(readBuf, 0, buf, offset, sz);
337    
338                if (sz >= readBuf.length) {
339                    readBuf = null;
340                } else {
341                    int newLen = readBuf.length - sz;
342                    byte[] newBuf = new byte[newLen];
343    
344                    System.arraycopy(readBuf, sz, newBuf, 0, newLen);
345    
346                    readBuf = newBuf;
347                }
348    
349                totalRead += sz;
350                numToRead -= sz;
351                offset += sz;
352            }
353    
354            while (numToRead > 0) {
355                byte[] rec = buffer.readRecord();
356    
357                if (rec == null) {
358                    // Unexpected EOF!
359                    throw new IOException("unexpected EOF with " + numToRead
360                                          + " bytes unread. Occured at byte: " + getBytesRead());
361                }
362                count(rec.length);
363                int sz = numToRead;
364                int recLen = rec.length;
365    
366                if (recLen > sz) {
367                    System.arraycopy(rec, 0, buf, offset, sz);
368    
369                    readBuf = new byte[recLen - sz];
370    
371                    System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
372                } else {
373                    sz = recLen;
374    
375                    System.arraycopy(rec, 0, buf, offset, recLen);
376                }
377    
378                totalRead += sz;
379                numToRead -= sz;
380                offset += sz;
381            }
382    
383            entryOffset += totalRead;
384    
385            return totalRead;
386        }
387    
388        protected final TarArchiveEntry getCurrentEntry() {
389            return currEntry;
390        }
391    
392        protected final void setCurrentEntry(TarArchiveEntry e) {
393            currEntry = e;
394        }
395    
396        protected final boolean isAtEOF() {
397            return hasHitEOF;
398        }
399    
400        protected final void setAtEOF(boolean b) {
401            hasHitEOF = b;
402        }
403    
404        /**
405         * Checks if the signature matches what is expected for a tar file.
406         * 
407         * @param signature
408         *            the bytes to check
409         * @param length
410         *            the number of bytes to check
411         * @return true, if this stream is a tar archive stream, false otherwise
412         */
413        public static boolean matches(byte[] signature, int length) {
414            if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
415                return false;
416            }
417    
418            if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
419                    signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
420                &&
421                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
422                    signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
423                    ){
424                return true;
425            }
426            if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
427                    signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
428                &&
429                (
430                 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
431                    signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
432                ||
433                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
434                    signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
435                )
436                    ){
437                return true;
438            }
439            // COMPRESS-107 - recognise Ant tar files
440            if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT,
441                    signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
442                &&
443                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT,
444                    signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
445                    ){
446                return true;
447            }
448            return false;
449        }
450    
451    }