001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019 /* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024 package org.apache.commons.compress.archivers.tar; 025 026 import java.io.BufferedReader; 027 import java.io.IOException; 028 import java.io.InputStream; 029 import java.io.InputStreamReader; 030 import java.util.HashMap; 031 import java.util.Iterator; 032 import java.util.Map; 033 import java.util.Map.Entry; 034 035 import org.apache.commons.compress.archivers.ArchiveEntry; 036 import org.apache.commons.compress.archivers.ArchiveInputStream; 037 import org.apache.commons.compress.utils.ArchiveUtils; 038 039 /** 040 * The TarInputStream reads a UNIX tar archive as an InputStream. 041 * methods are provided to position at each successive entry in 042 * the archive, and the read each entry as a normal input stream 043 * using read(). 044 * @NotThreadSafe 045 */ 046 public class TarArchiveInputStream extends ArchiveInputStream { 047 private static final int SMALL_BUFFER_SIZE = 256; 048 private static final int BUFFER_SIZE = 8 * 1024; 049 050 private boolean hasHitEOF; 051 private long entrySize; 052 private long entryOffset; 053 private byte[] readBuf; 054 protected final TarBuffer buffer; 055 private TarArchiveEntry currEntry; 056 057 /** 058 * Constructor for TarInputStream. 059 * @param is the input stream to use 060 */ 061 public TarArchiveInputStream(InputStream is) { 062 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 063 } 064 065 /** 066 * Constructor for TarInputStream. 067 * @param is the input stream to use 068 * @param blockSize the block size to use 069 */ 070 public TarArchiveInputStream(InputStream is, int blockSize) { 071 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); 072 } 073 074 /** 075 * Constructor for TarInputStream. 076 * @param is the input stream to use 077 * @param blockSize the block size to use 078 * @param recordSize the record size to use 079 */ 080 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { 081 this.buffer = new TarBuffer(is, blockSize, recordSize); 082 this.readBuf = null; 083 this.hasHitEOF = false; 084 } 085 086 /** 087 * Closes this stream. Calls the TarBuffer's close() method. 088 * @throws IOException on error 089 */ 090 public void close() throws IOException { 091 buffer.close(); 092 } 093 094 /** 095 * Get the record size being used by this stream's TarBuffer. 096 * 097 * @return The TarBuffer record size. 098 */ 099 public int getRecordSize() { 100 return buffer.getRecordSize(); 101 } 102 103 /** 104 * Get the available data that can be read from the current 105 * entry in the archive. This does not indicate how much data 106 * is left in the entire archive, only in the current entry. 107 * This value is determined from the entry's size header field 108 * and the amount of data already read from the current entry. 109 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE 110 * bytes are left in the current entry in the archive. 111 * 112 * @return The number of available bytes for the current entry. 113 * @throws IOException for signature 114 */ 115 public int available() throws IOException { 116 if (entrySize - entryOffset > Integer.MAX_VALUE) { 117 return Integer.MAX_VALUE; 118 } 119 return (int) (entrySize - entryOffset); 120 } 121 122 /** 123 * Skip bytes in the input buffer. This skips bytes in the 124 * current entry's data, not the entire archive, and will 125 * stop at the end of the current entry's data if the number 126 * to skip extends beyond that point. 127 * 128 * @param numToSkip The number of bytes to skip. 129 * @return the number actually skipped 130 * @throws IOException on error 131 */ 132 public long skip(long numToSkip) throws IOException { 133 // REVIEW 134 // This is horribly inefficient, but it ensures that we 135 // properly skip over bytes via the TarBuffer... 136 // 137 byte[] skipBuf = new byte[BUFFER_SIZE]; 138 long skip = numToSkip; 139 while (skip > 0) { 140 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); 141 int numRead = read(skipBuf, 0, realSkip); 142 if (numRead == -1) { 143 break; 144 } 145 skip -= numRead; 146 } 147 return (numToSkip - skip); 148 } 149 150 /** 151 * Since we do not support marking just yet, we do nothing. 152 */ 153 public synchronized void reset() { 154 } 155 156 /** 157 * Get the next entry in this tar archive. This will skip 158 * over any remaining data in the current entry, if there 159 * is one, and place the input stream at the header of the 160 * next entry, and read the header and instantiate a new 161 * TarEntry from the header bytes and return that entry. 162 * If there are no more entries in the archive, null will 163 * be returned to indicate that the end of the archive has 164 * been reached. 165 * 166 * @return The next TarEntry in the archive, or null. 167 * @throws IOException on error 168 */ 169 public TarArchiveEntry getNextTarEntry() throws IOException { 170 if (hasHitEOF) { 171 return null; 172 } 173 174 if (currEntry != null) { 175 long numToSkip = entrySize - entryOffset; 176 177 while (numToSkip > 0) { 178 long skipped = skip(numToSkip); 179 if (skipped <= 0) { 180 throw new RuntimeException("failed to skip current tar entry"); 181 } 182 numToSkip -= skipped; 183 } 184 185 readBuf = null; 186 } 187 188 byte[] headerBuf = buffer.readRecord(); 189 190 if (headerBuf == null) { 191 hasHitEOF = true; 192 } else if (buffer.isEOFRecord(headerBuf)) { 193 hasHitEOF = true; 194 } 195 196 if (hasHitEOF) { 197 currEntry = null; 198 } else { 199 currEntry = new TarArchiveEntry(headerBuf); 200 entryOffset = 0; 201 entrySize = currEntry.getSize(); 202 } 203 204 if (currEntry != null && currEntry.isGNULongNameEntry()) { 205 // read in the name 206 StringBuffer longName = new StringBuffer(); 207 byte[] buf = new byte[SMALL_BUFFER_SIZE]; 208 int length = 0; 209 while ((length = read(buf)) >= 0) { 210 longName.append(new String(buf, 0, length)); 211 } 212 getNextEntry(); 213 if (currEntry == null) { 214 // Bugzilla: 40334 215 // Malformed tar file - long entry name not followed by entry 216 return null; 217 } 218 // remove trailing null terminator 219 if (longName.length() > 0 220 && longName.charAt(longName.length() - 1) == 0) { 221 longName.deleteCharAt(longName.length() - 1); 222 } 223 currEntry.setName(longName.toString()); 224 } 225 226 if (currEntry != null && currEntry.isPaxHeader()){ // Process Pax headers 227 paxHeaders(); 228 } 229 230 return currEntry; 231 } 232 233 private void paxHeaders() throws IOException{ 234 BufferedReader br = new BufferedReader(new InputStreamReader(this, "UTF-8")); 235 Map headers = new HashMap(); 236 // Format is "length keyword=value\n"; 237 while(true){ // get length 238 int ch; 239 int len=0; 240 int read=0; 241 while((ch = br.read()) != -1){ 242 read++; 243 if (ch == ' '){ // End of length string 244 // Get keyword 245 StringBuffer sb = new StringBuffer(); 246 while((ch = br.read()) != -1){ 247 read++; 248 if (ch == '='){ // end of keyword 249 String keyword = sb.toString(); 250 // Get rest of entry 251 char[] cbuf = new char[len-read]; 252 int got = br.read(cbuf); 253 if (got != len-read){ 254 throw new IOException("Failed to read Paxheader. Expected "+(len-read)+" chars, read "+got); 255 } 256 String value = new String(cbuf, 0 , len-read-1); // Drop trailing NL 257 headers.put(keyword, value); 258 break; 259 } 260 sb.append((char)ch); 261 } 262 break; // Processed single header 263 } 264 len *= 10; 265 len += ch - '0'; 266 } 267 if (ch == -1){ // EOF 268 break; 269 } 270 } 271 getNextEntry(); // Get the actual file entry 272 /* 273 * The following headers are defined for Pax. 274 * atime, ctime, mtime, charset: cannot use these without changing TarArchiveEntry fields 275 * comment 276 * gid, gname 277 * linkpath 278 * size 279 * uid,uname 280 */ 281 Iterator hdrs = headers.entrySet().iterator(); 282 while(hdrs.hasNext()){ 283 Entry ent = (Entry) hdrs.next(); 284 String key = (String) ent.getKey(); 285 String val = (String) ent.getValue(); 286 if ("path".equals(key)){ 287 currEntry.setName(val); 288 } else if ("linkpath".equals(key)){ 289 currEntry.setLinkName(val); 290 } else if ("gid".equals(key)){ 291 currEntry.setGroupId(Integer.parseInt(val)); 292 } else if ("gname".equals(key)){ 293 currEntry.setGroupName(val); 294 } else if ("uid".equals(key)){ 295 currEntry.setUserId(Integer.parseInt(val)); 296 } else if ("uname".equals(key)){ 297 currEntry.setUserName(val); 298 } else if ("size".equals(key)){ 299 currEntry.setSize(Long.parseLong(val)); 300 } 301 } 302 } 303 304 public ArchiveEntry getNextEntry() throws IOException { 305 return getNextTarEntry(); 306 } 307 308 /** 309 * Reads bytes from the current tar archive entry. 310 * 311 * This method is aware of the boundaries of the current 312 * entry in the archive and will deal with them as if they 313 * were this stream's start and EOF. 314 * 315 * @param buf The buffer into which to place bytes read. 316 * @param offset The offset at which to place bytes read. 317 * @param numToRead The number of bytes to read. 318 * @return The number of bytes read, or -1 at EOF. 319 * @throws IOException on error 320 */ 321 public int read(byte[] buf, int offset, int numToRead) throws IOException { 322 int totalRead = 0; 323 324 if (entryOffset >= entrySize) { 325 return -1; 326 } 327 328 if ((numToRead + entryOffset) > entrySize) { 329 numToRead = (int) (entrySize - entryOffset); 330 } 331 332 if (readBuf != null) { 333 int sz = (numToRead > readBuf.length) ? readBuf.length 334 : numToRead; 335 336 System.arraycopy(readBuf, 0, buf, offset, sz); 337 338 if (sz >= readBuf.length) { 339 readBuf = null; 340 } else { 341 int newLen = readBuf.length - sz; 342 byte[] newBuf = new byte[newLen]; 343 344 System.arraycopy(readBuf, sz, newBuf, 0, newLen); 345 346 readBuf = newBuf; 347 } 348 349 totalRead += sz; 350 numToRead -= sz; 351 offset += sz; 352 } 353 354 while (numToRead > 0) { 355 byte[] rec = buffer.readRecord(); 356 357 if (rec == null) { 358 // Unexpected EOF! 359 throw new IOException("unexpected EOF with " + numToRead 360 + " bytes unread. Occured at byte: " + getBytesRead()); 361 } 362 count(rec.length); 363 int sz = numToRead; 364 int recLen = rec.length; 365 366 if (recLen > sz) { 367 System.arraycopy(rec, 0, buf, offset, sz); 368 369 readBuf = new byte[recLen - sz]; 370 371 System.arraycopy(rec, sz, readBuf, 0, recLen - sz); 372 } else { 373 sz = recLen; 374 375 System.arraycopy(rec, 0, buf, offset, recLen); 376 } 377 378 totalRead += sz; 379 numToRead -= sz; 380 offset += sz; 381 } 382 383 entryOffset += totalRead; 384 385 return totalRead; 386 } 387 388 protected final TarArchiveEntry getCurrentEntry() { 389 return currEntry; 390 } 391 392 protected final void setCurrentEntry(TarArchiveEntry e) { 393 currEntry = e; 394 } 395 396 protected final boolean isAtEOF() { 397 return hasHitEOF; 398 } 399 400 protected final void setAtEOF(boolean b) { 401 hasHitEOF = b; 402 } 403 404 /** 405 * Checks if the signature matches what is expected for a tar file. 406 * 407 * @param signature 408 * the bytes to check 409 * @param length 410 * the number of bytes to check 411 * @return true, if this stream is a tar archive stream, false otherwise 412 */ 413 public static boolean matches(byte[] signature, int length) { 414 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 415 return false; 416 } 417 418 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 419 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 420 && 421 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 422 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 423 ){ 424 return true; 425 } 426 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 427 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 428 && 429 ( 430 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 431 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 432 || 433 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 434 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 435 ) 436 ){ 437 return true; 438 } 439 // COMPRESS-107 - recognise Ant tar files 440 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 441 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 442 && 443 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 444 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 445 ){ 446 return true; 447 } 448 return false; 449 } 450 451 }