001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019 /* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024 package org.apache.commons.compress.archivers.tar; 025 026 import java.io.IOException; 027 import java.io.InputStream; 028 import java.io.InputStreamReader; 029 import java.io.Reader; 030 import java.util.HashMap; 031 import java.util.Map; 032 import java.util.Map.Entry; 033 034 import org.apache.commons.compress.archivers.ArchiveEntry; 035 import org.apache.commons.compress.archivers.ArchiveInputStream; 036 import org.apache.commons.compress.utils.ArchiveUtils; 037 038 /** 039 * The TarInputStream reads a UNIX tar archive as an InputStream. 040 * methods are provided to position at each successive entry in 041 * the archive, and the read each entry as a normal input stream 042 * using read(). 043 * @NotThreadSafe 044 */ 045 public class TarArchiveInputStream extends ArchiveInputStream { 046 private static final int SMALL_BUFFER_SIZE = 256; 047 private static final int BUFFER_SIZE = 8 * 1024; 048 049 private boolean hasHitEOF; 050 private long entrySize; 051 private long entryOffset; 052 private byte[] readBuf; 053 protected final TarBuffer buffer; 054 private TarArchiveEntry currEntry; 055 056 /** 057 * Constructor for TarInputStream. 058 * @param is the input stream to use 059 */ 060 public TarArchiveInputStream(InputStream is) { 061 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 062 } 063 064 /** 065 * Constructor for TarInputStream. 066 * @param is the input stream to use 067 * @param blockSize the block size to use 068 */ 069 public TarArchiveInputStream(InputStream is, int blockSize) { 070 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); 071 } 072 073 /** 074 * Constructor for TarInputStream. 075 * @param is the input stream to use 076 * @param blockSize the block size to use 077 * @param recordSize the record size to use 078 */ 079 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { 080 this.buffer = new TarBuffer(is, blockSize, recordSize); 081 this.readBuf = null; 082 this.hasHitEOF = false; 083 } 084 085 /** 086 * Closes this stream. Calls the TarBuffer's close() method. 087 * @throws IOException on error 088 */ 089 @Override 090 public void close() throws IOException { 091 buffer.close(); 092 } 093 094 /** 095 * Get the record size being used by this stream's TarBuffer. 096 * 097 * @return The TarBuffer record size. 098 */ 099 public int getRecordSize() { 100 return buffer.getRecordSize(); 101 } 102 103 /** 104 * Get the available data that can be read from the current 105 * entry in the archive. This does not indicate how much data 106 * is left in the entire archive, only in the current entry. 107 * This value is determined from the entry's size header field 108 * and the amount of data already read from the current entry. 109 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE 110 * bytes are left in the current entry in the archive. 111 * 112 * @return The number of available bytes for the current entry. 113 * @throws IOException for signature 114 */ 115 @Override 116 public int available() throws IOException { 117 if (entrySize - entryOffset > Integer.MAX_VALUE) { 118 return Integer.MAX_VALUE; 119 } 120 return (int) (entrySize - entryOffset); 121 } 122 123 /** 124 * Skip bytes in the input buffer. This skips bytes in the 125 * current entry's data, not the entire archive, and will 126 * stop at the end of the current entry's data if the number 127 * to skip extends beyond that point. 128 * 129 * @param numToSkip The number of bytes to skip. 130 * @return the number actually skipped 131 * @throws IOException on error 132 */ 133 @Override 134 public long skip(long numToSkip) throws IOException { 135 // REVIEW 136 // This is horribly inefficient, but it ensures that we 137 // properly skip over bytes via the TarBuffer... 138 // 139 byte[] skipBuf = new byte[BUFFER_SIZE]; 140 long skip = numToSkip; 141 while (skip > 0) { 142 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); 143 int numRead = read(skipBuf, 0, realSkip); 144 if (numRead == -1) { 145 break; 146 } 147 skip -= numRead; 148 } 149 return (numToSkip - skip); 150 } 151 152 /** 153 * Since we do not support marking just yet, we do nothing. 154 */ 155 @Override 156 public synchronized void reset() { 157 } 158 159 /** 160 * Get the next entry in this tar archive. This will skip 161 * over any remaining data in the current entry, if there 162 * is one, and place the input stream at the header of the 163 * next entry, and read the header and instantiate a new 164 * TarEntry from the header bytes and return that entry. 165 * If there are no more entries in the archive, null will 166 * be returned to indicate that the end of the archive has 167 * been reached. 168 * 169 * @return The next TarEntry in the archive, or null. 170 * @throws IOException on error 171 */ 172 public TarArchiveEntry getNextTarEntry() throws IOException { 173 if (hasHitEOF) { 174 return null; 175 } 176 177 if (currEntry != null) { 178 long numToSkip = entrySize - entryOffset; 179 180 while (numToSkip > 0) { 181 long skipped = skip(numToSkip); 182 if (skipped <= 0) { 183 throw new RuntimeException("failed to skip current tar entry"); 184 } 185 numToSkip -= skipped; 186 } 187 188 readBuf = null; 189 } 190 191 byte[] headerBuf = getRecord(); 192 193 if (hasHitEOF) { 194 currEntry = null; 195 return null; 196 } 197 198 currEntry = new TarArchiveEntry(headerBuf); 199 entryOffset = 0; 200 entrySize = currEntry.getSize(); 201 202 if (currEntry.isGNULongNameEntry()) { 203 // read in the name 204 StringBuffer longName = new StringBuffer(); 205 byte[] buf = new byte[SMALL_BUFFER_SIZE]; 206 int length = 0; 207 while ((length = read(buf)) >= 0) { 208 longName.append(new String(buf, 0, length)); 209 } 210 getNextEntry(); 211 if (currEntry == null) { 212 // Bugzilla: 40334 213 // Malformed tar file - long entry name not followed by entry 214 return null; 215 } 216 // remove trailing null terminator 217 if (longName.length() > 0 218 && longName.charAt(longName.length() - 1) == 0) { 219 longName.deleteCharAt(longName.length() - 1); 220 } 221 currEntry.setName(longName.toString()); 222 } 223 224 if (currEntry.isPaxHeader()){ // Process Pax headers 225 paxHeaders(); 226 } 227 228 if (currEntry.isGNUSparse()){ // Process sparse files 229 readGNUSparse(); 230 } 231 232 return currEntry; 233 } 234 235 /** 236 * Get the next record in this tar archive. This will skip 237 * over any remaining data in the current entry, if there 238 * is one, and place the input stream at the header of the 239 * next entry. 240 * If there are no more entries in the archive, null will 241 * be returned to indicate that the end of the archive has 242 * been reached. 243 * 244 * @return The next header in the archive, or null. 245 * @throws IOException on error 246 */ 247 private byte[] getRecord() throws IOException { 248 if (hasHitEOF) { 249 return null; 250 } 251 252 byte[] headerBuf = buffer.readRecord(); 253 254 if (headerBuf == null) { 255 hasHitEOF = true; 256 } else if (buffer.isEOFRecord(headerBuf)) { 257 hasHitEOF = true; 258 } 259 260 return hasHitEOF ? null : headerBuf; 261 } 262 263 private void paxHeaders() throws IOException{ 264 Reader br = new InputStreamReader(this, "UTF-8") { 265 @Override 266 public void close() { 267 // make sure GC doesn't close "this" before we are done 268 } 269 }; 270 Map<String, String> headers = new HashMap<String, String>(); 271 // Format is "length keyword=value\n"; 272 try { 273 while(true){ // get length 274 int ch; 275 int len = 0; 276 int read = 0; 277 while((ch = br.read()) != -1){ 278 read++; 279 if (ch == ' '){ // End of length string 280 // Get keyword 281 StringBuffer sb = new StringBuffer(); 282 while((ch = br.read()) != -1){ 283 read++; 284 if (ch == '='){ // end of keyword 285 String keyword = sb.toString(); 286 // Get rest of entry 287 char[] cbuf = new char[len-read]; 288 int got = br.read(cbuf); 289 if (got != len - read){ 290 throw new IOException("Failed to read " 291 + "Paxheader. Expected " 292 + (len - read) 293 + " chars, read " 294 + got); 295 } 296 // Drop trailing NL 297 String value = new String(cbuf, 0, 298 len - read - 1); 299 headers.put(keyword, value); 300 break; 301 } 302 sb.append((char) ch); 303 } 304 break; // Processed single header 305 } 306 len *= 10; 307 len += ch - '0'; 308 } 309 if (ch == -1){ // EOF 310 break; 311 } 312 } 313 } finally { 314 // NO-OP but makes FindBugs happy 315 br.close(); 316 } 317 318 getNextEntry(); // Get the actual file entry 319 /* 320 * The following headers are defined for Pax. 321 * atime, ctime, mtime, charset: cannot use these without changing TarArchiveEntry fields 322 * comment 323 * gid, gname 324 * linkpath 325 * size 326 * uid,uname 327 */ 328 for (Entry<String, String> ent : headers.entrySet()){ 329 String key = ent.getKey(); 330 String val = ent.getValue(); 331 if ("path".equals(key)){ 332 currEntry.setName(val); 333 } else if ("linkpath".equals(key)){ 334 currEntry.setLinkName(val); 335 } else if ("gid".equals(key)){ 336 currEntry.setGroupId(Integer.parseInt(val)); 337 } else if ("gname".equals(key)){ 338 currEntry.setGroupName(val); 339 } else if ("uid".equals(key)){ 340 currEntry.setUserId(Integer.parseInt(val)); 341 } else if ("uname".equals(key)){ 342 currEntry.setUserName(val); 343 } else if ("size".equals(key)){ 344 currEntry.setSize(Long.parseLong(val)); 345 } 346 } 347 } 348 349 /** 350 * Adds the sparse chunks from the current entry to the sparse chunks, 351 * including any additional sparse entries following the current entry. 352 * 353 * @throws IOException on error 354 * 355 * @todo Sparse files get not yet really processed. 356 */ 357 private void readGNUSparse() throws IOException { 358 /* we do not really process sparse files yet 359 sparses = new ArrayList(); 360 sparses.addAll(currEntry.getSparses()); 361 */ 362 if (currEntry.isExtended()) { 363 TarArchiveSparseEntry entry; 364 do { 365 byte[] headerBuf = getRecord(); 366 if (hasHitEOF) { 367 currEntry = null; 368 break; 369 } 370 entry = new TarArchiveSparseEntry(headerBuf); 371 /* we do not really process sparse files yet 372 sparses.addAll(entry.getSparses()); 373 */ 374 } while (entry.isExtended()); 375 } 376 } 377 378 @Override 379 public ArchiveEntry getNextEntry() throws IOException { 380 return getNextTarEntry(); 381 } 382 383 /** 384 * Reads bytes from the current tar archive entry. 385 * 386 * This method is aware of the boundaries of the current 387 * entry in the archive and will deal with them as if they 388 * were this stream's start and EOF. 389 * 390 * @param buf The buffer into which to place bytes read. 391 * @param offset The offset at which to place bytes read. 392 * @param numToRead The number of bytes to read. 393 * @return The number of bytes read, or -1 at EOF. 394 * @throws IOException on error 395 */ 396 @Override 397 public int read(byte[] buf, int offset, int numToRead) throws IOException { 398 int totalRead = 0; 399 400 if (entryOffset >= entrySize) { 401 return -1; 402 } 403 404 if ((numToRead + entryOffset) > entrySize) { 405 numToRead = (int) (entrySize - entryOffset); 406 } 407 408 if (readBuf != null) { 409 int sz = (numToRead > readBuf.length) ? readBuf.length 410 : numToRead; 411 412 System.arraycopy(readBuf, 0, buf, offset, sz); 413 414 if (sz >= readBuf.length) { 415 readBuf = null; 416 } else { 417 int newLen = readBuf.length - sz; 418 byte[] newBuf = new byte[newLen]; 419 420 System.arraycopy(readBuf, sz, newBuf, 0, newLen); 421 422 readBuf = newBuf; 423 } 424 425 totalRead += sz; 426 numToRead -= sz; 427 offset += sz; 428 } 429 430 while (numToRead > 0) { 431 byte[] rec = buffer.readRecord(); 432 433 if (rec == null) { 434 // Unexpected EOF! 435 throw new IOException("unexpected EOF with " + numToRead 436 + " bytes unread. Occured at byte: " + getBytesRead()); 437 } 438 count(rec.length); 439 int sz = numToRead; 440 int recLen = rec.length; 441 442 if (recLen > sz) { 443 System.arraycopy(rec, 0, buf, offset, sz); 444 445 readBuf = new byte[recLen - sz]; 446 447 System.arraycopy(rec, sz, readBuf, 0, recLen - sz); 448 } else { 449 sz = recLen; 450 451 System.arraycopy(rec, 0, buf, offset, recLen); 452 } 453 454 totalRead += sz; 455 numToRead -= sz; 456 offset += sz; 457 } 458 459 entryOffset += totalRead; 460 461 return totalRead; 462 } 463 464 /** 465 * Whether this class is able to read the given entry. 466 * 467 * <p>May return false if the current entry is a sparse file.</p> 468 */ 469 @Override 470 public boolean canReadEntryData(ArchiveEntry ae) { 471 if (ae instanceof TarArchiveEntry) { 472 TarArchiveEntry te = (TarArchiveEntry) ae; 473 return !te.isGNUSparse(); 474 } 475 return false; 476 } 477 478 protected final TarArchiveEntry getCurrentEntry() { 479 return currEntry; 480 } 481 482 protected final void setCurrentEntry(TarArchiveEntry e) { 483 currEntry = e; 484 } 485 486 protected final boolean isAtEOF() { 487 return hasHitEOF; 488 } 489 490 protected final void setAtEOF(boolean b) { 491 hasHitEOF = b; 492 } 493 494 /** 495 * Checks if the signature matches what is expected for a tar file. 496 * 497 * @param signature 498 * the bytes to check 499 * @param length 500 * the number of bytes to check 501 * @return true, if this stream is a tar archive stream, false otherwise 502 */ 503 public static boolean matches(byte[] signature, int length) { 504 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 505 return false; 506 } 507 508 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 509 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 510 && 511 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 512 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 513 ){ 514 return true; 515 } 516 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 517 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 518 && 519 ( 520 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 521 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 522 || 523 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 524 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 525 ) 526 ){ 527 return true; 528 } 529 // COMPRESS-107 - recognise Ant tar files 530 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 531 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 532 && 533 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 534 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 535 ){ 536 return true; 537 } 538 return false; 539 } 540 541 }