001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019 /* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024 package org.apache.commons.compress.archivers.tar; 025 026 import java.io.BufferedReader; 027 import java.io.IOException; 028 import java.io.InputStream; 029 import java.io.InputStreamReader; 030 import java.util.HashMap; 031 import java.util.Iterator; 032 import java.util.Map; 033 import java.util.Map.Entry; 034 035 import org.apache.commons.compress.archivers.ArchiveEntry; 036 import org.apache.commons.compress.archivers.ArchiveInputStream; 037 import org.apache.commons.compress.utils.ArchiveUtils; 038 039 /** 040 * The TarInputStream reads a UNIX tar archive as an InputStream. 041 * methods are provided to position at each successive entry in 042 * the archive, and the read each entry as a normal input stream 043 * using read(). 044 * @NotThreadSafe 045 */ 046 public class TarArchiveInputStream extends ArchiveInputStream { 047 private static final int SMALL_BUFFER_SIZE = 256; 048 private static final int BUFFER_SIZE = 8 * 1024; 049 050 private boolean hasHitEOF; 051 private long entrySize; 052 private long entryOffset; 053 private byte[] readBuf; 054 protected final TarBuffer buffer; 055 private TarArchiveEntry currEntry; 056 057 /** 058 * Constructor for TarInputStream. 059 * @param is the input stream to use 060 */ 061 public TarArchiveInputStream(InputStream is) { 062 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 063 } 064 065 /** 066 * Constructor for TarInputStream. 067 * @param is the input stream to use 068 * @param blockSize the block size to use 069 */ 070 public TarArchiveInputStream(InputStream is, int blockSize) { 071 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); 072 } 073 074 /** 075 * Constructor for TarInputStream. 076 * @param is the input stream to use 077 * @param blockSize the block size to use 078 * @param recordSize the record size to use 079 */ 080 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { 081 this.buffer = new TarBuffer(is, blockSize, recordSize); 082 this.readBuf = null; 083 this.hasHitEOF = false; 084 } 085 086 /** 087 * Closes this stream. Calls the TarBuffer's close() method. 088 * @throws IOException on error 089 */ 090 public void close() throws IOException { 091 buffer.close(); 092 } 093 094 /** 095 * Get the record size being used by this stream's TarBuffer. 096 * 097 * @return The TarBuffer record size. 098 */ 099 public int getRecordSize() { 100 return buffer.getRecordSize(); 101 } 102 103 /** 104 * Get the available data that can be read from the current 105 * entry in the archive. This does not indicate how much data 106 * is left in the entire archive, only in the current entry. 107 * This value is determined from the entry's size header field 108 * and the amount of data already read from the current entry. 109 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE 110 * bytes are left in the current entry in the archive. 111 * 112 * @return The number of available bytes for the current entry. 113 * @throws IOException for signature 114 */ 115 public int available() throws IOException { 116 if (entrySize - entryOffset > Integer.MAX_VALUE) { 117 return Integer.MAX_VALUE; 118 } 119 return (int) (entrySize - entryOffset); 120 } 121 122 /** 123 * Skip bytes in the input buffer. This skips bytes in the 124 * current entry's data, not the entire archive, and will 125 * stop at the end of the current entry's data if the number 126 * to skip extends beyond that point. 127 * 128 * @param numToSkip The number of bytes to skip. 129 * @return the number actually skipped 130 * @throws IOException on error 131 */ 132 public long skip(long numToSkip) throws IOException { 133 // REVIEW 134 // This is horribly inefficient, but it ensures that we 135 // properly skip over bytes via the TarBuffer... 136 // 137 byte[] skipBuf = new byte[BUFFER_SIZE]; 138 long skip = numToSkip; 139 while (skip > 0) { 140 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); 141 int numRead = read(skipBuf, 0, realSkip); 142 if (numRead == -1) { 143 break; 144 } 145 skip -= numRead; 146 } 147 return (numToSkip - skip); 148 } 149 150 /** 151 * Since we do not support marking just yet, we do nothing. 152 */ 153 public synchronized void reset() { 154 } 155 156 /** 157 * Get the next entry in this tar archive. This will skip 158 * over any remaining data in the current entry, if there 159 * is one, and place the input stream at the header of the 160 * next entry, and read the header and instantiate a new 161 * TarEntry from the header bytes and return that entry. 162 * If there are no more entries in the archive, null will 163 * be returned to indicate that the end of the archive has 164 * been reached. 165 * 166 * @return The next TarEntry in the archive, or null. 167 * @throws IOException on error 168 */ 169 public TarArchiveEntry getNextTarEntry() throws IOException { 170 if (hasHitEOF) { 171 return null; 172 } 173 174 if (currEntry != null) { 175 long numToSkip = entrySize - entryOffset; 176 177 while (numToSkip > 0) { 178 long skipped = skip(numToSkip); 179 if (skipped <= 0) { 180 throw new RuntimeException("failed to skip current tar entry"); 181 } 182 numToSkip -= skipped; 183 } 184 185 readBuf = null; 186 } 187 188 byte[] headerBuf = getRecord(); 189 190 if (hasHitEOF) { 191 currEntry = null; 192 return null; 193 } 194 195 currEntry = new TarArchiveEntry(headerBuf); 196 entryOffset = 0; 197 entrySize = currEntry.getSize(); 198 199 if (currEntry.isGNULongNameEntry()) { 200 // read in the name 201 StringBuffer longName = new StringBuffer(); 202 byte[] buf = new byte[SMALL_BUFFER_SIZE]; 203 int length = 0; 204 while ((length = read(buf)) >= 0) { 205 longName.append(new String(buf, 0, length)); 206 } 207 getNextEntry(); 208 if (currEntry == null) { 209 // Bugzilla: 40334 210 // Malformed tar file - long entry name not followed by entry 211 return null; 212 } 213 // remove trailing null terminator 214 if (longName.length() > 0 215 && longName.charAt(longName.length() - 1) == 0) { 216 longName.deleteCharAt(longName.length() - 1); 217 } 218 currEntry.setName(longName.toString()); 219 } 220 221 if (currEntry.isPaxHeader()){ // Process Pax headers 222 paxHeaders(); 223 } 224 225 if (currEntry.isGNUSparse()){ // Process sparse files 226 readGNUSparse(); 227 } 228 229 return currEntry; 230 } 231 232 /** 233 * Get the next record in this tar archive. This will skip 234 * over any remaining data in the current entry, if there 235 * is one, and place the input stream at the header of the 236 * next entry. 237 * If there are no more entries in the archive, null will 238 * be returned to indicate that the end of the archive has 239 * been reached. 240 * 241 * @return The next header in the archive, or null. 242 * @throws IOException on error 243 */ 244 private byte[] getRecord() throws IOException { 245 if (hasHitEOF) { 246 return null; 247 } 248 249 byte[] headerBuf = buffer.readRecord(); 250 251 if (headerBuf == null) { 252 hasHitEOF = true; 253 } else if (buffer.isEOFRecord(headerBuf)) { 254 hasHitEOF = true; 255 } 256 257 return hasHitEOF ? null : headerBuf; 258 } 259 260 private void paxHeaders() throws IOException{ 261 BufferedReader br = new BufferedReader(new InputStreamReader(this, "UTF-8")); 262 Map headers = new HashMap(); 263 // Format is "length keyword=value\n"; 264 while(true){ // get length 265 int ch; 266 int len=0; 267 int read=0; 268 while((ch = br.read()) != -1){ 269 read++; 270 if (ch == ' '){ // End of length string 271 // Get keyword 272 StringBuffer sb = new StringBuffer(); 273 while((ch = br.read()) != -1){ 274 read++; 275 if (ch == '='){ // end of keyword 276 String keyword = sb.toString(); 277 // Get rest of entry 278 char[] cbuf = new char[len-read]; 279 int got = br.read(cbuf); 280 if (got != len-read){ 281 throw new IOException("Failed to read Paxheader. Expected "+(len-read)+" chars, read "+got); 282 } 283 String value = new String(cbuf, 0 , len-read-1); // Drop trailing NL 284 headers.put(keyword, value); 285 break; 286 } 287 sb.append((char)ch); 288 } 289 break; // Processed single header 290 } 291 len *= 10; 292 len += ch - '0'; 293 } 294 if (ch == -1){ // EOF 295 break; 296 } 297 } 298 getNextEntry(); // Get the actual file entry 299 /* 300 * The following headers are defined for Pax. 301 * atime, ctime, mtime, charset: cannot use these without changing TarArchiveEntry fields 302 * comment 303 * gid, gname 304 * linkpath 305 * size 306 * uid,uname 307 */ 308 Iterator hdrs = headers.entrySet().iterator(); 309 while(hdrs.hasNext()){ 310 Entry ent = (Entry) hdrs.next(); 311 String key = (String) ent.getKey(); 312 String val = (String) ent.getValue(); 313 if ("path".equals(key)){ 314 currEntry.setName(val); 315 } else if ("linkpath".equals(key)){ 316 currEntry.setLinkName(val); 317 } else if ("gid".equals(key)){ 318 currEntry.setGroupId(Integer.parseInt(val)); 319 } else if ("gname".equals(key)){ 320 currEntry.setGroupName(val); 321 } else if ("uid".equals(key)){ 322 currEntry.setUserId(Integer.parseInt(val)); 323 } else if ("uname".equals(key)){ 324 currEntry.setUserName(val); 325 } else if ("size".equals(key)){ 326 currEntry.setSize(Long.parseLong(val)); 327 } 328 } 329 } 330 331 /** 332 * Adds the sparse chunks from the current entry to the sparse chunks, 333 * including any additional sparse entries following the current entry. 334 * 335 * @throws IOException on error 336 * 337 * @todo Sparse files get not yet really processed. 338 */ 339 private void readGNUSparse() throws IOException { 340 /* we do not really process sparse files yet 341 sparses = new ArrayList(); 342 sparses.addAll(currEntry.getSparses()); 343 */ 344 if (currEntry.isExtended()) { 345 TarArchiveSparseEntry entry; 346 do { 347 byte[] headerBuf = getRecord(); 348 if (hasHitEOF) { 349 currEntry = null; 350 break; 351 } 352 entry = new TarArchiveSparseEntry(headerBuf); 353 /* we do not really process sparse files yet 354 sparses.addAll(entry.getSparses()); 355 */ 356 } while (entry.isExtended()); 357 } 358 } 359 360 public ArchiveEntry getNextEntry() throws IOException { 361 return getNextTarEntry(); 362 } 363 364 /** 365 * Reads bytes from the current tar archive entry. 366 * 367 * This method is aware of the boundaries of the current 368 * entry in the archive and will deal with them as if they 369 * were this stream's start and EOF. 370 * 371 * @param buf The buffer into which to place bytes read. 372 * @param offset The offset at which to place bytes read. 373 * @param numToRead The number of bytes to read. 374 * @return The number of bytes read, or -1 at EOF. 375 * @throws IOException on error 376 */ 377 public int read(byte[] buf, int offset, int numToRead) throws IOException { 378 int totalRead = 0; 379 380 if (entryOffset >= entrySize) { 381 return -1; 382 } 383 384 if ((numToRead + entryOffset) > entrySize) { 385 numToRead = (int) (entrySize - entryOffset); 386 } 387 388 if (readBuf != null) { 389 int sz = (numToRead > readBuf.length) ? readBuf.length 390 : numToRead; 391 392 System.arraycopy(readBuf, 0, buf, offset, sz); 393 394 if (sz >= readBuf.length) { 395 readBuf = null; 396 } else { 397 int newLen = readBuf.length - sz; 398 byte[] newBuf = new byte[newLen]; 399 400 System.arraycopy(readBuf, sz, newBuf, 0, newLen); 401 402 readBuf = newBuf; 403 } 404 405 totalRead += sz; 406 numToRead -= sz; 407 offset += sz; 408 } 409 410 while (numToRead > 0) { 411 byte[] rec = buffer.readRecord(); 412 413 if (rec == null) { 414 // Unexpected EOF! 415 throw new IOException("unexpected EOF with " + numToRead 416 + " bytes unread. Occured at byte: " + getBytesRead()); 417 } 418 count(rec.length); 419 int sz = numToRead; 420 int recLen = rec.length; 421 422 if (recLen > sz) { 423 System.arraycopy(rec, 0, buf, offset, sz); 424 425 readBuf = new byte[recLen - sz]; 426 427 System.arraycopy(rec, sz, readBuf, 0, recLen - sz); 428 } else { 429 sz = recLen; 430 431 System.arraycopy(rec, 0, buf, offset, recLen); 432 } 433 434 totalRead += sz; 435 numToRead -= sz; 436 offset += sz; 437 } 438 439 entryOffset += totalRead; 440 441 return totalRead; 442 } 443 444 /** 445 * Whether this class is able to read the given entry. 446 * 447 * <p>May return false if the current entry is a sparse file.</p> 448 */ 449 public boolean canReadEntryData(ArchiveEntry ae) { 450 if (ae instanceof TarArchiveEntry) { 451 TarArchiveEntry te = (TarArchiveEntry) ae; 452 return !te.isGNUSparse(); 453 } 454 return false; 455 } 456 457 protected final TarArchiveEntry getCurrentEntry() { 458 return currEntry; 459 } 460 461 protected final void setCurrentEntry(TarArchiveEntry e) { 462 currEntry = e; 463 } 464 465 protected final boolean isAtEOF() { 466 return hasHitEOF; 467 } 468 469 protected final void setAtEOF(boolean b) { 470 hasHitEOF = b; 471 } 472 473 /** 474 * Checks if the signature matches what is expected for a tar file. 475 * 476 * @param signature 477 * the bytes to check 478 * @param length 479 * the number of bytes to check 480 * @return true, if this stream is a tar archive stream, false otherwise 481 */ 482 public static boolean matches(byte[] signature, int length) { 483 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 484 return false; 485 } 486 487 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 488 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 489 && 490 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 491 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 492 ){ 493 return true; 494 } 495 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 496 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 497 && 498 ( 499 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 500 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 501 || 502 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 503 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 504 ) 505 ){ 506 return true; 507 } 508 // COMPRESS-107 - recognise Ant tar files 509 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 510 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 511 && 512 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 513 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 514 ){ 515 return true; 516 } 517 return false; 518 } 519 520 }