001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.zip; 020 021 import java.io.ByteArrayInputStream; 022 import java.io.ByteArrayOutputStream; 023 import java.io.EOFException; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.io.PushbackInputStream; 027 import java.util.zip.CRC32; 028 import java.util.zip.DataFormatException; 029 import java.util.zip.Inflater; 030 import java.util.zip.ZipException; 031 032 import org.apache.commons.compress.archivers.ArchiveEntry; 033 import org.apache.commons.compress.archivers.ArchiveInputStream; 034 035 /** 036 * Implements an input stream that can read Zip archives. 037 * <p> 038 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information 039 * is not available from the header. 040 * <p> 041 * The {@link ZipFile} class is preferred when reading from files. 042 * 043 * @see ZipFile 044 * @NotThreadSafe 045 */ 046 public class ZipArchiveInputStream extends ArchiveInputStream { 047 048 private static final int SHORT = 2; 049 private static final int WORD = 4; 050 051 /** 052 * The zip encoding to use for filenames and the file comment. 053 */ 054 private final ZipEncoding zipEncoding; 055 056 /** 057 * Whether to look for and use Unicode extra fields. 058 */ 059 private final boolean useUnicodeExtraFields; 060 061 private final InputStream in; 062 063 private final Inflater inf = new Inflater(true); 064 private final CRC32 crc = new CRC32(); 065 066 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE]; 067 068 private ZipArchiveEntry current = null; 069 private boolean closed = false; 070 private boolean hitCentralDirectory = false; 071 private int offsetInBuffer = 0; 072 private long readBytesOfEntry = 0, bytesReadFromStream = 0; 073 private int lengthOfLastRead = 0; 074 private boolean hasDataDescriptor = false; 075 private ByteArrayInputStream lastStoredEntry = null; 076 077 private boolean allowStoredEntriesWithDataDescriptor = false; 078 079 private static final int LFH_LEN = 30; 080 /* 081 local file header signature 4 bytes (0x04034b50) 082 version needed to extract 2 bytes 083 general purpose bit flag 2 bytes 084 compression method 2 bytes 085 last mod file time 2 bytes 086 last mod file date 2 bytes 087 crc-32 4 bytes 088 compressed size 4 bytes 089 uncompressed size 4 bytes 090 file name length 2 bytes 091 extra field length 2 bytes 092 */ 093 094 public ZipArchiveInputStream(InputStream inputStream) { 095 this(inputStream, ZipEncodingHelper.UTF8, true); 096 } 097 098 /** 099 * @param encoding the encoding to use for file names, use null 100 * for the platform's default encoding 101 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 102 * Extra Fields (if present) to set the file names. 103 */ 104 public ZipArchiveInputStream(InputStream inputStream, 105 String encoding, 106 boolean useUnicodeExtraFields) { 107 this(inputStream, encoding, useUnicodeExtraFields, false); 108 } 109 110 /** 111 * @param encoding the encoding to use for file names, use null 112 * for the platform's default encoding 113 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 114 * Extra Fields (if present) to set the file names. 115 * @param allowStoredEntriesWithDataDescriptor whether the stream 116 * will try to read STORED entries that use a data descriptor 117 * @since Apache Commons Compress 1.1 118 */ 119 public ZipArchiveInputStream(InputStream inputStream, 120 String encoding, 121 boolean useUnicodeExtraFields, 122 boolean allowStoredEntriesWithDataDescriptor) { 123 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 124 this.useUnicodeExtraFields = useUnicodeExtraFields; 125 in = new PushbackInputStream(inputStream, buf.length); 126 this.allowStoredEntriesWithDataDescriptor = 127 allowStoredEntriesWithDataDescriptor; 128 } 129 130 public ZipArchiveEntry getNextZipEntry() throws IOException { 131 if (closed || hitCentralDirectory) { 132 return null; 133 } 134 if (current != null) { 135 closeEntry(); 136 } 137 byte[] lfh = new byte[LFH_LEN]; 138 try { 139 readFully(lfh); 140 } catch (EOFException e) { 141 return null; 142 } 143 ZipLong sig = new ZipLong(lfh); 144 if (sig.equals(ZipLong.CFH_SIG)) { 145 hitCentralDirectory = true; 146 return null; 147 } 148 if (!sig.equals(ZipLong.LFH_SIG)) { 149 return null; 150 } 151 152 int off = WORD; 153 current = new ZipArchiveEntry(); 154 155 int versionMadeBy = ZipShort.getValue(lfh, off); 156 off += SHORT; 157 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) 158 & ZipFile.NIBLET_MASK); 159 160 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfh, off); 161 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 162 final ZipEncoding entryEncoding = 163 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 164 hasDataDescriptor = gpFlag.usesDataDescriptor(); 165 current.setGeneralPurposeBit(gpFlag); 166 167 off += SHORT; 168 169 current.setMethod(ZipShort.getValue(lfh, off)); 170 off += SHORT; 171 172 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off)); 173 current.setTime(time); 174 off += WORD; 175 176 if (!hasDataDescriptor) { 177 current.setCrc(ZipLong.getValue(lfh, off)); 178 off += WORD; 179 180 current.setCompressedSize(ZipLong.getValue(lfh, off)); 181 off += WORD; 182 183 current.setSize(ZipLong.getValue(lfh, off)); 184 off += WORD; 185 } else { 186 off += 3 * WORD; 187 } 188 189 int fileNameLen = ZipShort.getValue(lfh, off); 190 191 off += SHORT; 192 193 int extraLen = ZipShort.getValue(lfh, off); 194 off += SHORT; 195 196 byte[] fileName = new byte[fileNameLen]; 197 readFully(fileName); 198 current.setName(entryEncoding.decode(fileName), fileName); 199 200 byte[] extraData = new byte[extraLen]; 201 readFully(extraData); 202 current.setExtra(extraData); 203 204 if (!hasUTF8Flag && useUnicodeExtraFields) { 205 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null); 206 } 207 return current; 208 } 209 210 /** {@inheritDoc} */ 211 public ArchiveEntry getNextEntry() throws IOException { 212 return getNextZipEntry(); 213 } 214 215 /** 216 * Whether this class is able to read the given entry. 217 * 218 * <p>May return false if it is set up to use encryption or a 219 * compression method that hasn't been implemented yet.</p> 220 * @since Apache Commons Compress 1.1 221 */ 222 public boolean canReadEntryData(ArchiveEntry ae) { 223 if (ae instanceof ZipArchiveEntry) { 224 ZipArchiveEntry ze = (ZipArchiveEntry) ae; 225 return ZipUtil.canHandleEntryData(ze) 226 && supportsDataDescriptorFor(ze); 227 228 } 229 return false; 230 } 231 232 public int read(byte[] buffer, int start, int length) throws IOException { 233 if (closed) { 234 throw new IOException("The stream is closed"); 235 } 236 if (inf.finished() || current == null) { 237 return -1; 238 } 239 240 // avoid int overflow, check null buffer 241 if (start <= buffer.length && length >= 0 && start >= 0 242 && buffer.length - start >= length) { 243 ZipUtil.checkRequestedFeatures(current); 244 if (!supportsDataDescriptorFor(current)) { 245 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException 246 .Feature 247 .DATA_DESCRIPTOR, 248 current); 249 } 250 251 if (current.getMethod() == ZipArchiveOutputStream.STORED) { 252 if (hasDataDescriptor) { 253 if (lastStoredEntry == null) { 254 readStoredEntry(); 255 } 256 return lastStoredEntry.read(buffer, start, length); 257 } 258 259 long csize = current.getSize(); 260 if (readBytesOfEntry >= csize) { 261 return -1; 262 } 263 if (offsetInBuffer >= lengthOfLastRead) { 264 offsetInBuffer = 0; 265 if ((lengthOfLastRead = in.read(buf)) == -1) { 266 return -1; 267 } 268 count(lengthOfLastRead); 269 bytesReadFromStream += lengthOfLastRead; 270 } 271 int toRead = length > lengthOfLastRead 272 ? lengthOfLastRead - offsetInBuffer 273 : length; 274 if ((csize - readBytesOfEntry) < toRead) { 275 // if it is smaller than toRead then it fits into an int 276 toRead = (int) (csize - readBytesOfEntry); 277 } 278 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead); 279 offsetInBuffer += toRead; 280 readBytesOfEntry += toRead; 281 crc.update(buffer, start, toRead); 282 return toRead; 283 } 284 285 if (inf.needsInput()) { 286 fill(); 287 if (lengthOfLastRead > 0) { 288 bytesReadFromStream += lengthOfLastRead; 289 } 290 } 291 int read = 0; 292 try { 293 read = inf.inflate(buffer, start, length); 294 } catch (DataFormatException e) { 295 throw new ZipException(e.getMessage()); 296 } 297 if (read == 0) { 298 if (inf.finished()) { 299 return -1; 300 } else if (lengthOfLastRead == -1) { 301 throw new IOException("Truncated ZIP file"); 302 } 303 } 304 crc.update(buffer, start, read); 305 return read; 306 } 307 throw new ArrayIndexOutOfBoundsException(); 308 } 309 310 public void close() throws IOException { 311 if (!closed) { 312 closed = true; 313 in.close(); 314 } 315 } 316 317 /** 318 * Skips over and discards value bytes of data from this input 319 * stream. 320 * 321 * <p>This implementation may end up skipping over some smaller 322 * number of bytes, possibly 0, if an only if it reaches the end 323 * of the underlying stream.</p> 324 * 325 * <p>The actual number of bytes skipped is returned.</p> 326 * 327 * @param value the number of bytes to be skipped. 328 * @return the actual number of bytes skipped. 329 * @throws IOException - if an I/O error occurs. 330 * @throws IllegalArgumentException - if value is negative. 331 */ 332 public long skip(long value) throws IOException { 333 if (value >= 0) { 334 long skipped = 0; 335 byte[] b = new byte[1024]; 336 while (skipped < value) { 337 long rem = value - skipped; 338 int x = read(b, 0, (int) (b.length > rem ? rem : b.length)); 339 if (x == -1) { 340 return skipped; 341 } 342 skipped += x; 343 } 344 return skipped; 345 } 346 throw new IllegalArgumentException(); 347 } 348 349 /** 350 * Checks if the signature matches what is expected for a zip file. 351 * Does not currently handle self-extracting zips which may have arbitrary 352 * leading content. 353 * 354 * @param signature 355 * the bytes to check 356 * @param length 357 * the number of bytes to check 358 * @return true, if this stream is a zip archive stream, false otherwise 359 */ 360 public static boolean matches(byte[] signature, int length) { 361 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 362 return false; 363 } 364 365 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 366 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip 367 } 368 369 private static boolean checksig(byte[] signature, byte[] expected){ 370 for (int i = 0; i < expected.length; i++) { 371 if (signature[i] != expected[i]) { 372 return false; 373 } 374 } 375 return true; 376 } 377 378 /** 379 * Closes the current ZIP archive entry and positions the underlying 380 * stream to the beginning of the next entry. All per-entry variables 381 * and data structures are cleared. 382 * <p> 383 * If the compressed size of this entry is included in the entry header, 384 * then any outstanding bytes are simply skipped from the underlying 385 * stream without uncompressing them. This allows an entry to be safely 386 * closed even if the compression method is unsupported. 387 * <p> 388 * In case we don't know the compressed size of this entry or have 389 * already buffered too much data from the underlying stream to support 390 * uncompression, then the uncompression process is completed and the 391 * end position of the stream is adjusted based on the result of that 392 * process. 393 * 394 * @throws IOException if an error occurs 395 */ 396 private void closeEntry() throws IOException { 397 if (closed) { 398 throw new IOException("The stream is closed"); 399 } 400 if (current == null) { 401 return; 402 } 403 404 // Ensure all entry bytes are read 405 if (bytesReadFromStream <= current.getCompressedSize() 406 && !hasDataDescriptor) { 407 long remaining = current.getCompressedSize() - bytesReadFromStream; 408 while (remaining > 0) { 409 long n = in.read(buf, 0, (int) Math.min(buf.length, remaining)); 410 if (n < 0) { 411 throw new EOFException( 412 "Truncated ZIP entry: " + current.getName()); 413 } else { 414 count(n); 415 remaining -= n; 416 } 417 } 418 } else { 419 skip(Long.MAX_VALUE); 420 421 long inB; 422 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) { 423 inB = ZipUtil.adjustToLong(inf.getTotalIn()); 424 } else { 425 inB = readBytesOfEntry; 426 } 427 428 // this is at most a single read() operation and can't 429 // exceed the range of int 430 int diff = (int) (bytesReadFromStream - inB); 431 432 // Pushback any required bytes 433 if (diff > 0) { 434 ((PushbackInputStream) in).unread( 435 buf, lengthOfLastRead - diff, diff); 436 pushedBackBytes(diff); 437 } 438 } 439 440 if (lastStoredEntry == null && hasDataDescriptor) { 441 readDataDescriptor(); 442 } 443 444 inf.reset(); 445 readBytesOfEntry = bytesReadFromStream = 0L; 446 offsetInBuffer = lengthOfLastRead = 0; 447 crc.reset(); 448 current = null; 449 lastStoredEntry = null; 450 } 451 452 private void fill() throws IOException { 453 if (closed) { 454 throw new IOException("The stream is closed"); 455 } 456 if ((lengthOfLastRead = in.read(buf)) > 0) { 457 count(lengthOfLastRead); 458 inf.setInput(buf, 0, lengthOfLastRead); 459 } 460 } 461 462 private void readFully(byte[] b) throws IOException { 463 int count = 0, x = 0; 464 while (count != b.length) { 465 count += x = in.read(b, count, b.length - count); 466 if (x == -1) { 467 throw new EOFException(); 468 } 469 count(x); 470 } 471 } 472 473 private void readDataDescriptor() throws IOException { 474 byte[] b = new byte[WORD]; 475 readFully(b); 476 ZipLong val = new ZipLong(b); 477 if (ZipLong.DD_SIG.equals(val)) { 478 // data descriptor with signature, skip sig 479 readFully(b); 480 val = new ZipLong(b); 481 } 482 current.setCrc(val.getValue()); 483 readFully(b); 484 current.setCompressedSize(new ZipLong(b).getValue()); 485 readFully(b); 486 current.setSize(new ZipLong(b).getValue()); 487 } 488 489 /** 490 * Whether this entry requires a data descriptor this library can work with. 491 * 492 * @return true if allowStoredEntriesWithDataDescriptor is true, 493 * the entry doesn't require any data descriptor or the method is 494 * DEFLATED. 495 */ 496 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { 497 return allowStoredEntriesWithDataDescriptor || 498 !entry.getGeneralPurposeBit().usesDataDescriptor() 499 || entry.getMethod() == ZipArchiveEntry.DEFLATED; 500 } 501 502 /** 503 * Caches a stored entry that uses the data descriptor. 504 * 505 * <ul> 506 * <li>Reads a stored entry until the signature of a local file 507 * header, central directory header or data descriptor has been 508 * found.</li> 509 * <li>Stores all entry data in lastStoredEntry.</p> 510 * <li>Rewinds the stream to position at the data 511 * descriptor.</li> 512 * <li>reads the data descriptor</li> 513 * </ul> 514 * 515 * <p>After calling this method the entry should know its size, 516 * the entry's data is cached and the stream is positioned at the 517 * next local file or central directory header.</p> 518 */ 519 private void readStoredEntry() throws IOException { 520 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 521 byte[] LFH = ZipLong.LFH_SIG.getBytes(); 522 byte[] CFH = ZipLong.CFH_SIG.getBytes(); 523 byte[] DD = ZipLong.DD_SIG.getBytes(); 524 int off = 0; 525 boolean done = false; 526 527 while (!done) { 528 int r = in.read(buf, off, ZipArchiveOutputStream.BUFFER_SIZE - off); 529 if (r <= 0) { 530 // read the whole archive without ever finding a 531 // central directory 532 throw new IOException("Truncated ZIP file"); 533 } 534 if (r + off < 4) { 535 // buf is too small to check for a signature, loop 536 off += r; 537 continue; 538 } 539 540 int readTooMuch = 0; 541 for (int i = 0; !done && i < r - 4; i++) { 542 if (buf[i] == LFH[0] && buf[i + 1] == LFH[1]) { 543 if ((buf[i + 2] == LFH[2] && buf[i + 3] == LFH[3]) 544 || (buf[i] == CFH[2] && buf[i + 3] == CFH[3])) { 545 // found a LFH or CFH: 546 readTooMuch = off + r - i - 12 /* dd without signature */; 547 done = true; 548 } 549 else if (buf[i + 2] == DD[2] && buf[i + 3] == DD[3]) { 550 // found DD: 551 readTooMuch = off + r - i; 552 done = true; 553 } 554 if (done) { 555 // * push back bytes read in excess as well as the data 556 // descriptor 557 // * copy the remaining bytes to cache 558 // * read data descriptor 559 ((PushbackInputStream) in).unread(buf, off + r - readTooMuch, readTooMuch); 560 bos.write(buf, 0, i); 561 readDataDescriptor(); 562 } 563 } 564 } 565 if (!done) { 566 // worst case we've read a data descriptor without a 567 // signature (12 bytes) plus the first three bytes of 568 // a LFH or CFH signature 569 // save the last 15 bytes in the buffer, cache 570 // anything in front of that, read on 571 if (off + r > 15) { 572 bos.write(buf, 0, off + r - 15); 573 System.arraycopy(buf, off + r - 15, buf, 0, 15); 574 off = 15; 575 } else { 576 off += r; 577 } 578 } 579 } 580 581 byte[] b = bos.toByteArray(); 582 lastStoredEntry = new ByteArrayInputStream(b); 583 } 584 }