001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorInputStream; 026import org.apache.commons.compress.utils.BoundedInputStream; 027import org.apache.commons.compress.utils.ByteUtils; 028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream; 029import org.apache.commons.compress.utils.CountingInputStream; 030import org.apache.commons.compress.utils.IOUtils; 031import org.apache.commons.compress.utils.InputStreamStatistics; 032 033/** 034 * CompressorInputStream for the LZ4 frame format. 035 * 036 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 037 * 038 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 039 * @since 1.14 040 * @NotThreadSafe 041 */ 042public class FramedLZ4CompressorInputStream extends CompressorInputStream 043 implements InputStreamStatistics { 044 045 // used by FramedLZ4CompressorOutputStream as well 046 static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR 047 4, 0x22, 0x4d, 0x18 048 }; 049 private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] { 050 0x2a, 0x4d, 0x18 051 }; 052 private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50; 053 054 static final int VERSION_MASK = 0xC0; 055 static final int SUPPORTED_VERSION = 0x40; 056 static final int BLOCK_INDEPENDENCE_MASK = 0x20; 057 static final int BLOCK_CHECKSUM_MASK = 0x10; 058 static final int CONTENT_SIZE_MASK = 0x08; 059 static final int CONTENT_CHECKSUM_MASK = 0x04; 060 static final int BLOCK_MAX_SIZE_MASK = 0x70; 061 static final int UNCOMPRESSED_FLAG_MASK = 0x80000000; 062 063 // used in no-arg read method 064 private final byte[] oneByte = new byte[1]; 065 066 private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { 067 @Override 068 public int getAsByte() throws IOException { 069 return readOneByte(); 070 } 071 }; 072 073 private final CountingInputStream inputStream; 074 private final boolean decompressConcatenated; 075 076 private boolean expectBlockChecksum; 077 private boolean expectBlockDependency; 078 private boolean expectContentSize; 079 private boolean expectContentChecksum; 080 081 private InputStream currentBlock; 082 private boolean endReached, inUncompressed; 083 084 // used for frame header checksum and content checksum, if present 085 private final XXHash32 contentHash = new XXHash32(); 086 087 // used for block checksum, if present 088 private final XXHash32 blockHash = new XXHash32(); 089 090 // only created if the frame doesn't set the block independence flag 091 private byte[] blockDependencyBuffer; 092 093 /** 094 * Creates a new input stream that decompresses streams compressed 095 * using the LZ4 frame format and stops after decompressing the 096 * first frame. 097 * @param in the InputStream from which to read the compressed data 098 * @throws IOException if reading fails 099 */ 100 public FramedLZ4CompressorInputStream(InputStream in) throws IOException { 101 this(in, false); 102 } 103 104 /** 105 * Creates a new input stream that decompresses streams compressed 106 * using the LZ4 frame format. 107 * @param in the InputStream from which to read the compressed data 108 * @param decompressConcatenated if true, decompress until the end 109 * of the input; if false, stop after the first LZ4 frame 110 * and leave the input position to point to the next byte 111 * after the frame stream 112 * @throws IOException if reading fails 113 */ 114 public FramedLZ4CompressorInputStream(InputStream in, boolean decompressConcatenated) throws IOException { 115 this.inputStream = new CountingInputStream(in); 116 this.decompressConcatenated = decompressConcatenated; 117 init(true); 118 } 119 120 /** {@inheritDoc} */ 121 @Override 122 public int read() throws IOException { 123 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; 124 } 125 126 /** {@inheritDoc} */ 127 @Override 128 public void close() throws IOException { 129 try { 130 if (currentBlock != null) { 131 currentBlock.close(); 132 currentBlock = null; 133 } 134 } finally { 135 inputStream.close(); 136 } 137 } 138 139 /** {@inheritDoc} */ 140 @Override 141 public int read(final byte[] b, final int off, final int len) throws IOException { 142 if (len == 0) { 143 return 0; 144 } 145 if (endReached) { 146 return -1; 147 } 148 int r = readOnce(b, off, len); 149 if (r == -1) { 150 nextBlock(); 151 if (!endReached) { 152 r = readOnce(b, off, len); 153 } 154 } 155 if (r != -1) { 156 if (expectBlockDependency) { 157 appendToBlockDependencyBuffer(b, off, r); 158 } 159 if (expectContentChecksum) { 160 contentHash.update(b, off, r); 161 } 162 } 163 return r; 164 } 165 166 /** 167 * @since 1.17 168 */ 169 @Override 170 public long getCompressedCount() { 171 return inputStream.getBytesRead(); 172 } 173 174 private void init(boolean firstFrame) throws IOException { 175 if (readSignature(firstFrame)) { 176 readFrameDescriptor(); 177 nextBlock(); 178 } 179 } 180 181 private boolean readSignature(boolean firstFrame) throws IOException { 182 String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage"; 183 final byte[] b = new byte[4]; 184 int read = IOUtils.readFully(inputStream, b); 185 count(read); 186 if (0 == read && !firstFrame) { 187 // good LZ4 frame and nothing after it 188 endReached = true; 189 return false; 190 } 191 if (4 != read) { 192 throw new IOException(garbageMessage); 193 } 194 195 read = skipSkippableFrame(b); 196 if (0 == read && !firstFrame) { 197 // good LZ4 frame with only some skippable frames after it 198 endReached = true; 199 return false; 200 } 201 if (4 != read || !matches(b, 4)) { 202 throw new IOException(garbageMessage); 203 } 204 return true; 205 } 206 207 private void readFrameDescriptor() throws IOException { 208 int flags = readOneByte(); 209 if (flags == -1) { 210 throw new IOException("Premature end of stream while reading frame flags"); 211 } 212 contentHash.update(flags); 213 if ((flags & VERSION_MASK) != SUPPORTED_VERSION) { 214 throw new IOException("Unsupported version " + (flags >> 6)); 215 } 216 expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0; 217 if (expectBlockDependency) { 218 if (blockDependencyBuffer == null) { 219 blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]; 220 } 221 } else { 222 blockDependencyBuffer = null; 223 } 224 expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0; 225 expectContentSize = (flags & CONTENT_SIZE_MASK) != 0; 226 expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0; 227 int bdByte = readOneByte(); 228 if (bdByte == -1) { // max size is irrelevant for this implementation 229 throw new IOException("Premature end of stream while reading frame BD byte"); 230 } 231 contentHash.update(bdByte); 232 if (expectContentSize) { // for now we don't care, contains the uncompressed size 233 byte[] contentSize = new byte[8]; 234 int skipped = IOUtils.readFully(inputStream, contentSize); 235 count(skipped); 236 if (8 != skipped) { 237 throw new IOException("Premature end of stream while reading content size"); 238 } 239 contentHash.update(contentSize, 0, contentSize.length); 240 } 241 int headerHash = readOneByte(); 242 if (headerHash == -1) { // partial hash of header. 243 throw new IOException("Premature end of stream while reading frame header checksum"); 244 } 245 int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff); 246 contentHash.reset(); 247 if (headerHash != expectedHash) { 248 throw new IOException("Frame header checksum mismatch"); 249 } 250 } 251 252 private void nextBlock() throws IOException { 253 maybeFinishCurrentBlock(); 254 long len = ByteUtils.fromLittleEndian(supplier, 4); 255 boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0; 256 int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK)); 257 if (realLen < 0) { 258 throw new IOException("Found illegal block with negative size"); 259 } 260 if (realLen == 0) { 261 verifyContentChecksum(); 262 if (!decompressConcatenated) { 263 endReached = true; 264 } else { 265 init(false); 266 } 267 return; 268 } 269 InputStream capped = new BoundedInputStream(inputStream, realLen); 270 if (expectBlockChecksum) { 271 capped = new ChecksumCalculatingInputStream(blockHash, capped); 272 } 273 if (uncompressed) { 274 inUncompressed = true; 275 currentBlock = capped; 276 } else { 277 inUncompressed = false; 278 BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped); 279 if (expectBlockDependency) { 280 s.prefill(blockDependencyBuffer); 281 } 282 currentBlock = s; 283 } 284 } 285 286 private void maybeFinishCurrentBlock() throws IOException { 287 if (currentBlock != null) { 288 currentBlock.close(); 289 currentBlock = null; 290 if (expectBlockChecksum) { 291 verifyChecksum(blockHash, "block"); 292 blockHash.reset(); 293 } 294 } 295 } 296 297 private void verifyContentChecksum() throws IOException { 298 if (expectContentChecksum) { 299 verifyChecksum(contentHash, "content"); 300 } 301 contentHash.reset(); 302 } 303 304 private void verifyChecksum(XXHash32 hash, String kind) throws IOException { 305 byte[] checksum = new byte[4]; 306 int read = IOUtils.readFully(inputStream, checksum); 307 count(read); 308 if (4 != read) { 309 throw new IOException("Premature end of stream while reading " + kind + " checksum"); 310 } 311 long expectedHash = hash.getValue(); 312 if (expectedHash != ByteUtils.fromLittleEndian(checksum)) { 313 throw new IOException(kind + " checksum mismatch."); 314 } 315 } 316 317 private int readOneByte() throws IOException { 318 final int b = inputStream.read(); 319 if (b != -1) { 320 count(1); 321 return b & 0xFF; 322 } 323 return -1; 324 } 325 326 private int readOnce(byte[] b, int off, int len) throws IOException { 327 if (inUncompressed) { 328 int cnt = currentBlock.read(b, off, len); 329 count(cnt); 330 return cnt; 331 } 332 BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock; 333 long before = l.getBytesRead(); 334 int cnt = currentBlock.read(b, off, len); 335 count(l.getBytesRead() - before); 336 return cnt; 337 } 338 339 private static boolean isSkippableFrameSignature(byte[] b) { 340 if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) { 341 return false; 342 } 343 for (int i = 1; i < 4; i++) { 344 if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) { 345 return false; 346 } 347 } 348 return true; 349 } 350 351 /** 352 * Skips over the contents of a skippable frame as well as 353 * skippable frames following it. 354 * 355 * <p>It then tries to read four more bytes which are supposed to 356 * hold an LZ4 signature and returns the number of bytes read 357 * while storing the bytes in the given array.</p> 358 */ 359 private int skipSkippableFrame(byte[] b) throws IOException { 360 int read = 4; 361 while (read == 4 && isSkippableFrameSignature(b)) { 362 final long len = ByteUtils.fromLittleEndian(supplier, 4); 363 if (len < 0) { 364 throw new IOException("Found illegal skippable frame with negative size"); 365 } 366 long skipped = IOUtils.skip(inputStream, len); 367 count(skipped); 368 if (len != skipped) { 369 throw new IOException("Premature end of stream while skipping frame"); 370 } 371 read = IOUtils.readFully(inputStream, b); 372 count(read); 373 } 374 return read; 375 } 376 377 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 378 len = Math.min(len, blockDependencyBuffer.length); 379 if (len > 0) { 380 int keep = blockDependencyBuffer.length - len; 381 if (keep > 0) { 382 // move last keep bytes towards the start of the buffer 383 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 384 } 385 // append new data 386 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 387 } 388 } 389 390 /** 391 * Checks if the signature matches what is expected for a .lz4 file. 392 * 393 * <p>.lz4 files start with a four byte signature.</p> 394 * 395 * @param signature the bytes to check 396 * @param length the number of bytes to check 397 * @return true if this is a .sz stream, false otherwise 398 */ 399 public static boolean matches(final byte[] signature, final int length) { 400 401 if (length < LZ4_SIGNATURE.length) { 402 return false; 403 } 404 405 byte[] shortenedSig = signature; 406 if (signature.length > LZ4_SIGNATURE.length) { 407 shortenedSig = new byte[LZ4_SIGNATURE.length]; 408 System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length); 409 } 410 411 return Arrays.equals(shortenedSig, LZ4_SIGNATURE); 412 } 413}