001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.ar; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.util.Arrays; 025 026import org.apache.commons.compress.archivers.ArchiveEntry; 027import org.apache.commons.compress.archivers.ArchiveInputStream; 028import org.apache.commons.compress.utils.ArchiveUtils; 029import org.apache.commons.compress.utils.IOUtils; 030 031/** 032 * Implements the "ar" archive format as an input stream. 033 * 034 * @NotThreadSafe 035 * 036 */ 037public class ArArchiveInputStream extends ArchiveInputStream { 038 039 private final InputStream input; 040 private long offset = 0; 041 private boolean closed; 042 043 /* 044 * If getNextEnxtry has been called, the entry metadata is stored in 045 * currentEntry. 046 */ 047 private ArArchiveEntry currentEntry = null; 048 049 // Storage area for extra long names (GNU ar) 050 private byte[] namebuffer = null; 051 052 /* 053 * The offset where the current entry started. -1 if no entry has been 054 * called 055 */ 056 private long entryOffset = -1; 057 058 // offsets and length of meta data parts 059 private static final int NAME_OFFSET = 0; 060 private static final int NAME_LEN = 16; 061 private static final int LAST_MODIFIED_OFFSET = NAME_LEN; 062 private static final int LAST_MODIFIED_LEN = 12; 063 private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN; 064 private static final int USER_ID_LEN = 6; 065 private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN; 066 private static final int GROUP_ID_LEN = 6; 067 private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN; 068 private static final int FILE_MODE_LEN = 8; 069 private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN; 070 private static final int LENGTH_LEN = 10; 071 072 // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 073 private final byte[] metaData = 074 new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN]; 075 076 /** 077 * Constructs an Ar input stream with the referenced stream 078 * 079 * @param pInput 080 * the ar input stream 081 */ 082 public ArArchiveInputStream(final InputStream pInput) { 083 input = pInput; 084 closed = false; 085 } 086 087 /** 088 * Returns the next AR entry in this stream. 089 * 090 * @return the next AR entry. 091 * @throws IOException 092 * if the entry could not be read 093 */ 094 public ArArchiveEntry getNextArEntry() throws IOException { 095 if (currentEntry != null) { 096 final long entryEnd = entryOffset + currentEntry.getLength(); 097 long skipped = IOUtils.skip(input, entryEnd - offset); 098 trackReadBytes(skipped); 099 currentEntry = null; 100 } 101 102 if (offset == 0) { 103 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 104 final byte[] realized = new byte[expected.length]; 105 final int read = IOUtils.readFully(input, realized); 106 trackReadBytes(read); 107 if (read != expected.length) { 108 throw new IOException("Failed to read header. Occured at byte: " + getBytesRead()); 109 } 110 if (!Arrays.equals(expected, realized)) { 111 throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized)); 112 } 113 } 114 115 if (offset % 2 != 0) { 116 if (input.read() < 0) { 117 // hit eof 118 return null; 119 } 120 trackReadBytes(1); 121 } 122 123 { 124 final int read = IOUtils.readFully(input, metaData); 125 trackReadBytes(read); 126 if (read == 0) { 127 return null; 128 } 129 if (read < metaData.length) { 130 throw new IOException("Truncated ar archive"); 131 } 132 } 133 134 { 135 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 136 final byte[] realized = new byte[expected.length]; 137 final int read = IOUtils.readFully(input, realized); 138 trackReadBytes(read); 139 if (read != expected.length) { 140 throw new IOException("Failed to read entry trailer. Occured at byte: " + getBytesRead()); 141 } 142 if (!Arrays.equals(expected, realized)) { 143 throw new IOException("Invalid entry trailer. not read the content? Occured at byte: " + getBytesRead()); 144 } 145 } 146 147 entryOffset = offset; 148 149// GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. 150 151 // entry name is stored as ASCII string 152 String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim(); 153 if (isGNUStringTable(temp)) { // GNU extended filenames entry 154 currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN); 155 return getNextArEntry(); 156 } 157 158 long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN); 159 if (temp.endsWith("/")) { // GNU terminator 160 temp = temp.substring(0, temp.length() - 1); 161 } else if (isGNULongName(temp)) { 162 final int off = Integer.parseInt(temp.substring(1));// get the offset 163 temp = getExtendedName(off); // convert to the long name 164 } else if (isBSDLongName(temp)) { 165 temp = getBSDLongName(temp); 166 // entry length contained the length of the file name in 167 // addition to the real length of the entry. 168 // assume file name was ASCII, there is no "standard" otherwise 169 final int nameLen = temp.length(); 170 len -= nameLen; 171 entryOffset += nameLen; 172 } 173 174 currentEntry = new ArArchiveEntry(temp, len, 175 asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true), 176 asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), 177 asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8), 178 asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN)); 179 return currentEntry; 180 } 181 182 /** 183 * Get an extended name from the GNU extended name buffer. 184 * 185 * @param offset pointer to entry within the buffer 186 * @return the extended file name; without trailing "/" if present. 187 * @throws IOException if name not found or buffer not set up 188 */ 189 private String getExtendedName(final int offset) throws IOException { 190 if (namebuffer == null) { 191 throw new IOException("Cannot process GNU long filename as no // record was found"); 192 } 193 for (int i = offset; i < namebuffer.length; i++) { 194 if (namebuffer[i] == '\012' || namebuffer[i] == 0) { 195 if (namebuffer[i - 1] == '/') { 196 i--; // drop trailing / 197 } 198 return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset); 199 } 200 } 201 throw new IOException("Failed to read entry: " + offset); 202 } 203 204 private long asLong(final byte[] byteArray, int offset, int len) { 205 return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim()); 206 } 207 208 private int asInt(final byte[] byteArray, int offset, int len) { 209 return asInt(byteArray, offset, len, 10, false); 210 } 211 212 private int asInt(final byte[] byteArray, int offset, int len, final boolean treatBlankAsZero) { 213 return asInt(byteArray, offset, len, 10, treatBlankAsZero); 214 } 215 216 private int asInt(final byte[] byteArray, int offset, int len, final int base) { 217 return asInt(byteArray, offset, len, base, false); 218 } 219 220 private int asInt(final byte[] byteArray, int offset, int len, final int base, final boolean treatBlankAsZero) { 221 final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim(); 222 if (string.length() == 0 && treatBlankAsZero) { 223 return 0; 224 } 225 return Integer.parseInt(string, base); 226 } 227 228 /* 229 * (non-Javadoc) 230 * 231 * @see 232 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 233 */ 234 @Override 235 public ArchiveEntry getNextEntry() throws IOException { 236 return getNextArEntry(); 237 } 238 239 /* 240 * (non-Javadoc) 241 * 242 * @see java.io.InputStream#close() 243 */ 244 @Override 245 public void close() throws IOException { 246 if (!closed) { 247 closed = true; 248 input.close(); 249 } 250 currentEntry = null; 251 } 252 253 /* 254 * (non-Javadoc) 255 * 256 * @see java.io.InputStream#read(byte[], int, int) 257 */ 258 @Override 259 public int read(final byte[] b, final int off, final int len) throws IOException { 260 if (len == 0) { 261 return 0; 262 } 263 if (currentEntry == null) { 264 throw new IllegalStateException("No current ar entry"); 265 } 266 final long entryEnd = entryOffset + currentEntry.getLength(); 267 if (len < 0 || offset >= entryEnd) { 268 return -1; 269 } 270 final int toRead = (int) Math.min(len, entryEnd - offset); 271 final int ret = this.input.read(b, off, toRead); 272 trackReadBytes(ret); 273 return ret; 274 } 275 276 /** 277 * Checks if the signature matches ASCII "!<arch>" followed by a single LF 278 * control character 279 * 280 * @param signature 281 * the bytes to check 282 * @param length 283 * the number of bytes to check 284 * @return true, if this stream is an Ar archive stream, false otherwise 285 */ 286 public static boolean matches(final byte[] signature, final int length) { 287 // 3c21 7261 6863 0a3e 288 289 return length >= 8 && signature[0] == 0x21 && 290 signature[1] == 0x3c && signature[2] == 0x61 && 291 signature[3] == 0x72 && signature[4] == 0x63 && 292 signature[5] == 0x68 && signature[6] == 0x3e && 293 signature[7] == 0x0a; 294 } 295 296 static final String BSD_LONGNAME_PREFIX = "#1/"; 297 private static final int BSD_LONGNAME_PREFIX_LEN = 298 BSD_LONGNAME_PREFIX.length(); 299 private static final String BSD_LONGNAME_PATTERN = 300 "^" + BSD_LONGNAME_PREFIX + "\\d+"; 301 302 /** 303 * Does the name look like it is a long name (or a name containing 304 * spaces) as encoded by BSD ar? 305 * 306 * <p>From the FreeBSD ar(5) man page:</p> 307 * <pre> 308 * BSD In the BSD variant, names that are shorter than 16 309 * characters and without embedded spaces are stored 310 * directly in this field. If a name has an embedded 311 * space, or if it is longer than 16 characters, then 312 * the string "#1/" followed by the decimal represen- 313 * tation of the length of the file name is placed in 314 * this field. The actual file name is stored immedi- 315 * ately after the archive header. The content of the 316 * archive member follows the file name. The ar_size 317 * field of the header (see below) will then hold the 318 * sum of the size of the file name and the size of 319 * the member. 320 * </pre> 321 * 322 * @since 1.3 323 */ 324 private static boolean isBSDLongName(final String name) { 325 return name != null && name.matches(BSD_LONGNAME_PATTERN); 326 } 327 328 /** 329 * Reads the real name from the current stream assuming the very 330 * first bytes to be read are the real file name. 331 * 332 * @see #isBSDLongName 333 * 334 * @since 1.3 335 */ 336 private String getBSDLongName(final String bsdLongName) throws IOException { 337 final int nameLen = 338 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 339 final byte[] name = new byte[nameLen]; 340 final int read = IOUtils.readFully(input, name); 341 trackReadBytes(read); 342 if (read != nameLen) { 343 throw new EOFException(); 344 } 345 return ArchiveUtils.toAsciiString(name); 346 } 347 348 private static final String GNU_STRING_TABLE_NAME = "//"; 349 350 /** 351 * Is this the name of the "Archive String Table" as used by 352 * SVR4/GNU to store long file names? 353 * 354 * <p>GNU ar stores multiple extended file names in the data section 355 * of a file with the name "//", this record is referred to by 356 * future headers.</p> 357 * 358 * <p>A header references an extended file name by storing a "/" 359 * followed by a decimal offset to the start of the file name in 360 * the extended file name data section.</p> 361 * 362 * <p>The format of the "//" file itself is simply a list of the 363 * long file names, each separated by one or more LF 364 * characters. Note that the decimal offsets are number of 365 * characters, not line or string number within the "//" file.</p> 366 */ 367 private static boolean isGNUStringTable(final String name) { 368 return GNU_STRING_TABLE_NAME.equals(name); 369 } 370 371 private void trackReadBytes(final long read) { 372 count(read); 373 if (read > 0) { 374 offset += read; 375 } 376 } 377 378 /** 379 * Reads the GNU archive String Table. 380 * 381 * @see #isGNUStringTable 382 */ 383 private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException { 384 final int bufflen = asInt(length, offset, len); // Assume length will fit in an int 385 namebuffer = new byte[bufflen]; 386 final int read = IOUtils.readFully(input, namebuffer, 0, bufflen); 387 trackReadBytes(read); 388 if (read != bufflen){ 389 throw new IOException("Failed to read complete // record: expected=" 390 + bufflen + " read=" + read); 391 } 392 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 393 } 394 395 private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; 396 397 /** 398 * Does the name look like it is a long name (or a name containing 399 * spaces) as encoded by SVR4/GNU ar? 400 * 401 * @see #isGNUStringTable 402 */ 403 private boolean isGNULongName(final String name) { 404 return name != null && name.matches(GNU_LONGNAME_PATTERN); 405 } 406}