001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.imaging.formats.jpeg.iptc; 019 020import static org.apache.commons.imaging.common.BinaryFunctions.read2Bytes; 021import static org.apache.commons.imaging.common.BinaryFunctions.read4Bytes; 022import static org.apache.commons.imaging.common.BinaryFunctions.readByte; 023import static org.apache.commons.imaging.common.BinaryFunctions.readBytes; 024import static org.apache.commons.imaging.common.BinaryFunctions.slice; 025import static org.apache.commons.imaging.common.BinaryFunctions.startsWith; 026 027import java.io.ByteArrayInputStream; 028import java.io.ByteArrayOutputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.nio.ByteOrder; 032import java.nio.charset.StandardCharsets; 033import java.util.ArrayList; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.List; 037import java.util.Map; 038import java.util.logging.Level; 039import java.util.logging.Logger; 040 041import org.apache.commons.imaging.ImageReadException; 042import org.apache.commons.imaging.ImageWriteException; 043import org.apache.commons.imaging.ImagingConstants; 044import org.apache.commons.imaging.common.BinaryFileParser; 045import org.apache.commons.imaging.common.BinaryOutputStream; 046import org.apache.commons.imaging.common.ByteConversions; 047import org.apache.commons.imaging.formats.jpeg.JpegConstants; 048import org.apache.commons.imaging.internal.Debug; 049 050public class IptcParser extends BinaryFileParser { 051 052 private static final Logger LOGGER = Logger.getLogger(IptcParser.class.getName()); 053 054 private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN; 055 056 public IptcParser() { 057 setByteOrder(ByteOrder.BIG_ENDIAN); 058 } 059 060 public boolean isPhotoshopJpegSegment(final byte[] segmentData) { 061 if (!startsWith(segmentData, 062 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING)) { 063 return false; 064 } 065 066 final int index = JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(); 067 return (index + 4) <= segmentData.length 068 && ByteConversions.toInt(segmentData, index, APP13_BYTE_ORDER) == JpegConstants.CONST_8BIM; 069 } 070 071 /* 072 * In practice, App13 segments are only used for Photoshop/IPTC metadata. 073 * However, we should not treat App13 signatures without Photoshop's 074 * signature as Photoshop/IPTC segments. 075 * 076 * A Photoshop/IPTC App13 segment begins with the Photoshop Identification 077 * string. 078 * 079 * There follows 0-N blocks (Photoshop calls them "Image Resource Blocks"). 080 * 081 * Each block has the following structure: 082 * 083 * 1. 4-byte type. This is always "8BIM" for blocks in a Photoshop App13 084 * segment. 2. 2-byte id. IPTC data is stored in blocks with id 0x0404, aka. 085 * IPTC_NAA_RECORD_IMAGE_RESOURCE_ID 3. Block name as a Pascal String. This 086 * is padded to have an even length. 4. 4-byte size (in bytes). 5. Block 087 * data. This is also padded to have an even length. 088 * 089 * The block data consists of a 0-N records. A record has the following 090 * structure: 091 * 092 * 1. 2-byte prefix. The value is always 0x1C02 2. 1-byte record type. The 093 * record types are documented by the IPTC. See IptcConstants. 3. 2-byte 094 * record size (in bytes). 4. Record data, "record size" bytes long. 095 * 096 * Record data (unlike block data) is NOT padded to have an even length. 097 * 098 * Record data, for IPTC record, should always be ISO-8859-1. But according 099 * to SANSELAN-33, this isn't always the case. 100 * 101 * The exception is the first record in the block, which must always be a 102 * record version record, whose value is a two-byte number; the value is 103 * 0x02. 104 * 105 * Some IPTC blocks are missing this first "record version" record, so we 106 * don't require it. 107 */ 108 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final Map<String, Object> params) 109 throws ImageReadException, IOException { 110 final boolean strict = params != null && Boolean.TRUE.equals(params.get(ImagingConstants.PARAM_KEY_STRICT)); 111 112 return parsePhotoshopSegment(bytes, strict); 113 } 114 115 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final boolean strict) throws ImageReadException, 116 IOException { 117 final List<IptcRecord> records = new ArrayList<>(); 118 119 final List<IptcBlock> blocks = parseAllBlocks(bytes, strict); 120 121 for (final IptcBlock block : blocks) { 122 // Ignore everything but IPTC data. 123 if (!block.isIPTCBlock()) { 124 continue; 125 } 126 127 records.addAll(parseIPTCBlock(block.blockData)); 128 } 129 130 return new PhotoshopApp13Data(records, blocks); 131 } 132 133 protected List<IptcRecord> parseIPTCBlock(final byte[] bytes) 134 throws IOException { 135 final List<IptcRecord> elements = new ArrayList<>(); 136 137 int index = 0; 138 // Integer recordVersion = null; 139 while (index + 1 < bytes.length) { 140 final int tagMarker = 0xff & bytes[index++]; 141 Debug.debug("tagMarker: " + tagMarker + " (0x" + Integer.toHexString(tagMarker) + ")"); 142 143 if (tagMarker != IptcConstants.IPTC_RECORD_TAG_MARKER) { 144 if (LOGGER.isLoggable(Level.FINE)) { 145 LOGGER.fine("Unexpected record tag marker in IPTC data."); 146 } 147 return elements; 148 } 149 150 final int recordNumber = 0xff & bytes[index++]; 151 Debug.debug("recordNumber: " + recordNumber + " (0x" + Integer.toHexString(recordNumber) + ")"); 152 153 // int recordPrefix = convertByteArrayToShort("recordPrefix", index, 154 // bytes); 155 // if (verbose) 156 // Debug.debug("recordPrefix", recordPrefix + " (0x" 157 // + Integer.toHexString(recordPrefix) + ")"); 158 // index += 2; 159 // 160 // if (recordPrefix != IPTC_RECORD_PREFIX) 161 // { 162 // if (verbose) 163 // System.out 164 // .println("Unexpected record prefix in IPTC data!"); 165 // return elements; 166 // } 167 168 // throw new ImageReadException( 169 // "Unexpected record prefix in IPTC data."); 170 171 final int recordType = 0xff & bytes[index]; 172 Debug.debug("recordType: " + recordType + " (0x" + Integer.toHexString(recordType) + ")"); 173 index++; 174 175 final int recordSize = ByteConversions.toUInt16(bytes, index, getByteOrder()); 176 index += 2; 177 178 final boolean extendedDataset = recordSize > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE; 179 final int dataFieldCountLength = recordSize & 0x7fff; 180 if (extendedDataset) { 181 Debug.debug("extendedDataset. dataFieldCountLength: " + dataFieldCountLength); 182 } 183 if (extendedDataset) { 184 // ignore extended dataset and everything after. 185 return elements; 186 } 187 188 final byte[] recordData = slice(bytes, index, recordSize); 189 index += recordSize; 190 191 // Debug.debug("recordSize", recordSize + " (0x" 192 // + Integer.toHexString(recordSize) + ")"); 193 194 if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) { 195 continue; 196 } 197 198 if (recordType == 0) { 199 if (LOGGER.isLoggable(Level.FINE)) { 200 LOGGER.fine("ignore record version record! " + elements.size()); 201 } 202 // ignore "record version" record; 203 continue; 204 } 205 // if (recordVersion == null) 206 // { 207 // // The first record in a JPEG/Photoshop IPTC block must be 208 // // the record version. 209 // if (recordType != 0) 210 // throw new ImageReadException("Missing record version: " 211 // + recordType); 212 // recordVersion = new Integer(convertByteArrayToShort( 213 // "recordNumber", recordData)); 214 // 215 // if (recordSize != 2) 216 // throw new ImageReadException( 217 // "Invalid record version record size: " + recordSize); 218 // 219 // // JPEG/Photoshop IPTC metadata is always in Record version 220 // // 2 221 // if (recordVersion.intValue() != 2) 222 // throw new ImageReadException( 223 // "Invalid IPTC record version: " + recordVersion); 224 // 225 // // Debug.debug("recordVersion", recordVersion); 226 // continue; 227 // } 228 229 final String value = new String(recordData, StandardCharsets.ISO_8859_1); 230 231 final IptcType iptcType = IptcTypeLookup.getIptcType(recordType); 232 233 // Debug.debug("iptcType", iptcType); 234 // debugByteArray("iptcData", iptcData); 235 // Debug.debug(); 236 237 // if (recordType == IPTC_TYPE_CREDIT.type 238 // || recordType == IPTC_TYPE_OBJECT_NAME.type) 239 // { 240 // this.debugByteArray("recordData", recordData); 241 // Debug.debug("index", IPTC_TYPE_CREDIT.name); 242 // } 243 244 final IptcRecord element = new IptcRecord(iptcType, value); 245 elements.add(element); 246 } 247 248 return elements; 249 } 250 251 protected List<IptcBlock> parseAllBlocks(final byte[] bytes, 252 final boolean strict) throws ImageReadException, IOException { 253 final List<IptcBlock> blocks = new ArrayList<>(); 254 255 try (InputStream bis = new ByteArrayInputStream(bytes)) { 256 257 // Note that these are unsigned quantities. Name is always an even 258 // number of bytes (including the 1st byte, which is the size.) 259 260 final byte[] idString = readBytes("", bis, 261 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(), 262 "App13 Segment missing identification string"); 263 if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.equals(idString)) { 264 throw new ImageReadException("Not a Photoshop App13 Segment"); 265 } 266 267 // int index = PHOTOSHOP_IDENTIFICATION_STRING.length; 268 269 while (true) { 270 final int imageResourceBlockSignature; 271 try { 272 imageResourceBlockSignature = read4Bytes("", bis, 273 "Image Resource Block missing identification string", APP13_BYTE_ORDER); 274 } catch (final IOException ioEx) { 275 break; 276 } 277 if (imageResourceBlockSignature != JpegConstants.CONST_8BIM) { 278 throw new ImageReadException( 279 "Invalid Image Resource Block Signature"); 280 } 281 282 final int blockType = read2Bytes("", bis, "Image Resource Block missing type", APP13_BYTE_ORDER); 283 Debug.debug("blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")"); 284 285 final int blockNameLength = readByte("Name length", bis, "Image Resource Block missing name length"); 286 if (blockNameLength > 0) { 287 Debug.debug("blockNameLength: " + blockNameLength + " (0x" 288 + Integer.toHexString(blockNameLength) + ")"); 289 } 290 byte[] blockNameBytes; 291 if (blockNameLength == 0) { 292 readByte("Block name bytes", bis, "Image Resource Block has invalid name"); 293 blockNameBytes = new byte[0]; 294 } else { 295 try { 296 blockNameBytes = readBytes("", bis, blockNameLength, 297 "Invalid Image Resource Block name"); 298 } catch (final IOException ioEx) { 299 if (strict) { 300 throw ioEx; 301 } 302 break; 303 } 304 305 if (blockNameLength % 2 == 0) { 306 readByte("Padding byte", bis, "Image Resource Block missing padding byte"); 307 } 308 } 309 310 final int blockSize = read4Bytes("", bis, "Image Resource Block missing size", APP13_BYTE_ORDER); 311 Debug.debug("blockSize: " + blockSize + " (0x" + Integer.toHexString(blockSize) + ")"); 312 313 /* 314 * doesn't catch cases where blocksize is invalid but is still less 315 * than bytes.length but will at least prevent OutOfMemory errors 316 */ 317 if (blockSize > bytes.length) { 318 throw new ImageReadException("Invalid Block Size : " + blockSize + " > " + bytes.length); 319 } 320 321 final byte[] blockData; 322 try { 323 blockData = readBytes("", bis, blockSize, "Invalid Image Resource Block data"); 324 } catch (final IOException ioEx) { 325 if (strict) { 326 throw ioEx; 327 } 328 break; 329 } 330 331 blocks.add(new IptcBlock(blockType, blockNameBytes, blockData)); 332 333 if ((blockSize % 2) != 0) { 334 readByte("Padding byte", bis, "Image Resource Block missing padding byte"); 335 } 336 } 337 338 return blocks; 339 } 340 } 341 342 // private void writeIPTCRecord(BinaryOutputStream bos, ) 343 344 public byte[] writePhotoshopApp13Segment(final PhotoshopApp13Data data) 345 throws IOException, ImageWriteException { 346 final ByteArrayOutputStream os = new ByteArrayOutputStream(); 347 final BinaryOutputStream bos = new BinaryOutputStream(os); 348 349 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.writeTo(bos); 350 351 final List<IptcBlock> blocks = data.getRawBlocks(); 352 for (final IptcBlock block : blocks) { 353 bos.write4Bytes(JpegConstants.CONST_8BIM); 354 355 if (block.blockType < 0 || block.blockType > 0xffff) { 356 throw new ImageWriteException("Invalid IPTC block type."); 357 } 358 bos.write2Bytes(block.blockType); 359 360 if (block.blockNameBytes.length > 255) { 361 throw new ImageWriteException("IPTC block name is too long: " 362 + block.blockNameBytes.length); 363 } 364 bos.write(block.blockNameBytes.length); 365 bos.write(block.blockNameBytes); 366 if (block.blockNameBytes.length % 2 == 0) { 367 bos.write(0); // pad to even size, including length byte. 368 } 369 370 if (block.blockData.length > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE) { 371 throw new ImageWriteException("IPTC block data is too long: " 372 + block.blockData.length); 373 } 374 bos.write4Bytes(block.blockData.length); 375 bos.write(block.blockData); 376 if (block.blockData.length % 2 == 1) { 377 bos.write(0); // pad to even size 378 } 379 380 } 381 382 bos.flush(); 383 return os.toByteArray(); 384 } 385 386 public byte[] writeIPTCBlock(List<IptcRecord> elements) 387 throws ImageWriteException, IOException { 388 byte[] blockData; 389 final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 390 try (BinaryOutputStream bos = new BinaryOutputStream(baos, getByteOrder())) { 391 392 // first, right record version record 393 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER); 394 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER); 395 bos.write(IptcTypes.RECORD_VERSION.type); // record version record 396 // type. 397 bos.write2Bytes(2); // record version record size 398 bos.write2Bytes(2); // record version value 399 400 // make a copy of the list. 401 elements = new ArrayList<>(elements); 402 403 // sort the list. Records must be in numerical order. 404 final Comparator<IptcRecord> comparator = new Comparator<IptcRecord>() { 405 @Override 406 public int compare(final IptcRecord e1, final IptcRecord e2) { 407 return e2.iptcType.getType() - e1.iptcType.getType(); 408 } 409 }; 410 Collections.sort(elements, comparator); 411 // TODO: make sure order right 412 413 // write the list. 414 for (final IptcRecord element : elements) { 415 if (element.iptcType == IptcTypes.RECORD_VERSION) { 416 continue; // ignore 417 } 418 419 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER); 420 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER); 421 if (element.iptcType.getType() < 0 422 || element.iptcType.getType() > 0xff) { 423 throw new ImageWriteException("Invalid record type: " 424 + element.iptcType.getType()); 425 } 426 bos.write(element.iptcType.getType()); 427 428 final byte[] recordData = element.getValue().getBytes(StandardCharsets.ISO_8859_1); 429 if (!new String(recordData, StandardCharsets.ISO_8859_1).equals(element.getValue())) { 430 throw new ImageWriteException( 431 "Invalid record value, not ISO-8859-1"); 432 } 433 434 bos.write2Bytes(recordData.length); 435 bos.write(recordData); 436 } 437 } 438 439 blockData = baos.toByteArray(); 440 441 return blockData; 442 } 443 444}