001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.imaging.formats.jpeg.iptc;
019
020import static org.apache.commons.imaging.common.BinaryFunctions.read2Bytes;
021import static org.apache.commons.imaging.common.BinaryFunctions.read4Bytes;
022import static org.apache.commons.imaging.common.BinaryFunctions.readByte;
023import static org.apache.commons.imaging.common.BinaryFunctions.readBytes;
024import static org.apache.commons.imaging.common.BinaryFunctions.slice;
025import static org.apache.commons.imaging.common.BinaryFunctions.startsWith;
026
027import java.io.ByteArrayInputStream;
028import java.io.ByteArrayOutputStream;
029import java.io.IOException;
030import java.io.InputStream;
031import java.nio.ByteOrder;
032import java.nio.charset.StandardCharsets;
033import java.util.ArrayList;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.List;
037import java.util.Map;
038import java.util.logging.Level;
039import java.util.logging.Logger;
040
041import org.apache.commons.imaging.ImageReadException;
042import org.apache.commons.imaging.ImageWriteException;
043import org.apache.commons.imaging.ImagingConstants;
044import org.apache.commons.imaging.common.BinaryFileParser;
045import org.apache.commons.imaging.common.BinaryOutputStream;
046import org.apache.commons.imaging.common.ByteConversions;
047import org.apache.commons.imaging.formats.jpeg.JpegConstants;
048import org.apache.commons.imaging.internal.Debug;
049
050public class IptcParser extends BinaryFileParser {
051
052    private static final Logger LOGGER = Logger.getLogger(IptcParser.class.getName());
053
054    private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN;
055
056    public IptcParser() {
057        setByteOrder(ByteOrder.BIG_ENDIAN);
058    }
059
060    public boolean isPhotoshopJpegSegment(final byte[] segmentData) {
061        if (!startsWith(segmentData,
062                JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING)) {
063            return false;
064        }
065
066        final int index = JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size();
067        return (index + 4) <= segmentData.length
068                && ByteConversions.toInt(segmentData, index, APP13_BYTE_ORDER) == JpegConstants.CONST_8BIM;
069    }
070
071    /*
072     * In practice, App13 segments are only used for Photoshop/IPTC metadata.
073     * However, we should not treat App13 signatures without Photoshop's
074     * signature as Photoshop/IPTC segments.
075     *
076     * A Photoshop/IPTC App13 segment begins with the Photoshop Identification
077     * string.
078     *
079     * There follows 0-N blocks (Photoshop calls them "Image Resource Blocks").
080     *
081     * Each block has the following structure:
082     *
083     * 1. 4-byte type. This is always "8BIM" for blocks in a Photoshop App13
084     * segment. 2. 2-byte id. IPTC data is stored in blocks with id 0x0404, aka.
085     * IPTC_NAA_RECORD_IMAGE_RESOURCE_ID 3. Block name as a Pascal String. This
086     * is padded to have an even length. 4. 4-byte size (in bytes). 5. Block
087     * data. This is also padded to have an even length.
088     *
089     * The block data consists of a 0-N records. A record has the following
090     * structure:
091     *
092     * 1. 2-byte prefix. The value is always 0x1C02 2. 1-byte record type. The
093     * record types are documented by the IPTC. See IptcConstants. 3. 2-byte
094     * record size (in bytes). 4. Record data, "record size" bytes long.
095     *
096     * Record data (unlike block data) is NOT padded to have an even length.
097     *
098     * Record data, for IPTC record, should always be ISO-8859-1. But according
099     * to SANSELAN-33, this isn't always the case.
100     *
101     * The exception is the first record in the block, which must always be a
102     * record version record, whose value is a two-byte number; the value is
103     * 0x02.
104     *
105     * Some IPTC blocks are missing this first "record version" record, so we
106     * don't require it.
107     */
108    public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final Map<String, Object> params)
109            throws ImageReadException, IOException {
110        final boolean strict =  params != null && Boolean.TRUE.equals(params.get(ImagingConstants.PARAM_KEY_STRICT));
111
112        return parsePhotoshopSegment(bytes, strict);
113    }
114
115    public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final boolean strict) throws ImageReadException,
116            IOException {
117        final List<IptcRecord> records = new ArrayList<>();
118
119        final List<IptcBlock> blocks = parseAllBlocks(bytes, strict);
120
121        for (final IptcBlock block : blocks) {
122            // Ignore everything but IPTC data.
123            if (!block.isIPTCBlock()) {
124                continue;
125            }
126
127            records.addAll(parseIPTCBlock(block.blockData));
128        }
129
130        return new PhotoshopApp13Data(records, blocks);
131    }
132
133    protected List<IptcRecord> parseIPTCBlock(final byte[] bytes)
134            throws IOException {
135        final List<IptcRecord> elements = new ArrayList<>();
136
137        int index = 0;
138        // Integer recordVersion = null;
139        while (index + 1 < bytes.length) {
140            final int tagMarker = 0xff & bytes[index++];
141            Debug.debug("tagMarker: " + tagMarker + " (0x" + Integer.toHexString(tagMarker) + ")");
142
143            if (tagMarker != IptcConstants.IPTC_RECORD_TAG_MARKER) {
144                if (LOGGER.isLoggable(Level.FINE)) {
145                    LOGGER.fine("Unexpected record tag marker in IPTC data.");
146                }
147                return elements;
148            }
149
150            final int recordNumber = 0xff & bytes[index++];
151            Debug.debug("recordNumber: " + recordNumber + " (0x" + Integer.toHexString(recordNumber) + ")");
152
153            // int recordPrefix = convertByteArrayToShort("recordPrefix", index,
154            // bytes);
155            // if (verbose)
156            // Debug.debug("recordPrefix", recordPrefix + " (0x"
157            // + Integer.toHexString(recordPrefix) + ")");
158            // index += 2;
159            //
160            // if (recordPrefix != IPTC_RECORD_PREFIX)
161            // {
162            // if (verbose)
163            // System.out
164            // .println("Unexpected record prefix in IPTC data!");
165            // return elements;
166            // }
167
168            // throw new ImageReadException(
169            // "Unexpected record prefix in IPTC data.");
170
171            final int recordType = 0xff & bytes[index];
172            Debug.debug("recordType: " + recordType + " (0x" + Integer.toHexString(recordType) + ")");
173            index++;
174
175            final int recordSize = ByteConversions.toUInt16(bytes, index, getByteOrder());
176            index += 2;
177
178            final boolean extendedDataset = recordSize > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE;
179            final int dataFieldCountLength = recordSize & 0x7fff;
180            if (extendedDataset) {
181                Debug.debug("extendedDataset. dataFieldCountLength: " + dataFieldCountLength);
182            }
183            if (extendedDataset) {
184                // ignore extended dataset and everything after.
185                return elements;
186            }
187
188            final byte[] recordData = slice(bytes, index, recordSize);
189            index += recordSize;
190
191            // Debug.debug("recordSize", recordSize + " (0x"
192            // + Integer.toHexString(recordSize) + ")");
193
194            if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
195                continue;
196            }
197
198            if (recordType == 0) {
199                if (LOGGER.isLoggable(Level.FINE)) {
200                    LOGGER.fine("ignore record version record! " + elements.size());
201                }
202                // ignore "record version" record;
203                continue;
204            }
205            // if (recordVersion == null)
206            // {
207            // // The first record in a JPEG/Photoshop IPTC block must be
208            // // the record version.
209            // if (recordType != 0)
210            // throw new ImageReadException("Missing record version: "
211            // + recordType);
212            // recordVersion = new Integer(convertByteArrayToShort(
213            // "recordNumber", recordData));
214            //
215            // if (recordSize != 2)
216            // throw new ImageReadException(
217            // "Invalid record version record size: " + recordSize);
218            //
219            // // JPEG/Photoshop IPTC metadata is always in Record version
220            // // 2
221            // if (recordVersion.intValue() != 2)
222            // throw new ImageReadException(
223            // "Invalid IPTC record version: " + recordVersion);
224            //
225            // // Debug.debug("recordVersion", recordVersion);
226            // continue;
227            // }
228
229            final String value = new String(recordData, StandardCharsets.ISO_8859_1);
230
231            final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
232
233            // Debug.debug("iptcType", iptcType);
234            // debugByteArray("iptcData", iptcData);
235            // Debug.debug();
236
237            // if (recordType == IPTC_TYPE_CREDIT.type
238            // || recordType == IPTC_TYPE_OBJECT_NAME.type)
239            // {
240            // this.debugByteArray("recordData", recordData);
241            // Debug.debug("index", IPTC_TYPE_CREDIT.name);
242            // }
243
244            final IptcRecord element = new IptcRecord(iptcType, value);
245            elements.add(element);
246        }
247
248        return elements;
249    }
250
251    protected List<IptcBlock> parseAllBlocks(final byte[] bytes,
252            final boolean strict) throws ImageReadException, IOException {
253        final List<IptcBlock> blocks = new ArrayList<>();
254
255        try (InputStream bis = new ByteArrayInputStream(bytes)) {
256
257            // Note that these are unsigned quantities. Name is always an even
258            // number of bytes (including the 1st byte, which is the size.)
259
260            final byte[] idString = readBytes("", bis,
261                    JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(),
262                    "App13 Segment missing identification string");
263            if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.equals(idString)) {
264                throw new ImageReadException("Not a Photoshop App13 Segment");
265            }
266
267            // int index = PHOTOSHOP_IDENTIFICATION_STRING.length;
268
269            while (true) {
270                final int imageResourceBlockSignature;
271                try {
272                    imageResourceBlockSignature = read4Bytes("", bis,
273                            "Image Resource Block missing identification string", APP13_BYTE_ORDER);
274                } catch (final IOException ioEx) {
275                    break;
276                }
277                if (imageResourceBlockSignature != JpegConstants.CONST_8BIM) {
278                    throw new ImageReadException(
279                            "Invalid Image Resource Block Signature");
280                }
281
282                final int blockType = read2Bytes("", bis, "Image Resource Block missing type", APP13_BYTE_ORDER);
283                Debug.debug("blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
284
285                final int blockNameLength = readByte("Name length", bis, "Image Resource Block missing name length");
286                if (blockNameLength > 0) {
287                    Debug.debug("blockNameLength: " + blockNameLength + " (0x"
288                            + Integer.toHexString(blockNameLength) + ")");
289                }
290                byte[] blockNameBytes;
291                if (blockNameLength == 0) {
292                    readByte("Block name bytes", bis, "Image Resource Block has invalid name");
293                    blockNameBytes = new byte[0];
294                } else {
295                    try {
296                        blockNameBytes = readBytes("", bis, blockNameLength,
297                                "Invalid Image Resource Block name");
298                    } catch (final IOException ioEx) {
299                        if (strict) {
300                            throw ioEx;
301                        }
302                        break;
303                    }
304
305                    if (blockNameLength % 2 == 0) {
306                        readByte("Padding byte", bis, "Image Resource Block missing padding byte");
307                    }
308                }
309
310                final int blockSize = read4Bytes("", bis, "Image Resource Block missing size", APP13_BYTE_ORDER);
311                Debug.debug("blockSize: " + blockSize + " (0x" + Integer.toHexString(blockSize) + ")");
312
313                /*
314                 * doesn't catch cases where blocksize is invalid but is still less
315                 * than bytes.length but will at least prevent OutOfMemory errors
316                 */
317                if (blockSize > bytes.length) {
318                    throw new ImageReadException("Invalid Block Size : " + blockSize + " > " + bytes.length);
319                }
320
321                final byte[] blockData;
322                try {
323                    blockData = readBytes("", bis, blockSize, "Invalid Image Resource Block data");
324                } catch (final IOException ioEx) {
325                    if (strict) {
326                        throw ioEx;
327                    }
328                    break;
329                }
330
331                blocks.add(new IptcBlock(blockType, blockNameBytes, blockData));
332
333                if ((blockSize % 2) != 0) {
334                    readByte("Padding byte", bis, "Image Resource Block missing padding byte");
335                }
336            }
337
338            return blocks;
339        }
340    }
341
342    // private void writeIPTCRecord(BinaryOutputStream bos, )
343
344    public byte[] writePhotoshopApp13Segment(final PhotoshopApp13Data data)
345            throws IOException, ImageWriteException {
346        final ByteArrayOutputStream os = new ByteArrayOutputStream();
347        final BinaryOutputStream bos = new BinaryOutputStream(os);
348
349        JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.writeTo(bos);
350
351        final List<IptcBlock> blocks = data.getRawBlocks();
352        for (final IptcBlock block : blocks) {
353            bos.write4Bytes(JpegConstants.CONST_8BIM);
354
355            if (block.blockType < 0 || block.blockType > 0xffff) {
356                throw new ImageWriteException("Invalid IPTC block type.");
357            }
358            bos.write2Bytes(block.blockType);
359
360            if (block.blockNameBytes.length > 255) {
361                throw new ImageWriteException("IPTC block name is too long: "
362                        + block.blockNameBytes.length);
363            }
364            bos.write(block.blockNameBytes.length);
365            bos.write(block.blockNameBytes);
366            if (block.blockNameBytes.length % 2 == 0) {
367                bos.write(0); // pad to even size, including length byte.
368            }
369
370            if (block.blockData.length > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE) {
371                throw new ImageWriteException("IPTC block data is too long: "
372                        + block.blockData.length);
373            }
374            bos.write4Bytes(block.blockData.length);
375            bos.write(block.blockData);
376            if (block.blockData.length % 2 == 1) {
377                bos.write(0); // pad to even size
378            }
379
380        }
381
382        bos.flush();
383        return os.toByteArray();
384    }
385
386    public byte[] writeIPTCBlock(List<IptcRecord> elements)
387            throws ImageWriteException, IOException {
388        byte[] blockData;
389        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
390        try (BinaryOutputStream bos = new BinaryOutputStream(baos, getByteOrder())) {
391
392            // first, right record version record
393            bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
394            bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
395            bos.write(IptcTypes.RECORD_VERSION.type); // record version record
396                                                      // type.
397            bos.write2Bytes(2); // record version record size
398            bos.write2Bytes(2); // record version value
399
400            // make a copy of the list.
401            elements = new ArrayList<>(elements);
402
403            // sort the list. Records must be in numerical order.
404            final Comparator<IptcRecord> comparator = new Comparator<IptcRecord>() {
405                @Override
406                public int compare(final IptcRecord e1, final IptcRecord e2) {
407                    return e2.iptcType.getType() - e1.iptcType.getType();
408                }
409            };
410            Collections.sort(elements, comparator);
411            // TODO: make sure order right
412
413            // write the list.
414            for (final IptcRecord element : elements) {
415                if (element.iptcType == IptcTypes.RECORD_VERSION) {
416                    continue; // ignore
417                }
418
419                bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
420                bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
421                if (element.iptcType.getType() < 0
422                        || element.iptcType.getType() > 0xff) {
423                    throw new ImageWriteException("Invalid record type: "
424                            + element.iptcType.getType());
425                }
426                bos.write(element.iptcType.getType());
427
428                final byte[] recordData = element.getValue().getBytes(StandardCharsets.ISO_8859_1);
429                if (!new String(recordData, StandardCharsets.ISO_8859_1).equals(element.getValue())) {
430                    throw new ImageWriteException(
431                            "Invalid record value, not ISO-8859-1");
432                }
433
434                bos.write2Bytes(recordData.length);
435                bos.write(recordData);
436            }
437        }
438
439        blockData = baos.toByteArray();
440
441        return blockData;
442    }
443
444}