001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.Serializable;
020import java.nio.charset.StandardCharsets;
021import java.util.Locale;
022import java.util.Objects;
023
024/**
025 * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}.
026 * <h2>Deprecating Serialization</h2>
027 * <p>
028 * <em>Serialization is deprecated and will be removed in 3.0.</em>
029 * </p>
030 *
031 * @see org.apache.commons.io.input.BOMInputStream
032 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
033 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
034 *      (Non-Normative)</a>
035 * @since 2.0
036 */
037public class ByteOrderMark implements Serializable {
038
039    private static final long serialVersionUID = 1L;
040
041    /** UTF-8 BOM. */
042    public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8.name(), 0xEF, 0xBB, 0xBF);
043
044    /** UTF-16BE BOM (Big-Endian). */
045    public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE.name(), 0xFE, 0xFF);
046
047    /** UTF-16LE BOM (Little-Endian). */
048    public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE.name(), 0xFF, 0xFE);
049
050    /**
051     * UTF-32BE BOM (Big-Endian).
052     *
053     * @since 2.2
054     */
055    public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
056
057    /**
058     * UTF-32LE BOM (Little-Endian).
059     *
060     * @since 2.2
061     */
062    public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
063
064    /**
065     * Unicode BOM character; external form depends on the encoding.
066     *
067     * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
068     * @since 2.5
069     */
070    public static final char UTF_BOM = '\uFEFF';
071
072    private final String charsetName;
073    private final int[] bytes;
074
075    /**
076     * Constructs a new instance.
077     *
078     * @param charsetName The name of the charset the BOM represents
079     * @param bytes The BOM's bytes
080     * @throws IllegalArgumentException if the charsetName is zero length
081     * @throws IllegalArgumentException if the bytes are zero length
082     */
083    public ByteOrderMark(final String charsetName, final int... bytes) {
084        Objects.requireNonNull(charsetName, "charsetName");
085        Objects.requireNonNull(bytes, "bytes");
086        if (charsetName.isEmpty()) {
087            throw new IllegalArgumentException("No charsetName specified");
088        }
089        if (bytes.length == 0) {
090            throw new IllegalArgumentException("No bytes specified");
091        }
092        this.charsetName = charsetName;
093        this.bytes = bytes.clone();
094    }
095
096    /**
097     * Indicates if this instance's bytes equals another.
098     *
099     * @param obj The object to compare to
100     * @return true if the bom's bytes are equal, otherwise
101     * false
102     */
103    @Override
104    public boolean equals(final Object obj) {
105        if (!(obj instanceof ByteOrderMark)) {
106            return false;
107        }
108        final ByteOrderMark bom = (ByteOrderMark) obj;
109        if (bytes.length != bom.length()) {
110            return false;
111        }
112        for (int i = 0; i < bytes.length; i++) {
113            if (bytes[i] != bom.get(i)) {
114                return false;
115            }
116        }
117        return true;
118    }
119
120    /**
121     * Gets the byte at the specified position.
122     *
123     * @param pos The position
124     * @return The specified byte
125     */
126    public int get(final int pos) {
127        return bytes[pos];
128    }
129
130    /**
131     * Gets a copy of the BOM's bytes.
132     *
133     * @return a copy of the BOM's bytes
134     */
135    public byte[] getBytes() {
136        final byte[] copy = IOUtils.byteArray(bytes.length);
137        for (int i = 0; i < bytes.length; i++) {
138            copy[i] = (byte) bytes[i];
139        }
140        return copy;
141    }
142
143    /**
144     * Gets the name of the {@link java.nio.charset.Charset} the BOM represents.
145     *
146     * @return the character set name
147     */
148    public String getCharsetName() {
149        return charsetName;
150    }
151
152    /**
153     * Computes the hashcode for this BOM.
154     *
155     * @return the hashcode for this BOM.
156     * @see Object#hashCode()
157     */
158    @Override
159    public int hashCode() {
160        int hashCode = getClass().hashCode();
161        for (final int b : bytes) {
162            hashCode += b;
163        }
164        return hashCode;
165    }
166
167    /**
168     * Gets the length of the BOM's bytes.
169     *
170     * @return the length of the BOM's bytes
171     */
172    public int length() {
173        return bytes.length;
174    }
175
176    /**
177     * Converts this instance to a String representation of the BOM.
178     *
179     * @return the length of the BOM's bytes
180     */
181    @Override
182    public String toString() {
183        final StringBuilder builder = new StringBuilder();
184        builder.append(getClass().getSimpleName());
185        builder.append('[');
186        builder.append(charsetName);
187        builder.append(": ");
188        for (int i = 0; i < bytes.length; i++) {
189            if (i > 0) {
190                builder.append(",");
191            }
192            builder.append("0x");
193            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT));
194        }
195        builder.append(']');
196        return builder.toString();
197    }
198
199}