001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.Serializable;
020
021/**
022 * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}.
023 * 
024 * @see org.apache.commons.io.input.BOMInputStream
025 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
026 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
027 *      (Non-Normative)</a>
028 * @version $Id: ByteOrderMark.java 1347571 2012-06-07 11:13:53Z sebb $
029 * @since 2.0
030 */
031public class ByteOrderMark implements Serializable {
032
033    private static final long serialVersionUID = 1L;
034
035    /** UTF-8 BOM */
036    public static final ByteOrderMark UTF_8    = new ByteOrderMark("UTF-8",    0xEF, 0xBB, 0xBF);
037    
038    /** UTF-16BE BOM (Big-Endian) */
039    public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
040    
041    /** UTF-16LE BOM (Little-Endian) */
042    public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
043
044    /** 
045     * UTF-32BE BOM (Big-Endian)
046     * @since 2.2 
047     */
048    public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
049    
050    /** 
051     * UTF-32LE BOM (Little-Endian)
052     * @since 2.2 
053     */
054    public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
055    
056    private final String charsetName;
057    private final int[] bytes;
058
059    /**
060     * Construct a new BOM.
061     *
062     * @param charsetName The name of the charset the BOM represents
063     * @param bytes The BOM's bytes
064     * @throws IllegalArgumentException if the charsetName is null or
065     * zero length
066     * @throws IllegalArgumentException if the bytes are null or zero
067     * length
068     */
069    public ByteOrderMark(String charsetName, int... bytes) {
070        if (charsetName == null || charsetName.length() == 0) {
071            throw new IllegalArgumentException("No charsetName specified");
072        }
073        if (bytes == null || bytes.length == 0) {
074            throw new IllegalArgumentException("No bytes specified");
075        }
076        this.charsetName = charsetName;
077        this.bytes = new int[bytes.length];
078        System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
079    }
080
081    /**
082     * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
083     *
084     * @return the character set name
085     */
086    public String getCharsetName() {
087        return charsetName;
088    }
089
090    /**
091     * Return the length of the BOM's bytes.
092     *
093     * @return the length of the BOM's bytes
094     */
095    public int length() {
096        return bytes.length;
097    }
098
099    /**
100     * The byte at the specified position.
101     *
102     * @param pos The position
103     * @return The specified byte
104     */
105    public int get(int pos) {
106        return bytes[pos];
107    }
108
109    /**
110     * Return a copy of the BOM's bytes.
111     *
112     * @return a copy of the BOM's bytes
113     */
114    public byte[] getBytes() {
115        byte[] copy = new byte[bytes.length];
116        for (int i = 0; i < bytes.length; i++) {
117            copy[i] = (byte)bytes[i];
118        }
119        return copy;
120    }
121
122    /**
123     * Indicates if this BOM's bytes equals another.
124     *
125     * @param obj The object to compare to
126     * @return true if the bom's bytes are equal, otherwise
127     * false
128     */
129    @Override
130    public boolean equals(Object obj) {
131        if (!(obj instanceof ByteOrderMark)) {
132            return false;
133        }
134        ByteOrderMark bom = (ByteOrderMark)obj;
135        if (bytes.length != bom.length()) {
136            return false;
137        }
138        for (int i = 0; i < bytes.length; i++) {
139            if (bytes[i] != bom.get(i)) {
140                return false;
141            }
142        }
143        return true;
144    }
145
146    /**
147     * Return the hashcode for this BOM.
148     *
149     * @return the hashcode for this BOM.
150     * @see java.lang.Object#hashCode()
151     */
152    @Override
153    public int hashCode() {
154        int hashCode = getClass().hashCode();
155        for (int b : bytes) {
156            hashCode += b;
157        }
158        return hashCode;
159    }
160
161    /**
162     * Provide a String representation of the BOM.
163     *
164     * @return the length of the BOM's bytes
165     */
166    @Override
167    public String toString() {
168        StringBuilder builder = new StringBuilder();
169        builder.append(getClass().getSimpleName());
170        builder.append('[');
171        builder.append(charsetName);
172        builder.append(": ");
173        for (int i = 0; i < bytes.length; i++) {
174            if (i > 0) {
175                builder.append(",");
176            }
177            builder.append("0x");
178            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
179        }
180        builder.append(']');
181        return builder.toString();
182    }
183
184}