001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.BinaryDecoder;
021import org.apache.commons.codec.BinaryEncoder;
022import org.apache.commons.codec.DecoderException;
023import org.apache.commons.codec.EncoderException;
024
025/**
026 * Translates between byte arrays and strings of "0"s and "1"s.
027 * 
028 * TODO: may want to add more bit vector functions like and/or/xor/nand 
029 * TODO: also might be good to generate boolean[] from byte[] et. cetera.
030 * 
031 * @author Apache Software Foundation
032 * @since 1.3
033 * @version $Id: BinaryCodec.java 798433 2009-07-28 07:53:10Z ggregory $
034 */
035public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
036    /*
037     * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
038     * it.
039     */
040    /** Empty char array. */
041    private static final char[] EMPTY_CHAR_ARRAY = new char[0];
042
043    /** Empty byte array. */
044    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
045
046    /** Mask for bit 0 of a byte. */
047    private static final int BIT_0 = 1;
048
049    /** Mask for bit 1 of a byte. */
050    private static final int BIT_1 = 0x02;
051
052    /** Mask for bit 2 of a byte. */
053    private static final int BIT_2 = 0x04;
054
055    /** Mask for bit 3 of a byte. */
056    private static final int BIT_3 = 0x08;
057
058    /** Mask for bit 4 of a byte. */
059    private static final int BIT_4 = 0x10;
060
061    /** Mask for bit 5 of a byte. */
062    private static final int BIT_5 = 0x20;
063
064    /** Mask for bit 6 of a byte. */
065    private static final int BIT_6 = 0x40;
066
067    /** Mask for bit 7 of a byte. */
068    private static final int BIT_7 = 0x80;
069
070    private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
071
072    /**
073     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
074     * 
075     * @param raw
076     *                  the raw binary data to convert
077     * @return 0 and 1 ASCII character bytes one for each bit of the argument
078     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
079     */
080    public byte[] encode(byte[] raw) {
081        return toAsciiBytes(raw);
082    }
083
084    /**
085     * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
086     * 
087     * @param raw
088     *                  the raw binary data to convert
089     * @return 0 and 1 ASCII character chars one for each bit of the argument
090     * @throws EncoderException
091     *                  if the argument is not a byte[]
092     * @see org.apache.commons.codec.Encoder#encode(Object)
093     */
094    public Object encode(Object raw) throws EncoderException {
095        if (!(raw instanceof byte[])) {
096            throw new EncoderException("argument not a byte array");
097        }
098        return toAsciiChars((byte[]) raw);
099    }
100
101    /**
102     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
103     * 
104     * @param ascii
105     *                  each byte represents an ASCII '0' or '1'
106     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
107     * @throws DecoderException
108     *                  if argument is not a byte[], char[] or String
109     * @see org.apache.commons.codec.Decoder#decode(Object)
110     */
111    public Object decode(Object ascii) throws DecoderException {
112        if (ascii == null) {
113            return EMPTY_BYTE_ARRAY;
114        }
115        if (ascii instanceof byte[]) {
116            return fromAscii((byte[]) ascii);
117        }
118        if (ascii instanceof char[]) {
119            return fromAscii((char[]) ascii);
120        }
121        if (ascii instanceof String) {
122            return fromAscii(((String) ascii).toCharArray());
123        }
124        throw new DecoderException("argument not a byte array");
125    }
126
127    /**
128     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
129     * 
130     * @param ascii
131     *                  each byte represents an ASCII '0' or '1'
132     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
133     * @see org.apache.commons.codec.Decoder#decode(Object)
134     */
135    public byte[] decode(byte[] ascii) {
136        return fromAscii(ascii);
137    }
138
139    /**
140     * Decodes a String where each char of the String represents an ASCII '0' or '1'.
141     * 
142     * @param ascii
143     *                  String of '0' and '1' characters
144     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
145     * @see org.apache.commons.codec.Decoder#decode(Object)
146     */
147    public byte[] toByteArray(String ascii) {
148        if (ascii == null) {
149            return EMPTY_BYTE_ARRAY;
150        }
151        return fromAscii(ascii.toCharArray());
152    }
153
154    // ------------------------------------------------------------------------
155    //
156    // static codec operations
157    //
158    // ------------------------------------------------------------------------
159    /**
160     * Decodes a char array where each char represents an ASCII '0' or '1'.
161     * 
162     * @param ascii
163     *                  each char represents an ASCII '0' or '1'
164     * @return the raw encoded binary where each bit corresponds to a char in the char array argument
165     */
166    public static byte[] fromAscii(char[] ascii) {
167        if (ascii == null || ascii.length == 0) {
168            return EMPTY_BYTE_ARRAY;
169        }
170        // get length/8 times bytes with 3 bit shifts to the right of the length
171        byte[] l_raw = new byte[ascii.length >> 3];
172        /*
173         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
174         * loop.
175         */
176        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
177            for (int bits = 0; bits < BITS.length; ++bits) {
178                if (ascii[jj - bits] == '1') {
179                    l_raw[ii] |= BITS[bits];
180                }
181            }
182        }
183        return l_raw;
184    }
185
186    /**
187     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
188     * 
189     * @param ascii
190     *                  each byte represents an ASCII '0' or '1'
191     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
192     */
193    public static byte[] fromAscii(byte[] ascii) {
194        if (isEmpty(ascii)) {
195            return EMPTY_BYTE_ARRAY;
196        }
197        // get length/8 times bytes with 3 bit shifts to the right of the length
198        byte[] l_raw = new byte[ascii.length >> 3];
199        /*
200         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
201         * loop.
202         */
203        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
204            for (int bits = 0; bits < BITS.length; ++bits) {
205                if (ascii[jj - bits] == '1') {
206                    l_raw[ii] |= BITS[bits];
207                }
208            }
209        }
210        return l_raw;
211    }
212
213    /**
214     * Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.)
215     * 
216     * @param array
217     *            the source array
218     * @return <code>true</code> if the given array is <code>null</code> or empty (size 0.)
219     */
220    private static boolean isEmpty(byte[] array) {
221        return array == null || array.length == 0;
222    }
223
224    /**
225     * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
226     * char.
227     * 
228     * @param raw
229     *                  the raw binary data to convert
230     * @return an array of 0 and 1 character bytes for each bit of the argument
231     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
232     */
233    public static byte[] toAsciiBytes(byte[] raw) {
234        if (isEmpty(raw)) {
235            return EMPTY_BYTE_ARRAY;
236        }
237        // get 8 times the bytes with 3 bit shifts to the left of the length
238        byte[] l_ascii = new byte[raw.length << 3];
239        /*
240         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
241         * loop.
242         */
243        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
244            for (int bits = 0; bits < BITS.length; ++bits) {
245                if ((raw[ii] & BITS[bits]) == 0) {
246                    l_ascii[jj - bits] = '0';
247                } else {
248                    l_ascii[jj - bits] = '1';
249                }
250            }
251        }
252        return l_ascii;
253    }
254
255    /**
256     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
257     * 
258     * @param raw
259     *                  the raw binary data to convert
260     * @return an array of 0 and 1 characters for each bit of the argument
261     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
262     */
263    public static char[] toAsciiChars(byte[] raw) {
264        if (isEmpty(raw)) {
265            return EMPTY_CHAR_ARRAY;
266        }
267        // get 8 times the bytes with 3 bit shifts to the left of the length
268        char[] l_ascii = new char[raw.length << 3];
269        /*
270         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
271         * loop.
272         */
273        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
274            for (int bits = 0; bits < BITS.length; ++bits) {
275                if ((raw[ii] & BITS[bits]) == 0) {
276                    l_ascii[jj - bits] = '0';
277                } else {
278                    l_ascii[jj - bits] = '1';
279                }
280            }
281        }
282        return l_ascii;
283    }
284
285    /**
286     * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
287     * 
288     * @param raw
289     *                  the raw binary data to convert
290     * @return a String of 0 and 1 characters representing the binary data
291     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
292     */
293    public static String toAsciiString(byte[] raw) {
294        return new String(toAsciiChars(raw));
295    }
296}