001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.math.BigInteger;
021
022import org.apache.commons.codec.BinaryDecoder;
023import org.apache.commons.codec.BinaryEncoder;
024import org.apache.commons.codec.DecoderException;
025import org.apache.commons.codec.EncoderException;
026
027/**
028 * Provides Base64 encoding and decoding as defined by RFC 2045.
029 * 
030 * <p>
031 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
032 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
033 * </p>
034 * <p>
035 * The class can be parameterized in the following manner with various constructors:
036 * <ul>
037 * <li>URL-safe mode: Default off.</li>
038 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
039 * 4 in the encoded data.
040 * <li>Line separator: Default is CRLF ("\r\n")</li>
041 * </ul>
042 * </p>
043 * <p>
044 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
045 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
046 * </p>
047 * 
048 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
049 * @author Apache Software Foundation
050 * @since 1.0
051 * @version $Id: Base64.java 801706 2009-08-06 16:27:06Z niallp $
052 */
053public class Base64 implements BinaryEncoder, BinaryDecoder {
054    private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
055
056    private static final int DEFAULT_BUFFER_SIZE = 8192;
057
058    /**
059     * Chunk size per RFC 2045 section 6.8.
060     * 
061     * <p>
062     * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
063     * equal signs.
064     * </p>
065     * 
066     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
067     */
068    static final int CHUNK_SIZE = 76;
069
070    /**
071     * Chunk separator per RFC 2045 section 2.1.
072     *
073     * <p>
074     * N.B. The next major release may break compatibility and make this field private.
075     * </p>
076     * 
077     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
078     */
079    static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
080
081    /**
082     * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
083     * equivalents as specified in Table 1 of RFC 2045.
084     * 
085     * Thanks to "commons" project in ws.apache.org for this code.
086     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
087     */
088    private static final byte[] STANDARD_ENCODE_TABLE = {
089            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
090            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
091            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
092            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
093            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
094    };
095
096    /**
097     * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
098     * changed to - and _ to make the encoded Base64 results more URL-SAFE.
099     * This table is only used when the Base64's mode is set to URL-SAFE.
100     */    
101    private static final byte[] URL_SAFE_ENCODE_TABLE = {
102            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
103            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
104            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
105            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
106            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
107    };
108
109    /**
110     * Byte used to pad output.
111     */
112    private static final byte PAD = '=';
113
114    /**
115     * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in
116     * Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
117     * alphabet but fall within the bounds of the array are translated to -1.
118     * 
119     * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
120     * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
121     * 
122     * Thanks to "commons" project in ws.apache.org for this code.
123     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
124     */
125    private static final byte[] DECODE_TABLE = {
126            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
127            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
128            -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54,
129            55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4,
130            5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
131            24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34,
132            35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
133    };
134
135    /** Mask used to extract 6 bits, used when encoding */
136    private static final int MASK_6BITS = 0x3f;
137
138    /** Mask used to extract 8 bits, used in decoding base64 bytes */
139    private static final int MASK_8BITS = 0xff;
140
141    // The static final fields above are used for the original static byte[] methods on Base64.
142    // The private member fields below are used with the new streaming approach, which requires
143    // some state be preserved between calls of encode() and decode().
144
145    /**
146     * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
147     * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
148     * between the two modes.
149     */
150    private final byte[] encodeTable;
151
152    /**
153     * Line length for encoding. Not used when decoding. A value of zero or less implies no chunking of the base64
154     * encoded data.
155     */
156    private final int lineLength;
157
158    /**
159     * Line separator for encoding. Not used when decoding. Only used if lineLength > 0.
160     */
161    private final byte[] lineSeparator;
162
163    /**
164     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
165     * <code>decodeSize = 3 + lineSeparator.length;</code>
166     */
167    private final int decodeSize;
168
169    /**
170     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
171     * <code>encodeSize = 4 + lineSeparator.length;</code>
172     */
173    private final int encodeSize;
174
175    /**
176     * Buffer for streaming.
177     */
178    private byte[] buffer;
179
180    /**
181     * Position where next character should be written in the buffer.
182     */
183    private int pos;
184
185    /**
186     * Position where next character should be read from the buffer.
187     */
188    private int readPos;
189
190    /**
191     * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to
192     * make sure each encoded line never goes beyond lineLength (if lineLength > 0).
193     */
194    private int currentLinePos;
195
196    /**
197     * Writes to the buffer only occur after every 3 reads when encoding, an every 4 reads when decoding. This variable
198     * helps track that.
199     */
200    private int modulus;
201
202    /**
203     * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this Base64 object becomes useless,
204     * and must be thrown away.
205     */
206    private boolean eof;
207
208    /**
209     * Place holder for the 3 bytes we're dealing with for our base64 logic. Bitwise operations store and extract the
210     * base64 encoding or decoding from this variable.
211     */
212    private int x;
213
214    /**
215     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
216     * <p>
217     * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
218     * </p>
219     * 
220     * <p>
221     * When decoding all variants are supported.
222     * </p>
223     */
224    public Base64() {
225        this(false);
226    }
227
228    /**
229     * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
230     * <p>
231     * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
232     * </p>
233     * 
234     * <p>
235     * When decoding all variants are supported.
236     * </p>
237     * 
238     * @param urlSafe
239     *            if <code>true</code>, URL-safe encoding is used. In most cases this should be set to
240     *            <code>false</code>.
241     * @since 1.4
242     */
243    public Base64(boolean urlSafe) {
244        this(CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
245    }
246
247    /**
248     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
249     * <p>
250     * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
251     * STANDARD_ENCODE_TABLE.
252     * </p>
253     * <p>
254     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
255     * </p>
256     * <p>
257     * When decoding all variants are supported.
258     * </p>
259     * 
260     * @param lineLength
261     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4).
262     *            If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding.
263     * @since 1.4
264     */
265    public Base64(int lineLength) {
266        this(lineLength, CHUNK_SEPARATOR);
267    }
268
269    /**
270     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
271     * <p>
272     * When encoding the line length and line separator are given in the constructor, and the encoding table is
273     * STANDARD_ENCODE_TABLE.
274     * </p>
275     * <p>
276     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
277     * </p>
278     * <p>
279     * When decoding all variants are supported.
280     * </p>
281     * 
282     * @param lineLength
283     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4).
284     *            If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding.
285     * @param lineSeparator
286     *            Each line of encoded data will end with this sequence of bytes.
287     * @throws IllegalArgumentException
288     *             Thrown when the provided lineSeparator included some base64 characters.
289     * @since 1.4
290     */
291    public Base64(int lineLength, byte[] lineSeparator) {
292        this(lineLength, lineSeparator, false);
293    }
294
295    /**
296     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
297     * <p>
298     * When encoding the line length and line separator are given in the constructor, and the encoding table is
299     * STANDARD_ENCODE_TABLE.
300     * </p>
301     * <p>
302     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
303     * </p>
304     * <p>
305     * When decoding all variants are supported.
306     * </p>
307     * 
308     * @param lineLength
309     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4).
310     *            If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding.
311     * @param lineSeparator
312     *            Each line of encoded data will end with this sequence of bytes.
313     * @param urlSafe
314     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
315     *            operations. Decoding seamlessly handles both modes.
316     * @throws IllegalArgumentException
317     *             The provided lineSeparator included some base64 characters. That's not going to work!
318     * @since 1.4
319     */
320    public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) {
321        if (lineSeparator == null) {
322            lineLength = 0;  // disable chunk-separating
323            lineSeparator = CHUNK_SEPARATOR;  // this just gets ignored
324        }
325        this.lineLength = lineLength > 0 ? (lineLength / 4) * 4 : 0;
326        this.lineSeparator = new byte[lineSeparator.length];
327        System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
328        if (lineLength > 0) {
329            this.encodeSize = 4 + lineSeparator.length;
330        } else {
331            this.encodeSize = 4;
332        }
333        this.decodeSize = this.encodeSize - 1;
334        if (containsBase64Byte(lineSeparator)) {
335            String sep = StringUtils.newStringUtf8(lineSeparator);
336            throw new IllegalArgumentException("lineSeperator must not contain base64 characters: [" + sep + "]");
337        }
338        this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
339    }
340
341    /**
342     * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
343     * 
344     * @return true if we're in URL-SAFE mode, false otherwise.
345     * @since 1.4
346     */
347    public boolean isUrlSafe() {
348        return this.encodeTable == URL_SAFE_ENCODE_TABLE;
349    }
350
351    /**
352     * Returns true if this Base64 object has buffered data for reading.
353     * 
354     * @return true if there is Base64 object still available for reading.
355     */
356    boolean hasData() {
357        return this.buffer != null;
358    }
359
360    /**
361     * Returns the amount of buffered data available for reading.
362     * 
363     * @return The amount of buffered data available for reading.
364     */
365    int avail() {
366        return buffer != null ? pos - readPos : 0;
367    }
368
369    /** Doubles our buffer. */
370    private void resizeBuffer() {
371        if (buffer == null) {
372            buffer = new byte[DEFAULT_BUFFER_SIZE];
373            pos = 0;
374            readPos = 0;
375        } else {
376            byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
377            System.arraycopy(buffer, 0, b, 0, buffer.length);
378            buffer = b;
379        }
380    }
381
382    /**
383     * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
384     * bytes. Returns how many bytes were actually extracted.
385     * 
386     * @param b
387     *            byte[] array to extract the buffered data into.
388     * @param bPos
389     *            position in byte[] array to start extraction at.
390     * @param bAvail
391     *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
392     * @return The number of bytes successfully extracted into the provided byte[] array.
393     */
394    int readResults(byte[] b, int bPos, int bAvail) {
395        if (buffer != null) {
396            int len = Math.min(avail(), bAvail);
397            if (buffer != b) {
398                System.arraycopy(buffer, readPos, b, bPos, len);
399                readPos += len;
400                if (readPos >= pos) {
401                    buffer = null;
402                }
403            } else {
404                // Re-using the original consumer's output array is only
405                // allowed for one round.
406                buffer = null;
407            }
408            return len;
409        }
410        return eof ? -1 : 0;
411    }
412
413    /**
414     * Sets the streaming buffer. This is a small optimization where we try to buffer directly to the consumer's output
415     * array for one round (if the consumer calls this method first) instead of starting our own buffer.
416     * 
417     * @param out
418     *            byte[] array to buffer directly to.
419     * @param outPos
420     *            Position to start buffering into.
421     * @param outAvail
422     *            Amount of bytes available for direct buffering.
423     */
424    void setInitialBuffer(byte[] out, int outPos, int outAvail) {
425        // We can re-use consumer's original output array under
426        // special circumstances, saving on some System.arraycopy().
427        if (out != null && out.length == outAvail) {
428            buffer = out;
429            pos = outPos;
430            readPos = outPos;
431        }
432    }
433
434    /**
435     * <p>
436     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
437     * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last
438     * remaining bytes (if not multiple of 3).
439     * </p>
440     * <p>
441     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
442     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
443     * </p>
444     * 
445     * @param in
446     *            byte[] array of binary data to base64 encode.
447     * @param inPos
448     *            Position to start reading data from.
449     * @param inAvail
450     *            Amount of bytes available from input for encoding.
451     */
452    void encode(byte[] in, int inPos, int inAvail) {
453        if (eof) {
454            return;
455        }
456        // inAvail < 0 is how we're informed of EOF in the underlying data we're
457        // encoding.
458        if (inAvail < 0) {
459            eof = true;
460            if (buffer == null || buffer.length - pos < encodeSize) {
461                resizeBuffer();
462            }
463            switch (modulus) {
464                case 1 :
465                    buffer[pos++] = encodeTable[(x >> 2) & MASK_6BITS];
466                    buffer[pos++] = encodeTable[(x << 4) & MASK_6BITS];
467                    // URL-SAFE skips the padding to further reduce size.
468                    if (encodeTable == STANDARD_ENCODE_TABLE) {
469                        buffer[pos++] = PAD;
470                        buffer[pos++] = PAD;
471                    }
472                    break;
473
474                case 2 :
475                    buffer[pos++] = encodeTable[(x >> 10) & MASK_6BITS];
476                    buffer[pos++] = encodeTable[(x >> 4) & MASK_6BITS];
477                    buffer[pos++] = encodeTable[(x << 2) & MASK_6BITS];
478                    // URL-SAFE skips the padding to further reduce size.
479                    if (encodeTable == STANDARD_ENCODE_TABLE) {
480                        buffer[pos++] = PAD;
481                    }
482                    break;
483            }
484            if (lineLength > 0 && pos > 0) {
485                System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length);
486                pos += lineSeparator.length;
487            }
488        } else {
489            for (int i = 0; i < inAvail; i++) {
490                if (buffer == null || buffer.length - pos < encodeSize) {
491                    resizeBuffer();
492                }
493                modulus = (++modulus) % 3;
494                int b = in[inPos++];
495                if (b < 0) {
496                    b += 256;
497                }
498                x = (x << 8) + b;
499                if (0 == modulus) {
500                    buffer[pos++] = encodeTable[(x >> 18) & MASK_6BITS];
501                    buffer[pos++] = encodeTable[(x >> 12) & MASK_6BITS];
502                    buffer[pos++] = encodeTable[(x >> 6) & MASK_6BITS];
503                    buffer[pos++] = encodeTable[x & MASK_6BITS];
504                    currentLinePos += 4;
505                    if (lineLength > 0 && lineLength <= currentLinePos) {
506                        System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length);
507                        pos += lineSeparator.length;
508                        currentLinePos = 0;
509                    }
510                }
511            }
512        }
513    }
514
515    /**
516     * <p>
517     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
518     * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
519     * call is not necessary when decoding, but it doesn't hurt, either.
520     * </p>
521     * <p>
522     * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
523     * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
524     * garbage-out philosophy: it will not check the provided data for validity.
525     * </p>
526     * <p>
527     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
528     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
529     * </p>
530     * 
531     * @param in
532     *            byte[] array of ascii data to base64 decode.
533     * @param inPos
534     *            Position to start reading data from.
535     * @param inAvail
536     *            Amount of bytes available from input for encoding.
537     */
538    void decode(byte[] in, int inPos, int inAvail) {
539        if (eof) {
540            return;
541        }
542        if (inAvail < 0) {
543            eof = true;
544        }
545        for (int i = 0; i < inAvail; i++) {
546            if (buffer == null || buffer.length - pos < decodeSize) {
547                resizeBuffer();
548            }
549            byte b = in[inPos++];
550            if (b == PAD) {
551                // We're done.
552                eof = true;
553                break;
554            } else {
555                if (b >= 0 && b < DECODE_TABLE.length) {
556                    int result = DECODE_TABLE[b];
557                    if (result >= 0) {
558                        modulus = (++modulus) % 4;
559                        x = (x << 6) + result;
560                        if (modulus == 0) {
561                            buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS);
562                            buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS);
563                            buffer[pos++] = (byte) (x & MASK_8BITS);
564                        }
565                    }
566                }
567            }
568        }
569
570        // Two forms of EOF as far as base64 decoder is concerned: actual
571        // EOF (-1) and first time '=' character is encountered in stream.
572        // This approach makes the '=' padding characters completely optional.
573        if (eof && modulus != 0) {
574            x = x << 6;
575            switch (modulus) {
576                case 2 :
577                    x = x << 6;
578                    buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS);
579                    break;
580                case 3 :
581                    buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS);
582                    buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS);
583                    break;
584            }
585        }
586    }
587
588    /**
589     * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
590     * 
591     * @param octet
592     *            The value to test
593     * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
594     * @since 1.4
595     */
596    public static boolean isBase64(byte octet) {
597        return octet == PAD || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
598    }
599
600    /**
601     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
602     * method treats whitespace as valid.
603     * 
604     * @param arrayOctet
605     *            byte array to test
606     * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
607     *         false, otherwise
608     */
609    public static boolean isArrayByteBase64(byte[] arrayOctet) {
610        for (int i = 0; i < arrayOctet.length; i++) {
611            if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
612                return false;
613            }
614        }
615        return true;
616    }
617
618    /**
619     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
620     * 
621     * @param arrayOctet
622     *            byte array to test
623     * @return <code>true</code> if any byte is a valid character in the Base64 alphabet; false herwise
624     */
625    private static boolean containsBase64Byte(byte[] arrayOctet) {
626        for (int i = 0; i < arrayOctet.length; i++) {
627            if (isBase64(arrayOctet[i])) {
628                return true;
629            }
630        }
631        return false;
632    }
633
634    /**
635     * Encodes binary data using the base64 algorithm but does not chunk the output.
636     * 
637     * @param binaryData
638     *            binary data to encode
639     * @return byte[] containing Base64 characters in their UTF-8 representation.
640     */
641    public static byte[] encodeBase64(byte[] binaryData) {
642        return encodeBase64(binaryData, false);
643    }
644
645    /**
646     * Encodes binary data using the base64 algorithm into 76 character blocks separated by CRLF.
647     *
648     * @param binaryData
649     *            binary data to encode
650     * @return String containing Base64 characters.
651     * @since 1.4
652     */    
653    public static String encodeBase64String(byte[] binaryData) {
654        return StringUtils.newStringUtf8(encodeBase64(binaryData, true));
655    }
656    
657    /**
658     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
659     * url-safe variation emits - and _ instead of + and / characters.
660     * 
661     * @param binaryData
662     *            binary data to encode
663     * @return byte[] containing Base64 characters in their UTF-8 representation.
664     * @since 1.4
665     */
666    public static byte[] encodeBase64URLSafe(byte[] binaryData) {
667        return encodeBase64(binaryData, false, true);
668    }
669
670    /**
671     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
672     * url-safe variation emits - and _ instead of + and / characters.
673     *
674     * @param binaryData
675     *            binary data to encode
676     * @return String containing Base64 characters
677     * @since 1.4
678     */    
679    public static String encodeBase64URLSafeString(byte[] binaryData) {
680        return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
681    }    
682
683    /**
684     * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
685     * 
686     * @param binaryData
687     *            binary data to encode
688     * @return Base64 characters chunked in 76 character blocks
689     */
690    public static byte[] encodeBase64Chunked(byte[] binaryData) {
691        return encodeBase64(binaryData, true);
692    }
693
694    /**
695     * Decodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
696     * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
697     * 
698     * @param pObject
699     *            Object to decode
700     * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String supplied.
701     * @throws DecoderException
702     *             if the parameter supplied is not of type byte[]
703     */
704    public Object decode(Object pObject) throws DecoderException {        
705        if (pObject instanceof byte[]) {
706            return decode((byte[]) pObject);
707        } else if (pObject instanceof String) {
708            return decode((String) pObject);
709        } else {
710            throw new DecoderException("Parameter supplied to Base64 decode is not a byte[] or a String");
711        }
712    }
713
714    /**
715     * Decodes a String containing containing characters in the Base64 alphabet.
716     *
717     * @param pArray
718     *            A String containing Base64 character data
719     * @return a byte array containing binary data
720     * @since 1.4
721     */
722    public byte[] decode(String pArray) {
723        return decode(StringUtils.getBytesUtf8(pArray));
724    }
725
726    /**
727     * Decodes a byte[] containing containing characters in the Base64 alphabet.
728     * 
729     * @param pArray
730     *            A byte array containing Base64 character data
731     * @return a byte array containing binary data
732     */
733    public byte[] decode(byte[] pArray) {
734        reset();
735        if (pArray == null || pArray.length == 0) {
736            return pArray;
737        }
738        long len = (pArray.length * 3) / 4;
739        byte[] buf = new byte[(int) len];
740        setInitialBuffer(buf, 0, buf.length);
741        decode(pArray, 0, pArray.length);
742        decode(pArray, 0, -1); // Notify decoder of EOF.
743
744        // Would be nice to just return buf (like we sometimes do in the encode
745        // logic), but we have no idea what the line-length was (could even be
746        // variable).  So we cannot determine ahead of time exactly how big an
747        // array is necessary.  Hence the need to construct a 2nd byte array to
748        // hold the final result:
749
750        byte[] result = new byte[pos];
751        readResults(result, 0, result.length);
752        return result;
753    }
754
755    /**
756     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
757     * 
758     * @param binaryData
759     *            Array containing binary data to encode.
760     * @param isChunked
761     *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
762     * @return Base64-encoded data.
763     * @throws IllegalArgumentException
764     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
765     */
766    public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
767        return encodeBase64(binaryData, isChunked, false);
768    }
769
770    /**
771     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
772     * 
773     * @param binaryData
774     *            Array containing binary data to encode.
775     * @param isChunked
776     *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
777     * @param urlSafe
778     *            if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
779     * @return Base64-encoded data.
780     * @throws IllegalArgumentException
781     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
782     * @since 1.4
783     */
784    public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) {
785        return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
786    }
787
788    /**
789     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
790     * 
791     * @param binaryData
792     *            Array containing binary data to encode.
793     * @param isChunked
794     *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
795     * @param urlSafe
796     *            if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
797     * @param maxResultSize
798     *            The maximum result size to accept.
799     * @return Base64-encoded data.
800     * @throws IllegalArgumentException
801     *             Thrown when the input array needs an output array bigger than maxResultSize
802     * @since 1.4
803     */
804    public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) {
805        if (binaryData == null || binaryData.length == 0) {
806            return binaryData;
807        }
808
809        long len = getEncodeLength(binaryData, CHUNK_SIZE, CHUNK_SEPARATOR);
810        if (len > maxResultSize) {
811            throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
812                len +
813                ") than the specified maxium size of " +
814                maxResultSize);
815        }
816                
817        Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
818        return b64.encode(binaryData);
819    }
820
821    /**
822     * Decodes a Base64 String into octets
823     *
824     * @param base64String
825     *            String containing Base64 data
826     * @return Array containing decoded data.
827     * @since 1.4
828     */
829    public static byte[] decodeBase64(String base64String) {
830        return new Base64().decode(base64String);
831    }
832
833    /**
834     * Decodes Base64 data into octets
835     * 
836     * @param base64Data
837     *            Byte array containing Base64 data
838     * @return Array containing decoded data.
839     */
840    public static byte[] decodeBase64(byte[] base64Data) {
841        return new Base64().decode(base64Data);
842    }
843
844    /**
845     * Discards any whitespace from a base-64 encoded block.
846     * 
847     * @param data
848     *            The base-64 encoded data to discard the whitespace from.
849     * @return The data, less whitespace (see RFC 2045).
850     * @deprecated This method is no longer needed
851     */
852    static byte[] discardWhitespace(byte[] data) {
853        byte groomedData[] = new byte[data.length];
854        int bytesCopied = 0;
855        for (int i = 0; i < data.length; i++) {
856            switch (data[i]) {
857                case ' ' :
858                case '\n' :
859                case '\r' :
860                case '\t' :
861                    break;
862                default :
863                    groomedData[bytesCopied++] = data[i];
864            }
865        }
866        byte packedData[] = new byte[bytesCopied];
867        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
868        return packedData;
869    }
870
871    /**
872     * Checks if a byte value is whitespace or not.
873     * 
874     * @param byteToCheck
875     *            the byte to check
876     * @return true if byte is whitespace, false otherwise
877     */
878    private static boolean isWhiteSpace(byte byteToCheck) {
879        switch (byteToCheck) {
880            case ' ' :
881            case '\n' :
882            case '\r' :
883            case '\t' :
884                return true;
885            default :
886                return false;
887        }
888    }
889
890    // Implementation of the Encoder Interface
891
892    /**
893     * Encodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
894     * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
895     * 
896     * @param pObject
897     *            Object to encode
898     * @return An object (of type byte[]) containing the base64 encoded data which corresponds to the byte[] supplied.
899     * @throws EncoderException
900     *             if the parameter supplied is not of type byte[]
901     */
902    public Object encode(Object pObject) throws EncoderException {
903        if (!(pObject instanceof byte[])) {
904            throw new EncoderException("Parameter supplied to Base64 encode is not a byte[]");
905        }
906        return encode((byte[]) pObject);
907    }
908
909    /**
910     * Encodes a byte[] containing binary data, into a String containing characters in the Base64 alphabet.
911     *
912     * @param pArray
913     *            a byte array containing binary data
914     * @return A String containing only Base64 character data
915     * @since 1.4
916     */    
917    public String encodeToString(byte[] pArray) {
918        return StringUtils.newStringUtf8(encode(pArray));
919    }
920
921    /**
922     * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
923     * 
924     * @param pArray
925     *            a byte array containing binary data
926     * @return A byte array containing only Base64 character data
927     */
928    public byte[] encode(byte[] pArray) {
929        reset();        
930        if (pArray == null || pArray.length == 0) {
931            return pArray;
932        }
933        long len = getEncodeLength(pArray, lineLength, lineSeparator);
934        byte[] buf = new byte[(int) len];
935        setInitialBuffer(buf, 0, buf.length);
936        encode(pArray, 0, pArray.length);
937        encode(pArray, 0, -1); // Notify encoder of EOF.
938        // Encoder might have resized, even though it was unnecessary.
939        if (buffer != buf) {
940            readResults(buf, 0, buf.length);
941        }
942        // In URL-SAFE mode we skip the padding characters, so sometimes our
943        // final length is a bit smaller.
944        if (isUrlSafe() && pos < buf.length) {
945            byte[] smallerBuf = new byte[pos];
946            System.arraycopy(buf, 0, smallerBuf, 0, pos);
947            buf = smallerBuf;
948        }
949        return buf;        
950    }
951
952    /**
953     * Pre-calculates the amount of space needed to base64-encode the supplied array.
954     *
955     * @param pArray byte[] array which will later be encoded
956     * @param chunkSize line-length of the output (<= 0 means no chunking) between each
957     *        chunkSeparator (e.g. CRLF).
958     * @param chunkSeparator the sequence of bytes used to separate chunks of output (e.g. CRLF).
959     *
960     * @return amount of space needed to encoded the supplied array.  Returns
961     *         a long since a max-len array will require Integer.MAX_VALUE + 33%.
962     */
963    private static long getEncodeLength(byte[] pArray, int chunkSize, byte[] chunkSeparator) {
964        // base64 always encodes to multiples of 4.
965        chunkSize = (chunkSize / 4) * 4;
966
967        long len = (pArray.length * 4) / 3;
968        long mod = len % 4;
969        if (mod != 0) {
970            len += 4 - mod;
971        }
972        if (chunkSize > 0) {
973            boolean lenChunksPerfectly = len % chunkSize == 0;
974            len += (len / chunkSize) * chunkSeparator.length;
975            if (!lenChunksPerfectly) {
976                len += chunkSeparator.length;
977            }
978        }
979        return len;
980    }
981
982    // Implementation of integer encoding used for crypto
983    /**
984     * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature
985     * 
986     * @param pArray
987     *            a byte array containing base64 character data
988     * @return A BigInteger
989     * @since 1.4
990     */
991    public static BigInteger decodeInteger(byte[] pArray) {
992        return new BigInteger(1, decodeBase64(pArray));
993    }
994
995    /**
996     * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature
997     * 
998     * @param bigInt
999     *            a BigInteger
1000     * @return A byte array containing base64 character data
1001     * @throws NullPointerException
1002     *             if null is passed in
1003     * @since 1.4
1004     */
1005    public static byte[] encodeInteger(BigInteger bigInt) {
1006        if (bigInt == null) {
1007            throw new NullPointerException("encodeInteger called with null parameter");
1008        }
1009        return encodeBase64(toIntegerBytes(bigInt), false);
1010    }
1011
1012    /**
1013     * Returns a byte-array representation of a <code>BigInteger</code> without sign bit.
1014     * 
1015     * @param bigInt
1016     *            <code>BigInteger</code> to be converted
1017     * @return a byte array representation of the BigInteger parameter
1018     */
1019    static byte[] toIntegerBytes(BigInteger bigInt) {
1020        int bitlen = bigInt.bitLength();
1021        // round bitlen
1022        bitlen = ((bitlen + 7) >> 3) << 3;
1023        byte[] bigBytes = bigInt.toByteArray();
1024
1025        if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
1026            return bigBytes;
1027        }
1028        // set up params for copying everything but sign bit
1029        int startSrc = 0;
1030        int len = bigBytes.length;
1031
1032        // if bigInt is exactly byte-aligned, just skip signbit in copy
1033        if ((bigInt.bitLength() % 8) == 0) {
1034            startSrc = 1;
1035            len--;
1036        }
1037        int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
1038        byte[] resizedBytes = new byte[bitlen / 8];
1039        System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
1040        return resizedBytes;
1041    }
1042
1043    /**
1044     * Resets this Base64 object to its initial newly constructed state.
1045     */
1046    private void reset() {
1047        buffer = null;
1048        pos = 0;
1049        readPos = 0;
1050        currentLinePos = 0;
1051        modulus = 0;
1052        eof = false;
1053    }
1054
1055}