001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.net;
019
020import java.io.ByteArrayOutputStream;
021import java.io.UnsupportedEncodingException;
022import java.util.BitSet;
023
024import org.apache.commons.codec.BinaryDecoder;
025import org.apache.commons.codec.BinaryEncoder;
026import org.apache.commons.codec.DecoderException;
027import org.apache.commons.codec.EncoderException;
028import org.apache.commons.codec.CharEncoding;
029import org.apache.commons.codec.StringDecoder;
030import org.apache.commons.codec.StringEncoder;
031import org.apache.commons.codec.binary.StringUtils;
032
033/**
034 * <p>
035 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
036 * </p>
037 * <p>
038 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
039 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
040 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
041 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
042 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
043 * gateway.
044 * </p>
045 * 
046 * <p>
047 * Note:
048 * </p>
049 * <p>
050 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
051 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
052 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
053 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
054 * </p>
055 * 
056 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
057 *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
058 * 
059 * @author Apache Software Foundation
060 * @since 1.3
061 * @version $Id: QuotedPrintableCodec.java 798333 2009-07-27 23:41:58Z ggregory $
062 */
063public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
064    /**
065     * The default charset used for string decoding and encoding.
066     */
067    private final String charset;
068
069    /**
070     * BitSet of printable characters as defined in RFC 1521.
071     */
072    private static final BitSet PRINTABLE_CHARS = new BitSet(256);
073
074    private static final byte ESCAPE_CHAR = '=';
075
076    private static final byte TAB = 9;
077
078    private static final byte SPACE = 32;
079    // Static initializer for printable chars collection
080    static {
081        // alpha characters
082        for (int i = 33; i <= 60; i++) {
083            PRINTABLE_CHARS.set(i);
084        }
085        for (int i = 62; i <= 126; i++) {
086            PRINTABLE_CHARS.set(i);
087        }
088        PRINTABLE_CHARS.set(TAB);
089        PRINTABLE_CHARS.set(SPACE);
090    }
091
092    /**
093     * Default constructor.
094     */
095    public QuotedPrintableCodec() {
096        this(CharEncoding.UTF_8);
097    }
098
099    /**
100     * Constructor which allows for the selection of a default charset
101     * 
102     * @param charset
103     *                  the default string charset to use.
104     */
105    public QuotedPrintableCodec(String charset) {
106        super();
107        this.charset = charset;
108    }
109
110    /**
111     * Encodes byte into its quoted-printable representation.
112     * 
113     * @param b
114     *                  byte to encode
115     * @param buffer
116     *                  the buffer to write to
117     */
118    private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
119        buffer.write(ESCAPE_CHAR);
120        char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
121        char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
122        buffer.write(hex1);
123        buffer.write(hex2);
124    }
125
126    /**
127     * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
128     * 
129     * <p>
130     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
131     * RFC 1521 and is suitable for encoding binary data and unformatted text.
132     * </p>
133     * 
134     * @param printable
135     *                  bitset of characters deemed quoted-printable
136     * @param bytes
137     *                  array of bytes to be encoded
138     * @return array of bytes containing quoted-printable data
139     */
140    public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
141        if (bytes == null) {
142            return null;
143        }
144        if (printable == null) {
145            printable = PRINTABLE_CHARS;
146        }
147        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
148        for (int i = 0; i < bytes.length; i++) {
149            int b = bytes[i];
150            if (b < 0) {
151                b = 256 + b;
152            }
153            if (printable.get(b)) {
154                buffer.write(b);
155            } else {
156                encodeQuotedPrintable(b, buffer);
157            }
158        }
159        return buffer.toByteArray();
160    }
161
162    /**
163     * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
164     * back to their original representation.
165     * 
166     * <p>
167     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
168     * RFC 1521.
169     * </p>
170     * 
171     * @param bytes
172     *                  array of quoted-printable characters
173     * @return array of original bytes
174     * @throws DecoderException
175     *                  Thrown if quoted-printable decoding is unsuccessful
176     */
177    public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
178        if (bytes == null) {
179            return null;
180        }
181        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
182        for (int i = 0; i < bytes.length; i++) {
183            int b = bytes[i];
184            if (b == ESCAPE_CHAR) {
185                try {
186                    int u = Utils.digit16(bytes[++i]);
187                    int l = Utils.digit16(bytes[++i]);
188                    buffer.write((char) ((u << 4) + l));
189                } catch (ArrayIndexOutOfBoundsException e) {
190                    throw new DecoderException("Invalid quoted-printable encoding", e);
191                }
192            } else {
193                buffer.write(b);
194            }
195        }
196        return buffer.toByteArray();
197    }
198
199    /**
200     * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
201     * 
202     * <p>
203     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
204     * RFC 1521 and is suitable for encoding binary data and unformatted text.
205     * </p>
206     * 
207     * @param bytes
208     *                  array of bytes to be encoded
209     * @return array of bytes containing quoted-printable data
210     */
211    public byte[] encode(byte[] bytes) {
212        return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
213    }
214
215    /**
216     * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
217     * back to their original representation.
218     * 
219     * <p>
220     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
221     * RFC 1521.
222     * </p>
223     * 
224     * @param bytes
225     *                  array of quoted-printable characters
226     * @return array of original bytes
227     * @throws DecoderException
228     *                  Thrown if quoted-printable decoding is unsuccessful
229     */
230    public byte[] decode(byte[] bytes) throws DecoderException {
231        return decodeQuotedPrintable(bytes);
232    }
233
234    /**
235     * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
236     * 
237     * <p>
238     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
239     * RFC 1521 and is suitable for encoding binary data.
240     * </p>
241     * 
242     * @param pString
243     *                  string to convert to quoted-printable form
244     * @return quoted-printable string
245     * 
246     * @throws EncoderException
247     *                  Thrown if quoted-printable encoding is unsuccessful
248     * 
249     * @see #getDefaultCharset()
250     */
251    public String encode(String pString) throws EncoderException {
252        if (pString == null) {
253            return null;
254        }
255        try {
256            return encode(pString, getDefaultCharset());
257        } catch (UnsupportedEncodingException e) {
258            throw new EncoderException(e.getMessage(), e);
259        }
260    }
261
262    /**
263     * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
264     * are converted back to their original representation.
265     * 
266     * @param pString
267     *                  quoted-printable string to convert into its original form
268     * @param charset
269     *                  the original string charset
270     * @return original string
271     * @throws DecoderException
272     *                  Thrown if quoted-printable decoding is unsuccessful
273     * @throws UnsupportedEncodingException
274     *                  Thrown if charset is not supported
275     */
276    public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
277        if (pString == null) {
278            return null;
279        }
280        return new String(decode(StringUtils.getBytesUsAscii(pString)), charset);
281    }
282
283    /**
284     * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
285     * converted back to their original representation.
286     * 
287     * @param pString
288     *                  quoted-printable string to convert into its original form
289     * @return original string
290     * @throws DecoderException
291     *                  Thrown if quoted-printable decoding is unsuccessful.
292     *                  Thrown if charset is not supported.
293     * @see #getDefaultCharset()
294     */
295    public String decode(String pString) throws DecoderException {
296        if (pString == null) {
297            return null;
298        }
299        try {
300            return decode(pString, getDefaultCharset());
301        } catch (UnsupportedEncodingException e) {
302            throw new DecoderException(e.getMessage(), e);
303        }
304    }
305
306    /**
307     * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
308     * 
309     * @param pObject
310     *                  string to convert to a quoted-printable form
311     * @return quoted-printable object
312     * @throws EncoderException
313     *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
314     *                  unsuccessful
315     */
316    public Object encode(Object pObject) throws EncoderException {
317        if (pObject == null) {
318            return null;
319        } else if (pObject instanceof byte[]) {
320            return encode((byte[]) pObject);
321        } else if (pObject instanceof String) {
322            return encode((String) pObject);
323        } else {
324            throw new EncoderException("Objects of type " + 
325                  pObject.getClass().getName() + 
326                  " cannot be quoted-printable encoded");
327        }
328    }
329
330    /**
331     * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
332     * representation.
333     * 
334     * @param pObject
335     *                  quoted-printable object to convert into its original form
336     * @return original object
337     * @throws DecoderException
338     *                  Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
339     *                  encountered during the decode process.
340     */
341    public Object decode(Object pObject) throws DecoderException {
342        if (pObject == null) {
343            return null;
344        } else if (pObject instanceof byte[]) {
345            return decode((byte[]) pObject);
346        } else if (pObject instanceof String) {
347            return decode((String) pObject);
348        } else {
349            throw new DecoderException("Objects of type " + 
350                  pObject.getClass().getName() + 
351                  " cannot be quoted-printable decoded");
352        }
353    }
354
355    /**
356     * Returns the default charset used for string decoding and encoding.
357     * 
358     * @return the default string charset.
359     */
360    public String getDefaultCharset() {
361        return this.charset;
362    }
363
364    /**
365     * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
366     * 
367     * <p>
368     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369     * RFC 1521 and is suitable for encoding binary data and unformatted text.
370     * </p>
371     * 
372     * @param pString
373     *                  string to convert to quoted-printable form
374     * @param charset
375     *                  the charset for pString
376     * @return quoted-printable string
377     * 
378     * @throws UnsupportedEncodingException
379     *                  Thrown if the charset is not supported
380     */
381    public String encode(String pString, String charset) throws UnsupportedEncodingException {
382        if (pString == null) {
383            return null;
384        }
385        return StringUtils.newStringUsAscii(encode(pString.getBytes(charset)));
386    }
387}