001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.net;
019
020import java.io.ByteArrayOutputStream;
021import java.io.UnsupportedEncodingException;
022import java.util.BitSet;
023
024import org.apache.commons.codec.BinaryDecoder;
025import org.apache.commons.codec.BinaryEncoder;
026import org.apache.commons.codec.DecoderException;
027import org.apache.commons.codec.EncoderException;
028import org.apache.commons.codec.CharEncoding;
029import org.apache.commons.codec.StringDecoder;
030import org.apache.commons.codec.StringEncoder;
031import org.apache.commons.codec.binary.StringUtils;
032
033/**
034 * <p>Implements the 'www-form-urlencoded' encoding scheme, 
035 * also misleadingly known as URL encoding.</p>
036 *  
037 * <p>For more detailed information please refer to 
038 * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
039 * Chapter 17.13.4 'Form content types'</a> of the 
040 * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
041 * 
042 * <p> 
043 * This codec is meant to be a replacement for standard Java classes
044 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder} 
045 * on older Java platforms, as these classes in Java versions below 
046 * 1.4 rely on the platform's default charset encoding.
047 * </p>
048 * 
049 * @author Apache Software Foundation
050 * @since 1.2
051 * @version $Id: URLCodec.java 798416 2009-07-28 06:35:58Z ggregory $
052 */
053public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
054    
055    /**
056     * Radix used in encoding and decoding.
057     */
058    static final int RADIX = 16;
059    
060    /**
061     * The default charset used for string decoding and encoding. Consider this field final. The next major release may
062     * break compatibility and make this field be final.
063     */
064    protected String charset;
065    
066    /**
067     * Consider this field final. The next major release may break compatibility and make this field be final.
068     */
069    protected static byte ESCAPE_CHAR = '%';
070    /**
071     * BitSet of www-form-url safe characters.
072     */
073    protected static final BitSet WWW_FORM_URL = new BitSet(256);
074    
075    // Static initializer for www_form_url
076    static {
077        // alpha characters
078        for (int i = 'a'; i <= 'z'; i++) {
079            WWW_FORM_URL.set(i);
080        }
081        for (int i = 'A'; i <= 'Z'; i++) {
082            WWW_FORM_URL.set(i);
083        }
084        // numeric characters
085        for (int i = '0'; i <= '9'; i++) {
086            WWW_FORM_URL.set(i);
087        }
088        // special chars
089        WWW_FORM_URL.set('-');
090        WWW_FORM_URL.set('_');
091        WWW_FORM_URL.set('.');
092        WWW_FORM_URL.set('*');
093        // blank to be replaced with +
094        WWW_FORM_URL.set(' ');
095    }
096
097
098    /**
099     * Default constructor.
100     */
101    public URLCodec() {
102        this(CharEncoding.UTF_8);
103    }
104
105    /**
106     * Constructor which allows for the selection of a default charset
107     * 
108     * @param charset the default string charset to use.
109     */
110    public URLCodec(String charset) {
111        super();
112        this.charset = charset;
113    }
114
115    /**
116     * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
117     * 
118     * @param urlsafe
119     *            bitset of characters deemed URL safe
120     * @param bytes
121     *            array of bytes to convert to URL safe characters
122     * @return array of bytes containing URL safe characters
123     */
124    public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
125        if (bytes == null) {
126            return null;
127        }
128        if (urlsafe == null) {
129            urlsafe = WWW_FORM_URL;
130        }
131
132        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
133        for (int i = 0; i < bytes.length; i++) {
134            int b = bytes[i];
135            if (b < 0) {
136                b = 256 + b;
137            }
138            if (urlsafe.get(b)) {
139                if (b == ' ') {
140                    b = '+';
141                }
142                buffer.write(b);
143            } else {
144                buffer.write(ESCAPE_CHAR);
145                char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
146                char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
147                buffer.write(hex1);
148                buffer.write(hex2);
149            }
150        }
151        return buffer.toByteArray();
152    }
153
154    /**
155     * Decodes an array of URL safe 7-bit characters into an array of 
156     * original bytes. Escaped characters are converted back to their 
157     * original representation.
158     *
159     * @param bytes array of URL safe characters
160     * @return array of original bytes 
161     * @throws DecoderException Thrown if URL decoding is unsuccessful
162     */
163    public static final byte[] decodeUrl(byte[] bytes) throws DecoderException {
164        if (bytes == null) {
165            return null;
166        }
167        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
168        for (int i = 0; i < bytes.length; i++) {
169            int b = bytes[i];
170            if (b == '+') {
171                buffer.write(' ');
172            } else if (b == ESCAPE_CHAR) {
173                try {
174                    int u = Utils.digit16(bytes[++i]);
175                    int l = Utils.digit16(bytes[++i]);
176                    buffer.write((char) ((u << 4) + l));
177                } catch (ArrayIndexOutOfBoundsException e) {
178                    throw new DecoderException("Invalid URL encoding: ", e);
179                }
180            } else {
181                buffer.write(b);
182            }
183        }
184        return buffer.toByteArray();
185    }
186
187    /**
188     * Encodes an array of bytes into an array of URL safe 7-bit 
189     * characters. Unsafe characters are escaped.
190     *
191     * @param bytes array of bytes to convert to URL safe characters
192     * @return array of bytes containing URL safe characters
193     */
194    public byte[] encode(byte[] bytes) {
195        return encodeUrl(WWW_FORM_URL, bytes);
196    }
197
198
199    /**
200     * Decodes an array of URL safe 7-bit characters into an array of 
201     * original bytes. Escaped characters are converted back to their 
202     * original representation.
203     *
204     * @param bytes array of URL safe characters
205     * @return array of original bytes 
206     * @throws DecoderException Thrown if URL decoding is unsuccessful
207     */
208    public byte[] decode(byte[] bytes) throws DecoderException {
209        return decodeUrl(bytes);
210    }
211
212    /**
213     * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
214     * 
215     * @param pString
216     *            string to convert to a URL safe form
217     * @param charset
218     *            the charset for pString
219     * @return URL safe string
220     * @throws UnsupportedEncodingException
221     *             Thrown if charset is not supported
222     */
223    public String encode(String pString, String charset) throws UnsupportedEncodingException {
224        if (pString == null) {
225            return null;
226        }
227        return StringUtils.newStringUsAscii(encode(pString.getBytes(charset)));
228    }
229
230    /**
231     * Encodes a string into its URL safe form using the default string 
232     * charset. Unsafe characters are escaped.
233     *
234     * @param pString string to convert to a URL safe form
235     * @return URL safe string
236     * @throws EncoderException Thrown if URL encoding is unsuccessful
237     * 
238     * @see #getDefaultCharset()
239     */
240    public String encode(String pString) throws EncoderException {
241        if (pString == null) {
242            return null;
243        }
244        try {
245            return encode(pString, getDefaultCharset());
246        } catch (UnsupportedEncodingException e) {
247            throw new EncoderException(e.getMessage(), e);
248        }
249    }
250
251
252    /**
253     * Decodes a URL safe string into its original form using the 
254     * specified encoding. Escaped characters are converted back 
255     * to their original representation.
256     *
257     * @param pString URL safe string to convert into its original form
258     * @param charset the original string charset
259     * @return original string 
260     * @throws DecoderException Thrown if URL decoding is unsuccessful
261     * @throws UnsupportedEncodingException Thrown if charset is not
262     *                                      supported 
263     */
264    public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
265        if (pString == null) {
266            return null;
267        }
268        return new String(decode(StringUtils.getBytesUsAscii(pString)), charset);
269    }
270
271    /**
272     * Decodes a URL safe string into its original form using the default
273     * string charset. Escaped characters are converted back to their 
274     * original representation.
275     *
276     * @param pString URL safe string to convert into its original form
277     * @return original string 
278     * @throws DecoderException Thrown if URL decoding is unsuccessful
279     * 
280     * @see #getDefaultCharset()
281     */
282    public String decode(String pString) throws DecoderException {
283        if (pString == null) {
284            return null;
285        }
286        try {
287            return decode(pString, getDefaultCharset());
288        } catch (UnsupportedEncodingException e) {
289            throw new DecoderException(e.getMessage(), e);
290        }
291    }
292
293    /**
294     * Encodes an object into its URL safe form. Unsafe characters are 
295     * escaped.
296     *
297     * @param pObject string to convert to a URL safe form
298     * @return URL safe object
299     * @throws EncoderException Thrown if URL encoding is not 
300     *                          applicable to objects of this type or
301     *                          if encoding is unsuccessful
302     */
303    public Object encode(Object pObject) throws EncoderException {
304        if (pObject == null) {
305            return null;
306        } else if (pObject instanceof byte[]) {
307            return encode((byte[])pObject);
308        } else if (pObject instanceof String) {
309            return encode((String)pObject);
310        } else {
311            throw new EncoderException("Objects of type " +
312                pObject.getClass().getName() + " cannot be URL encoded"); 
313              
314        }
315    }
316
317    /**
318     * Decodes a URL safe object into its original form. Escaped characters are converted back to their original
319     * representation.
320     * 
321     * @param pObject
322     *                  URL safe object to convert into its original form
323     * @return original object
324     * @throws DecoderException
325     *                  Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
326     *                  encountered during the decode process.
327     */
328    public Object decode(Object pObject) throws DecoderException {
329        if (pObject == null) {
330            return null;
331        } else if (pObject instanceof byte[]) {
332            return decode((byte[]) pObject);
333        } else if (pObject instanceof String) {
334            return decode((String) pObject);
335        } else {
336            throw new DecoderException("Objects of type " + pObject.getClass().getName() + " cannot be URL decoded");
337
338        }
339    }
340
341    /**
342     * The <code>String</code> encoding used for decoding and encoding.
343     * 
344     * @return Returns the encoding.
345     * 
346     * @deprecated use #getDefaultCharset()
347     */
348    public String getEncoding() {
349        return this.charset;
350    }
351
352    /**
353     * The default charset used for string decoding and encoding.
354     *
355     * @return the default string charset.
356     */
357    public String getDefaultCharset() {
358        return this.charset;
359    }
360
361}