001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.io.UnsupportedEncodingException;
021
022import org.apache.commons.codec.CharEncoding;
023
024/**
025 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a
026 * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
027 * 
028 * @see CharEncoding
029 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
030 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
031 * @version $Id: StringUtils.java 801391 2009-08-05 19:55:54Z ggregory $
032 * @since 1.4
033 */
034public class StringUtils {
035
036    /**
037     * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
038     * byte array.
039     * 
040     * @param string
041     *            the String to encode
042     * @return encoded bytes
043     * @throws IllegalStateException
044     *             Thrown when the charset is missing, which should be never according the the Java specification.
045     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
046     * @see #getBytesUnchecked(String, String)
047     */
048    public static byte[] getBytesIso8859_1(String string) {
049        return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1);
050    }
051
052    /**
053     * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
054     * array.
055     * 
056     * @param string
057     *            the String to encode
058     * @return encoded bytes
059     * @throws IllegalStateException
060     *             Thrown when the charset is missing, which should be never according the the Java specification.
061     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
062     * @see #getBytesUnchecked(String, String)
063     */
064    public static byte[] getBytesUsAscii(String string) {
065        return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII);
066    }
067
068    /**
069     * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
070     * array.
071     * 
072     * @param string
073     *            the String to encode
074     * @return encoded bytes
075     * @throws IllegalStateException
076     *             Thrown when the charset is missing, which should be never according the the Java specification.
077     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
078     * @see #getBytesUnchecked(String, String)
079     */
080    public static byte[] getBytesUtf16(String string) {
081        return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16);
082    }
083
084    /**
085     * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
086     * array.
087     * 
088     * @param string
089     *            the String to encode
090     * @return encoded bytes
091     * @throws IllegalStateException
092     *             Thrown when the charset is missing, which should be never according the the Java specification.
093     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
094     * @see #getBytesUnchecked(String, String)
095     */
096    public static byte[] getBytesUtf16Be(String string) {
097        return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE);
098    }
099
100    /**
101     * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
102     * array.
103     * 
104     * @param string
105     *            the String to encode
106     * @return encoded bytes
107     * @throws IllegalStateException
108     *             Thrown when the charset is missing, which should be never according the the Java specification.
109     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
110     * @see #getBytesUnchecked(String, String)
111     */
112    public static byte[] getBytesUtf16Le(String string) {
113        return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE);
114    }
115
116    /**
117     * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
118     * array.
119     * 
120     * @param string
121     *            the String to encode
122     * @return encoded bytes
123     * @throws IllegalStateException
124     *             Thrown when the charset is missing, which should be never according the the Java specification.
125     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126     * @see #getBytesUnchecked(String, String)
127     */
128    public static byte[] getBytesUtf8(String string) {
129        return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8);
130    }
131
132    /**
133     * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
134     * array.
135     * <p>
136     * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
137     * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
138     * </p>
139     * 
140     * @param string
141     *            the String to encode
142     * @param charsetName
143     *            The name of a required {@link java.nio.charset.Charset}
144     * @return encoded bytes
145     * @throws IllegalStateException
146     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
147     *             required charset name.
148     * @see CharEncoding
149     * @see String#getBytes(String)
150     */
151    public static byte[] getBytesUnchecked(String string, String charsetName) {
152        if (string == null) {
153            return null;
154        }
155        try {
156            return string.getBytes(charsetName);
157        } catch (UnsupportedEncodingException e) {
158            throw StringUtils.newIllegalStateException(charsetName, e);
159        }
160    }
161
162    private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
163        return new IllegalStateException(charsetName + ": " + e);
164    }
165
166    /**
167     * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
168     * <p>
169     * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
170     * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
171     * </p>
172     * 
173     * @param bytes
174     *            The bytes to be decoded into characters
175     * @param charsetName
176     *            The name of a required {@link java.nio.charset.Charset}
177     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
178     * @throws IllegalStateException
179     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
180     *             required charset name.
181     * @see CharEncoding
182     * @see String#String(byte[], String)
183     */
184    public static String newString(byte[] bytes, String charsetName) {
185        if (bytes == null) {
186            return null;
187        }
188        try {
189            return new String(bytes, charsetName);
190        } catch (UnsupportedEncodingException e) {
191            throw StringUtils.newIllegalStateException(charsetName, e);
192        }
193    }
194
195    /**
196     * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
197     * 
198     * @param bytes
199     *            The bytes to be decoded into characters
200     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
201     * @throws IllegalStateException
202     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
203     *             charset is required.
204     */
205    public static String newStringIso8859_1(byte[] bytes) {
206        return StringUtils.newString(bytes, CharEncoding.ISO_8859_1);
207    }
208
209    /**
210     * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
211     * 
212     * @param bytes
213     *            The bytes to be decoded into characters
214     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
215     * @throws IllegalStateException
216     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
217     *             charset is required.
218     */
219    public static String newStringUsAscii(byte[] bytes) {
220        return StringUtils.newString(bytes, CharEncoding.US_ASCII);
221    }
222
223    /**
224     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
225     * 
226     * @param bytes
227     *            The bytes to be decoded into characters
228     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
229     * @throws IllegalStateException
230     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
231     *             charset is required.
232     */
233    public static String newStringUtf16(byte[] bytes) {
234        return StringUtils.newString(bytes, CharEncoding.UTF_16);
235    }
236
237    /**
238     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
239     * 
240     * @param bytes
241     *            The bytes to be decoded into characters
242     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
243     * @throws IllegalStateException
244     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
245     *             charset is required.
246     */
247    public static String newStringUtf16Be(byte[] bytes) {
248        return StringUtils.newString(bytes, CharEncoding.UTF_16BE);
249    }
250
251    /**
252     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
253     * 
254     * @param bytes
255     *            The bytes to be decoded into characters
256     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
257     * @throws IllegalStateException
258     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
259     *             charset is required.
260     */
261    public static String newStringUtf16Le(byte[] bytes) {
262        return StringUtils.newString(bytes, CharEncoding.UTF_16LE);
263    }
264
265    /**
266     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
267     * 
268     * @param bytes
269     *            The bytes to be decoded into characters
270     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
271     * @throws IllegalStateException
272     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
273     *             charset is required.
274     */
275    public static String newStringUtf8(byte[] bytes) {
276        return StringUtils.newString(bytes, CharEncoding.UTF_8);
277    }
278
279}