001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.Reader;
022import java.nio.ByteBuffer;
023import java.nio.CharBuffer;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetEncoder;
026import java.nio.charset.CoderResult;
027import java.nio.charset.CodingErrorAction;
028
029/**
030 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
031 * and transforms it to a byte stream using a specified charset encoding. The stream
032 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
033 * encodings supported by the JRE are handled correctly. In particular for charsets such as
034 * UTF-16, the implementation ensures that one and only one byte order marker
035 * is produced.
036 * <p>
037 * Since in general it is not possible to predict the number of characters to be read from the
038 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
039 * the {@link Reader} are buffered. There is therefore no well defined correlation
040 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
041 * This also implies that in general there is no need to wrap the underlying {@link Reader}
042 * in a {@link java.io.BufferedReader}.
043 * <p>
044 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
045 * in the following example, reading from <tt>in2</tt> would return the same byte
046 * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal
047 * with respect to the charset encoding):
048 * <pre>
049 * InputStream in = ...
050 * Charset cs = ...
051 * InputStreamReader reader = new InputStreamReader(in, cs);
052 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
053 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
054 * except that the control flow is reversed: both classes transform a character stream
055 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
056 * while {@link ReaderInputStream} pulls it from the underlying stream.
057 * <p>
058 * Note that while there are use cases where there is no alternative to using
059 * this class, very often the need to use this class is an indication of a flaw
060 * in the design of the code. This class is typically used in situations where an existing
061 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
062 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
063 * where this problem may appear is when implementing the {@link javax.activation.DataSource}
064 * interface from the Java Activation Framework.
065 * <p>
066 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
067 * read operation will block or not, it is not possible to provide a meaningful
068 * implementation of the {@link InputStream#available()} method. A call to this method
069 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
070 * <p>
071 * Instances of {@link ReaderInputStream} are not thread safe.
072 * 
073 * @see org.apache.commons.io.output.WriterOutputStream
074 * 
075 * @since 2.0
076 */
077public class ReaderInputStream extends InputStream {
078    private static final int DEFAULT_BUFFER_SIZE = 1024;
079
080    private final Reader reader;
081    private final CharsetEncoder encoder;
082
083    /**
084     * CharBuffer used as input for the decoder. It should be reasonably
085     * large as we read data from the underlying Reader into this buffer.
086     */
087    private final CharBuffer encoderIn;
088
089    /**
090     * ByteBuffer used as output for the decoder. This buffer can be small
091     * as it is only used to transfer data from the decoder to the
092     * buffer provided by the caller.
093     */
094    private final ByteBuffer encoderOut;
095
096    private CoderResult lastCoderResult;
097    private boolean endOfInput;
098
099    /**
100     * Construct a new {@link ReaderInputStream}.
101     * 
102     * @param reader the target {@link Reader}
103     * @param encoder the charset encoder
104     * @since 2.1
105     */
106    public ReaderInputStream(Reader reader, CharsetEncoder encoder) {
107        this(reader, encoder, DEFAULT_BUFFER_SIZE);
108    }
109
110    /**
111     * Construct a new {@link ReaderInputStream}.
112     * 
113     * @param reader the target {@link Reader}
114     * @param encoder the charset encoder
115     * @param bufferSize the size of the input buffer in number of characters
116     * @since 2.1
117     */
118    public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) {
119        this.reader = reader;
120        this.encoder = encoder;
121        this.encoderIn = CharBuffer.allocate(bufferSize);
122        this.encoderIn.flip();
123        this.encoderOut = ByteBuffer.allocate(128);
124        this.encoderOut.flip();
125    }
126
127    /**
128     * Construct a new {@link ReaderInputStream}.
129     * 
130     * @param reader the target {@link Reader}
131     * @param charset the charset encoding
132     * @param bufferSize the size of the input buffer in number of characters
133     */
134    public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {
135        this(reader,
136             charset.newEncoder()
137                    .onMalformedInput(CodingErrorAction.REPLACE)
138                    .onUnmappableCharacter(CodingErrorAction.REPLACE),
139             bufferSize);
140    }
141
142    /**
143     * Construct a new {@link ReaderInputStream} with a default input buffer size of
144     * 1024 characters.
145     * 
146     * @param reader the target {@link Reader}
147     * @param charset the charset encoding
148     */
149    public ReaderInputStream(Reader reader, Charset charset) {
150        this(reader, charset, DEFAULT_BUFFER_SIZE);
151    }
152
153    /**
154     * Construct a new {@link ReaderInputStream}.
155     * 
156     * @param reader the target {@link Reader}
157     * @param charsetName the name of the charset encoding
158     * @param bufferSize the size of the input buffer in number of characters
159     */
160    public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {
161        this(reader, Charset.forName(charsetName), bufferSize);
162    }
163
164    /**
165     * Construct a new {@link ReaderInputStream} with a default input buffer size of
166     * 1024 characters.
167     * 
168     * @param reader the target {@link Reader}
169     * @param charsetName the name of the charset encoding
170     */
171    public ReaderInputStream(Reader reader, String charsetName) {
172        this(reader, charsetName, DEFAULT_BUFFER_SIZE);
173    }
174
175    /**
176     * Construct a new {@link ReaderInputStream} that uses the default character encoding
177     * with a default input buffer size of 1024 characters.
178     * 
179     * @param reader the target {@link Reader}
180     */
181    public ReaderInputStream(Reader reader) {
182        this(reader, Charset.defaultCharset());
183    }
184
185    /**
186     * Fills the internal char buffer from the reader.
187     * 
188     * @throws IOException
189     *             If an I/O error occurs
190     */
191    private void fillBuffer() throws IOException {
192        if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
193            encoderIn.compact();
194            int position = encoderIn.position();
195            // We don't use Reader#read(CharBuffer) here because it is more efficient
196            // to write directly to the underlying char array (the default implementation
197            // copies data to a temporary char array).
198            int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
199            if (c == -1) {
200                endOfInput = true;
201            } else {
202                encoderIn.position(position+c);
203            }
204            encoderIn.flip();
205        }
206        encoderOut.compact();
207        lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
208        encoderOut.flip();
209    }
210    
211    /**
212     * Read the specified number of bytes into an array.
213     * 
214     * @param b the byte array to read into
215     * @param off the offset to start reading bytes into
216     * @param len the number of bytes to read
217     * @return the number of bytes read or <code>-1</code>
218     *         if the end of the stream has been reached
219     * @throws IOException if an I/O error occurs
220     */
221    @Override
222    public int read(byte[] b, int off, int len) throws IOException {
223        if (b == null) {
224            throw new NullPointerException("Byte array must not be null");
225        }
226        if (len < 0 || off < 0 || (off + len) > b.length) {
227            throw new IndexOutOfBoundsException("Array Size=" + b.length +
228                    ", offset=" + off + ", length=" + len);
229        }
230        int read = 0;
231        if (len == 0) {
232            return 0; // Always return 0 if len == 0
233        }
234        while (len > 0) {
235            if (encoderOut.hasRemaining()) {
236                int c = Math.min(encoderOut.remaining(), len);
237                encoderOut.get(b, off, c);
238                off += c;
239                len -= c;
240                read += c;
241            } else {
242                fillBuffer();
243                if (endOfInput && !encoderOut.hasRemaining()) {
244                    break;
245                }
246            }
247        }
248        return read == 0 && endOfInput ? -1 : read;
249    }
250
251    /**
252     * Read the specified number of bytes into an array.
253     * 
254     * @param b the byte array to read into
255     * @return the number of bytes read or <code>-1</code>
256     *         if the end of the stream has been reached
257     * @throws IOException if an I/O error occurs
258     */
259    @Override
260    public int read(byte[] b) throws IOException {
261        return read(b, 0, b.length);
262    }
263
264    /**
265     * Read a single byte.
266     *
267     * @return either the byte read or <code>-1</code> if the end of the stream
268     *         has been reached
269     * @throws IOException if an I/O error occurs
270     */
271    @Override
272    public int read() throws IOException {
273        for (;;) {
274            if (encoderOut.hasRemaining()) {
275                return encoderOut.get() & 0xFF;
276            } else {
277                fillBuffer();
278                if (endOfInput && !encoderOut.hasRemaining()) {
279                    return -1;
280                }
281            }
282        }
283    }
284
285    /**
286     * Close the stream. This method will cause the underlying {@link Reader}
287     * to be closed.
288     * @throws IOException if an I/O error occurs
289     */
290    @Override
291    public void close() throws IOException {
292        reader.close();
293    }
294}