001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.RandomAccessFile;
023import java.io.UnsupportedEncodingException;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetEncoder;
026import java.nio.charset.UnsupportedCharsetException;
027
028import org.apache.commons.io.Charsets;
029
030/**
031 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
032 * the last line). Useful for e.g. searching in log files.
033 *
034 * @since 2.2
035 */
036public class ReversedLinesFileReader implements Closeable {
037
038    private final int blockSize;
039    private final Charset encoding;
040
041    private final RandomAccessFile randomAccessFile;
042
043    private final long totalByteLength;
044    private final long totalBlockCount;
045
046    private final byte[][] newLineSequences;
047    private final int avoidNewlineSplitBufferSize;
048    private final int byteDecrement;
049
050    private FilePart currentFilePart;
051
052    private boolean trailingNewlineOfFileSkipped = false;
053
054    /**
055     * Creates a ReversedLinesFileReader with default block size of 4KB and the
056     * platform's default encoding.
057     *
058     * @param file
059     *            the file to be read
060     * @throws IOException  if an I/O error occurs
061     */
062    public ReversedLinesFileReader(final File file) throws IOException {
063        this(file, 4096, Charset.defaultCharset().toString());
064    }
065
066    /**
067     * Creates a ReversedLinesFileReader with the given block size and encoding.
068     *
069     * @param file
070     *            the file to be read
071     * @param blockSize
072     *            size of the internal buffer (for ideal performance this should
073     *            match with the block size of the underlying file system).
074     * @param encoding
075     *            the encoding of the file
076     * @throws IOException  if an I/O error occurs
077     * @since 2.3
078     */
079    public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
080        this.blockSize = blockSize;
081        this.encoding = encoding;
082
083        randomAccessFile = new RandomAccessFile(file, "r");
084        totalByteLength = randomAccessFile.length();
085        int lastBlockLength = (int) (totalByteLength % blockSize);
086        if (lastBlockLength > 0) {
087            totalBlockCount = totalByteLength / blockSize + 1;
088        } else {
089            totalBlockCount = totalByteLength / blockSize;
090            if (totalByteLength > 0) {
091                lastBlockLength = blockSize;
092            }
093        }
094        currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
095
096        // --- check & prepare encoding ---
097        Charset charset = Charsets.toCharset(encoding);
098        CharsetEncoder charsetEncoder = charset.newEncoder();
099        float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
100        if(maxBytesPerChar==1f) {
101            // all one byte encodings are no problem
102            byteDecrement = 1;
103        } else if(charset == Charset.forName("UTF-8")) {
104            // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
105            // http://en.wikipedia.org/wiki/UTF-8
106            byteDecrement = 1;
107        } else if(charset == Charset.forName("Shift_JIS")) {
108            // Same as for UTF-8
109            // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
110            byteDecrement = 1;
111        } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
112            // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
113            // however byte order has to be specified
114            byteDecrement = 2;
115        } else if(charset == Charset.forName("UTF-16")) {
116            throw new UnsupportedEncodingException(
117                    "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
118        } else {
119            throw new UnsupportedEncodingException(
120                    "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
121        }
122        // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
123        newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
124
125        avoidNewlineSplitBufferSize = newLineSequences[0].length;
126    }
127
128    /**
129     * Creates a ReversedLinesFileReader with the given block size and encoding.
130     *
131     * @param file
132     *            the file to be read
133     * @param blockSize
134     *            size of the internal buffer (for ideal performance this should
135     *            match with the block size of the underlying file system).
136     * @param encoding
137     *            the encoding of the file
138     * @throws IOException  if an I/O error occurs
139     * @throws UnsupportedCharsetException
140     *             thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
141     *             supported.
142     */
143    public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
144        this(file, blockSize, Charsets.toCharset(encoding));
145    }
146
147    /**
148     * Returns the lines of the file from bottom to top.
149     *
150     * @return the next line or null if the start of the file is reached
151     * @throws IOException  if an I/O error occurs
152     */
153    public String readLine() throws IOException {
154
155        String line = currentFilePart.readLine();
156        while (line == null) {
157            currentFilePart = currentFilePart.rollOver();
158            if (currentFilePart != null) {
159                line = currentFilePart.readLine();
160            } else {
161                // no more fileparts: we're done, leave line set to null
162                break;
163            }
164        }
165
166        // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line
167        if("".equals(line) && !trailingNewlineOfFileSkipped) {
168            trailingNewlineOfFileSkipped = true;
169            line = readLine();
170        }
171
172        return line;
173    }
174
175    /**
176     * Closes underlying resources.
177     *
178     * @throws IOException  if an I/O error occurs
179     */
180    public void close() throws IOException {
181        randomAccessFile.close();
182    }
183
184    private class FilePart {
185        private final long no;
186
187        private final byte[] data;
188
189        private byte[] leftOver;
190
191        private int currentLastBytePos;
192
193        /**
194         * ctor
195         * @param no the part number
196         * @param length its length
197         * @param leftOverOfLastFilePart remainder
198         * @throws IOException if there is a problem reading the file
199         */
200        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
201            this.no = no;
202            int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
203            this.data = new byte[dataLength];
204            final long off = (no - 1) * blockSize;
205
206            // read data
207            if (no > 0 /* file not empty */) {
208                randomAccessFile.seek(off);
209                final int countRead = randomAccessFile.read(data, 0, length);
210                if (countRead != length) {
211                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
212                }
213            }
214            // copy left over part into data arr
215            if (leftOverOfLastFilePart != null) {
216                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
217            }
218            this.currentLastBytePos = data.length - 1;
219            this.leftOver = null;
220        }
221
222        /**
223         * Handles block rollover
224         * 
225         * @return the new FilePart or null
226         * @throws IOException if there was a problem reading the file
227         */
228        private FilePart rollOver() throws IOException {
229
230            if (currentLastBytePos > -1) {
231                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
232                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
233            }
234
235            if (no > 1) {
236                return new FilePart(no - 1, blockSize, leftOver);
237            } else {
238                // NO 1 was the last FilePart, we're finished
239                if (leftOver != null) {
240                    throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
241                            + new String(leftOver, encoding));
242                }
243                return null;
244            }
245        }
246
247        /**
248         * Reads a line.
249         * 
250         * @return the line or null
251         * @throws IOException if there is an error reading from the file
252         */
253        private String readLine() throws IOException {
254
255            String line = null;
256            int newLineMatchByteCount;
257
258            boolean isLastFilePart = no == 1;
259
260            int i = currentLastBytePos;
261            while (i > -1) {
262
263                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
264                    // avoidNewlineSplitBuffer: for all except the last file part we
265                    // take a few bytes to the next file part to avoid splitting of newlines
266                    createLeftOver();
267                    break; // skip last few bytes and leave it to the next file part
268                }
269
270                // --- check for newline ---
271                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
272                    final int lineStart = i + 1;
273                    int lineLengthBytes = currentLastBytePos - lineStart + 1;
274
275                    if (lineLengthBytes < 0) {
276                        throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
277                    }
278                    byte[] lineData = new byte[lineLengthBytes];
279                    System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
280
281                    line = new String(lineData, encoding);
282
283                    currentLastBytePos = i - newLineMatchByteCount;
284                    break; // found line
285                }
286
287                // --- move cursor ---
288                i -= byteDecrement;
289
290                // --- end of file part handling ---
291                if (i < 0) {
292                    createLeftOver();
293                    break; // end of file part
294                }
295            }
296
297            // --- last file part handling ---
298            if (isLastFilePart && leftOver != null) {
299                // there will be no line break anymore, this is the first line of the file
300                line = new String(leftOver, encoding);
301                leftOver = null;
302            }
303
304            return line;
305        }
306
307        /**
308         * Creates the buffer containing any left over bytes.
309         */
310        private void createLeftOver() {
311            int lineLengthBytes = currentLastBytePos + 1;
312            if (lineLengthBytes > 0) {
313                // create left over for next block
314                leftOver = new byte[lineLengthBytes];
315                System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
316            } else {
317                leftOver = null;
318            }
319            currentLastBytePos = -1;
320        }
321
322        /**
323         * Finds the new-line sequence and return its length.
324         * 
325         * @param data buffer to scan
326         * @param i start offset in buffer
327         * @return length of newline sequence or 0 if none found
328         */
329        private int getNewLineMatchByteCount(byte[] data, int i) {
330            for (byte[] newLineSequence : newLineSequences) {
331                boolean match = true;
332                for (int j = newLineSequence.length - 1; j >= 0; j--) {
333                    int k = i + j - (newLineSequence.length - 1);
334                    match &= k >= 0 && data[k] == newLineSequence[j];
335                }
336                if (match) {
337                    return newLineSequence.length;
338                }
339            }
340            return 0;
341        }
342    }
343
344}