001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.z;
020
021import java.io.IOException;
022import java.io.InputStream;
023import org.apache.commons.compress.compressors.z._internal_.InternalLZWInputStream;
024
025/**
026 * Input stream that decompresses .Z files.
027 * @NotThreadSafe
028 * @since 1.7
029 */
030public class ZCompressorInputStream extends InternalLZWInputStream {
031    private static final int MAGIC_1 = 0x1f;
032    private static final int MAGIC_2 = 0x9d;
033    private static final int BLOCK_MODE_MASK = 0x80;
034    private static final int MAX_CODE_SIZE_MASK = 0x1f;
035    private final boolean blockMode;
036    private final int maxCodeSize;
037    private long totalCodesRead = 0;
038    
039    public ZCompressorInputStream(InputStream inputStream) throws IOException {
040        super(inputStream);
041        int firstByte = in.read();
042        int secondByte = in.read();
043        int thirdByte = in.read();
044        if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
045            throw new IOException("Input is not in .Z format");
046        }
047        blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
048        maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
049        if (blockMode) {
050            setClearCode(codeSize);
051        }
052        initializeTables(maxCodeSize);
053        clearEntries();
054    }
055    
056    private void clearEntries() {
057        tableSize = 1 << 8;
058        if (blockMode) {
059            tableSize++;
060        }
061    }
062
063    /**
064     * {@inheritDoc}
065     * <p><strong>This method is only protected for technical reasons
066     * and is not part of Commons Compress' published API.  It may
067     * change or disappear without warning.</strong></p>
068     */
069    @Override
070    protected int readNextCode() throws IOException {
071        int code = super.readNextCode();
072        if (code >= 0) {
073            ++totalCodesRead;
074        }
075        return code;
076    }
077    
078    private void reAlignReading() throws IOException {
079        // "compress" works in multiples of 8 symbols, each codeBits bits long.
080        // When codeBits changes, the remaining unused symbols in the current
081        // group of 8 are still written out, in the old codeSize,
082        // as garbage values (usually zeroes) that need to be skipped.
083        long codeReadsToThrowAway = 8 - (totalCodesRead % 8);
084        if (codeReadsToThrowAway == 8) {
085            codeReadsToThrowAway = 0;
086        }
087        for (long i = 0; i < codeReadsToThrowAway; i++) {
088            readNextCode();
089        }
090        bitsCached = 0;
091        bitsCachedSize = 0;
092    }
093    
094    /**
095     * {@inheritDoc}
096     * <p><strong>This method is only protected for technical reasons
097     * and is not part of Commons Compress' published API.  It may
098     * change or disappear without warning.</strong></p>
099     */
100    @Override
101    protected int addEntry(int previousCode, byte character) throws IOException {
102        final int maxTableSize = 1 << codeSize;
103        int r = addEntry(previousCode, character, maxTableSize);
104        if (tableSize == maxTableSize && codeSize < maxCodeSize) {
105            reAlignReading();
106            codeSize++;
107        }
108        return r;
109    }
110
111    /**
112     * {@inheritDoc}
113     * <p><strong>This method is only protected for technical reasons
114     * and is not part of Commons Compress' published API.  It may
115     * change or disappear without warning.</strong></p>
116     */
117    @Override
118    protected int decompressNextSymbol() throws IOException {
119        //
120        //                   table entry    table entry
121        //                  _____________   _____
122        //    table entry  /             \ /     \
123        //    ____________/               \       \
124        //   /           / \             / \       \
125        //  +---+---+---+---+---+---+---+---+---+---+
126        //  | . | . | . | . | . | . | . | . | . | . |
127        //  +---+---+---+---+---+---+---+---+---+---+
128        //  |<--------->|<------------->|<----->|<->|
129        //     symbol        symbol      symbol  symbol
130        //
131        final int code = readNextCode();
132        if (code < 0) {
133            return -1;
134        } else if (blockMode && code == clearCode) {
135            clearEntries();
136            reAlignReading();
137            codeSize = 9;
138            previousCode = -1;
139            return 0;
140        } else {
141            boolean addedUnfinishedEntry = false;
142            if (code == tableSize) {
143                addRepeatOfPreviousCode();
144                addedUnfinishedEntry = true;
145            } else if (code > tableSize) {
146                throw new IOException(String.format("Invalid %d bit code 0x%x", Integer.valueOf(codeSize), Integer.valueOf(code)));
147            }
148            return expandCodeToOutputStack(code, addedUnfinishedEntry);
149        }
150    }
151    
152    /**
153     * Checks if the signature matches what is expected for a Unix compress file.
154     * 
155     * @param signature
156     *            the bytes to check
157     * @param length
158     *            the number of bytes to check
159     * @return true, if this stream is a Unix compress compressed
160     * stream, false otherwise
161     * 
162     * @since 1.9
163     */
164    public static boolean matches(byte[] signature, int length) {
165        return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
166    }
167
168}