001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.utils.IOUtils;
037
038import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
039import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
040import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
042
043/**
044 * Implements an input stream that can read Zip archives.
045 *
046 * <p>Note that {@link ZipArchiveEntry#getSize()} may return -1 if the
047 * DEFLATE algorithm is used, as the size information is not available
048 * from the header.</p>
049 *
050 * <p>The {@link ZipFile} class is preferred when reading from files.</p>
051 *
052 * <p>As of Apache Commons Compress it transparently supports Zip64
053 * extensions and thus individual entries and archives larger than 4
054 * GB or with more than 65536 entries.</p>
055 *
056 * @see ZipFile
057 * @NotThreadSafe
058 */
059public class ZipArchiveInputStream extends ArchiveInputStream {
060
061    /** The zip encoding to use for filenames and the file comment. */
062    private final ZipEncoding zipEncoding;
063
064    /** Whether to look for and use Unicode extra fields. */
065    private final boolean useUnicodeExtraFields;
066
067    /** Wrapped stream, will always be a PushbackInputStream. */
068    private final InputStream in;
069
070    /** Inflater used for all deflated entries. */
071    private final Inflater inf = new Inflater(true);
072
073    /** Buffer used to read from the wrapped stream. */
074    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
075
076    /** The entry that is currently being read. */
077    private CurrentEntry current = null;
078
079    /** Whether the stream has been closed. */
080    private boolean closed = false;
081
082    /** Whether the stream has reached the central directory - and thus found all entries. */
083    private boolean hitCentralDirectory = false;
084
085    /**
086     * When reading a stored entry that uses the data descriptor this
087     * stream has to read the full entry and caches it.  This is the
088     * cache.
089     */
090    private ByteArrayInputStream lastStoredEntry = null;
091
092    /** Whether the stream will try to read STORED entries that use a data descriptor. */
093    private boolean allowStoredEntriesWithDataDescriptor = false;
094
095    private static final int LFH_LEN = 30;
096    /*
097      local file header signature     WORD
098      version needed to extract       SHORT
099      general purpose bit flag        SHORT
100      compression method              SHORT
101      last mod file time              SHORT
102      last mod file date              SHORT
103      crc-32                          WORD
104      compressed size                 WORD
105      uncompressed size               WORD
106      file name length                SHORT
107      extra field length              SHORT
108    */
109
110    private static final int CFH_LEN = 46;
111    /*
112        central file header signature   WORD
113        version made by                 SHORT
114        version needed to extract       SHORT
115        general purpose bit flag        SHORT
116        compression method              SHORT
117        last mod file time              SHORT
118        last mod file date              SHORT
119        crc-32                          WORD
120        compressed size                 WORD
121        uncompressed size               WORD
122        file name length                SHORT
123        extra field length              SHORT
124        file comment length             SHORT
125        disk number start               SHORT
126        internal file attributes        SHORT
127        external file attributes        WORD
128        relative offset of local header WORD
129    */
130
131    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
132
133    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
134    private final byte[] LFH_BUF = new byte[LFH_LEN];
135    private final byte[] SKIP_BUF = new byte[1024];
136    private final byte[] SHORT_BUF = new byte[SHORT];
137    private final byte[] WORD_BUF = new byte[WORD];
138    private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
139
140    private int entriesRead = 0;
141
142    public ZipArchiveInputStream(InputStream inputStream) {
143        this(inputStream, ZipEncodingHelper.UTF8);
144    }
145
146    /**
147     * @param encoding the encoding to use for file names, use null
148     * for the platform's default encoding
149     * @since 1.5
150     */
151    public ZipArchiveInputStream(InputStream inputStream, String encoding) {
152        this(inputStream, encoding, true);
153    }
154
155    /**
156     * @param encoding the encoding to use for file names, use null
157     * for the platform's default encoding
158     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
159     * Extra Fields (if present) to set the file names.
160     */
161    public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) {
162        this(inputStream, encoding, useUnicodeExtraFields, false);
163    }
164
165    /**
166     * @param encoding the encoding to use for file names, use null
167     * for the platform's default encoding
168     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
169     * Extra Fields (if present) to set the file names.
170     * @param allowStoredEntriesWithDataDescriptor whether the stream
171     * will try to read STORED entries that use a data descriptor
172     * @since 1.1
173     */
174    public ZipArchiveInputStream(InputStream inputStream,
175                                 String encoding,
176                                 boolean useUnicodeExtraFields,
177                                 boolean allowStoredEntriesWithDataDescriptor) {
178        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
179        this.useUnicodeExtraFields = useUnicodeExtraFields;
180        in = new PushbackInputStream(inputStream, buf.capacity());
181        this.allowStoredEntriesWithDataDescriptor =
182            allowStoredEntriesWithDataDescriptor;
183        // haven't read anything so far
184        buf.limit(0);
185    }
186
187    public ZipArchiveEntry getNextZipEntry() throws IOException {
188        boolean firstEntry = true;
189        if (closed || hitCentralDirectory) {
190            return null;
191        }
192        if (current != null) {
193            closeEntry();
194            firstEntry = false;
195        }
196
197        try {
198            if (firstEntry) {
199                // split archives have a special signature before the
200                // first local file header - look for it and fail with
201                // the appropriate error message if this is a split
202                // archive.
203                readFirstLocalFileHeader(LFH_BUF);
204            } else {
205                readFully(LFH_BUF);
206            }
207        } catch (EOFException e) {
208            return null;
209        }
210
211        ZipLong sig = new ZipLong(LFH_BUF);
212        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
213            hitCentralDirectory = true;
214            skipRemainderOfArchive();
215        }
216        if (!sig.equals(ZipLong.LFH_SIG)) {
217            return null;
218        }
219
220        int off = WORD;
221        current = new CurrentEntry();
222
223        int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
224        off += SHORT;
225        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
226
227        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
228        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
229        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
230        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
231        current.entry.setGeneralPurposeBit(gpFlag);
232
233        off += SHORT;
234
235        current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
236        off += SHORT;
237
238        long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
239        current.entry.setTime(time);
240        off += WORD;
241
242        ZipLong size = null, cSize = null;
243        if (!current.hasDataDescriptor) {
244            current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
245            off += WORD;
246
247            cSize = new ZipLong(LFH_BUF, off);
248            off += WORD;
249
250            size = new ZipLong(LFH_BUF, off);
251            off += WORD;
252        } else {
253            off += 3 * WORD;
254        }
255
256        int fileNameLen = ZipShort.getValue(LFH_BUF, off);
257
258        off += SHORT;
259
260        int extraLen = ZipShort.getValue(LFH_BUF, off);
261        off += SHORT;
262
263        byte[] fileName = new byte[fileNameLen];
264        readFully(fileName);
265        current.entry.setName(entryEncoding.decode(fileName), fileName);
266
267        byte[] extraData = new byte[extraLen];
268        readFully(extraData);
269        current.entry.setExtra(extraData);
270
271        if (!hasUTF8Flag && useUnicodeExtraFields) {
272            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
273        }
274
275        processZip64Extra(size, cSize);
276
277        if (current.entry.getCompressedSize() != -1) {
278            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
279                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
280            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
281                current.in = new ExplodingInputStream(
282                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
283                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
284                        new BoundedInputStream(in, current.entry.getCompressedSize()));
285            }
286        }
287        
288        entriesRead++;
289        return current.entry;
290    }
291
292    /**
293     * Fills the given array with the first local file header and
294     * deals with splitting/spanning markers that may prefix the first
295     * LFH.
296     */
297    private void readFirstLocalFileHeader(byte[] lfh) throws IOException {
298        readFully(lfh);
299        ZipLong sig = new ZipLong(lfh);
300        if (sig.equals(ZipLong.DD_SIG)) {
301            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
302        }
303
304        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
305            // The archive is not really split as only one segment was
306            // needed in the end.  Just skip over the marker.
307            byte[] missedLfhBytes = new byte[4];
308            readFully(missedLfhBytes);
309            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
310            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
311        }
312    }
313
314    /**
315     * Records whether a Zip64 extra is present and sets the size
316     * information from it if sizes are 0xFFFFFFFF and the entry
317     * doesn't use a data descriptor.
318     */
319    private void processZip64Extra(ZipLong size, ZipLong cSize) {
320        Zip64ExtendedInformationExtraField z64 =
321            (Zip64ExtendedInformationExtraField) 
322            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
323        current.usesZip64 = z64 != null;
324        if (!current.hasDataDescriptor) {
325            if (z64 != null // same as current.usesZip64 but avoids NPE warning
326                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
327                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
328                current.entry.setSize(z64.getSize().getLongValue());
329            } else {
330                current.entry.setCompressedSize(cSize.getValue());
331                current.entry.setSize(size.getValue());
332            }
333        }
334    }
335
336    @Override
337    public ArchiveEntry getNextEntry() throws IOException {
338        return getNextZipEntry();
339    }
340
341    /**
342     * Whether this class is able to read the given entry.
343     *
344     * <p>May return false if it is set up to use encryption or a
345     * compression method that hasn't been implemented yet.</p>
346     * @since 1.1
347     */
348    @Override
349    public boolean canReadEntryData(ArchiveEntry ae) {
350        if (ae instanceof ZipArchiveEntry) {
351            ZipArchiveEntry ze = (ZipArchiveEntry) ae;
352            return ZipUtil.canHandleEntryData(ze)
353                && supportsDataDescriptorFor(ze);
354
355        }
356        return false;
357    }
358
359    @Override
360    public int read(byte[] buffer, int offset, int length) throws IOException {
361        if (closed) {
362            throw new IOException("The stream is closed");
363        }
364
365        if (current == null) {
366            return -1;
367        }
368
369        // avoid int overflow, check null buffer
370        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
371            throw new ArrayIndexOutOfBoundsException();
372        }
373        
374        ZipUtil.checkRequestedFeatures(current.entry);
375        if (!supportsDataDescriptorFor(current.entry)) {
376            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
377                    current.entry);
378        }
379
380        int read;
381        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
382            read = readStored(buffer, offset, length);
383        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
384            read = readDeflated(buffer, offset, length);
385        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
386                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
387            read = current.in.read(buffer, offset, length);
388        } else {
389            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
390                    current.entry);
391        }
392        
393        if (read >= 0) {
394            current.crc.update(buffer, offset, read);
395        }
396        
397        return read;
398    }
399
400    /**
401     * Implementation of read for STORED entries.
402     */
403    private int readStored(byte[] buffer, int offset, int length) throws IOException {
404
405        if (current.hasDataDescriptor) {
406            if (lastStoredEntry == null) {
407                readStoredEntry();
408            }
409            return lastStoredEntry.read(buffer, offset, length);
410        }
411
412        long csize = current.entry.getSize();
413        if (current.bytesRead >= csize) {
414            return -1;
415        }
416
417        if (buf.position() >= buf.limit()) {
418            buf.position(0);
419            int l = in.read(buf.array());
420            if (l == -1) {
421                return -1;
422            }
423            buf.limit(l);
424
425            count(l);
426            current.bytesReadFromStream += l;
427        }
428
429        int toRead = Math.min(buf.remaining(), length);
430        if ((csize - current.bytesRead) < toRead) {
431            // if it is smaller than toRead then it fits into an int
432            toRead = (int) (csize - current.bytesRead);
433        }
434        buf.get(buffer, offset, toRead);
435        current.bytesRead += toRead;
436        return toRead;
437    }
438
439    /**
440     * Implementation of read for DEFLATED entries.
441     */
442    private int readDeflated(byte[] buffer, int offset, int length) throws IOException {
443        int read = readFromInflater(buffer, offset, length);
444        if (read <= 0) {
445            if (inf.finished()) {
446                return -1;
447            } else if (inf.needsDictionary()) {
448                throw new ZipException("This archive needs a preset dictionary"
449                                       + " which is not supported by Commons"
450                                       + " Compress.");
451            } else if (read == -1) {
452                throw new IOException("Truncated ZIP file");
453            }
454        }
455        return read;
456    }
457
458    /**
459     * Potentially reads more bytes to fill the inflater's buffer and
460     * reads from it.
461     */
462    private int readFromInflater(byte[] buffer, int offset, int length) throws IOException {
463        int read = 0;
464        do {
465            if (inf.needsInput()) {
466                int l = fill();
467                if (l > 0) {
468                    current.bytesReadFromStream += buf.limit();
469                } else if (l == -1) {
470                    return -1;
471                } else {
472                    break;
473                }
474            }
475            try {
476                read = inf.inflate(buffer, offset, length);
477            } catch (DataFormatException e) {
478                throw (IOException) new ZipException(e.getMessage()).initCause(e);
479            }
480        } while (read == 0 && inf.needsInput());
481        return read;
482    }
483
484    @Override
485    public void close() throws IOException {
486        if (!closed) {
487            closed = true;
488            in.close();
489            inf.end();
490        }
491    }
492
493    /**
494     * Skips over and discards value bytes of data from this input
495     * stream.
496     *
497     * <p>This implementation may end up skipping over some smaller
498     * number of bytes, possibly 0, if and only if it reaches the end
499     * of the underlying stream.</p>
500     *
501     * <p>The actual number of bytes skipped is returned.</p>
502     *
503     * @param value the number of bytes to be skipped.
504     * @return the actual number of bytes skipped.
505     * @throws IOException - if an I/O error occurs.
506     * @throws IllegalArgumentException - if value is negative.
507     */
508    @Override
509    public long skip(long value) throws IOException {
510        if (value >= 0) {
511            long skipped = 0;
512            while (skipped < value) {
513                long rem = value - skipped;
514                int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
515                if (x == -1) {
516                    return skipped;
517                }
518                skipped += x;
519            }
520            return skipped;
521        }
522        throw new IllegalArgumentException();
523    }
524
525    /**
526     * Checks if the signature matches what is expected for a zip file.
527     * Does not currently handle self-extracting zips which may have arbitrary
528     * leading content.
529     *
530     * @param signature the bytes to check
531     * @param length    the number of bytes to check
532     * @return true, if this stream is a zip archive stream, false otherwise
533     */
534    public static boolean matches(byte[] signature, int length) {
535        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
536            return false;
537        }
538
539        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
540            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
541            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
542            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
543    }
544
545    private static boolean checksig(byte[] signature, byte[] expected) {
546        for (int i = 0; i < expected.length; i++) {
547            if (signature[i] != expected[i]) {
548                return false;
549            }
550        }
551        return true;
552    }
553
554    /**
555     * Closes the current ZIP archive entry and positions the underlying
556     * stream to the beginning of the next entry. All per-entry variables
557     * and data structures are cleared.
558     * <p>
559     * If the compressed size of this entry is included in the entry header,
560     * then any outstanding bytes are simply skipped from the underlying
561     * stream without uncompressing them. This allows an entry to be safely
562     * closed even if the compression method is unsupported.
563     * <p>
564     * In case we don't know the compressed size of this entry or have
565     * already buffered too much data from the underlying stream to support
566     * uncompression, then the uncompression process is completed and the
567     * end position of the stream is adjusted based on the result of that
568     * process.
569     *
570     * @throws IOException if an error occurs
571     */
572    private void closeEntry() throws IOException {
573        if (closed) {
574            throw new IOException("The stream is closed");
575        }
576        if (current == null) {
577            return;
578        }
579
580        // Ensure all entry bytes are read
581        if (current.bytesReadFromStream <= current.entry.getCompressedSize()
582                && !current.hasDataDescriptor) {
583            drainCurrentEntryData();
584        } else {
585            skip(Long.MAX_VALUE);
586
587            long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
588                       ? getBytesInflated() : current.bytesRead;
589
590            // this is at most a single read() operation and can't
591            // exceed the range of int
592            int diff = (int) (current.bytesReadFromStream - inB);
593
594            // Pushback any required bytes
595            if (diff > 0) {
596                pushback(buf.array(), buf.limit() - diff, diff);
597            }
598        }
599
600        if (lastStoredEntry == null && current.hasDataDescriptor) {
601            readDataDescriptor();
602        }
603
604        inf.reset();
605        buf.clear().flip();
606        current = null;
607        lastStoredEntry = null;
608    }
609
610    /**
611     * Read all data of the current entry from the underlying stream
612     * that hasn't been read, yet.
613     */
614    private void drainCurrentEntryData() throws IOException {
615        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
616        while (remaining > 0) {
617            long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
618            if (n < 0) {
619                throw new EOFException("Truncated ZIP entry: " + current.entry.getName());
620            } else {
621                count(n);
622                remaining -= n;
623            }
624        }
625    }
626
627    /**
628     * Get the number of bytes Inflater has actually processed.
629     *
630     * <p>for Java &lt; Java7 the getBytes* methods in
631     * Inflater/Deflater seem to return unsigned ints rather than
632     * longs that start over with 0 at 2^32.</p>
633     *
634     * <p>The stream knows how many bytes it has read, but not how
635     * many the Inflater actually consumed - it should be between the
636     * total number of bytes read for the entry and the total number
637     * minus the last read operation.  Here we just try to make the
638     * value close enough to the bytes we've read by assuming the
639     * number of bytes consumed must be smaller than (or equal to) the
640     * number of bytes read but not smaller by more than 2^32.</p>
641     */
642    private long getBytesInflated() {
643        long inB = inf.getBytesRead();
644        if (current.bytesReadFromStream >= TWO_EXP_32) {
645            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
646                inB += TWO_EXP_32;
647            }
648        }
649        return inB;
650    }
651
652    private int fill() throws IOException {
653        if (closed) {
654            throw new IOException("The stream is closed");
655        }
656        int length = in.read(buf.array());
657        if (length > 0) {
658            buf.limit(length);
659            count(buf.limit());
660            inf.setInput(buf.array(), 0, buf.limit());
661        }
662        return length;
663    }
664
665    private void readFully(byte[] b) throws IOException {
666        int count = IOUtils.readFully(in, b);
667        count(count);
668        if (count < b.length) {
669            throw new EOFException();
670        }
671    }
672
673    private void readDataDescriptor() throws IOException {
674        readFully(WORD_BUF);
675        ZipLong val = new ZipLong(WORD_BUF);
676        if (ZipLong.DD_SIG.equals(val)) {
677            // data descriptor with signature, skip sig
678            readFully(WORD_BUF);
679            val = new ZipLong(WORD_BUF);
680        }
681        current.entry.setCrc(val.getValue());
682
683        // if there is a ZIP64 extra field, sizes are eight bytes
684        // each, otherwise four bytes each.  Unfortunately some
685        // implementations - namely Java7 - use eight bytes without
686        // using a ZIP64 extra field -
687        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
688
689        // just read 16 bytes and check whether bytes nine to twelve
690        // look like one of the signatures of what could follow a data
691        // descriptor (ignoring archive decryption headers for now).
692        // If so, push back eight bytes and assume sizes are four
693        // bytes, otherwise sizes are eight bytes each.
694        readFully(TWO_DWORD_BUF);
695        ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
696        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
697            pushback(TWO_DWORD_BUF, DWORD, DWORD);
698            current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
699            current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
700        } else {
701            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF));
702            current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD));
703        }
704    }
705
706    /**
707     * Whether this entry requires a data descriptor this library can work with.
708     *
709     * @return true if allowStoredEntriesWithDataDescriptor is true,
710     * the entry doesn't require any data descriptor or the method is
711     * DEFLATED.
712     */
713    private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
714        return !entry.getGeneralPurposeBit().usesDataDescriptor()
715
716                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
717                || entry.getMethod() == ZipEntry.DEFLATED;
718    }
719
720    /**
721     * Caches a stored entry that uses the data descriptor.
722     *
723     * <ul>
724     *   <li>Reads a stored entry until the signature of a local file
725     *     header, central directory header or data descriptor has been
726     *     found.</li>
727     *   <li>Stores all entry data in lastStoredEntry.</p>
728     *   <li>Rewinds the stream to position at the data
729     *     descriptor.</li>
730     *   <li>reads the data descriptor</li>
731     * </ul>
732     *
733     * <p>After calling this method the entry should know its size,
734     * the entry's data is cached and the stream is positioned at the
735     * next local file or central directory header.</p>
736     */
737    private void readStoredEntry() throws IOException {
738        ByteArrayOutputStream bos = new ByteArrayOutputStream();
739        int off = 0;
740        boolean done = false;
741
742        // length of DD without signature
743        int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
744
745        while (!done) {
746            int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
747            if (r <= 0) {
748                // read the whole archive without ever finding a
749                // central directory
750                throw new IOException("Truncated ZIP file");
751            }
752            if (r + off < 4) {
753                // buffer too small to check for a signature, loop
754                off += r;
755                continue;
756            }
757
758            done = bufferContainsSignature(bos, off, r, ddLen);
759            if (!done) {
760                off = cacheBytesRead(bos, off, r, ddLen);
761            }
762        }
763
764        byte[] b = bos.toByteArray();
765        lastStoredEntry = new ByteArrayInputStream(b);
766    }
767
768    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
769    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
770    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
771
772    /**
773     * Checks whether the current buffer contains the signature of a
774     * &quot;data descriptor&quot;, &quot;local file header&quot; or
775     * &quot;central directory entry&quot;.
776     *
777     * <p>If it contains such a signature, reads the data descriptor
778     * and positions the stream right after the data descriptor.</p>
779     */
780    private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen)
781            throws IOException {
782
783        boolean done = false;
784        int readTooMuch = 0;
785        for (int i = 0; !done && i < lastRead - 4; i++) {
786            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
787                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
788                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
789                    // found a LFH or CFH:
790                    readTooMuch = offset + lastRead - i - expectedDDLen;
791                    done = true;
792                }
793                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
794                    // found DD:
795                    readTooMuch = offset + lastRead - i;
796                    done = true;
797                }
798                if (done) {
799                    // * push back bytes read in excess as well as the data
800                    //   descriptor
801                    // * copy the remaining bytes to cache
802                    // * read data descriptor
803                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
804                    bos.write(buf.array(), 0, i);
805                    readDataDescriptor();
806                }
807            }
808        }
809        return done;
810    }
811
812    /**
813     * If the last read bytes could hold a data descriptor and an
814     * incomplete signature then save the last bytes to the front of
815     * the buffer and cache everything in front of the potential data
816     * descriptor into the given ByteArrayOutputStream.
817     *
818     * <p>Data descriptor plus incomplete signature (3 bytes in the
819     * worst case) can be 20 bytes max.</p>
820     */
821    private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) {
822        final int cacheable = offset + lastRead - expecteDDLen - 3;
823        if (cacheable > 0) {
824            bos.write(buf.array(), 0, cacheable);
825            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
826            offset = expecteDDLen + 3;
827        } else {
828            offset += lastRead;
829        }
830        return offset;
831    }
832
833    private void pushback(byte[] buf, int offset, int length) throws IOException {
834        ((PushbackInputStream) in).unread(buf, offset, length);
835        pushedBackBytes(length);
836    }
837
838    // End of Central Directory Record
839    //   end of central dir signature    WORD
840    //   number of this disk             SHORT
841    //   number of the disk with the
842    //   start of the central directory  SHORT
843    //   total number of entries in the
844    //   central directory on this disk  SHORT
845    //   total number of entries in
846    //   the central directory           SHORT
847    //   size of the central directory   WORD
848    //   offset of start of central
849    //   directory with respect to
850    //   the starting disk number        WORD
851    //   .ZIP file comment length        SHORT
852    //   .ZIP file comment               up to 64KB
853    //
854
855    /**
856     * Reads the stream until it find the "End of central directory
857     * record" and consumes it as well.
858     */
859    private void skipRemainderOfArchive() throws IOException {
860        // skip over central directory. One LFH has been read too much
861        // already.  The calculation discounts file names and extra
862        // data so it will be too short.
863        realSkip(entriesRead * CFH_LEN - LFH_LEN);
864        findEocdRecord();
865        realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
866        readFully(SHORT_BUF);
867        // file comment
868        realSkip(ZipShort.getValue(SHORT_BUF));
869    }
870
871    /**
872     * Reads forward until the signature of the &quot;End of central
873     * directory&quot; record is found.
874     */
875    private void findEocdRecord() throws IOException {
876        int currentByte = -1;
877        boolean skipReadCall = false;
878        while (skipReadCall || (currentByte = readOneByte()) > -1) {
879            skipReadCall = false;
880            if (!isFirstByteOfEocdSig(currentByte)) {
881                continue;
882            }
883            currentByte = readOneByte();
884            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
885                if (currentByte == -1) {
886                    break;
887                }
888                skipReadCall = isFirstByteOfEocdSig(currentByte);
889                continue;
890            }
891            currentByte = readOneByte();
892            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
893                if (currentByte == -1) {
894                    break;
895                }
896                skipReadCall = isFirstByteOfEocdSig(currentByte);
897                continue;
898            }
899            currentByte = readOneByte();
900            if (currentByte == -1
901                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
902                break;
903            }
904            skipReadCall = isFirstByteOfEocdSig(currentByte);
905        }
906    }
907
908    /**
909     * Skips bytes by reading from the underlying stream rather than
910     * the (potentially inflating) archive stream - which {@link
911     * #skip} would do.
912     *
913     * Also updates bytes-read counter.
914     */
915    private void realSkip(long value) throws IOException {
916        if (value >= 0) {
917            long skipped = 0;
918            while (skipped < value) {
919                long rem = value - skipped;
920                int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
921                if (x == -1) {
922                    return;
923                }
924                count(x);
925                skipped += x;
926            }
927            return;
928        }
929        throw new IllegalArgumentException();
930    }
931
932    /**
933     * Reads bytes by reading from the underlying stream rather than
934     * the (potentially inflating) archive stream - which {@link #read} would do.
935     *
936     * Also updates bytes-read counter.
937     */
938    private int readOneByte() throws IOException {
939        int b = in.read();
940        if (b != -1) {
941            count(1);
942        }
943        return b;
944    }
945
946    private boolean isFirstByteOfEocdSig(int b) {
947        return b == ZipArchiveOutputStream.EOCD_SIG[0];
948    }
949
950    /**
951     * Structure collecting information for the entry that is
952     * currently being read.
953     */
954    private static final class CurrentEntry {
955
956        /**
957         * Current ZIP entry.
958         */
959        private final ZipArchiveEntry entry = new ZipArchiveEntry();
960
961        /**
962         * Does the entry use a data descriptor?
963         */
964        private boolean hasDataDescriptor;
965
966        /**
967         * Does the entry have a ZIP64 extended information extra field.
968         */
969        private boolean usesZip64;
970
971        /**
972         * Number of bytes of entry content read by the client if the
973         * entry is STORED.
974         */
975        private long bytesRead;
976
977        /**
978         * Number of bytes of entry content read so from the stream.
979         *
980         * <p>This may be more than the actual entry's length as some
981         * stuff gets buffered up and needs to be pushed back when the
982         * end of the entry has been reached.</p>
983         */
984        private long bytesReadFromStream;
985
986        /**
987         * The checksum calculated as the current entry is read.
988         */
989        private final CRC32 crc = new CRC32();
990
991        /**
992         * The input stream decompressing the data for shrunk and imploded entries.
993         */
994        private InputStream in;
995    }
996
997    /**
998     * Bounded input stream adapted from commons-io
999     */
1000    private class BoundedInputStream extends InputStream {
1001
1002        /** the wrapped input stream */
1003        private final InputStream in;
1004
1005        /** the max length to provide */
1006        private final long max;
1007
1008        /** the number of bytes already returned */
1009        private long pos = 0;
1010    
1011        /**
1012         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1013         * stream and limits it to a certain size.
1014         *
1015         * @param in The wrapped input stream
1016         * @param size The maximum number of bytes to return
1017         */
1018        public BoundedInputStream(final InputStream in, final long size) {
1019            this.max = size;
1020            this.in = in;
1021        }
1022
1023        @Override
1024        public int read() throws IOException {
1025            if (max >= 0 && pos >= max) {
1026                return -1;
1027            }
1028            final int result = in.read();
1029            pos++;
1030            count(1);
1031            current.bytesReadFromStream++;
1032            return result;
1033        }
1034
1035        @Override
1036        public int read(final byte[] b) throws IOException {
1037            return this.read(b, 0, b.length);
1038        }
1039
1040        @Override
1041        public int read(final byte[] b, final int off, final int len) throws IOException {
1042            if (max >= 0 && pos >= max) {
1043                return -1;
1044            }
1045            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1046            final int bytesRead = in.read(b, off, (int) maxRead);
1047
1048            if (bytesRead == -1) {
1049                return -1;
1050            }
1051
1052            pos += bytesRead;
1053            count(bytesRead);
1054            current.bytesReadFromStream += bytesRead;
1055            return bytesRead;
1056        }
1057
1058        @Override
1059        public long skip(final long n) throws IOException {
1060            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1061            final long skippedBytes = in.skip(toSkip);
1062            pos += skippedBytes;
1063            return skippedBytes;
1064        }
1065    
1066        @Override
1067        public int available() throws IOException {
1068            if (max >= 0 && pos >= max) {
1069                return 0;
1070            }
1071            return in.available();
1072        }
1073    }
1074}