001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.utils.IOUtils; 037 038import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 039import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 040import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 042 043/** 044 * Implements an input stream that can read Zip archives. 045 * 046 * <p>Note that {@link ZipArchiveEntry#getSize()} may return -1 if the 047 * DEFLATE algorithm is used, as the size information is not available 048 * from the header.</p> 049 * 050 * <p>The {@link ZipFile} class is preferred when reading from files.</p> 051 * 052 * <p>As of Apache Commons Compress it transparently supports Zip64 053 * extensions and thus individual entries and archives larger than 4 054 * GB or with more than 65536 entries.</p> 055 * 056 * @see ZipFile 057 * @NotThreadSafe 058 */ 059public class ZipArchiveInputStream extends ArchiveInputStream { 060 061 /** The zip encoding to use for filenames and the file comment. */ 062 private final ZipEncoding zipEncoding; 063 064 /** Whether to look for and use Unicode extra fields. */ 065 private final boolean useUnicodeExtraFields; 066 067 /** Wrapped stream, will always be a PushbackInputStream. */ 068 private final InputStream in; 069 070 /** Inflater used for all deflated entries. */ 071 private final Inflater inf = new Inflater(true); 072 073 /** Buffer used to read from the wrapped stream. */ 074 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 075 076 /** The entry that is currently being read. */ 077 private CurrentEntry current = null; 078 079 /** Whether the stream has been closed. */ 080 private boolean closed = false; 081 082 /** Whether the stream has reached the central directory - and thus found all entries. */ 083 private boolean hitCentralDirectory = false; 084 085 /** 086 * When reading a stored entry that uses the data descriptor this 087 * stream has to read the full entry and caches it. This is the 088 * cache. 089 */ 090 private ByteArrayInputStream lastStoredEntry = null; 091 092 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 093 private boolean allowStoredEntriesWithDataDescriptor = false; 094 095 private static final int LFH_LEN = 30; 096 /* 097 local file header signature WORD 098 version needed to extract SHORT 099 general purpose bit flag SHORT 100 compression method SHORT 101 last mod file time SHORT 102 last mod file date SHORT 103 crc-32 WORD 104 compressed size WORD 105 uncompressed size WORD 106 file name length SHORT 107 extra field length SHORT 108 */ 109 110 private static final int CFH_LEN = 46; 111 /* 112 central file header signature WORD 113 version made by SHORT 114 version needed to extract SHORT 115 general purpose bit flag SHORT 116 compression method SHORT 117 last mod file time SHORT 118 last mod file date SHORT 119 crc-32 WORD 120 compressed size WORD 121 uncompressed size WORD 122 file name length SHORT 123 extra field length SHORT 124 file comment length SHORT 125 disk number start SHORT 126 internal file attributes SHORT 127 external file attributes WORD 128 relative offset of local header WORD 129 */ 130 131 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 132 133 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 134 private final byte[] LFH_BUF = new byte[LFH_LEN]; 135 private final byte[] SKIP_BUF = new byte[1024]; 136 private final byte[] SHORT_BUF = new byte[SHORT]; 137 private final byte[] WORD_BUF = new byte[WORD]; 138 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD]; 139 140 private int entriesRead = 0; 141 142 public ZipArchiveInputStream(InputStream inputStream) { 143 this(inputStream, ZipEncodingHelper.UTF8); 144 } 145 146 /** 147 * @param encoding the encoding to use for file names, use null 148 * for the platform's default encoding 149 * @since 1.5 150 */ 151 public ZipArchiveInputStream(InputStream inputStream, String encoding) { 152 this(inputStream, encoding, true); 153 } 154 155 /** 156 * @param encoding the encoding to use for file names, use null 157 * for the platform's default encoding 158 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 159 * Extra Fields (if present) to set the file names. 160 */ 161 public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) { 162 this(inputStream, encoding, useUnicodeExtraFields, false); 163 } 164 165 /** 166 * @param encoding the encoding to use for file names, use null 167 * for the platform's default encoding 168 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 169 * Extra Fields (if present) to set the file names. 170 * @param allowStoredEntriesWithDataDescriptor whether the stream 171 * will try to read STORED entries that use a data descriptor 172 * @since 1.1 173 */ 174 public ZipArchiveInputStream(InputStream inputStream, 175 String encoding, 176 boolean useUnicodeExtraFields, 177 boolean allowStoredEntriesWithDataDescriptor) { 178 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 179 this.useUnicodeExtraFields = useUnicodeExtraFields; 180 in = new PushbackInputStream(inputStream, buf.capacity()); 181 this.allowStoredEntriesWithDataDescriptor = 182 allowStoredEntriesWithDataDescriptor; 183 // haven't read anything so far 184 buf.limit(0); 185 } 186 187 public ZipArchiveEntry getNextZipEntry() throws IOException { 188 boolean firstEntry = true; 189 if (closed || hitCentralDirectory) { 190 return null; 191 } 192 if (current != null) { 193 closeEntry(); 194 firstEntry = false; 195 } 196 197 try { 198 if (firstEntry) { 199 // split archives have a special signature before the 200 // first local file header - look for it and fail with 201 // the appropriate error message if this is a split 202 // archive. 203 readFirstLocalFileHeader(LFH_BUF); 204 } else { 205 readFully(LFH_BUF); 206 } 207 } catch (EOFException e) { 208 return null; 209 } 210 211 ZipLong sig = new ZipLong(LFH_BUF); 212 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 213 hitCentralDirectory = true; 214 skipRemainderOfArchive(); 215 } 216 if (!sig.equals(ZipLong.LFH_SIG)) { 217 return null; 218 } 219 220 int off = WORD; 221 current = new CurrentEntry(); 222 223 int versionMadeBy = ZipShort.getValue(LFH_BUF, off); 224 off += SHORT; 225 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 226 227 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off); 228 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 229 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 230 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 231 current.entry.setGeneralPurposeBit(gpFlag); 232 233 off += SHORT; 234 235 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off)); 236 off += SHORT; 237 238 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off)); 239 current.entry.setTime(time); 240 off += WORD; 241 242 ZipLong size = null, cSize = null; 243 if (!current.hasDataDescriptor) { 244 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off)); 245 off += WORD; 246 247 cSize = new ZipLong(LFH_BUF, off); 248 off += WORD; 249 250 size = new ZipLong(LFH_BUF, off); 251 off += WORD; 252 } else { 253 off += 3 * WORD; 254 } 255 256 int fileNameLen = ZipShort.getValue(LFH_BUF, off); 257 258 off += SHORT; 259 260 int extraLen = ZipShort.getValue(LFH_BUF, off); 261 off += SHORT; 262 263 byte[] fileName = new byte[fileNameLen]; 264 readFully(fileName); 265 current.entry.setName(entryEncoding.decode(fileName), fileName); 266 267 byte[] extraData = new byte[extraLen]; 268 readFully(extraData); 269 current.entry.setExtra(extraData); 270 271 if (!hasUTF8Flag && useUnicodeExtraFields) { 272 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 273 } 274 275 processZip64Extra(size, cSize); 276 277 if (current.entry.getCompressedSize() != -1) { 278 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 279 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 280 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 281 current.in = new ExplodingInputStream( 282 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 283 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 284 new BoundedInputStream(in, current.entry.getCompressedSize())); 285 } 286 } 287 288 entriesRead++; 289 return current.entry; 290 } 291 292 /** 293 * Fills the given array with the first local file header and 294 * deals with splitting/spanning markers that may prefix the first 295 * LFH. 296 */ 297 private void readFirstLocalFileHeader(byte[] lfh) throws IOException { 298 readFully(lfh); 299 ZipLong sig = new ZipLong(lfh); 300 if (sig.equals(ZipLong.DD_SIG)) { 301 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 302 } 303 304 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 305 // The archive is not really split as only one segment was 306 // needed in the end. Just skip over the marker. 307 byte[] missedLfhBytes = new byte[4]; 308 readFully(missedLfhBytes); 309 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 310 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 311 } 312 } 313 314 /** 315 * Records whether a Zip64 extra is present and sets the size 316 * information from it if sizes are 0xFFFFFFFF and the entry 317 * doesn't use a data descriptor. 318 */ 319 private void processZip64Extra(ZipLong size, ZipLong cSize) { 320 Zip64ExtendedInformationExtraField z64 = 321 (Zip64ExtendedInformationExtraField) 322 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 323 current.usesZip64 = z64 != null; 324 if (!current.hasDataDescriptor) { 325 if (z64 != null // same as current.usesZip64 but avoids NPE warning 326 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 327 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 328 current.entry.setSize(z64.getSize().getLongValue()); 329 } else { 330 current.entry.setCompressedSize(cSize.getValue()); 331 current.entry.setSize(size.getValue()); 332 } 333 } 334 } 335 336 @Override 337 public ArchiveEntry getNextEntry() throws IOException { 338 return getNextZipEntry(); 339 } 340 341 /** 342 * Whether this class is able to read the given entry. 343 * 344 * <p>May return false if it is set up to use encryption or a 345 * compression method that hasn't been implemented yet.</p> 346 * @since 1.1 347 */ 348 @Override 349 public boolean canReadEntryData(ArchiveEntry ae) { 350 if (ae instanceof ZipArchiveEntry) { 351 ZipArchiveEntry ze = (ZipArchiveEntry) ae; 352 return ZipUtil.canHandleEntryData(ze) 353 && supportsDataDescriptorFor(ze); 354 355 } 356 return false; 357 } 358 359 @Override 360 public int read(byte[] buffer, int offset, int length) throws IOException { 361 if (closed) { 362 throw new IOException("The stream is closed"); 363 } 364 365 if (current == null) { 366 return -1; 367 } 368 369 // avoid int overflow, check null buffer 370 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 371 throw new ArrayIndexOutOfBoundsException(); 372 } 373 374 ZipUtil.checkRequestedFeatures(current.entry); 375 if (!supportsDataDescriptorFor(current.entry)) { 376 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 377 current.entry); 378 } 379 380 int read; 381 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 382 read = readStored(buffer, offset, length); 383 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 384 read = readDeflated(buffer, offset, length); 385 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 386 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 387 read = current.in.read(buffer, offset, length); 388 } else { 389 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 390 current.entry); 391 } 392 393 if (read >= 0) { 394 current.crc.update(buffer, offset, read); 395 } 396 397 return read; 398 } 399 400 /** 401 * Implementation of read for STORED entries. 402 */ 403 private int readStored(byte[] buffer, int offset, int length) throws IOException { 404 405 if (current.hasDataDescriptor) { 406 if (lastStoredEntry == null) { 407 readStoredEntry(); 408 } 409 return lastStoredEntry.read(buffer, offset, length); 410 } 411 412 long csize = current.entry.getSize(); 413 if (current.bytesRead >= csize) { 414 return -1; 415 } 416 417 if (buf.position() >= buf.limit()) { 418 buf.position(0); 419 int l = in.read(buf.array()); 420 if (l == -1) { 421 return -1; 422 } 423 buf.limit(l); 424 425 count(l); 426 current.bytesReadFromStream += l; 427 } 428 429 int toRead = Math.min(buf.remaining(), length); 430 if ((csize - current.bytesRead) < toRead) { 431 // if it is smaller than toRead then it fits into an int 432 toRead = (int) (csize - current.bytesRead); 433 } 434 buf.get(buffer, offset, toRead); 435 current.bytesRead += toRead; 436 return toRead; 437 } 438 439 /** 440 * Implementation of read for DEFLATED entries. 441 */ 442 private int readDeflated(byte[] buffer, int offset, int length) throws IOException { 443 int read = readFromInflater(buffer, offset, length); 444 if (read <= 0) { 445 if (inf.finished()) { 446 return -1; 447 } else if (inf.needsDictionary()) { 448 throw new ZipException("This archive needs a preset dictionary" 449 + " which is not supported by Commons" 450 + " Compress."); 451 } else if (read == -1) { 452 throw new IOException("Truncated ZIP file"); 453 } 454 } 455 return read; 456 } 457 458 /** 459 * Potentially reads more bytes to fill the inflater's buffer and 460 * reads from it. 461 */ 462 private int readFromInflater(byte[] buffer, int offset, int length) throws IOException { 463 int read = 0; 464 do { 465 if (inf.needsInput()) { 466 int l = fill(); 467 if (l > 0) { 468 current.bytesReadFromStream += buf.limit(); 469 } else if (l == -1) { 470 return -1; 471 } else { 472 break; 473 } 474 } 475 try { 476 read = inf.inflate(buffer, offset, length); 477 } catch (DataFormatException e) { 478 throw (IOException) new ZipException(e.getMessage()).initCause(e); 479 } 480 } while (read == 0 && inf.needsInput()); 481 return read; 482 } 483 484 @Override 485 public void close() throws IOException { 486 if (!closed) { 487 closed = true; 488 in.close(); 489 inf.end(); 490 } 491 } 492 493 /** 494 * Skips over and discards value bytes of data from this input 495 * stream. 496 * 497 * <p>This implementation may end up skipping over some smaller 498 * number of bytes, possibly 0, if and only if it reaches the end 499 * of the underlying stream.</p> 500 * 501 * <p>The actual number of bytes skipped is returned.</p> 502 * 503 * @param value the number of bytes to be skipped. 504 * @return the actual number of bytes skipped. 505 * @throws IOException - if an I/O error occurs. 506 * @throws IllegalArgumentException - if value is negative. 507 */ 508 @Override 509 public long skip(long value) throws IOException { 510 if (value >= 0) { 511 long skipped = 0; 512 while (skipped < value) { 513 long rem = value - skipped; 514 int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 515 if (x == -1) { 516 return skipped; 517 } 518 skipped += x; 519 } 520 return skipped; 521 } 522 throw new IllegalArgumentException(); 523 } 524 525 /** 526 * Checks if the signature matches what is expected for a zip file. 527 * Does not currently handle self-extracting zips which may have arbitrary 528 * leading content. 529 * 530 * @param signature the bytes to check 531 * @param length the number of bytes to check 532 * @return true, if this stream is a zip archive stream, false otherwise 533 */ 534 public static boolean matches(byte[] signature, int length) { 535 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 536 return false; 537 } 538 539 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 540 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 541 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 542 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 543 } 544 545 private static boolean checksig(byte[] signature, byte[] expected) { 546 for (int i = 0; i < expected.length; i++) { 547 if (signature[i] != expected[i]) { 548 return false; 549 } 550 } 551 return true; 552 } 553 554 /** 555 * Closes the current ZIP archive entry and positions the underlying 556 * stream to the beginning of the next entry. All per-entry variables 557 * and data structures are cleared. 558 * <p> 559 * If the compressed size of this entry is included in the entry header, 560 * then any outstanding bytes are simply skipped from the underlying 561 * stream without uncompressing them. This allows an entry to be safely 562 * closed even if the compression method is unsupported. 563 * <p> 564 * In case we don't know the compressed size of this entry or have 565 * already buffered too much data from the underlying stream to support 566 * uncompression, then the uncompression process is completed and the 567 * end position of the stream is adjusted based on the result of that 568 * process. 569 * 570 * @throws IOException if an error occurs 571 */ 572 private void closeEntry() throws IOException { 573 if (closed) { 574 throw new IOException("The stream is closed"); 575 } 576 if (current == null) { 577 return; 578 } 579 580 // Ensure all entry bytes are read 581 if (current.bytesReadFromStream <= current.entry.getCompressedSize() 582 && !current.hasDataDescriptor) { 583 drainCurrentEntryData(); 584 } else { 585 skip(Long.MAX_VALUE); 586 587 long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 588 ? getBytesInflated() : current.bytesRead; 589 590 // this is at most a single read() operation and can't 591 // exceed the range of int 592 int diff = (int) (current.bytesReadFromStream - inB); 593 594 // Pushback any required bytes 595 if (diff > 0) { 596 pushback(buf.array(), buf.limit() - diff, diff); 597 } 598 } 599 600 if (lastStoredEntry == null && current.hasDataDescriptor) { 601 readDataDescriptor(); 602 } 603 604 inf.reset(); 605 buf.clear().flip(); 606 current = null; 607 lastStoredEntry = null; 608 } 609 610 /** 611 * Read all data of the current entry from the underlying stream 612 * that hasn't been read, yet. 613 */ 614 private void drainCurrentEntryData() throws IOException { 615 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 616 while (remaining > 0) { 617 long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 618 if (n < 0) { 619 throw new EOFException("Truncated ZIP entry: " + current.entry.getName()); 620 } else { 621 count(n); 622 remaining -= n; 623 } 624 } 625 } 626 627 /** 628 * Get the number of bytes Inflater has actually processed. 629 * 630 * <p>for Java < Java7 the getBytes* methods in 631 * Inflater/Deflater seem to return unsigned ints rather than 632 * longs that start over with 0 at 2^32.</p> 633 * 634 * <p>The stream knows how many bytes it has read, but not how 635 * many the Inflater actually consumed - it should be between the 636 * total number of bytes read for the entry and the total number 637 * minus the last read operation. Here we just try to make the 638 * value close enough to the bytes we've read by assuming the 639 * number of bytes consumed must be smaller than (or equal to) the 640 * number of bytes read but not smaller by more than 2^32.</p> 641 */ 642 private long getBytesInflated() { 643 long inB = inf.getBytesRead(); 644 if (current.bytesReadFromStream >= TWO_EXP_32) { 645 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 646 inB += TWO_EXP_32; 647 } 648 } 649 return inB; 650 } 651 652 private int fill() throws IOException { 653 if (closed) { 654 throw new IOException("The stream is closed"); 655 } 656 int length = in.read(buf.array()); 657 if (length > 0) { 658 buf.limit(length); 659 count(buf.limit()); 660 inf.setInput(buf.array(), 0, buf.limit()); 661 } 662 return length; 663 } 664 665 private void readFully(byte[] b) throws IOException { 666 int count = IOUtils.readFully(in, b); 667 count(count); 668 if (count < b.length) { 669 throw new EOFException(); 670 } 671 } 672 673 private void readDataDescriptor() throws IOException { 674 readFully(WORD_BUF); 675 ZipLong val = new ZipLong(WORD_BUF); 676 if (ZipLong.DD_SIG.equals(val)) { 677 // data descriptor with signature, skip sig 678 readFully(WORD_BUF); 679 val = new ZipLong(WORD_BUF); 680 } 681 current.entry.setCrc(val.getValue()); 682 683 // if there is a ZIP64 extra field, sizes are eight bytes 684 // each, otherwise four bytes each. Unfortunately some 685 // implementations - namely Java7 - use eight bytes without 686 // using a ZIP64 extra field - 687 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 688 689 // just read 16 bytes and check whether bytes nine to twelve 690 // look like one of the signatures of what could follow a data 691 // descriptor (ignoring archive decryption headers for now). 692 // If so, push back eight bytes and assume sizes are four 693 // bytes, otherwise sizes are eight bytes each. 694 readFully(TWO_DWORD_BUF); 695 ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD); 696 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 697 pushback(TWO_DWORD_BUF, DWORD, DWORD); 698 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF)); 699 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD)); 700 } else { 701 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF)); 702 current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD)); 703 } 704 } 705 706 /** 707 * Whether this entry requires a data descriptor this library can work with. 708 * 709 * @return true if allowStoredEntriesWithDataDescriptor is true, 710 * the entry doesn't require any data descriptor or the method is 711 * DEFLATED. 712 */ 713 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { 714 return !entry.getGeneralPurposeBit().usesDataDescriptor() 715 716 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 717 || entry.getMethod() == ZipEntry.DEFLATED; 718 } 719 720 /** 721 * Caches a stored entry that uses the data descriptor. 722 * 723 * <ul> 724 * <li>Reads a stored entry until the signature of a local file 725 * header, central directory header or data descriptor has been 726 * found.</li> 727 * <li>Stores all entry data in lastStoredEntry.</p> 728 * <li>Rewinds the stream to position at the data 729 * descriptor.</li> 730 * <li>reads the data descriptor</li> 731 * </ul> 732 * 733 * <p>After calling this method the entry should know its size, 734 * the entry's data is cached and the stream is positioned at the 735 * next local file or central directory header.</p> 736 */ 737 private void readStoredEntry() throws IOException { 738 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 739 int off = 0; 740 boolean done = false; 741 742 // length of DD without signature 743 int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 744 745 while (!done) { 746 int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 747 if (r <= 0) { 748 // read the whole archive without ever finding a 749 // central directory 750 throw new IOException("Truncated ZIP file"); 751 } 752 if (r + off < 4) { 753 // buffer too small to check for a signature, loop 754 off += r; 755 continue; 756 } 757 758 done = bufferContainsSignature(bos, off, r, ddLen); 759 if (!done) { 760 off = cacheBytesRead(bos, off, r, ddLen); 761 } 762 } 763 764 byte[] b = bos.toByteArray(); 765 lastStoredEntry = new ByteArrayInputStream(b); 766 } 767 768 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 769 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 770 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 771 772 /** 773 * Checks whether the current buffer contains the signature of a 774 * "data descriptor", "local file header" or 775 * "central directory entry". 776 * 777 * <p>If it contains such a signature, reads the data descriptor 778 * and positions the stream right after the data descriptor.</p> 779 */ 780 private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen) 781 throws IOException { 782 783 boolean done = false; 784 int readTooMuch = 0; 785 for (int i = 0; !done && i < lastRead - 4; i++) { 786 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 787 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 788 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 789 // found a LFH or CFH: 790 readTooMuch = offset + lastRead - i - expectedDDLen; 791 done = true; 792 } 793 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 794 // found DD: 795 readTooMuch = offset + lastRead - i; 796 done = true; 797 } 798 if (done) { 799 // * push back bytes read in excess as well as the data 800 // descriptor 801 // * copy the remaining bytes to cache 802 // * read data descriptor 803 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 804 bos.write(buf.array(), 0, i); 805 readDataDescriptor(); 806 } 807 } 808 } 809 return done; 810 } 811 812 /** 813 * If the last read bytes could hold a data descriptor and an 814 * incomplete signature then save the last bytes to the front of 815 * the buffer and cache everything in front of the potential data 816 * descriptor into the given ByteArrayOutputStream. 817 * 818 * <p>Data descriptor plus incomplete signature (3 bytes in the 819 * worst case) can be 20 bytes max.</p> 820 */ 821 private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) { 822 final int cacheable = offset + lastRead - expecteDDLen - 3; 823 if (cacheable > 0) { 824 bos.write(buf.array(), 0, cacheable); 825 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 826 offset = expecteDDLen + 3; 827 } else { 828 offset += lastRead; 829 } 830 return offset; 831 } 832 833 private void pushback(byte[] buf, int offset, int length) throws IOException { 834 ((PushbackInputStream) in).unread(buf, offset, length); 835 pushedBackBytes(length); 836 } 837 838 // End of Central Directory Record 839 // end of central dir signature WORD 840 // number of this disk SHORT 841 // number of the disk with the 842 // start of the central directory SHORT 843 // total number of entries in the 844 // central directory on this disk SHORT 845 // total number of entries in 846 // the central directory SHORT 847 // size of the central directory WORD 848 // offset of start of central 849 // directory with respect to 850 // the starting disk number WORD 851 // .ZIP file comment length SHORT 852 // .ZIP file comment up to 64KB 853 // 854 855 /** 856 * Reads the stream until it find the "End of central directory 857 * record" and consumes it as well. 858 */ 859 private void skipRemainderOfArchive() throws IOException { 860 // skip over central directory. One LFH has been read too much 861 // already. The calculation discounts file names and extra 862 // data so it will be too short. 863 realSkip(entriesRead * CFH_LEN - LFH_LEN); 864 findEocdRecord(); 865 realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 866 readFully(SHORT_BUF); 867 // file comment 868 realSkip(ZipShort.getValue(SHORT_BUF)); 869 } 870 871 /** 872 * Reads forward until the signature of the "End of central 873 * directory" record is found. 874 */ 875 private void findEocdRecord() throws IOException { 876 int currentByte = -1; 877 boolean skipReadCall = false; 878 while (skipReadCall || (currentByte = readOneByte()) > -1) { 879 skipReadCall = false; 880 if (!isFirstByteOfEocdSig(currentByte)) { 881 continue; 882 } 883 currentByte = readOneByte(); 884 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 885 if (currentByte == -1) { 886 break; 887 } 888 skipReadCall = isFirstByteOfEocdSig(currentByte); 889 continue; 890 } 891 currentByte = readOneByte(); 892 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 893 if (currentByte == -1) { 894 break; 895 } 896 skipReadCall = isFirstByteOfEocdSig(currentByte); 897 continue; 898 } 899 currentByte = readOneByte(); 900 if (currentByte == -1 901 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 902 break; 903 } 904 skipReadCall = isFirstByteOfEocdSig(currentByte); 905 } 906 } 907 908 /** 909 * Skips bytes by reading from the underlying stream rather than 910 * the (potentially inflating) archive stream - which {@link 911 * #skip} would do. 912 * 913 * Also updates bytes-read counter. 914 */ 915 private void realSkip(long value) throws IOException { 916 if (value >= 0) { 917 long skipped = 0; 918 while (skipped < value) { 919 long rem = value - skipped; 920 int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 921 if (x == -1) { 922 return; 923 } 924 count(x); 925 skipped += x; 926 } 927 return; 928 } 929 throw new IllegalArgumentException(); 930 } 931 932 /** 933 * Reads bytes by reading from the underlying stream rather than 934 * the (potentially inflating) archive stream - which {@link #read} would do. 935 * 936 * Also updates bytes-read counter. 937 */ 938 private int readOneByte() throws IOException { 939 int b = in.read(); 940 if (b != -1) { 941 count(1); 942 } 943 return b; 944 } 945 946 private boolean isFirstByteOfEocdSig(int b) { 947 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 948 } 949 950 /** 951 * Structure collecting information for the entry that is 952 * currently being read. 953 */ 954 private static final class CurrentEntry { 955 956 /** 957 * Current ZIP entry. 958 */ 959 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 960 961 /** 962 * Does the entry use a data descriptor? 963 */ 964 private boolean hasDataDescriptor; 965 966 /** 967 * Does the entry have a ZIP64 extended information extra field. 968 */ 969 private boolean usesZip64; 970 971 /** 972 * Number of bytes of entry content read by the client if the 973 * entry is STORED. 974 */ 975 private long bytesRead; 976 977 /** 978 * Number of bytes of entry content read so from the stream. 979 * 980 * <p>This may be more than the actual entry's length as some 981 * stuff gets buffered up and needs to be pushed back when the 982 * end of the entry has been reached.</p> 983 */ 984 private long bytesReadFromStream; 985 986 /** 987 * The checksum calculated as the current entry is read. 988 */ 989 private final CRC32 crc = new CRC32(); 990 991 /** 992 * The input stream decompressing the data for shrunk and imploded entries. 993 */ 994 private InputStream in; 995 } 996 997 /** 998 * Bounded input stream adapted from commons-io 999 */ 1000 private class BoundedInputStream extends InputStream { 1001 1002 /** the wrapped input stream */ 1003 private final InputStream in; 1004 1005 /** the max length to provide */ 1006 private final long max; 1007 1008 /** the number of bytes already returned */ 1009 private long pos = 0; 1010 1011 /** 1012 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1013 * stream and limits it to a certain size. 1014 * 1015 * @param in The wrapped input stream 1016 * @param size The maximum number of bytes to return 1017 */ 1018 public BoundedInputStream(final InputStream in, final long size) { 1019 this.max = size; 1020 this.in = in; 1021 } 1022 1023 @Override 1024 public int read() throws IOException { 1025 if (max >= 0 && pos >= max) { 1026 return -1; 1027 } 1028 final int result = in.read(); 1029 pos++; 1030 count(1); 1031 current.bytesReadFromStream++; 1032 return result; 1033 } 1034 1035 @Override 1036 public int read(final byte[] b) throws IOException { 1037 return this.read(b, 0, b.length); 1038 } 1039 1040 @Override 1041 public int read(final byte[] b, final int off, final int len) throws IOException { 1042 if (max >= 0 && pos >= max) { 1043 return -1; 1044 } 1045 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1046 final int bytesRead = in.read(b, off, (int) maxRead); 1047 1048 if (bytesRead == -1) { 1049 return -1; 1050 } 1051 1052 pos += bytesRead; 1053 count(bytesRead); 1054 current.bytesReadFromStream += bytesRead; 1055 return bytesRead; 1056 } 1057 1058 @Override 1059 public long skip(final long n) throws IOException { 1060 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1061 final long skippedBytes = in.skip(toSkip); 1062 pos += skippedBytes; 1063 return skippedBytes; 1064 } 1065 1066 @Override 1067 public int available() throws IOException { 1068 if (max >= 0 && pos >= max) { 1069 return 0; 1070 } 1071 return in.available(); 1072 } 1073 } 1074}