001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.RandomAccessFile; 027import java.util.Arrays; 028import java.util.Collections; 029import java.util.Comparator; 030import java.util.Enumeration; 031import java.util.HashMap; 032import java.util.LinkedList; 033import java.util.List; 034import java.util.Map; 035import java.util.zip.Inflater; 036import java.util.zip.InflaterInputStream; 037import java.util.zip.ZipException; 038 039import org.apache.commons.compress.utils.IOUtils; 040 041import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 046 047/** 048 * Replacement for <code>java.util.ZipFile</code>. 049 * 050 * <p>This class adds support for file name encodings other than UTF-8 051 * (which is required to work on ZIP files created by native zip tools 052 * and is able to skip a preamble like the one found in self 053 * extracting archives. Furthermore it returns instances of 054 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 055 * instead of <code>java.util.zip.ZipEntry</code>.</p> 056 * 057 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 058 * have to reimplement all methods anyway. Like 059 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the 060 * covers and supports compressed and uncompressed entries. As of 061 * Apache Commons Compress 1.3 it also transparently supports Zip64 062 * extensions and thus individual entries and archives larger than 4 063 * GB or with more than 65536 entries.</p> 064 * 065 * <p>The method signatures mimic the ones of 066 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 067 * 068 * <ul> 069 * <li>There is no getName method.</li> 070 * <li>entries has been renamed to getEntries.</li> 071 * <li>getEntries and getEntry return 072 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 073 * instances.</li> 074 * <li>close is allowed to throw IOException.</li> 075 * </ul> 076 * 077 */ 078public class ZipFile implements Closeable { 079 private static final int HASH_SIZE = 509; 080 static final int NIBLET_MASK = 0x0f; 081 static final int BYTE_SHIFT = 8; 082 private static final int POS_0 = 0; 083 private static final int POS_1 = 1; 084 private static final int POS_2 = 2; 085 private static final int POS_3 = 3; 086 087 /** 088 * List of entries in the order they appear inside the central 089 * directory. 090 */ 091 private final List<ZipArchiveEntry> entries = 092 new LinkedList<ZipArchiveEntry>(); 093 094 /** 095 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 096 */ 097 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 098 new HashMap<String, LinkedList<ZipArchiveEntry>>(HASH_SIZE); 099 100 private static final class OffsetEntry { 101 private long headerOffset = -1; 102 private long dataOffset = -1; 103 } 104 105 /** 106 * The encoding to use for filenames and the file comment. 107 * 108 * <p>For a list of possible values see <a 109 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 110 * Defaults to UTF-8.</p> 111 */ 112 private final String encoding; 113 114 /** 115 * The zip encoding to use for filenames and the file comment. 116 */ 117 private final ZipEncoding zipEncoding; 118 119 /** 120 * File name of actual source. 121 */ 122 private final String archiveName; 123 124 /** 125 * The actual data source. 126 */ 127 private final RandomAccessFile archive; 128 129 /** 130 * Whether to look for and use Unicode extra fields. 131 */ 132 private final boolean useUnicodeExtraFields; 133 134 /** 135 * Whether the file is closed. 136 */ 137 private boolean closed; 138 139 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 140 private final byte[] DWORD_BUF = new byte[DWORD]; 141 private final byte[] WORD_BUF = new byte[WORD]; 142 private final byte[] CFH_BUF = new byte[CFH_LEN]; 143 private final byte[] SHORT_BUF = new byte[SHORT]; 144 145 /** 146 * Opens the given file for reading, assuming "UTF8" for file names. 147 * 148 * @param f the archive. 149 * 150 * @throws IOException if an error occurs while reading the file. 151 */ 152 public ZipFile(File f) throws IOException { 153 this(f, ZipEncodingHelper.UTF8); 154 } 155 156 /** 157 * Opens the given file for reading, assuming "UTF8". 158 * 159 * @param name name of the archive. 160 * 161 * @throws IOException if an error occurs while reading the file. 162 */ 163 public ZipFile(String name) throws IOException { 164 this(new File(name), ZipEncodingHelper.UTF8); 165 } 166 167 /** 168 * Opens the given file for reading, assuming the specified 169 * encoding for file names, scanning unicode extra fields. 170 * 171 * @param name name of the archive. 172 * @param encoding the encoding to use for file names, use null 173 * for the platform's default encoding 174 * 175 * @throws IOException if an error occurs while reading the file. 176 */ 177 public ZipFile(String name, String encoding) throws IOException { 178 this(new File(name), encoding, true); 179 } 180 181 /** 182 * Opens the given file for reading, assuming the specified 183 * encoding for file names and scanning for unicode extra fields. 184 * 185 * @param f the archive. 186 * @param encoding the encoding to use for file names, use null 187 * for the platform's default encoding 188 * 189 * @throws IOException if an error occurs while reading the file. 190 */ 191 public ZipFile(File f, String encoding) throws IOException { 192 this(f, encoding, true); 193 } 194 195 /** 196 * Opens the given file for reading, assuming the specified 197 * encoding for file names. 198 * 199 * @param f the archive. 200 * @param encoding the encoding to use for file names, use null 201 * for the platform's default encoding 202 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 203 * Extra Fields (if present) to set the file names. 204 * 205 * @throws IOException if an error occurs while reading the file. 206 */ 207 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) 208 throws IOException { 209 this.archiveName = f.getAbsolutePath(); 210 this.encoding = encoding; 211 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 212 this.useUnicodeExtraFields = useUnicodeExtraFields; 213 archive = new RandomAccessFile(f, "r"); 214 boolean success = false; 215 try { 216 Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 217 populateFromCentralDirectory(); 218 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 219 success = true; 220 } finally { 221 if (!success) { 222 closed = true; 223 IOUtils.closeQuietly(archive); 224 } 225 } 226 } 227 228 /** 229 * The encoding to use for filenames and the file comment. 230 * 231 * @return null if using the platform's default character encoding. 232 */ 233 public String getEncoding() { 234 return encoding; 235 } 236 237 /** 238 * Closes the archive. 239 * @throws IOException if an error occurs closing the archive. 240 */ 241 public void close() throws IOException { 242 // this flag is only written here and read in finalize() which 243 // can never be run in parallel. 244 // no synchronization needed. 245 closed = true; 246 247 archive.close(); 248 } 249 250 /** 251 * close a zipfile quietly; throw no io fault, do nothing 252 * on a null parameter 253 * @param zipfile file to close, can be null 254 */ 255 public static void closeQuietly(ZipFile zipfile) { 256 IOUtils.closeQuietly(zipfile); 257 } 258 259 /** 260 * Returns all entries. 261 * 262 * <p>Entries will be returned in the same order they appear 263 * within the archive's central directory.</p> 264 * 265 * @return all entries as {@link ZipArchiveEntry} instances 266 */ 267 public Enumeration<ZipArchiveEntry> getEntries() { 268 return Collections.enumeration(entries); 269 } 270 271 /** 272 * Returns all entries in physical order. 273 * 274 * <p>Entries will be returned in the same order their contents 275 * appear within the archive.</p> 276 * 277 * @return all entries as {@link ZipArchiveEntry} instances 278 * 279 * @since 1.1 280 */ 281 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 282 ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[0]); 283 Arrays.sort(allEntries, OFFSET_COMPARATOR); 284 return Collections.enumeration(Arrays.asList(allEntries)); 285 } 286 287 /** 288 * Returns a named entry - or {@code null} if no entry by 289 * that name exists. 290 * 291 * <p>If multiple entries with the same name exist the first entry 292 * in the archive's central directory by that name is 293 * returned.</p> 294 * 295 * @param name name of the entry. 296 * @return the ZipArchiveEntry corresponding to the given name - or 297 * {@code null} if not present. 298 */ 299 public ZipArchiveEntry getEntry(String name) { 300 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 301 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 302 } 303 304 /** 305 * Returns all named entries in the same order they appear within 306 * the archive's central directory. 307 * 308 * @param name name of the entry. 309 * @return the Iterable<ZipArchiveEntry> corresponding to the 310 * given name 311 * @since 1.6 312 */ 313 public Iterable<ZipArchiveEntry> getEntries(String name) { 314 List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 315 return entriesOfThatName != null ? entriesOfThatName 316 : Collections.<ZipArchiveEntry>emptyList(); 317 } 318 319 /** 320 * Returns all named entries in the same order their contents 321 * appear within the archive. 322 * 323 * @param name name of the entry. 324 * @return the Iterable<ZipArchiveEntry> corresponding to the 325 * given name 326 * @since 1.6 327 */ 328 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(String name) { 329 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 330 if (nameMap.containsKey(name)) { 331 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 332 Arrays.sort(entriesOfThatName, OFFSET_COMPARATOR); 333 } 334 return Arrays.asList(entriesOfThatName); 335 } 336 337 /** 338 * Whether this class is able to read the given entry. 339 * 340 * <p>May return false if it is set up to use encryption or a 341 * compression method that hasn't been implemented yet.</p> 342 * @since 1.1 343 */ 344 public boolean canReadEntryData(ZipArchiveEntry ze) { 345 return ZipUtil.canHandleEntryData(ze); 346 } 347 348 /** 349 * Returns an InputStream for reading the contents of the given entry. 350 * 351 * @param ze the entry to get the stream for. 352 * @return a stream to read the entry from. 353 * @throws IOException if unable to create an input stream from the zipentry 354 * @throws ZipException if the zipentry uses an unsupported feature 355 */ 356 public InputStream getInputStream(ZipArchiveEntry ze) 357 throws IOException, ZipException { 358 if (!(ze instanceof Entry)) { 359 return null; 360 } 361 // cast valididty is checked just above 362 OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 363 ZipUtil.checkRequestedFeatures(ze); 364 long start = offsetEntry.dataOffset; 365 BoundedInputStream bis = 366 new BoundedInputStream(start, ze.getCompressedSize()); 367 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 368 case STORED: 369 return bis; 370 case UNSHRINKING: 371 return new UnshrinkingInputStream(bis); 372 case IMPLODING: 373 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 374 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis)); 375 case DEFLATED: 376 bis.addDummy(); 377 final Inflater inflater = new Inflater(true); 378 return new InflaterInputStream(bis, inflater) { 379 @Override 380 public void close() throws IOException { 381 super.close(); 382 inflater.end(); 383 } 384 }; 385 default: 386 throw new ZipException("Found unsupported compression method " 387 + ze.getMethod()); 388 } 389 } 390 391 /** 392 * <p> 393 * Convenience method to return the entry's content as a String if isUnixSymlink() 394 * returns true for it, otherwise returns null. 395 * </p> 396 * 397 * <p>This method assumes the symbolic link's file name uses the 398 * same encoding that as been specified for this ZipFile.</p> 399 * 400 * @param entry ZipArchiveEntry object that represents the symbolic link 401 * @return entry's content as a String 402 * @throws IOException problem with content's input stream 403 * @since 1.5 404 */ 405 public String getUnixSymlink(ZipArchiveEntry entry) throws IOException { 406 if (entry != null && entry.isUnixSymlink()) { 407 InputStream in = null; 408 try { 409 in = getInputStream(entry); 410 byte[] symlinkBytes = IOUtils.toByteArray(in); 411 return zipEncoding.decode(symlinkBytes); 412 } finally { 413 if (in != null) { 414 in.close(); 415 } 416 } 417 } else { 418 return null; 419 } 420 } 421 422 /** 423 * Ensures that the close method of this zipfile is called when 424 * there are no more references to it. 425 * @see #close() 426 */ 427 @Override 428 protected void finalize() throws Throwable { 429 try { 430 if (!closed) { 431 System.err.println("Cleaning up unclosed ZipFile for archive " 432 + archiveName); 433 close(); 434 } 435 } finally { 436 super.finalize(); 437 } 438 } 439 440 /** 441 * Length of a "central directory" entry structure without file 442 * name, extra fields or comment. 443 */ 444 private static final int CFH_LEN = 445 /* version made by */ SHORT 446 /* version needed to extract */ + SHORT 447 /* general purpose bit flag */ + SHORT 448 /* compression method */ + SHORT 449 /* last mod file time */ + SHORT 450 /* last mod file date */ + SHORT 451 /* crc-32 */ + WORD 452 /* compressed size */ + WORD 453 /* uncompressed size */ + WORD 454 /* filename length */ + SHORT 455 /* extra field length */ + SHORT 456 /* file comment length */ + SHORT 457 /* disk number start */ + SHORT 458 /* internal file attributes */ + SHORT 459 /* external file attributes */ + WORD 460 /* relative offset of local header */ + WORD; 461 462 private static final long CFH_SIG = 463 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 464 465 /** 466 * Reads the central directory of the given archive and populates 467 * the internal tables with ZipArchiveEntry instances. 468 * 469 * <p>The ZipArchiveEntrys will know all data that can be obtained from 470 * the central directory alone, but not the data that requires the 471 * local file header or additional data to be read.</p> 472 * 473 * @return a map of zipentries that didn't have the language 474 * encoding flag set when read. 475 */ 476 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 477 throws IOException { 478 HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 479 new HashMap<ZipArchiveEntry, NameAndComment>(); 480 481 positionAtCentralDirectory(); 482 483 archive.readFully(WORD_BUF); 484 long sig = ZipLong.getValue(WORD_BUF); 485 486 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 487 throw new IOException("central directory is empty, can't expand" 488 + " corrupt archive."); 489 } 490 491 while (sig == CFH_SIG) { 492 readCentralDirectoryEntry(noUTF8Flag); 493 archive.readFully(WORD_BUF); 494 sig = ZipLong.getValue(WORD_BUF); 495 } 496 return noUTF8Flag; 497 } 498 499 /** 500 * Reads an individual entry of the central directory, creats an 501 * ZipArchiveEntry from it and adds it to the global maps. 502 * 503 * @param noUTF8Flag map used to collect entries that don't have 504 * their UTF-8 flag set and whose name will be set by data read 505 * from the local file header later. The current entry may be 506 * added to this map. 507 */ 508 private void 509 readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 510 throws IOException { 511 archive.readFully(CFH_BUF); 512 int off = 0; 513 OffsetEntry offset = new OffsetEntry(); 514 Entry ze = new Entry(offset); 515 516 int versionMadeBy = ZipShort.getValue(CFH_BUF, off); 517 off += SHORT; 518 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 519 520 off += SHORT; // skip version info 521 522 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off); 523 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 524 final ZipEncoding entryEncoding = 525 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 526 ze.setGeneralPurposeBit(gpFlag); 527 528 off += SHORT; 529 530 ze.setMethod(ZipShort.getValue(CFH_BUF, off)); 531 off += SHORT; 532 533 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off)); 534 ze.setTime(time); 535 off += WORD; 536 537 ze.setCrc(ZipLong.getValue(CFH_BUF, off)); 538 off += WORD; 539 540 ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off)); 541 off += WORD; 542 543 ze.setSize(ZipLong.getValue(CFH_BUF, off)); 544 off += WORD; 545 546 int fileNameLen = ZipShort.getValue(CFH_BUF, off); 547 off += SHORT; 548 549 int extraLen = ZipShort.getValue(CFH_BUF, off); 550 off += SHORT; 551 552 int commentLen = ZipShort.getValue(CFH_BUF, off); 553 off += SHORT; 554 555 int diskStart = ZipShort.getValue(CFH_BUF, off); 556 off += SHORT; 557 558 ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off)); 559 off += SHORT; 560 561 ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off)); 562 off += WORD; 563 564 byte[] fileName = new byte[fileNameLen]; 565 archive.readFully(fileName); 566 ze.setName(entryEncoding.decode(fileName), fileName); 567 568 // LFH offset, 569 offset.headerOffset = ZipLong.getValue(CFH_BUF, off); 570 // data offset will be filled later 571 entries.add(ze); 572 573 byte[] cdExtraData = new byte[extraLen]; 574 archive.readFully(cdExtraData); 575 ze.setCentralDirectoryExtra(cdExtraData); 576 577 setSizesAndOffsetFromZip64Extra(ze, offset, diskStart); 578 579 byte[] comment = new byte[commentLen]; 580 archive.readFully(comment); 581 ze.setComment(entryEncoding.decode(comment)); 582 583 if (!hasUTF8Flag && useUnicodeExtraFields) { 584 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 585 } 586 } 587 588 /** 589 * If the entry holds a Zip64 extended information extra field, 590 * read sizes from there if the entry's sizes are set to 591 * 0xFFFFFFFFF, do the same for the offset of the local file 592 * header. 593 * 594 * <p>Ensures the Zip64 extra either knows both compressed and 595 * uncompressed size or neither of both as the internal logic in 596 * ExtraFieldUtils forces the field to create local header data 597 * even if they are never used - and here a field with only one 598 * size would be invalid.</p> 599 */ 600 private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze, 601 OffsetEntry offset, 602 int diskStart) 603 throws IOException { 604 Zip64ExtendedInformationExtraField z64 = 605 (Zip64ExtendedInformationExtraField) 606 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 607 if (z64 != null) { 608 boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 609 boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 610 boolean hasRelativeHeaderOffset = 611 offset.headerOffset == ZIP64_MAGIC; 612 z64.reparseCentralDirectoryData(hasUncompressedSize, 613 hasCompressedSize, 614 hasRelativeHeaderOffset, 615 diskStart == ZIP64_MAGIC_SHORT); 616 617 if (hasUncompressedSize) { 618 ze.setSize(z64.getSize().getLongValue()); 619 } else if (hasCompressedSize) { 620 z64.setSize(new ZipEightByteInteger(ze.getSize())); 621 } 622 623 if (hasCompressedSize) { 624 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 625 } else if (hasUncompressedSize) { 626 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 627 } 628 629 if (hasRelativeHeaderOffset) { 630 offset.headerOffset = 631 z64.getRelativeHeaderOffset().getLongValue(); 632 } 633 } 634 } 635 636 /** 637 * Length of the "End of central directory record" - which is 638 * supposed to be the last structure of the archive - without file 639 * comment. 640 */ 641 static final int MIN_EOCD_SIZE = 642 /* end of central dir signature */ WORD 643 /* number of this disk */ + SHORT 644 /* number of the disk with the */ 645 /* start of the central directory */ + SHORT 646 /* total number of entries in */ 647 /* the central dir on this disk */ + SHORT 648 /* total number of entries in */ 649 /* the central dir */ + SHORT 650 /* size of the central directory */ + WORD 651 /* offset of start of central */ 652 /* directory with respect to */ 653 /* the starting disk number */ + WORD 654 /* zipfile comment length */ + SHORT; 655 656 /** 657 * Maximum length of the "End of central directory record" with a 658 * file comment. 659 */ 660 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 661 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 662 663 /** 664 * Offset of the field that holds the location of the first 665 * central directory entry inside the "End of central directory 666 * record" relative to the start of the "End of central directory 667 * record". 668 */ 669 private static final int CFD_LOCATOR_OFFSET = 670 /* end of central dir signature */ WORD 671 /* number of this disk */ + SHORT 672 /* number of the disk with the */ 673 /* start of the central directory */ + SHORT 674 /* total number of entries in */ 675 /* the central dir on this disk */ + SHORT 676 /* total number of entries in */ 677 /* the central dir */ + SHORT 678 /* size of the central directory */ + WORD; 679 680 /** 681 * Length of the "Zip64 end of central directory locator" - which 682 * should be right in front of the "end of central directory 683 * record" if one is present at all. 684 */ 685 private static final int ZIP64_EOCDL_LENGTH = 686 /* zip64 end of central dir locator sig */ WORD 687 /* number of the disk with the start */ 688 /* start of the zip64 end of */ 689 /* central directory */ + WORD 690 /* relative offset of the zip64 */ 691 /* end of central directory record */ + DWORD 692 /* total number of disks */ + WORD; 693 694 /** 695 * Offset of the field that holds the location of the "Zip64 end 696 * of central directory record" inside the "Zip64 end of central 697 * directory locator" relative to the start of the "Zip64 end of 698 * central directory locator". 699 */ 700 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 701 /* zip64 end of central dir locator sig */ WORD 702 /* number of the disk with the start */ 703 /* start of the zip64 end of */ 704 /* central directory */ + WORD; 705 706 /** 707 * Offset of the field that holds the location of the first 708 * central directory entry inside the "Zip64 end of central 709 * directory record" relative to the start of the "Zip64 end of 710 * central directory record". 711 */ 712 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 713 /* zip64 end of central dir */ 714 /* signature */ WORD 715 /* size of zip64 end of central */ 716 /* directory record */ + DWORD 717 /* version made by */ + SHORT 718 /* version needed to extract */ + SHORT 719 /* number of this disk */ + WORD 720 /* number of the disk with the */ 721 /* start of the central directory */ + WORD 722 /* total number of entries in the */ 723 /* central directory on this disk */ + DWORD 724 /* total number of entries in the */ 725 /* central directory */ + DWORD 726 /* size of the central directory */ + DWORD; 727 728 /** 729 * Searches for either the "Zip64 end of central directory 730 * locator" or the "End of central dir record", parses 731 * it and positions the stream at the first central directory 732 * record. 733 */ 734 private void positionAtCentralDirectory() 735 throws IOException { 736 positionAtEndOfCentralDirectoryRecord(); 737 boolean found = false; 738 boolean searchedForZip64EOCD = 739 archive.getFilePointer() > ZIP64_EOCDL_LENGTH; 740 if (searchedForZip64EOCD) { 741 archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH); 742 archive.readFully(WORD_BUF); 743 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 744 WORD_BUF); 745 } 746 if (!found) { 747 // not a ZIP64 archive 748 if (searchedForZip64EOCD) { 749 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 750 } 751 positionAtCentralDirectory32(); 752 } else { 753 positionAtCentralDirectory64(); 754 } 755 } 756 757 /** 758 * Parses the "Zip64 end of central directory locator", 759 * finds the "Zip64 end of central directory record" using the 760 * parsed information, parses that and positions the stream at the 761 * first central directory record. 762 * 763 * Expects stream to be positioned right behind the "Zip64 764 * end of central directory locator"'s signature. 765 */ 766 private void positionAtCentralDirectory64() 767 throws IOException { 768 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 769 - WORD /* signature has already been read */); 770 archive.readFully(DWORD_BUF); 771 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 772 archive.readFully(WORD_BUF); 773 if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 774 throw new ZipException("archive's ZIP64 end of central " 775 + "directory locator is corrupt."); 776 } 777 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 778 - WORD /* signature has already been read */); 779 archive.readFully(DWORD_BUF); 780 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 781 } 782 783 /** 784 * Parses the "End of central dir record" and positions 785 * the stream at the first central directory record. 786 * 787 * Expects stream to be positioned at the beginning of the 788 * "End of central dir record". 789 */ 790 private void positionAtCentralDirectory32() 791 throws IOException { 792 skipBytes(CFD_LOCATOR_OFFSET); 793 archive.readFully(WORD_BUF); 794 archive.seek(ZipLong.getValue(WORD_BUF)); 795 } 796 797 /** 798 * Searches for the and positions the stream at the start of the 799 * "End of central dir record". 800 */ 801 private void positionAtEndOfCentralDirectoryRecord() 802 throws IOException { 803 boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 804 ZipArchiveOutputStream.EOCD_SIG); 805 if (!found) { 806 throw new ZipException("archive is not a ZIP archive"); 807 } 808 } 809 810 /** 811 * Searches the archive backwards from minDistance to maxDistance 812 * for the given signature, positions the RandomaccessFile right 813 * at the signature if it has been found. 814 */ 815 private boolean tryToLocateSignature(long minDistanceFromEnd, 816 long maxDistanceFromEnd, 817 byte[] sig) throws IOException { 818 boolean found = false; 819 long off = archive.length() - minDistanceFromEnd; 820 final long stopSearching = 821 Math.max(0L, archive.length() - maxDistanceFromEnd); 822 if (off >= 0) { 823 for (; off >= stopSearching; off--) { 824 archive.seek(off); 825 int curr = archive.read(); 826 if (curr == -1) { 827 break; 828 } 829 if (curr == sig[POS_0]) { 830 curr = archive.read(); 831 if (curr == sig[POS_1]) { 832 curr = archive.read(); 833 if (curr == sig[POS_2]) { 834 curr = archive.read(); 835 if (curr == sig[POS_3]) { 836 found = true; 837 break; 838 } 839 } 840 } 841 } 842 } 843 } 844 if (found) { 845 archive.seek(off); 846 } 847 return found; 848 } 849 850 /** 851 * Skips the given number of bytes or throws an EOFException if 852 * skipping failed. 853 */ 854 private void skipBytes(final int count) throws IOException { 855 int totalSkipped = 0; 856 while (totalSkipped < count) { 857 int skippedNow = archive.skipBytes(count - totalSkipped); 858 if (skippedNow <= 0) { 859 throw new EOFException(); 860 } 861 totalSkipped += skippedNow; 862 } 863 } 864 865 /** 866 * Number of bytes in local file header up to the "length of 867 * filename" entry. 868 */ 869 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 870 /* local file header signature */ WORD 871 /* version needed to extract */ + SHORT 872 /* general purpose bit flag */ + SHORT 873 /* compression method */ + SHORT 874 /* last mod file time */ + SHORT 875 /* last mod file date */ + SHORT 876 /* crc-32 */ + WORD 877 /* compressed size */ + WORD 878 /* uncompressed size */ + WORD; 879 880 /** 881 * Walks through all recorded entries and adds the data available 882 * from the local file header. 883 * 884 * <p>Also records the offsets for the data to read from the 885 * entries.</p> 886 */ 887 private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment> 888 entriesWithoutUTF8Flag) 889 throws IOException { 890 for (ZipArchiveEntry zipArchiveEntry : entries) { 891 // entries is filled in populateFromCentralDirectory and 892 // never modified 893 Entry ze = (Entry) zipArchiveEntry; 894 OffsetEntry offsetEntry = ze.getOffsetEntry(); 895 long offset = offsetEntry.headerOffset; 896 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 897 archive.readFully(SHORT_BUF); 898 int fileNameLen = ZipShort.getValue(SHORT_BUF); 899 archive.readFully(SHORT_BUF); 900 int extraFieldLen = ZipShort.getValue(SHORT_BUF); 901 int lenToSkip = fileNameLen; 902 while (lenToSkip > 0) { 903 int skipped = archive.skipBytes(lenToSkip); 904 if (skipped <= 0) { 905 throw new IOException("failed to skip file name in" 906 + " local file header"); 907 } 908 lenToSkip -= skipped; 909 } 910 byte[] localExtraData = new byte[extraFieldLen]; 911 archive.readFully(localExtraData); 912 ze.setExtra(localExtraData); 913 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 914 + SHORT + SHORT + fileNameLen + extraFieldLen; 915 916 if (entriesWithoutUTF8Flag.containsKey(ze)) { 917 NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 918 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 919 nc.comment); 920 } 921 922 String name = ze.getName(); 923 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 924 if (entriesOfThatName == null) { 925 entriesOfThatName = new LinkedList<ZipArchiveEntry>(); 926 nameMap.put(name, entriesOfThatName); 927 } 928 entriesOfThatName.addLast(ze); 929 } 930 } 931 932 /** 933 * Checks whether the archive starts with a LFH. If it doesn't, 934 * it may be an empty archive. 935 */ 936 private boolean startsWithLocalFileHeader() throws IOException { 937 archive.seek(0); 938 archive.readFully(WORD_BUF); 939 return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG); 940 } 941 942 /** 943 * InputStream that delegates requests to the underlying 944 * RandomAccessFile, making sure that only bytes from a certain 945 * range can be read. 946 */ 947 private class BoundedInputStream extends InputStream { 948 private long remaining; 949 private long loc; 950 private boolean addDummyByte = false; 951 952 BoundedInputStream(long start, long remaining) { 953 this.remaining = remaining; 954 loc = start; 955 } 956 957 @Override 958 public int read() throws IOException { 959 if (remaining-- <= 0) { 960 if (addDummyByte) { 961 addDummyByte = false; 962 return 0; 963 } 964 return -1; 965 } 966 synchronized (archive) { 967 archive.seek(loc++); 968 return archive.read(); 969 } 970 } 971 972 @Override 973 public int read(byte[] b, int off, int len) throws IOException { 974 if (remaining <= 0) { 975 if (addDummyByte) { 976 addDummyByte = false; 977 b[off] = 0; 978 return 1; 979 } 980 return -1; 981 } 982 983 if (len <= 0) { 984 return 0; 985 } 986 987 if (len > remaining) { 988 len = (int) remaining; 989 } 990 int ret = -1; 991 synchronized (archive) { 992 archive.seek(loc); 993 ret = archive.read(b, off, len); 994 } 995 if (ret > 0) { 996 loc += ret; 997 remaining -= ret; 998 } 999 return ret; 1000 } 1001 1002 /** 1003 * Inflater needs an extra dummy byte for nowrap - see 1004 * Inflater's javadocs. 1005 */ 1006 void addDummy() { 1007 addDummyByte = true; 1008 } 1009 } 1010 1011 private static final class NameAndComment { 1012 private final byte[] name; 1013 private final byte[] comment; 1014 private NameAndComment(byte[] name, byte[] comment) { 1015 this.name = name; 1016 this.comment = comment; 1017 } 1018 } 1019 1020 /** 1021 * Compares two ZipArchiveEntries based on their offset within the archive. 1022 * 1023 * <p>Won't return any meaningful results if one of the entries 1024 * isn't part of the archive at all.</p> 1025 * 1026 * @since 1.1 1027 */ 1028 private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR = 1029 new Comparator<ZipArchiveEntry>() { 1030 public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) { 1031 if (e1 == e2) { 1032 return 0; 1033 } 1034 1035 Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1036 Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1037 if (ent1 == null) { 1038 return 1; 1039 } 1040 if (ent2 == null) { 1041 return -1; 1042 } 1043 long val = (ent1.getOffsetEntry().headerOffset 1044 - ent2.getOffsetEntry().headerOffset); 1045 return val == 0 ? 0 : val < 0 ? -1 : +1; 1046 } 1047 }; 1048 1049 /** 1050 * Extends ZipArchiveEntry to store the offset within the archive. 1051 */ 1052 private static class Entry extends ZipArchiveEntry { 1053 1054 private final OffsetEntry offsetEntry; 1055 1056 Entry(OffsetEntry offset) { 1057 this.offsetEntry = offset; 1058 } 1059 1060 OffsetEntry getOffsetEntry() { 1061 return offsetEntry; 1062 } 1063 1064 @Override 1065 public int hashCode() { 1066 return 3 * super.hashCode() 1067 + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE); 1068 } 1069 1070 @Override 1071 public boolean equals(Object other) { 1072 if (super.equals(other)) { 1073 // super.equals would return false if other were not an Entry 1074 Entry otherEntry = (Entry) other; 1075 return offsetEntry.headerOffset 1076 == otherEntry.offsetEntry.headerOffset 1077 && offsetEntry.dataOffset 1078 == otherEntry.offsetEntry.dataOffset; 1079 } 1080 return false; 1081 } 1082 } 1083}