001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.File; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.StringWriter; 025import java.nio.ByteBuffer; 026import java.util.Arrays; 027import java.util.HashMap; 028import java.util.Map; 029import org.apache.commons.compress.archivers.ArchiveEntry; 030import org.apache.commons.compress.archivers.ArchiveOutputStream; 031import org.apache.commons.compress.archivers.zip.ZipEncoding; 032import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 033import org.apache.commons.compress.utils.CharsetNames; 034import org.apache.commons.compress.utils.CountingOutputStream; 035 036/** 037 * The TarOutputStream writes a UNIX tar archive as an OutputStream. 038 * Methods are provided to put entries, and then write their contents 039 * by writing to this stream using write(). 040 * @NotThreadSafe 041 */ 042public class TarArchiveOutputStream extends ArchiveOutputStream { 043 /** Fail if a long file name is required in the archive. */ 044 public static final int LONGFILE_ERROR = 0; 045 046 /** Long paths will be truncated in the archive. */ 047 public static final int LONGFILE_TRUNCATE = 1; 048 049 /** GNU tar extensions are used to store long file names in the archive. */ 050 public static final int LONGFILE_GNU = 2; 051 052 /** POSIX/PAX extensions are used to store long file names in the archive. */ 053 public static final int LONGFILE_POSIX = 3; 054 055 /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */ 056 public static final int BIGNUMBER_ERROR = 0; 057 058 /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */ 059 public static final int BIGNUMBER_STAR = 1; 060 061 /** POSIX/PAX extensions are used to store big numbers in the archive. */ 062 public static final int BIGNUMBER_POSIX = 2; 063 064 private long currSize; 065 private String currName; 066 private long currBytes; 067 private final byte[] recordBuf; 068 private int assemLen; 069 private final byte[] assemBuf; 070 private int longFileMode = LONGFILE_ERROR; 071 private int bigNumberMode = BIGNUMBER_ERROR; 072 private int recordsWritten; 073 private final int recordsPerBlock; 074 private final int recordSize; 075 076 private boolean closed = false; 077 078 /** Indicates if putArchiveEntry has been called without closeArchiveEntry */ 079 private boolean haveUnclosedEntry = false; 080 081 /** indicates if this archive is finished */ 082 private boolean finished = false; 083 084 private final OutputStream out; 085 086 private final ZipEncoding encoding; 087 088 private boolean addPaxHeadersForNonAsciiNames = false; 089 private static final ZipEncoding ASCII = 090 ZipEncodingHelper.getZipEncoding("ASCII"); 091 092 /** 093 * Constructor for TarInputStream. 094 * @param os the output stream to use 095 */ 096 public TarArchiveOutputStream(OutputStream os) { 097 this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); 098 } 099 100 /** 101 * Constructor for TarInputStream. 102 * @param os the output stream to use 103 * @param encoding name of the encoding to use for file names 104 * @since 1.4 105 */ 106 public TarArchiveOutputStream(OutputStream os, String encoding) { 107 this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding); 108 } 109 110 /** 111 * Constructor for TarInputStream. 112 * @param os the output stream to use 113 * @param blockSize the block size to use 114 */ 115 public TarArchiveOutputStream(OutputStream os, int blockSize) { 116 this(os, blockSize, TarConstants.DEFAULT_RCDSIZE); 117 } 118 119 /** 120 * Constructor for TarInputStream. 121 * @param os the output stream to use 122 * @param blockSize the block size to use 123 * @param encoding name of the encoding to use for file names 124 * @since 1.4 125 */ 126 public TarArchiveOutputStream(OutputStream os, int blockSize, 127 String encoding) { 128 this(os, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); 129 } 130 131 /** 132 * Constructor for TarInputStream. 133 * @param os the output stream to use 134 * @param blockSize the block size to use 135 * @param recordSize the record size to use 136 */ 137 public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) { 138 this(os, blockSize, recordSize, null); 139 } 140 141 /** 142 * Constructor for TarInputStream. 143 * @param os the output stream to use 144 * @param blockSize the block size to use 145 * @param recordSize the record size to use 146 * @param encoding name of the encoding to use for file names 147 * @since 1.4 148 */ 149 public TarArchiveOutputStream(OutputStream os, int blockSize, 150 int recordSize, String encoding) { 151 out = new CountingOutputStream(os); 152 this.encoding = ZipEncodingHelper.getZipEncoding(encoding); 153 154 this.assemLen = 0; 155 this.assemBuf = new byte[recordSize]; 156 this.recordBuf = new byte[recordSize]; 157 this.recordSize = recordSize; 158 this.recordsPerBlock = blockSize / recordSize; 159 } 160 161 /** 162 * Set the long file mode. 163 * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2). 164 * This specifies the treatment of long file names (names >= TarConstants.NAMELEN). 165 * Default is LONGFILE_ERROR. 166 * @param longFileMode the mode to use 167 */ 168 public void setLongFileMode(int longFileMode) { 169 this.longFileMode = longFileMode; 170 } 171 172 /** 173 * Set the big number mode. 174 * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2). 175 * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header. 176 * Default is BIGNUMBER_ERROR. 177 * @param bigNumberMode the mode to use 178 * @since 1.4 179 */ 180 public void setBigNumberMode(int bigNumberMode) { 181 this.bigNumberMode = bigNumberMode; 182 } 183 184 /** 185 * Whether to add a PAX extension header for non-ASCII file names. 186 * @since 1.4 187 */ 188 public void setAddPaxHeadersForNonAsciiNames(boolean b) { 189 addPaxHeadersForNonAsciiNames = b; 190 } 191 192 @Deprecated 193 @Override 194 public int getCount() { 195 return (int) getBytesWritten(); 196 } 197 198 @Override 199 public long getBytesWritten() { 200 return ((CountingOutputStream) out).getBytesWritten(); 201 } 202 203 /** 204 * Ends the TAR archive without closing the underlying OutputStream. 205 * 206 * An archive consists of a series of file entries terminated by an 207 * end-of-archive entry, which consists of two 512 blocks of zero bytes. 208 * POSIX.1 requires two EOF records, like some other implementations. 209 * 210 * @throws IOException on error 211 */ 212 @Override 213 public void finish() throws IOException { 214 if (finished) { 215 throw new IOException("This archive has already been finished"); 216 } 217 218 if (haveUnclosedEntry) { 219 throw new IOException("This archives contains unclosed entries."); 220 } 221 writeEOFRecord(); 222 writeEOFRecord(); 223 padAsNeeded(); 224 out.flush(); 225 finished = true; 226 } 227 228 /** 229 * Closes the underlying OutputStream. 230 * @throws IOException on error 231 */ 232 @Override 233 public void close() throws IOException { 234 if (!finished) { 235 finish(); 236 } 237 238 if (!closed) { 239 out.close(); 240 closed = true; 241 } 242 } 243 244 /** 245 * Get the record size being used by this stream's TarBuffer. 246 * 247 * @return The TarBuffer record size. 248 */ 249 public int getRecordSize() { 250 return this.recordSize; 251 } 252 253 /** 254 * Put an entry on the output stream. This writes the entry's 255 * header record and positions the output stream for writing 256 * the contents of the entry. Once this method is called, the 257 * stream is ready for calls to write() to write the entry's 258 * contents. Once the contents are written, closeArchiveEntry() 259 * <B>MUST</B> be called to ensure that all buffered data 260 * is completely written to the output stream. 261 * 262 * @param archiveEntry The TarEntry to be written to the archive. 263 * @throws IOException on error 264 * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry 265 */ 266 @Override 267 public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException { 268 if (finished) { 269 throw new IOException("Stream has already been finished"); 270 } 271 TarArchiveEntry entry = (TarArchiveEntry) archiveEntry; 272 Map<String, String> paxHeaders = new HashMap<String, String>(); 273 final String entryName = entry.getName(); 274 boolean paxHeaderContainsPath = handleLongName(entryName, paxHeaders, "path", 275 TarConstants.LF_GNUTYPE_LONGNAME, "file name"); 276 277 final String linkName = entry.getLinkName(); 278 boolean paxHeaderContainsLinkPath = linkName != null && linkName.length() > 0 279 && handleLongName(linkName, paxHeaders, "linkpath", 280 TarConstants.LF_GNUTYPE_LONGLINK, "link name"); 281 282 if (bigNumberMode == BIGNUMBER_POSIX) { 283 addPaxHeadersForBigNumbers(paxHeaders, entry); 284 } else if (bigNumberMode != BIGNUMBER_STAR) { 285 failForBigNumbers(entry); 286 } 287 288 if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath 289 && !ASCII.canEncode(entryName)) { 290 paxHeaders.put("path", entryName); 291 } 292 293 if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsLinkPath 294 && (entry.isLink() || entry.isSymbolicLink()) 295 && !ASCII.canEncode(linkName)) { 296 paxHeaders.put("linkpath", linkName); 297 } 298 299 if (paxHeaders.size() > 0) { 300 writePaxHeaders(entryName, paxHeaders); 301 } 302 303 entry.writeEntryHeader(recordBuf, encoding, 304 bigNumberMode == BIGNUMBER_STAR); 305 writeRecord(recordBuf); 306 307 currBytes = 0; 308 309 if (entry.isDirectory()) { 310 currSize = 0; 311 } else { 312 currSize = entry.getSize(); 313 } 314 currName = entryName; 315 haveUnclosedEntry = true; 316 } 317 318 /** 319 * Close an entry. This method MUST be called for all file 320 * entries that contain data. The reason is that we must 321 * buffer data written to the stream in order to satisfy 322 * the buffer's record based writes. Thus, there may be 323 * data fragments still being assembled that must be written 324 * to the output stream before this entry is closed and the 325 * next entry written. 326 * @throws IOException on error 327 */ 328 @Override 329 public void closeArchiveEntry() throws IOException { 330 if (finished) { 331 throw new IOException("Stream has already been finished"); 332 } 333 if (!haveUnclosedEntry){ 334 throw new IOException("No current entry to close"); 335 } 336 if (assemLen > 0) { 337 for (int i = assemLen; i < assemBuf.length; ++i) { 338 assemBuf[i] = 0; 339 } 340 341 writeRecord(assemBuf); 342 343 currBytes += assemLen; 344 assemLen = 0; 345 } 346 347 if (currBytes < currSize) { 348 throw new IOException("entry '" + currName + "' closed at '" 349 + currBytes 350 + "' before the '" + currSize 351 + "' bytes specified in the header were written"); 352 } 353 haveUnclosedEntry = false; 354 } 355 356 /** 357 * Writes bytes to the current tar archive entry. This method 358 * is aware of the current entry and will throw an exception if 359 * you attempt to write bytes past the length specified for the 360 * current entry. The method is also (painfully) aware of the 361 * record buffering required by TarBuffer, and manages buffers 362 * that are not a multiple of recordsize in length, including 363 * assembling records from small buffers. 364 * 365 * @param wBuf The buffer to write to the archive. 366 * @param wOffset The offset in the buffer from which to get bytes. 367 * @param numToWrite The number of bytes to write. 368 * @throws IOException on error 369 */ 370 @Override 371 public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException { 372 if (!haveUnclosedEntry) { 373 throw new IllegalStateException("No current tar entry"); 374 } 375 if (currBytes + numToWrite > currSize) { 376 throw new IOException("request to write '" + numToWrite 377 + "' bytes exceeds size in header of '" 378 + currSize + "' bytes for entry '" 379 + currName + "'"); 380 381 // 382 // We have to deal with assembly!!! 383 // The programmer can be writing little 32 byte chunks for all 384 // we know, and we must assemble complete records for writing. 385 // REVIEW Maybe this should be in TarBuffer? Could that help to 386 // eliminate some of the buffer copying. 387 // 388 } 389 390 if (assemLen > 0) { 391 if (assemLen + numToWrite >= recordBuf.length) { 392 int aLen = recordBuf.length - assemLen; 393 394 System.arraycopy(assemBuf, 0, recordBuf, 0, 395 assemLen); 396 System.arraycopy(wBuf, wOffset, recordBuf, 397 assemLen, aLen); 398 writeRecord(recordBuf); 399 400 currBytes += recordBuf.length; 401 wOffset += aLen; 402 numToWrite -= aLen; 403 assemLen = 0; 404 } else { 405 System.arraycopy(wBuf, wOffset, assemBuf, assemLen, 406 numToWrite); 407 408 wOffset += numToWrite; 409 assemLen += numToWrite; 410 numToWrite = 0; 411 } 412 } 413 414 // 415 // When we get here we have EITHER: 416 // o An empty "assemble" buffer. 417 // o No bytes to write (numToWrite == 0) 418 // 419 while (numToWrite > 0) { 420 if (numToWrite < recordBuf.length) { 421 System.arraycopy(wBuf, wOffset, assemBuf, assemLen, 422 numToWrite); 423 424 assemLen += numToWrite; 425 426 break; 427 } 428 429 writeRecord(wBuf, wOffset); 430 431 int num = recordBuf.length; 432 433 currBytes += num; 434 numToWrite -= num; 435 wOffset += num; 436 } 437 } 438 439 /** 440 * Writes a PAX extended header with the given map as contents. 441 * @since 1.4 442 */ 443 void writePaxHeaders(String entryName, 444 Map<String, String> headers) throws IOException { 445 String name = "./PaxHeaders.X/" + stripTo7Bits(entryName); 446 if (name.length() >= TarConstants.NAMELEN) { 447 name = name.substring(0, TarConstants.NAMELEN - 1); 448 } 449 TarArchiveEntry pex = new TarArchiveEntry(name, 450 TarConstants.LF_PAX_EXTENDED_HEADER_LC); 451 452 StringWriter w = new StringWriter(); 453 for (Map.Entry<String, String> h : headers.entrySet()) { 454 String key = h.getKey(); 455 String value = h.getValue(); 456 int len = key.length() + value.length() 457 + 3 /* blank, equals and newline */ 458 + 2 /* guess 9 < actual length < 100 */; 459 String line = len + " " + key + "=" + value + "\n"; 460 int actualLength = line.getBytes(CharsetNames.UTF_8).length; 461 while (len != actualLength) { 462 // Adjust for cases where length < 10 or > 100 463 // or where UTF-8 encoding isn't a single octet 464 // per character. 465 // Must be in loop as size may go from 99 to 100 in 466 // first pass so we'd need a second. 467 len = actualLength; 468 line = len + " " + key + "=" + value + "\n"; 469 actualLength = line.getBytes(CharsetNames.UTF_8).length; 470 } 471 w.write(line); 472 } 473 byte[] data = w.toString().getBytes(CharsetNames.UTF_8); 474 pex.setSize(data.length); 475 putArchiveEntry(pex); 476 write(data); 477 closeArchiveEntry(); 478 } 479 480 private String stripTo7Bits(String name) { 481 final int length = name.length(); 482 StringBuilder result = new StringBuilder(length); 483 for (int i = 0; i < length; i++) { 484 char stripped = (char) (name.charAt(i) & 0x7F); 485 if (shouldBeReplaced(stripped)) { 486 result.append("_"); 487 } else { 488 result.append(stripped); 489 } 490 } 491 return result.toString(); 492 } 493 494 /** 495 * @return true if the character could lead to problems when used 496 * inside a TarArchiveEntry name for a PAX header. 497 */ 498 private boolean shouldBeReplaced(char c) { 499 return c == 0 // would be read as Trailing null 500 || c == '/' // when used as last character TAE will consider the PAX header a directory 501 || c == '\\'; // same as '/' as slashes get "normalized" on Windows 502 } 503 504 /** 505 * Write an EOF (end of archive) record to the tar archive. 506 * An EOF record consists of a record of all zeros. 507 */ 508 private void writeEOFRecord() throws IOException { 509 Arrays.fill(recordBuf, (byte) 0); 510 writeRecord(recordBuf); 511 } 512 513 @Override 514 public void flush() throws IOException { 515 out.flush(); 516 } 517 518 @Override 519 public ArchiveEntry createArchiveEntry(File inputFile, String entryName) 520 throws IOException { 521 if(finished) { 522 throw new IOException("Stream has already been finished"); 523 } 524 return new TarArchiveEntry(inputFile, entryName); 525 } 526 527 /** 528 * Write an archive record to the archive. 529 * 530 * @param record The record data to write to the archive. 531 * @throws IOException on error 532 */ 533 private void writeRecord(byte[] record) throws IOException { 534 if (record.length != recordSize) { 535 throw new IOException("record to write has length '" 536 + record.length 537 + "' which is not the record size of '" 538 + recordSize + "'"); 539 } 540 541 out.write(record); 542 recordsWritten++; 543 } 544 545 /** 546 * Write an archive record to the archive, where the record may be 547 * inside of a larger array buffer. The buffer must be "offset plus 548 * record size" long. 549 * 550 * @param buf The buffer containing the record data to write. 551 * @param offset The offset of the record data within buf. 552 * @throws IOException on error 553 */ 554 private void writeRecord(byte[] buf, int offset) throws IOException { 555 556 if (offset + recordSize > buf.length) { 557 throw new IOException("record has length '" + buf.length 558 + "' with offset '" + offset 559 + "' which is less than the record size of '" 560 + recordSize + "'"); 561 } 562 563 out.write(buf, offset, recordSize); 564 recordsWritten++; 565 } 566 567 private void padAsNeeded() throws IOException { 568 int start = recordsWritten % recordsPerBlock; 569 if (start != 0) { 570 for (int i = start; i < recordsPerBlock; i++) { 571 writeEOFRecord(); 572 } 573 } 574 } 575 576 private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders, 577 TarArchiveEntry entry) { 578 addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(), 579 TarConstants.MAXSIZE); 580 addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(), 581 TarConstants.MAXID); 582 addPaxHeaderForBigNumber(paxHeaders, "mtime", 583 entry.getModTime().getTime() / 1000, 584 TarConstants.MAXSIZE); 585 addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(), 586 TarConstants.MAXID); 587 // star extensions by J\u00f6rg Schilling 588 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor", 589 entry.getDevMajor(), TarConstants.MAXID); 590 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor", 591 entry.getDevMinor(), TarConstants.MAXID); 592 // there is no PAX header for file mode 593 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); 594 } 595 596 private void addPaxHeaderForBigNumber(Map<String, String> paxHeaders, 597 String header, long value, 598 long maxValue) { 599 if (value < 0 || value > maxValue) { 600 paxHeaders.put(header, String.valueOf(value)); 601 } 602 } 603 604 private void failForBigNumbers(TarArchiveEntry entry) { 605 failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE); 606 failForBigNumber("group id", entry.getGroupId(), TarConstants.MAXID); 607 failForBigNumber("last modification time", 608 entry.getModTime().getTime() / 1000, 609 TarConstants.MAXSIZE); 610 failForBigNumber("user id", entry.getUserId(), TarConstants.MAXID); 611 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); 612 failForBigNumber("major device number", entry.getDevMajor(), 613 TarConstants.MAXID); 614 failForBigNumber("minor device number", entry.getDevMinor(), 615 TarConstants.MAXID); 616 } 617 618 private void failForBigNumber(String field, long value, long maxValue) { 619 if (value < 0 || value > maxValue) { 620 throw new RuntimeException(field + " '" + value 621 + "' is too big ( > " 622 + maxValue + " )"); 623 } 624 } 625 626 /** 627 * Handles long file or link names according to the longFileMode setting. 628 * 629 * <p>I.e. if the given name is too long to be written to a plain 630 * tar header then 631 * <ul> 632 * <li>it creates a pax header who's name is given by the 633 * paxHeaderName parameter if longFileMode is POSIX</li> 634 * <li>it creates a GNU longlink entry who's type is given by 635 * the linkType parameter if longFileMode is GNU</li> 636 * <li>it throws an exception if longFileMode is ERROR</li> 637 * <li>it truncates the name if longFileMode is TRUNCATE</li> 638 * </ul></p> 639 * 640 * @param name the name to write 641 * @param paxHeaders current map of pax headers 642 * @param paxHeaderName name of the pax header to write 643 * @param linkType type of the GNU entry to write 644 * @param fieldName the name of the field 645 * @return whether a pax header has been written. 646 */ 647 private boolean handleLongName(String name, 648 Map<String, String> paxHeaders, 649 String paxHeaderName, byte linkType, String fieldName) 650 throws IOException { 651 final ByteBuffer encodedName = encoding.encode(name); 652 final int len = encodedName.limit() - encodedName.position(); 653 if (len >= TarConstants.NAMELEN) { 654 655 if (longFileMode == LONGFILE_POSIX) { 656 paxHeaders.put(paxHeaderName, name); 657 return true; 658 } else if (longFileMode == LONGFILE_GNU) { 659 // create a TarEntry for the LongLink, the contents 660 // of which are the link's name 661 TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK, linkType); 662 663 longLinkEntry.setSize(len + 1); // +1 for NUL 664 putArchiveEntry(longLinkEntry); 665 write(encodedName.array(), encodedName.arrayOffset(), len); 666 write(0); // NUL terminator 667 closeArchiveEntry(); 668 } else if (longFileMode != LONGFILE_TRUNCATE) { 669 throw new RuntimeException(fieldName + " '" + name 670 + "' is too long ( > " 671 + TarConstants.NAMELEN + " bytes)"); 672 } 673 } 674 return false; 675 } 676}