001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.File;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.StringWriter;
025import java.nio.ByteBuffer;
026import java.util.Arrays;
027import java.util.HashMap;
028import java.util.Map;
029import org.apache.commons.compress.archivers.ArchiveEntry;
030import org.apache.commons.compress.archivers.ArchiveOutputStream;
031import org.apache.commons.compress.archivers.zip.ZipEncoding;
032import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
033import org.apache.commons.compress.utils.CharsetNames;
034import org.apache.commons.compress.utils.CountingOutputStream;
035
036/**
037 * The TarOutputStream writes a UNIX tar archive as an OutputStream.
038 * Methods are provided to put entries, and then write their contents
039 * by writing to this stream using write().
040 * @NotThreadSafe
041 */
042public class TarArchiveOutputStream extends ArchiveOutputStream {
043    /** Fail if a long file name is required in the archive. */
044    public static final int LONGFILE_ERROR = 0;
045
046    /** Long paths will be truncated in the archive. */
047    public static final int LONGFILE_TRUNCATE = 1;
048
049    /** GNU tar extensions are used to store long file names in the archive. */
050    public static final int LONGFILE_GNU = 2;
051
052    /** POSIX/PAX extensions are used to store long file names in the archive. */
053    public static final int LONGFILE_POSIX = 3;
054
055    /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */
056    public static final int BIGNUMBER_ERROR = 0;
057
058    /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */
059    public static final int BIGNUMBER_STAR = 1;
060
061    /** POSIX/PAX extensions are used to store big numbers in the archive. */
062    public static final int BIGNUMBER_POSIX = 2;
063
064    private long      currSize;
065    private String    currName;
066    private long      currBytes;
067    private final byte[]    recordBuf;
068    private int       assemLen;
069    private final byte[]    assemBuf;
070    private int       longFileMode = LONGFILE_ERROR;
071    private int       bigNumberMode = BIGNUMBER_ERROR;
072    private int recordsWritten;
073    private final int recordsPerBlock;
074    private final int recordSize;
075
076    private boolean closed = false;
077
078    /** Indicates if putArchiveEntry has been called without closeArchiveEntry */
079    private boolean haveUnclosedEntry = false;
080
081    /** indicates if this archive is finished */
082    private boolean finished = false;
083
084    private final OutputStream out;
085
086    private final ZipEncoding encoding;
087
088    private boolean addPaxHeadersForNonAsciiNames = false;
089    private static final ZipEncoding ASCII =
090        ZipEncodingHelper.getZipEncoding("ASCII");
091
092    /**
093     * Constructor for TarInputStream.
094     * @param os the output stream to use
095     */
096    public TarArchiveOutputStream(OutputStream os) {
097        this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
098    }
099
100    /**
101     * Constructor for TarInputStream.
102     * @param os the output stream to use
103     * @param encoding name of the encoding to use for file names
104     * @since 1.4
105     */
106    public TarArchiveOutputStream(OutputStream os, String encoding) {
107        this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding);
108    }
109
110    /**
111     * Constructor for TarInputStream.
112     * @param os the output stream to use
113     * @param blockSize the block size to use
114     */
115    public TarArchiveOutputStream(OutputStream os, int blockSize) {
116        this(os, blockSize, TarConstants.DEFAULT_RCDSIZE);
117    }
118
119    /**
120     * Constructor for TarInputStream.
121     * @param os the output stream to use
122     * @param blockSize the block size to use
123     * @param encoding name of the encoding to use for file names
124     * @since 1.4
125     */
126    public TarArchiveOutputStream(OutputStream os, int blockSize,
127                                  String encoding) {
128        this(os, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
129    }
130
131    /**
132     * Constructor for TarInputStream.
133     * @param os the output stream to use
134     * @param blockSize the block size to use
135     * @param recordSize the record size to use
136     */
137    public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) {
138        this(os, blockSize, recordSize, null);
139    }
140
141    /**
142     * Constructor for TarInputStream.
143     * @param os the output stream to use
144     * @param blockSize the block size to use
145     * @param recordSize the record size to use
146     * @param encoding name of the encoding to use for file names
147     * @since 1.4
148     */
149    public TarArchiveOutputStream(OutputStream os, int blockSize,
150                                  int recordSize, String encoding) {
151        out = new CountingOutputStream(os);
152        this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
153
154        this.assemLen = 0;
155        this.assemBuf = new byte[recordSize];
156        this.recordBuf = new byte[recordSize];
157        this.recordSize = recordSize;
158        this.recordsPerBlock = blockSize / recordSize;
159    }
160
161    /**
162     * Set the long file mode.
163     * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2).
164     * This specifies the treatment of long file names (names >= TarConstants.NAMELEN).
165     * Default is LONGFILE_ERROR.
166     * @param longFileMode the mode to use
167     */
168    public void setLongFileMode(int longFileMode) {
169        this.longFileMode = longFileMode;
170    }
171
172    /**
173     * Set the big number mode.
174     * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2).
175     * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header.
176     * Default is BIGNUMBER_ERROR.
177     * @param bigNumberMode the mode to use
178     * @since 1.4
179     */
180    public void setBigNumberMode(int bigNumberMode) {
181        this.bigNumberMode = bigNumberMode;
182    }
183
184    /**
185     * Whether to add a PAX extension header for non-ASCII file names.
186     * @since 1.4
187     */
188    public void setAddPaxHeadersForNonAsciiNames(boolean b) {
189        addPaxHeadersForNonAsciiNames = b;
190    }
191
192    @Deprecated
193    @Override
194    public int getCount() {
195        return (int) getBytesWritten();
196    }
197
198    @Override
199    public long getBytesWritten() {
200        return ((CountingOutputStream) out).getBytesWritten();
201    }
202
203    /**
204     * Ends the TAR archive without closing the underlying OutputStream.
205     * 
206     * An archive consists of a series of file entries terminated by an
207     * end-of-archive entry, which consists of two 512 blocks of zero bytes. 
208     * POSIX.1 requires two EOF records, like some other implementations.
209     * 
210     * @throws IOException on error
211     */
212    @Override
213    public void finish() throws IOException {
214        if (finished) {
215            throw new IOException("This archive has already been finished");
216        }
217
218        if (haveUnclosedEntry) {
219            throw new IOException("This archives contains unclosed entries.");
220        }
221        writeEOFRecord();
222        writeEOFRecord();
223        padAsNeeded();
224        out.flush();
225        finished = true;
226    }
227
228    /**
229     * Closes the underlying OutputStream.
230     * @throws IOException on error
231     */
232    @Override
233    public void close() throws IOException {
234        if (!finished) {
235            finish();
236        }
237
238        if (!closed) {
239            out.close();
240            closed = true;
241        }
242    }
243
244    /**
245     * Get the record size being used by this stream's TarBuffer.
246     *
247     * @return The TarBuffer record size.
248     */
249    public int getRecordSize() {
250        return this.recordSize;
251    }
252
253    /**
254     * Put an entry on the output stream. This writes the entry's
255     * header record and positions the output stream for writing
256     * the contents of the entry. Once this method is called, the
257     * stream is ready for calls to write() to write the entry's
258     * contents. Once the contents are written, closeArchiveEntry()
259     * <B>MUST</B> be called to ensure that all buffered data
260     * is completely written to the output stream.
261     *
262     * @param archiveEntry The TarEntry to be written to the archive.
263     * @throws IOException on error
264     * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry
265     */
266    @Override
267    public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException {
268        if (finished) {
269            throw new IOException("Stream has already been finished");
270        }
271        TarArchiveEntry entry = (TarArchiveEntry) archiveEntry;
272        Map<String, String> paxHeaders = new HashMap<String, String>();
273        final String entryName = entry.getName();
274        boolean paxHeaderContainsPath = handleLongName(entryName, paxHeaders, "path",
275                                                       TarConstants.LF_GNUTYPE_LONGNAME, "file name");
276
277        final String linkName = entry.getLinkName();
278        boolean paxHeaderContainsLinkPath = linkName != null && linkName.length() > 0
279            && handleLongName(linkName, paxHeaders, "linkpath",
280                              TarConstants.LF_GNUTYPE_LONGLINK, "link name");
281
282        if (bigNumberMode == BIGNUMBER_POSIX) {
283            addPaxHeadersForBigNumbers(paxHeaders, entry);
284        } else if (bigNumberMode != BIGNUMBER_STAR) {
285            failForBigNumbers(entry);
286        }
287
288        if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath
289            && !ASCII.canEncode(entryName)) {
290            paxHeaders.put("path", entryName);
291        }
292
293        if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsLinkPath
294            && (entry.isLink() || entry.isSymbolicLink())
295            && !ASCII.canEncode(linkName)) {
296            paxHeaders.put("linkpath", linkName);
297        }
298
299        if (paxHeaders.size() > 0) {
300            writePaxHeaders(entryName, paxHeaders);
301        }
302
303        entry.writeEntryHeader(recordBuf, encoding,
304                               bigNumberMode == BIGNUMBER_STAR);
305        writeRecord(recordBuf);
306
307        currBytes = 0;
308
309        if (entry.isDirectory()) {
310            currSize = 0;
311        } else {
312            currSize = entry.getSize();
313        }
314        currName = entryName;
315        haveUnclosedEntry = true;
316    }
317
318    /**
319     * Close an entry. This method MUST be called for all file
320     * entries that contain data. The reason is that we must
321     * buffer data written to the stream in order to satisfy
322     * the buffer's record based writes. Thus, there may be
323     * data fragments still being assembled that must be written
324     * to the output stream before this entry is closed and the
325     * next entry written.
326     * @throws IOException on error
327     */
328    @Override
329    public void closeArchiveEntry() throws IOException {
330        if (finished) {
331            throw new IOException("Stream has already been finished");
332        }
333        if (!haveUnclosedEntry){
334            throw new IOException("No current entry to close");
335        }
336        if (assemLen > 0) {
337            for (int i = assemLen; i < assemBuf.length; ++i) {
338                assemBuf[i] = 0;
339            }
340
341            writeRecord(assemBuf);
342
343            currBytes += assemLen;
344            assemLen = 0;
345        }
346
347        if (currBytes < currSize) {
348            throw new IOException("entry '" + currName + "' closed at '"
349                                  + currBytes
350                                  + "' before the '" + currSize
351                                  + "' bytes specified in the header were written");
352        }
353        haveUnclosedEntry = false;
354    }
355
356    /**
357     * Writes bytes to the current tar archive entry. This method
358     * is aware of the current entry and will throw an exception if
359     * you attempt to write bytes past the length specified for the
360     * current entry. The method is also (painfully) aware of the
361     * record buffering required by TarBuffer, and manages buffers
362     * that are not a multiple of recordsize in length, including
363     * assembling records from small buffers.
364     *
365     * @param wBuf The buffer to write to the archive.
366     * @param wOffset The offset in the buffer from which to get bytes.
367     * @param numToWrite The number of bytes to write.
368     * @throws IOException on error
369     */
370    @Override
371    public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException {
372        if (!haveUnclosedEntry) {
373            throw new IllegalStateException("No current tar entry");
374        }
375        if (currBytes + numToWrite > currSize) {
376            throw new IOException("request to write '" + numToWrite
377                                  + "' bytes exceeds size in header of '"
378                                  + currSize + "' bytes for entry '"
379                                  + currName + "'");
380
381            //
382            // We have to deal with assembly!!!
383            // The programmer can be writing little 32 byte chunks for all
384            // we know, and we must assemble complete records for writing.
385            // REVIEW Maybe this should be in TarBuffer? Could that help to
386            // eliminate some of the buffer copying.
387            //
388        }
389
390        if (assemLen > 0) {
391            if (assemLen + numToWrite >= recordBuf.length) {
392                int aLen = recordBuf.length - assemLen;
393
394                System.arraycopy(assemBuf, 0, recordBuf, 0,
395                                 assemLen);
396                System.arraycopy(wBuf, wOffset, recordBuf,
397                                 assemLen, aLen);
398                writeRecord(recordBuf);
399
400                currBytes += recordBuf.length;
401                wOffset += aLen;
402                numToWrite -= aLen;
403                assemLen = 0;
404            } else {
405                System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
406                                 numToWrite);
407
408                wOffset += numToWrite;
409                assemLen += numToWrite;
410                numToWrite = 0;
411            }
412        }
413
414        //
415        // When we get here we have EITHER:
416        // o An empty "assemble" buffer.
417        // o No bytes to write (numToWrite == 0)
418        //
419        while (numToWrite > 0) {
420            if (numToWrite < recordBuf.length) {
421                System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
422                                 numToWrite);
423
424                assemLen += numToWrite;
425
426                break;
427            }
428
429            writeRecord(wBuf, wOffset);
430
431            int num = recordBuf.length;
432
433            currBytes += num;
434            numToWrite -= num;
435            wOffset += num;
436        }
437    }
438
439    /**
440     * Writes a PAX extended header with the given map as contents.
441     * @since 1.4
442     */
443    void writePaxHeaders(String entryName,
444                         Map<String, String> headers) throws IOException {
445        String name = "./PaxHeaders.X/" + stripTo7Bits(entryName);
446        if (name.length() >= TarConstants.NAMELEN) {
447            name = name.substring(0, TarConstants.NAMELEN - 1);
448        }
449        TarArchiveEntry pex = new TarArchiveEntry(name,
450                                                  TarConstants.LF_PAX_EXTENDED_HEADER_LC);
451
452        StringWriter w = new StringWriter();
453        for (Map.Entry<String, String> h : headers.entrySet()) {
454            String key = h.getKey();
455            String value = h.getValue();
456            int len = key.length() + value.length()
457                + 3 /* blank, equals and newline */
458                + 2 /* guess 9 < actual length < 100 */;
459            String line = len + " " + key + "=" + value + "\n";
460            int actualLength = line.getBytes(CharsetNames.UTF_8).length;
461            while (len != actualLength) {
462                // Adjust for cases where length < 10 or > 100
463                // or where UTF-8 encoding isn't a single octet
464                // per character.
465                // Must be in loop as size may go from 99 to 100 in
466                // first pass so we'd need a second.
467                len = actualLength;
468                line = len + " " + key + "=" + value + "\n";
469                actualLength = line.getBytes(CharsetNames.UTF_8).length;
470            }
471            w.write(line);
472        }
473        byte[] data = w.toString().getBytes(CharsetNames.UTF_8);
474        pex.setSize(data.length);
475        putArchiveEntry(pex);
476        write(data);
477        closeArchiveEntry();
478    }
479
480    private String stripTo7Bits(String name) {
481        final int length = name.length();
482        StringBuilder result = new StringBuilder(length);
483        for (int i = 0; i < length; i++) {
484            char stripped = (char) (name.charAt(i) & 0x7F);
485            if (shouldBeReplaced(stripped)) {
486                result.append("_");
487            } else {
488                result.append(stripped);
489            }
490        }
491        return result.toString();
492    }
493
494    /**
495     * @return true if the character could lead to problems when used
496     * inside a TarArchiveEntry name for a PAX header.
497     */
498    private boolean shouldBeReplaced(char c) {
499        return c == 0 // would be read as Trailing null
500            || c == '/' // when used as last character TAE will consider the PAX header a directory
501            || c == '\\'; // same as '/' as slashes get "normalized" on Windows
502    }
503
504    /**
505     * Write an EOF (end of archive) record to the tar archive.
506     * An EOF record consists of a record of all zeros.
507     */
508    private void writeEOFRecord() throws IOException {
509        Arrays.fill(recordBuf, (byte) 0);
510        writeRecord(recordBuf);
511    }
512
513    @Override
514    public void flush() throws IOException {
515        out.flush();
516    }
517
518    @Override
519    public ArchiveEntry createArchiveEntry(File inputFile, String entryName)
520            throws IOException {
521        if(finished) {
522            throw new IOException("Stream has already been finished");
523        }
524        return new TarArchiveEntry(inputFile, entryName);
525    }
526    
527    /**
528     * Write an archive record to the archive.
529     *
530     * @param record The record data to write to the archive.
531     * @throws IOException on error
532     */
533    private void writeRecord(byte[] record) throws IOException {
534        if (record.length != recordSize) {
535            throw new IOException("record to write has length '"
536                                  + record.length
537                                  + "' which is not the record size of '"
538                                  + recordSize + "'");
539        }
540
541        out.write(record);
542        recordsWritten++;
543    }
544    
545    /**
546     * Write an archive record to the archive, where the record may be
547     * inside of a larger array buffer. The buffer must be "offset plus
548     * record size" long.
549     *
550     * @param buf The buffer containing the record data to write.
551     * @param offset The offset of the record data within buf.
552     * @throws IOException on error
553     */
554    private void writeRecord(byte[] buf, int offset) throws IOException {
555 
556        if (offset + recordSize > buf.length) {
557            throw new IOException("record has length '" + buf.length
558                                  + "' with offset '" + offset
559                                  + "' which is less than the record size of '"
560                                  + recordSize + "'");
561        }
562
563        out.write(buf, offset, recordSize);
564        recordsWritten++;
565    }
566
567    private void padAsNeeded() throws IOException {
568        int start = recordsWritten % recordsPerBlock;
569        if (start != 0) {
570            for (int i = start; i < recordsPerBlock; i++) {
571                writeEOFRecord();
572            }
573        }
574    }
575
576    private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders,
577                                            TarArchiveEntry entry) {
578        addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(),
579                                 TarConstants.MAXSIZE);
580        addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(),
581                                 TarConstants.MAXID);
582        addPaxHeaderForBigNumber(paxHeaders, "mtime",
583                                 entry.getModTime().getTime() / 1000,
584                                 TarConstants.MAXSIZE);
585        addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(),
586                                 TarConstants.MAXID);
587        // star extensions by J\u00f6rg Schilling
588        addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor",
589                                 entry.getDevMajor(), TarConstants.MAXID);
590        addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor",
591                                 entry.getDevMinor(), TarConstants.MAXID);
592        // there is no PAX header for file mode
593        failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
594    }
595
596    private void addPaxHeaderForBigNumber(Map<String, String> paxHeaders,
597                                          String header, long value,
598                                          long maxValue) {
599        if (value < 0 || value > maxValue) {
600            paxHeaders.put(header, String.valueOf(value));
601        }
602    }
603
604    private void failForBigNumbers(TarArchiveEntry entry) {
605        failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE);
606        failForBigNumber("group id", entry.getGroupId(), TarConstants.MAXID);
607        failForBigNumber("last modification time",
608                         entry.getModTime().getTime() / 1000,
609                         TarConstants.MAXSIZE);
610        failForBigNumber("user id", entry.getUserId(), TarConstants.MAXID);
611        failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
612        failForBigNumber("major device number", entry.getDevMajor(),
613                         TarConstants.MAXID);
614        failForBigNumber("minor device number", entry.getDevMinor(),
615                         TarConstants.MAXID);
616    }
617
618    private void failForBigNumber(String field, long value, long maxValue) {
619        if (value < 0 || value > maxValue) {
620            throw new RuntimeException(field + " '" + value
621                                       + "' is too big ( > "
622                                       + maxValue + " )");
623        }
624    }
625
626    /**
627     * Handles long file or link names according to the longFileMode setting.
628     *
629     * <p>I.e. if the given name is too long to be written to a plain
630     * tar header then
631     * <ul>
632     *   <li>it creates a pax header who's name is given by the
633     *   paxHeaderName parameter if longFileMode is POSIX</li>
634     *   <li>it creates a GNU longlink entry who's type is given by
635     *   the linkType parameter if longFileMode is GNU</li>
636     *   <li>it throws an exception if longFileMode is ERROR</li>
637     *   <li>it truncates the name if longFileMode is TRUNCATE</li>
638     * </ul></p>
639     *
640     * @param name the name to write
641     * @param paxHeaders current map of pax headers
642     * @param paxHeaderName name of the pax header to write
643     * @param linkType type of the GNU entry to write
644     * @param fieldName the name of the field
645     * @return whether a pax header has been written.
646     */
647    private boolean handleLongName(String name,
648                                   Map<String, String> paxHeaders,
649                                   String paxHeaderName, byte linkType, String fieldName)
650        throws IOException {
651        final ByteBuffer encodedName = encoding.encode(name);
652        final int len = encodedName.limit() - encodedName.position();
653        if (len >= TarConstants.NAMELEN) {
654
655            if (longFileMode == LONGFILE_POSIX) {
656                paxHeaders.put(paxHeaderName, name);
657                return true;
658            } else if (longFileMode == LONGFILE_GNU) {
659                // create a TarEntry for the LongLink, the contents
660                // of which are the link's name
661                TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK, linkType);
662
663                longLinkEntry.setSize(len + 1); // +1 for NUL
664                putArchiveEntry(longLinkEntry);
665                write(encodedName.array(), encodedName.arrayOffset(), len);
666                write(0); // NUL terminator
667                closeArchiveEntry();
668            } else if (longFileMode != LONGFILE_TRUNCATE) {
669                throw new RuntimeException(fieldName + " '" + name
670                                           + "' is too long ( > "
671                                           + TarConstants.NAMELEN + " bytes)");
672            }
673        }
674        return false;
675    }
676}