001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025
026import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
027import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
028import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
029import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
030import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
031import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
032import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
033import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
034import org.apache.commons.compress.archivers.sevenz.SevenZFile;
035import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
036import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
037import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
038import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
039import org.apache.commons.compress.utils.IOUtils;
040
041/**
042 * Factory to create Archive[In|Out]putStreams from names or the first bytes of
043 * the InputStream. In order to add other implementations, you should extend
044 * ArchiveStreamFactory and override the appropriate methods (and call their
045 * implementation from super of course).
046 * 
047 * Compressing a ZIP-File:
048 * 
049 * <pre>
050 * final OutputStream out = new FileOutputStream(output); 
051 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
052 * 
053 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
054 * IOUtils.copy(new FileInputStream(file1), os);
055 * os.closeArchiveEntry();
056 *
057 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
058 * IOUtils.copy(new FileInputStream(file2), os);
059 * os.closeArchiveEntry();
060 * os.close();
061 * </pre>
062 * 
063 * Decompressing a ZIP-File:
064 * 
065 * <pre>
066 * final InputStream is = new FileInputStream(input); 
067 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
068 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
069 * OutputStream out = new FileOutputStream(new File(dir, entry.getName()));
070 * IOUtils.copy(in, out);
071 * out.close();
072 * in.close();
073 * </pre>
074 * 
075 * @Immutable
076 */
077public class ArchiveStreamFactory {
078
079    /**
080     * Constant used to identify the AR archive format.
081     * @since 1.1
082     */
083    public static final String AR = "ar";
084    /**
085     * Constant used to identify the ARJ archive format.
086     * @since 1.6
087     */
088    public static final String ARJ = "arj";
089    /**
090     * Constant used to identify the CPIO archive format.
091     * @since 1.1
092     */
093    public static final String CPIO = "cpio";
094    /**
095     * Constant used to identify the Unix DUMP archive format.
096     * @since 1.3
097     */
098    public static final String DUMP = "dump";
099    /**
100     * Constant used to identify the JAR archive format.
101     * @since 1.1
102     */
103    public static final String JAR = "jar";
104    /**
105     * Constant used to identify the TAR archive format.
106     * @since 1.1
107     */
108    public static final String TAR = "tar";
109    /**
110     * Constant used to identify the ZIP archive format.
111     * @since 1.1
112     */
113    public static final String ZIP = "zip";
114    /**
115     * Constant used to identify the 7z archive format.
116     * @since 1.8
117     */
118    public static final String SEVEN_Z = "7z";
119
120    /**
121     * Entry encoding, null for the default.
122     */
123    private String entryEncoding = null;
124
125    /**
126     * Returns the encoding to use for arj, zip, dump, cpio and tar
127     * files, or null for the default.
128     *
129     * @return entry encoding, or null
130     * @since 1.5
131     */
132    public String getEntryEncoding() {
133        return entryEncoding;
134    }
135
136    /**
137     * Sets the encoding to use for arj, zip, dump, cpio and tar files. Use null for the default.
138     * 
139     * @param entryEncoding the entry encoding, null uses the default.
140     * @since 1.5
141     */
142    public void setEntryEncoding(String entryEncoding) {
143        this.entryEncoding = entryEncoding;
144    }
145
146    /**
147     * Create an archive input stream from an archiver name and an input stream.
148     * 
149     * @param archiverName the archive name, i.e. "ar", "arj", "zip", "tar", "jar", "dump" or "cpio"
150     * @param in the input stream
151     * @return the archive input stream
152     * @throws ArchiveException if the archiver name is not known
153     * @throws StreamingNotSupportedException if the format cannot be
154     * read from a stream
155     * @throws IllegalArgumentException if the archiver name or stream is null
156     */
157    public ArchiveInputStream createArchiveInputStream(
158            final String archiverName, final InputStream in)
159            throws ArchiveException {
160
161        if (archiverName == null) {
162            throw new IllegalArgumentException("Archivername must not be null.");
163        }
164
165        if (in == null) {
166            throw new IllegalArgumentException("InputStream must not be null.");
167        }
168
169        if (AR.equalsIgnoreCase(archiverName)) {
170            return new ArArchiveInputStream(in);
171        }
172        if (ARJ.equalsIgnoreCase(archiverName)) {
173            if (entryEncoding != null) {
174                return new ArjArchiveInputStream(in, entryEncoding);
175            } else {
176                return new ArjArchiveInputStream(in);
177            }
178        }
179        if (ZIP.equalsIgnoreCase(archiverName)) {
180            if (entryEncoding != null) {
181                return new ZipArchiveInputStream(in, entryEncoding);
182            } else {
183                return new ZipArchiveInputStream(in);
184            }
185        }
186        if (TAR.equalsIgnoreCase(archiverName)) {
187            if (entryEncoding != null) {
188                return new TarArchiveInputStream(in, entryEncoding);
189            } else {
190                return new TarArchiveInputStream(in);
191            }
192        }
193        if (JAR.equalsIgnoreCase(archiverName)) {
194            return new JarArchiveInputStream(in);
195        }
196        if (CPIO.equalsIgnoreCase(archiverName)) {
197            if (entryEncoding != null) {
198                return new CpioArchiveInputStream(in, entryEncoding);
199            } else {
200                return new CpioArchiveInputStream(in);
201            }
202        }
203        if (DUMP.equalsIgnoreCase(archiverName)) {
204            if (entryEncoding != null) {
205                return new DumpArchiveInputStream(in, entryEncoding);
206            } else {
207                return new DumpArchiveInputStream(in);
208            }
209        }
210        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
211            throw new StreamingNotSupportedException(SEVEN_Z);
212        }
213
214        throw new ArchiveException("Archiver: " + archiverName + " not found.");
215    }
216
217    /**
218     * Create an archive output stream from an archiver name and an input stream.
219     * 
220     * @param archiverName the archive name, i.e. "ar", "zip", "tar", "jar" or "cpio"
221     * @param out the output stream
222     * @return the archive output stream
223     * @throws ArchiveException if the archiver name is not known
224     * @throws StreamingNotSupportedException if the format cannot be
225     * written to a stream
226     * @throws IllegalArgumentException if the archiver name or stream is null
227     */
228    public ArchiveOutputStream createArchiveOutputStream(
229            final String archiverName, final OutputStream out)
230            throws ArchiveException {
231        if (archiverName == null) {
232            throw new IllegalArgumentException("Archivername must not be null.");
233        }
234        if (out == null) {
235            throw new IllegalArgumentException("OutputStream must not be null.");
236        }
237
238        if (AR.equalsIgnoreCase(archiverName)) {
239            return new ArArchiveOutputStream(out);
240        }
241        if (ZIP.equalsIgnoreCase(archiverName)) {
242            ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
243            if (entryEncoding != null) {
244                zip.setEncoding(entryEncoding);
245            }
246            return zip;
247        }
248        if (TAR.equalsIgnoreCase(archiverName)) {
249            if (entryEncoding != null) {
250                return new TarArchiveOutputStream(out, entryEncoding);
251            } else {
252                return new TarArchiveOutputStream(out);
253            }
254        }
255        if (JAR.equalsIgnoreCase(archiverName)) {
256            return new JarArchiveOutputStream(out);
257        }
258        if (CPIO.equalsIgnoreCase(archiverName)) {
259            if (entryEncoding != null) {
260                return new CpioArchiveOutputStream(out, entryEncoding);
261            } else {
262                return new CpioArchiveOutputStream(out);
263            }
264        }
265        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
266            throw new StreamingNotSupportedException(SEVEN_Z);
267        }
268        throw new ArchiveException("Archiver: " + archiverName + " not found.");
269    }
270
271    /**
272     * Create an archive input stream from an input stream, autodetecting
273     * the archive type from the first few bytes of the stream. The InputStream
274     * must support marks, like BufferedInputStream.
275     * 
276     * @param in the input stream
277     * @return the archive input stream
278     * @throws ArchiveException if the archiver name is not known
279     * @throws StreamingNotSupportedException if the format cannot be
280     * read from a stream
281     * @throws IllegalArgumentException if the stream is null or does not support mark
282     */
283    public ArchiveInputStream createArchiveInputStream(final InputStream in)
284            throws ArchiveException {
285        if (in == null) {
286            throw new IllegalArgumentException("Stream must not be null.");
287        }
288
289        if (!in.markSupported()) {
290            throw new IllegalArgumentException("Mark is not supported.");
291        }
292
293        final byte[] signature = new byte[12];
294        in.mark(signature.length);
295        try {
296            int signatureLength = IOUtils.readFully(in, signature);
297            in.reset();
298            if (ZipArchiveInputStream.matches(signature, signatureLength)) {
299                if (entryEncoding != null) {
300                    return new ZipArchiveInputStream(in, entryEncoding);
301                } else {
302                    return new ZipArchiveInputStream(in);
303                }
304            } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
305                return new JarArchiveInputStream(in);
306            } else if (ArArchiveInputStream.matches(signature, signatureLength)) {
307                return new ArArchiveInputStream(in);
308            } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
309                return new CpioArchiveInputStream(in);
310            } else if (ArjArchiveInputStream.matches(signature, signatureLength)) {
311                return new ArjArchiveInputStream(in);
312            } else if (SevenZFile.matches(signature, signatureLength)) {
313                throw new StreamingNotSupportedException(SEVEN_Z);
314            }
315
316            // Dump needs a bigger buffer to check the signature;
317            final byte[] dumpsig = new byte[32];
318            in.mark(dumpsig.length);
319            signatureLength = IOUtils.readFully(in, dumpsig);
320            in.reset();
321            if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
322                return new DumpArchiveInputStream(in);
323            }
324
325            // Tar needs an even bigger buffer to check the signature; read the first block
326            final byte[] tarheader = new byte[512];
327            in.mark(tarheader.length);
328            signatureLength = IOUtils.readFully(in, tarheader);
329            in.reset();
330            if (TarArchiveInputStream.matches(tarheader, signatureLength)) {
331                if (entryEncoding != null) {
332                    return new TarArchiveInputStream(in, entryEncoding);
333                } else {
334                    return new TarArchiveInputStream(in);
335                }
336            }
337            // COMPRESS-117 - improve auto-recognition
338            if (signatureLength >= 512) {
339                TarArchiveInputStream tais = null;
340                try {
341                    tais = new TarArchiveInputStream(new ByteArrayInputStream(tarheader));
342                    // COMPRESS-191 - verify the header checksum
343                    if (tais.getNextTarEntry().isCheckSumOK()) {
344                        return new TarArchiveInputStream(in);
345                    }
346                } catch (Exception e) { // NOPMD
347                    // can generate IllegalArgumentException as well
348                    // as IOException
349                    // autodetection, simply not a TAR
350                    // ignored
351                } finally {
352                    IOUtils.closeQuietly(tais);
353                }
354            }
355        } catch (IOException e) {
356            throw new ArchiveException("Could not use reset and mark operations.", e);
357        }
358
359        throw new ArchiveException("No Archiver found for the stream signature");
360    }
361
362}