001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025 026import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 027import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 028import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 029import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 030import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 031import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 032import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 033import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 034import org.apache.commons.compress.archivers.sevenz.SevenZFile; 035import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 036import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 037import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 038import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 039import org.apache.commons.compress.utils.IOUtils; 040 041/** 042 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 043 * the InputStream. In order to add other implementations, you should extend 044 * ArchiveStreamFactory and override the appropriate methods (and call their 045 * implementation from super of course). 046 * 047 * Compressing a ZIP-File: 048 * 049 * <pre> 050 * final OutputStream out = new FileOutputStream(output); 051 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 052 * 053 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 054 * IOUtils.copy(new FileInputStream(file1), os); 055 * os.closeArchiveEntry(); 056 * 057 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 058 * IOUtils.copy(new FileInputStream(file2), os); 059 * os.closeArchiveEntry(); 060 * os.close(); 061 * </pre> 062 * 063 * Decompressing a ZIP-File: 064 * 065 * <pre> 066 * final InputStream is = new FileInputStream(input); 067 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 068 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 069 * OutputStream out = new FileOutputStream(new File(dir, entry.getName())); 070 * IOUtils.copy(in, out); 071 * out.close(); 072 * in.close(); 073 * </pre> 074 * 075 * @Immutable 076 */ 077public class ArchiveStreamFactory { 078 079 /** 080 * Constant used to identify the AR archive format. 081 * @since 1.1 082 */ 083 public static final String AR = "ar"; 084 /** 085 * Constant used to identify the ARJ archive format. 086 * @since 1.6 087 */ 088 public static final String ARJ = "arj"; 089 /** 090 * Constant used to identify the CPIO archive format. 091 * @since 1.1 092 */ 093 public static final String CPIO = "cpio"; 094 /** 095 * Constant used to identify the Unix DUMP archive format. 096 * @since 1.3 097 */ 098 public static final String DUMP = "dump"; 099 /** 100 * Constant used to identify the JAR archive format. 101 * @since 1.1 102 */ 103 public static final String JAR = "jar"; 104 /** 105 * Constant used to identify the TAR archive format. 106 * @since 1.1 107 */ 108 public static final String TAR = "tar"; 109 /** 110 * Constant used to identify the ZIP archive format. 111 * @since 1.1 112 */ 113 public static final String ZIP = "zip"; 114 /** 115 * Constant used to identify the 7z archive format. 116 * @since 1.8 117 */ 118 public static final String SEVEN_Z = "7z"; 119 120 /** 121 * Entry encoding, null for the default. 122 */ 123 private String entryEncoding = null; 124 125 /** 126 * Returns the encoding to use for arj, zip, dump, cpio and tar 127 * files, or null for the default. 128 * 129 * @return entry encoding, or null 130 * @since 1.5 131 */ 132 public String getEntryEncoding() { 133 return entryEncoding; 134 } 135 136 /** 137 * Sets the encoding to use for arj, zip, dump, cpio and tar files. Use null for the default. 138 * 139 * @param entryEncoding the entry encoding, null uses the default. 140 * @since 1.5 141 */ 142 public void setEntryEncoding(String entryEncoding) { 143 this.entryEncoding = entryEncoding; 144 } 145 146 /** 147 * Create an archive input stream from an archiver name and an input stream. 148 * 149 * @param archiverName the archive name, i.e. "ar", "arj", "zip", "tar", "jar", "dump" or "cpio" 150 * @param in the input stream 151 * @return the archive input stream 152 * @throws ArchiveException if the archiver name is not known 153 * @throws StreamingNotSupportedException if the format cannot be 154 * read from a stream 155 * @throws IllegalArgumentException if the archiver name or stream is null 156 */ 157 public ArchiveInputStream createArchiveInputStream( 158 final String archiverName, final InputStream in) 159 throws ArchiveException { 160 161 if (archiverName == null) { 162 throw new IllegalArgumentException("Archivername must not be null."); 163 } 164 165 if (in == null) { 166 throw new IllegalArgumentException("InputStream must not be null."); 167 } 168 169 if (AR.equalsIgnoreCase(archiverName)) { 170 return new ArArchiveInputStream(in); 171 } 172 if (ARJ.equalsIgnoreCase(archiverName)) { 173 if (entryEncoding != null) { 174 return new ArjArchiveInputStream(in, entryEncoding); 175 } else { 176 return new ArjArchiveInputStream(in); 177 } 178 } 179 if (ZIP.equalsIgnoreCase(archiverName)) { 180 if (entryEncoding != null) { 181 return new ZipArchiveInputStream(in, entryEncoding); 182 } else { 183 return new ZipArchiveInputStream(in); 184 } 185 } 186 if (TAR.equalsIgnoreCase(archiverName)) { 187 if (entryEncoding != null) { 188 return new TarArchiveInputStream(in, entryEncoding); 189 } else { 190 return new TarArchiveInputStream(in); 191 } 192 } 193 if (JAR.equalsIgnoreCase(archiverName)) { 194 return new JarArchiveInputStream(in); 195 } 196 if (CPIO.equalsIgnoreCase(archiverName)) { 197 if (entryEncoding != null) { 198 return new CpioArchiveInputStream(in, entryEncoding); 199 } else { 200 return new CpioArchiveInputStream(in); 201 } 202 } 203 if (DUMP.equalsIgnoreCase(archiverName)) { 204 if (entryEncoding != null) { 205 return new DumpArchiveInputStream(in, entryEncoding); 206 } else { 207 return new DumpArchiveInputStream(in); 208 } 209 } 210 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 211 throw new StreamingNotSupportedException(SEVEN_Z); 212 } 213 214 throw new ArchiveException("Archiver: " + archiverName + " not found."); 215 } 216 217 /** 218 * Create an archive output stream from an archiver name and an input stream. 219 * 220 * @param archiverName the archive name, i.e. "ar", "zip", "tar", "jar" or "cpio" 221 * @param out the output stream 222 * @return the archive output stream 223 * @throws ArchiveException if the archiver name is not known 224 * @throws StreamingNotSupportedException if the format cannot be 225 * written to a stream 226 * @throws IllegalArgumentException if the archiver name or stream is null 227 */ 228 public ArchiveOutputStream createArchiveOutputStream( 229 final String archiverName, final OutputStream out) 230 throws ArchiveException { 231 if (archiverName == null) { 232 throw new IllegalArgumentException("Archivername must not be null."); 233 } 234 if (out == null) { 235 throw new IllegalArgumentException("OutputStream must not be null."); 236 } 237 238 if (AR.equalsIgnoreCase(archiverName)) { 239 return new ArArchiveOutputStream(out); 240 } 241 if (ZIP.equalsIgnoreCase(archiverName)) { 242 ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 243 if (entryEncoding != null) { 244 zip.setEncoding(entryEncoding); 245 } 246 return zip; 247 } 248 if (TAR.equalsIgnoreCase(archiverName)) { 249 if (entryEncoding != null) { 250 return new TarArchiveOutputStream(out, entryEncoding); 251 } else { 252 return new TarArchiveOutputStream(out); 253 } 254 } 255 if (JAR.equalsIgnoreCase(archiverName)) { 256 return new JarArchiveOutputStream(out); 257 } 258 if (CPIO.equalsIgnoreCase(archiverName)) { 259 if (entryEncoding != null) { 260 return new CpioArchiveOutputStream(out, entryEncoding); 261 } else { 262 return new CpioArchiveOutputStream(out); 263 } 264 } 265 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 266 throw new StreamingNotSupportedException(SEVEN_Z); 267 } 268 throw new ArchiveException("Archiver: " + archiverName + " not found."); 269 } 270 271 /** 272 * Create an archive input stream from an input stream, autodetecting 273 * the archive type from the first few bytes of the stream. The InputStream 274 * must support marks, like BufferedInputStream. 275 * 276 * @param in the input stream 277 * @return the archive input stream 278 * @throws ArchiveException if the archiver name is not known 279 * @throws StreamingNotSupportedException if the format cannot be 280 * read from a stream 281 * @throws IllegalArgumentException if the stream is null or does not support mark 282 */ 283 public ArchiveInputStream createArchiveInputStream(final InputStream in) 284 throws ArchiveException { 285 if (in == null) { 286 throw new IllegalArgumentException("Stream must not be null."); 287 } 288 289 if (!in.markSupported()) { 290 throw new IllegalArgumentException("Mark is not supported."); 291 } 292 293 final byte[] signature = new byte[12]; 294 in.mark(signature.length); 295 try { 296 int signatureLength = IOUtils.readFully(in, signature); 297 in.reset(); 298 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 299 if (entryEncoding != null) { 300 return new ZipArchiveInputStream(in, entryEncoding); 301 } else { 302 return new ZipArchiveInputStream(in); 303 } 304 } else if (JarArchiveInputStream.matches(signature, signatureLength)) { 305 return new JarArchiveInputStream(in); 306 } else if (ArArchiveInputStream.matches(signature, signatureLength)) { 307 return new ArArchiveInputStream(in); 308 } else if (CpioArchiveInputStream.matches(signature, signatureLength)) { 309 return new CpioArchiveInputStream(in); 310 } else if (ArjArchiveInputStream.matches(signature, signatureLength)) { 311 return new ArjArchiveInputStream(in); 312 } else if (SevenZFile.matches(signature, signatureLength)) { 313 throw new StreamingNotSupportedException(SEVEN_Z); 314 } 315 316 // Dump needs a bigger buffer to check the signature; 317 final byte[] dumpsig = new byte[32]; 318 in.mark(dumpsig.length); 319 signatureLength = IOUtils.readFully(in, dumpsig); 320 in.reset(); 321 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 322 return new DumpArchiveInputStream(in); 323 } 324 325 // Tar needs an even bigger buffer to check the signature; read the first block 326 final byte[] tarheader = new byte[512]; 327 in.mark(tarheader.length); 328 signatureLength = IOUtils.readFully(in, tarheader); 329 in.reset(); 330 if (TarArchiveInputStream.matches(tarheader, signatureLength)) { 331 if (entryEncoding != null) { 332 return new TarArchiveInputStream(in, entryEncoding); 333 } else { 334 return new TarArchiveInputStream(in); 335 } 336 } 337 // COMPRESS-117 - improve auto-recognition 338 if (signatureLength >= 512) { 339 TarArchiveInputStream tais = null; 340 try { 341 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarheader)); 342 // COMPRESS-191 - verify the header checksum 343 if (tais.getNextTarEntry().isCheckSumOK()) { 344 return new TarArchiveInputStream(in); 345 } 346 } catch (Exception e) { // NOPMD 347 // can generate IllegalArgumentException as well 348 // as IOException 349 // autodetection, simply not a TAR 350 // ignored 351 } finally { 352 IOUtils.closeQuietly(tais); 353 } 354 } 355 } catch (IOException e) { 356 throw new ArchiveException("Could not use reset and mark operations.", e); 357 } 358 359 throw new ArchiveException("No Archiver found for the stream signature"); 360 } 361 362}