001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.dump; 020 021import org.apache.commons.compress.archivers.ArchiveException; 022import org.apache.commons.compress.archivers.ArchiveInputStream; 023import org.apache.commons.compress.archivers.zip.ZipEncoding; 024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 025 026import java.io.EOFException; 027import java.io.IOException; 028import java.io.InputStream; 029 030import java.util.Arrays; 031import java.util.Comparator; 032import java.util.HashMap; 033import java.util.Map; 034import java.util.PriorityQueue; 035import java.util.Queue; 036import java.util.Stack; 037 038/** 039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. 040 * Methods are provided to position at each successive entry in 041 * the archive, and the read each entry as a normal input stream 042 * using read(). 043 * 044 * There doesn't seem to exist a hint on the encoding of string values 045 * in any piece documentation. Given the main purpose of dump/restore 046 * is backing up a system it seems very likely the format uses the 047 * current default encoding of the system. 048 * 049 * @NotThreadSafe 050 */ 051public class DumpArchiveInputStream extends ArchiveInputStream { 052 private DumpArchiveSummary summary; 053 private DumpArchiveEntry active; 054 private boolean isClosed; 055 private boolean hasHitEOF; 056 private long entrySize; 057 private long entryOffset; 058 private int readIdx; 059 private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; 060 private byte[] blockBuffer; 061 private int recordOffset; 062 private long filepos; 063 protected TapeInputStream raw; 064 065 // map of ino -> dirent entry. We can use this to reconstruct full paths. 066 private final Map<Integer, Dirent> names = new HashMap<Integer, Dirent>(); 067 068 // map of ino -> (directory) entry when we're missing one or more elements in the path. 069 private final Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>(); 070 071 // queue of (directory) entries where we now have the full path. 072 private Queue<DumpArchiveEntry> queue; 073 074 /** 075 * The encoding to use for filenames and labels. 076 */ 077 private final ZipEncoding encoding; 078 079 /** 080 * Constructor using the platform's default encoding for file 081 * names. 082 * 083 * @param is 084 * @throws ArchiveException 085 */ 086 public DumpArchiveInputStream(InputStream is) throws ArchiveException { 087 this(is, null); 088 } 089 090 /** 091 * Constructor. 092 * 093 * @param is 094 * @param encoding the encoding to use for file names, use null 095 * for the platform's default encoding 096 * @since 1.6 097 */ 098 public DumpArchiveInputStream(InputStream is, String encoding) 099 throws ArchiveException { 100 this.raw = new TapeInputStream(is); 101 this.hasHitEOF = false; 102 this.encoding = ZipEncodingHelper.getZipEncoding(encoding); 103 104 try { 105 // read header, verify it's a dump archive. 106 byte[] headerBytes = raw.readRecord(); 107 108 if (!DumpArchiveUtil.verify(headerBytes)) { 109 throw new UnrecognizedFormatException(); 110 } 111 112 // get summary information 113 summary = new DumpArchiveSummary(headerBytes, this.encoding); 114 115 // reset buffer with actual block size. 116 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); 117 118 // allocate our read buffer. 119 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; 120 121 // skip past CLRI and BITS segments since we don't handle them yet. 122 readCLRI(); 123 readBITS(); 124 } catch (IOException ex) { 125 throw new ArchiveException(ex.getMessage(), ex); 126 } 127 128 // put in a dummy record for the root node. 129 Dirent root = new Dirent(2, 2, 4, "."); 130 names.put(Integer.valueOf(2), root); 131 132 // use priority based on queue to ensure parent directories are 133 // released first. 134 queue = new PriorityQueue<DumpArchiveEntry>(10, 135 new Comparator<DumpArchiveEntry>() { 136 public int compare(DumpArchiveEntry p, DumpArchiveEntry q) { 137 if (p.getOriginalName() == null || q.getOriginalName() == null) { 138 return Integer.MAX_VALUE; 139 } 140 141 return p.getOriginalName().compareTo(q.getOriginalName()); 142 } 143 }); 144 } 145 146 @Deprecated 147 @Override 148 public int getCount() { 149 return (int) getBytesRead(); 150 } 151 152 @Override 153 public long getBytesRead() { 154 return raw.getBytesRead(); 155 } 156 157 /** 158 * Return the archive summary information. 159 */ 160 public DumpArchiveSummary getSummary() { 161 return summary; 162 } 163 164 /** 165 * Read CLRI (deleted inode) segment. 166 */ 167 private void readCLRI() throws IOException { 168 byte[] buffer = raw.readRecord(); 169 170 if (!DumpArchiveUtil.verify(buffer)) { 171 throw new InvalidFormatException(); 172 } 173 174 active = DumpArchiveEntry.parse(buffer); 175 176 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { 177 throw new InvalidFormatException(); 178 } 179 180 // we don't do anything with this yet. 181 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 182 == -1) { 183 throw new EOFException(); 184 } 185 readIdx = active.getHeaderCount(); 186 } 187 188 /** 189 * Read BITS segment. 190 */ 191 private void readBITS() throws IOException { 192 byte[] buffer = raw.readRecord(); 193 194 if (!DumpArchiveUtil.verify(buffer)) { 195 throw new InvalidFormatException(); 196 } 197 198 active = DumpArchiveEntry.parse(buffer); 199 200 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { 201 throw new InvalidFormatException(); 202 } 203 204 // we don't do anything with this yet. 205 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 206 == -1) { 207 throw new EOFException(); 208 } 209 readIdx = active.getHeaderCount(); 210 } 211 212 /** 213 * Read the next entry. 214 */ 215 public DumpArchiveEntry getNextDumpEntry() throws IOException { 216 return getNextEntry(); 217 } 218 219 /** 220 * Read the next entry. 221 */ 222 @Override 223 public DumpArchiveEntry getNextEntry() throws IOException { 224 DumpArchiveEntry entry = null; 225 String path = null; 226 227 // is there anything in the queue? 228 if (!queue.isEmpty()) { 229 return queue.remove(); 230 } 231 232 while (entry == null) { 233 if (hasHitEOF) { 234 return null; 235 } 236 237 // skip any remaining records in this segment for prior file. 238 // we might still have holes... easiest to do it 239 // block by block. We may want to revisit this if 240 // the unnecessary decompression time adds up. 241 while (readIdx < active.getHeaderCount()) { 242 if (!active.isSparseRecord(readIdx++) 243 && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { 244 throw new EOFException(); 245 } 246 } 247 248 readIdx = 0; 249 filepos = raw.getBytesRead(); 250 251 byte[] headerBytes = raw.readRecord(); 252 253 if (!DumpArchiveUtil.verify(headerBytes)) { 254 throw new InvalidFormatException(); 255 } 256 257 active = DumpArchiveEntry.parse(headerBytes); 258 259 // skip any remaining segments for prior file. 260 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { 261 if (raw.skip(DumpArchiveConstants.TP_SIZE 262 * (active.getHeaderCount() 263 - active.getHeaderHoles())) == -1) { 264 throw new EOFException(); 265 } 266 267 filepos = raw.getBytesRead(); 268 headerBytes = raw.readRecord(); 269 270 if (!DumpArchiveUtil.verify(headerBytes)) { 271 throw new InvalidFormatException(); 272 } 273 274 active = DumpArchiveEntry.parse(headerBytes); 275 } 276 277 // check if this is an end-of-volume marker. 278 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { 279 hasHitEOF = true; 280 281 return null; 282 } 283 284 entry = active; 285 286 if (entry.isDirectory()) { 287 readDirectoryEntry(active); 288 289 // now we create an empty InputStream. 290 entryOffset = 0; 291 entrySize = 0; 292 readIdx = active.getHeaderCount(); 293 } else { 294 entryOffset = 0; 295 entrySize = active.getEntrySize(); 296 readIdx = 0; 297 } 298 299 recordOffset = readBuf.length; 300 301 path = getPath(entry); 302 303 if (path == null) { 304 entry = null; 305 } 306 } 307 308 entry.setName(path); 309 entry.setSimpleName(names.get(Integer.valueOf(entry.getIno())).getName()); 310 entry.setOffset(filepos); 311 312 return entry; 313 } 314 315 /** 316 * Read directory entry. 317 */ 318 private void readDirectoryEntry(DumpArchiveEntry entry) 319 throws IOException { 320 long size = entry.getEntrySize(); 321 boolean first = true; 322 323 while (first || 324 DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) { 325 // read the header that we just peeked at. 326 if (!first) { 327 raw.readRecord(); 328 } 329 330 if (!names.containsKey(Integer.valueOf(entry.getIno())) && 331 DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) { 332 pending.put(Integer.valueOf(entry.getIno()), entry); 333 } 334 335 int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); 336 337 if (blockBuffer.length < datalen) { 338 blockBuffer = new byte[datalen]; 339 } 340 341 if (raw.read(blockBuffer, 0, datalen) != datalen) { 342 throw new EOFException(); 343 } 344 345 int reclen = 0; 346 347 for (int i = 0; i < datalen - 8 && i < size - 8; 348 i += reclen) { 349 int ino = DumpArchiveUtil.convert32(blockBuffer, i); 350 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); 351 352 byte type = blockBuffer[i + 6]; 353 354 String name = DumpArchiveUtil.decode(encoding, blockBuffer, i + 8, blockBuffer[i + 7]); 355 356 if (".".equals(name) || "..".equals(name)) { 357 // do nothing... 358 continue; 359 } 360 361 Dirent d = new Dirent(ino, entry.getIno(), type, name); 362 363 /* 364 if ((type == 4) && names.containsKey(ino)) { 365 System.out.println("we already have ino: " + 366 names.get(ino)); 367 } 368 */ 369 370 names.put(Integer.valueOf(ino), d); 371 372 // check whether this allows us to fill anything in the pending list. 373 for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) { 374 String path = getPath(e.getValue()); 375 376 if (path != null) { 377 e.getValue().setName(path); 378 e.getValue() 379 .setSimpleName(names.get(e.getKey()).getName()); 380 queue.add(e.getValue()); 381 } 382 } 383 384 // remove anything that we found. (We can't do it earlier 385 // because of concurrent modification exceptions.) 386 for (DumpArchiveEntry e : queue) { 387 pending.remove(Integer.valueOf(e.getIno())); 388 } 389 } 390 391 byte[] peekBytes = raw.peek(); 392 393 if (!DumpArchiveUtil.verify(peekBytes)) { 394 throw new InvalidFormatException(); 395 } 396 397 entry = DumpArchiveEntry.parse(peekBytes); 398 first = false; 399 size -= DumpArchiveConstants.TP_SIZE; 400 } 401 } 402 403 /** 404 * Get full path for specified archive entry, or null if there's a gap. 405 * 406 * @param entry 407 * @return full path for specified archive entry, or null if there's a gap. 408 */ 409 private String getPath(DumpArchiveEntry entry) { 410 // build the stack of elements. It's possible that we're 411 // still missing an intermediate value and if so we 412 Stack<String> elements = new Stack<String>(); 413 Dirent dirent = null; 414 415 for (int i = entry.getIno();; i = dirent.getParentIno()) { 416 if (!names.containsKey(Integer.valueOf(i))) { 417 elements.clear(); 418 break; 419 } 420 421 dirent = names.get(Integer.valueOf(i)); 422 elements.push(dirent.getName()); 423 424 if (dirent.getIno() == dirent.getParentIno()) { 425 break; 426 } 427 } 428 429 // if an element is missing defer the work and read next entry. 430 if (elements.isEmpty()) { 431 pending.put(Integer.valueOf(entry.getIno()), entry); 432 433 return null; 434 } 435 436 // generate full path from stack of elements. 437 StringBuilder sb = new StringBuilder(elements.pop()); 438 439 while (!elements.isEmpty()) { 440 sb.append('/'); 441 sb.append(elements.pop()); 442 } 443 444 return sb.toString(); 445 } 446 447 /** 448 * Reads bytes from the current dump archive entry. 449 * 450 * This method is aware of the boundaries of the current 451 * entry in the archive and will deal with them as if they 452 * were this stream's start and EOF. 453 * 454 * @param buf The buffer into which to place bytes read. 455 * @param off The offset at which to place bytes read. 456 * @param len The number of bytes to read. 457 * @return The number of bytes read, or -1 at EOF. 458 * @throws IOException on error 459 */ 460 @Override 461 public int read(byte[] buf, int off, int len) throws IOException { 462 int totalRead = 0; 463 464 if (hasHitEOF || isClosed || entryOffset >= entrySize) { 465 return -1; 466 } 467 468 if (active == null) { 469 throw new IllegalStateException("No current dump entry"); 470 } 471 472 if (len + entryOffset > entrySize) { 473 len = (int) (entrySize - entryOffset); 474 } 475 476 while (len > 0) { 477 int sz = len > readBuf.length - recordOffset 478 ? readBuf.length - recordOffset : len; 479 480 // copy any data we have 481 if (recordOffset + sz <= readBuf.length) { 482 System.arraycopy(readBuf, recordOffset, buf, off, sz); 483 totalRead += sz; 484 recordOffset += sz; 485 len -= sz; 486 off += sz; 487 } 488 489 // load next block if necessary. 490 if (len > 0) { 491 if (readIdx >= 512) { 492 byte[] headerBytes = raw.readRecord(); 493 494 if (!DumpArchiveUtil.verify(headerBytes)) { 495 throw new InvalidFormatException(); 496 } 497 498 active = DumpArchiveEntry.parse(headerBytes); 499 readIdx = 0; 500 } 501 502 if (!active.isSparseRecord(readIdx++)) { 503 int r = raw.read(readBuf, 0, readBuf.length); 504 if (r != readBuf.length) { 505 throw new EOFException(); 506 } 507 } else { 508 Arrays.fill(readBuf, (byte) 0); 509 } 510 511 recordOffset = 0; 512 } 513 } 514 515 entryOffset += totalRead; 516 517 return totalRead; 518 } 519 520 /** 521 * Closes the stream for this entry. 522 */ 523 @Override 524 public void close() throws IOException { 525 if (!isClosed) { 526 isClosed = true; 527 raw.close(); 528 } 529 } 530 531 /** 532 * Look at the first few bytes of the file to decide if it's a dump 533 * archive. With 32 bytes we can look at the magic value, with a full 534 * 1k we can verify the checksum. 535 */ 536 public static boolean matches(byte[] buffer, int length) { 537 // do we have enough of the header? 538 if (length < 32) { 539 return false; 540 } 541 542 // this is the best test 543 if (length >= DumpArchiveConstants.TP_SIZE) { 544 return DumpArchiveUtil.verify(buffer); 545 } 546 547 // this will work in a pinch. 548 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 549 24); 550 } 551 552}