001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.dump;
020
021import org.apache.commons.compress.archivers.ArchiveException;
022import org.apache.commons.compress.archivers.ArchiveInputStream;
023import org.apache.commons.compress.archivers.zip.ZipEncoding;
024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
025
026import java.io.EOFException;
027import java.io.IOException;
028import java.io.InputStream;
029
030import java.util.Arrays;
031import java.util.Comparator;
032import java.util.HashMap;
033import java.util.Map;
034import java.util.PriorityQueue;
035import java.util.Queue;
036import java.util.Stack;
037
038/**
039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream.
040 * Methods are provided to position at each successive entry in
041 * the archive, and the read each entry as a normal input stream
042 * using read().
043 *
044 * There doesn't seem to exist a hint on the encoding of string values
045 * in any piece documentation.  Given the main purpose of dump/restore
046 * is backing up a system it seems very likely the format uses the
047 * current default encoding of the system.
048 *
049 * @NotThreadSafe
050 */
051public class DumpArchiveInputStream extends ArchiveInputStream {
052    private DumpArchiveSummary summary;
053    private DumpArchiveEntry active;
054    private boolean isClosed;
055    private boolean hasHitEOF;
056    private long entrySize;
057    private long entryOffset;
058    private int readIdx;
059    private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
060    private byte[] blockBuffer;
061    private int recordOffset;
062    private long filepos;
063    protected TapeInputStream raw;
064
065    // map of ino -> dirent entry. We can use this to reconstruct full paths.
066    private final Map<Integer, Dirent> names = new HashMap<Integer, Dirent>();
067
068    // map of ino -> (directory) entry when we're missing one or more elements in the path.
069    private final Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>();
070
071    // queue of (directory) entries where we now have the full path.
072    private Queue<DumpArchiveEntry> queue;
073
074    /**
075     * The encoding to use for filenames and labels.
076     */
077    private final ZipEncoding encoding;
078
079    /**
080     * Constructor using the platform's default encoding for file
081     * names.
082     *
083     * @param is
084     * @throws ArchiveException
085     */
086    public DumpArchiveInputStream(InputStream is) throws ArchiveException {
087        this(is, null);
088    }
089
090    /**
091     * Constructor.
092     *
093     * @param is
094     * @param encoding the encoding to use for file names, use null
095     * for the platform's default encoding
096     * @since 1.6
097     */
098    public DumpArchiveInputStream(InputStream is, String encoding)
099        throws ArchiveException {
100        this.raw = new TapeInputStream(is);
101        this.hasHitEOF = false;
102        this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
103
104        try {
105            // read header, verify it's a dump archive.
106            byte[] headerBytes = raw.readRecord();
107
108            if (!DumpArchiveUtil.verify(headerBytes)) {
109                throw new UnrecognizedFormatException();
110            }
111
112            // get summary information
113            summary = new DumpArchiveSummary(headerBytes, this.encoding);
114
115            // reset buffer with actual block size.
116            raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
117
118            // allocate our read buffer.
119            blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
120
121            // skip past CLRI and BITS segments since we don't handle them yet.
122            readCLRI();
123            readBITS();
124        } catch (IOException ex) {
125            throw new ArchiveException(ex.getMessage(), ex);
126        }
127
128        // put in a dummy record for the root node.
129        Dirent root = new Dirent(2, 2, 4, ".");
130        names.put(Integer.valueOf(2), root);
131
132        // use priority based on queue to ensure parent directories are
133        // released first.
134        queue = new PriorityQueue<DumpArchiveEntry>(10,
135                new Comparator<DumpArchiveEntry>() {
136                    public int compare(DumpArchiveEntry p, DumpArchiveEntry q) {
137                        if (p.getOriginalName() == null || q.getOriginalName() == null) {
138                            return Integer.MAX_VALUE;
139                        }
140
141                        return p.getOriginalName().compareTo(q.getOriginalName());
142                    }
143                });
144    }
145
146    @Deprecated
147    @Override
148    public int getCount() {
149        return (int) getBytesRead();
150    }
151
152    @Override
153    public long getBytesRead() {
154        return raw.getBytesRead();
155    }
156
157    /**
158     * Return the archive summary information.
159     */
160    public DumpArchiveSummary getSummary() {
161        return summary;
162    }
163
164    /**
165     * Read CLRI (deleted inode) segment.
166     */
167    private void readCLRI() throws IOException {
168        byte[] buffer = raw.readRecord();
169
170        if (!DumpArchiveUtil.verify(buffer)) {
171            throw new InvalidFormatException();
172        }
173
174        active = DumpArchiveEntry.parse(buffer);
175
176        if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
177            throw new InvalidFormatException();
178        }
179
180        // we don't do anything with this yet.
181        if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
182            == -1) {
183            throw new EOFException();
184        }
185        readIdx = active.getHeaderCount();
186    }
187
188    /**
189     * Read BITS segment.
190     */
191    private void readBITS() throws IOException {
192        byte[] buffer = raw.readRecord();
193
194        if (!DumpArchiveUtil.verify(buffer)) {
195            throw new InvalidFormatException();
196        }
197
198        active = DumpArchiveEntry.parse(buffer);
199
200        if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
201            throw new InvalidFormatException();
202        }
203
204        // we don't do anything with this yet.
205        if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
206            == -1) {
207            throw new EOFException();
208        }
209        readIdx = active.getHeaderCount();
210    }
211
212    /**
213     * Read the next entry.
214     */
215    public DumpArchiveEntry getNextDumpEntry() throws IOException {
216        return getNextEntry();
217    }
218
219    /**
220     * Read the next entry.
221     */
222    @Override
223    public DumpArchiveEntry getNextEntry() throws IOException {
224        DumpArchiveEntry entry = null;
225        String path = null;
226
227        // is there anything in the queue?
228        if (!queue.isEmpty()) {
229            return queue.remove();
230        }
231
232        while (entry == null) {
233            if (hasHitEOF) {
234                return null;
235            }
236
237            // skip any remaining records in this segment for prior file.
238            // we might still have holes... easiest to do it
239            // block by block. We may want to revisit this if
240            // the unnecessary decompression time adds up.
241            while (readIdx < active.getHeaderCount()) {
242                if (!active.isSparseRecord(readIdx++)
243                    && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
244                    throw new EOFException();
245                }
246            }
247
248            readIdx = 0;
249            filepos = raw.getBytesRead();
250
251            byte[] headerBytes = raw.readRecord();
252
253            if (!DumpArchiveUtil.verify(headerBytes)) {
254                throw new InvalidFormatException();
255            }
256
257            active = DumpArchiveEntry.parse(headerBytes);
258
259            // skip any remaining segments for prior file.
260            while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
261                if (raw.skip(DumpArchiveConstants.TP_SIZE
262                             * (active.getHeaderCount()
263                                - active.getHeaderHoles())) == -1) {
264                    throw new EOFException();
265                }
266
267                filepos = raw.getBytesRead();
268                headerBytes = raw.readRecord();
269
270                if (!DumpArchiveUtil.verify(headerBytes)) {
271                    throw new InvalidFormatException();
272                }
273
274                active = DumpArchiveEntry.parse(headerBytes);
275            }
276
277            // check if this is an end-of-volume marker.
278            if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
279                hasHitEOF = true;
280
281                return null;
282            }
283
284            entry = active;
285
286            if (entry.isDirectory()) {
287                readDirectoryEntry(active);
288
289                // now we create an empty InputStream.
290                entryOffset = 0;
291                entrySize = 0;
292                readIdx = active.getHeaderCount();
293            } else {
294                entryOffset = 0;
295                entrySize = active.getEntrySize();
296                readIdx = 0;
297            }
298
299            recordOffset = readBuf.length;
300
301            path = getPath(entry);
302
303            if (path == null) {
304                entry = null;
305            }
306        }
307
308        entry.setName(path);
309        entry.setSimpleName(names.get(Integer.valueOf(entry.getIno())).getName());
310        entry.setOffset(filepos);
311
312        return entry;
313    }
314
315    /**
316     * Read directory entry.
317     */
318    private void readDirectoryEntry(DumpArchiveEntry entry)
319        throws IOException {
320        long size = entry.getEntrySize();
321        boolean first = true;
322
323        while (first ||
324                DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {
325            // read the header that we just peeked at.
326            if (!first) {
327                raw.readRecord();
328            }
329
330            if (!names.containsKey(Integer.valueOf(entry.getIno())) &&
331                    DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {
332                pending.put(Integer.valueOf(entry.getIno()), entry);
333            }
334
335            int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
336
337            if (blockBuffer.length < datalen) {
338                blockBuffer = new byte[datalen];
339            }
340
341            if (raw.read(blockBuffer, 0, datalen) != datalen) {
342                throw new EOFException();
343            }
344
345            int reclen = 0;
346
347            for (int i = 0; i < datalen - 8 && i < size - 8;
348                    i += reclen) {
349                int ino = DumpArchiveUtil.convert32(blockBuffer, i);
350                reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
351
352                byte type = blockBuffer[i + 6];
353
354                String name = DumpArchiveUtil.decode(encoding, blockBuffer, i + 8, blockBuffer[i + 7]);
355
356                if (".".equals(name) || "..".equals(name)) {
357                    // do nothing...
358                    continue;
359                }
360
361                Dirent d = new Dirent(ino, entry.getIno(), type, name);
362
363                /*
364                if ((type == 4) && names.containsKey(ino)) {
365                    System.out.println("we already have ino: " +
366                                       names.get(ino));
367                }
368                */
369
370                names.put(Integer.valueOf(ino), d);
371
372                // check whether this allows us to fill anything in the pending list.
373                for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) {
374                    String path = getPath(e.getValue());
375
376                    if (path != null) {
377                        e.getValue().setName(path);
378                        e.getValue()
379                         .setSimpleName(names.get(e.getKey()).getName());
380                        queue.add(e.getValue());
381                    }
382                }
383
384                // remove anything that we found. (We can't do it earlier
385                // because of concurrent modification exceptions.)
386                for (DumpArchiveEntry e : queue) {
387                    pending.remove(Integer.valueOf(e.getIno()));
388                }
389            }
390
391            byte[] peekBytes = raw.peek();
392
393            if (!DumpArchiveUtil.verify(peekBytes)) {
394                throw new InvalidFormatException();
395            }
396
397            entry = DumpArchiveEntry.parse(peekBytes);
398            first = false;
399            size -= DumpArchiveConstants.TP_SIZE;
400        }
401    }
402
403    /**
404     * Get full path for specified archive entry, or null if there's a gap.
405     *
406     * @param entry
407     * @return  full path for specified archive entry, or null if there's a gap.
408     */
409    private String getPath(DumpArchiveEntry entry) {
410        // build the stack of elements. It's possible that we're 
411        // still missing an intermediate value and if so we
412        Stack<String> elements = new Stack<String>();
413        Dirent dirent = null;
414
415        for (int i = entry.getIno();; i = dirent.getParentIno()) {
416            if (!names.containsKey(Integer.valueOf(i))) {
417                elements.clear();
418                break;
419            }
420
421            dirent = names.get(Integer.valueOf(i));
422            elements.push(dirent.getName());
423
424            if (dirent.getIno() == dirent.getParentIno()) {
425                break;
426            }
427        }
428
429        // if an element is missing defer the work and read next entry.
430        if (elements.isEmpty()) {
431            pending.put(Integer.valueOf(entry.getIno()), entry);
432
433            return null;
434        }
435
436        // generate full path from stack of elements.
437        StringBuilder sb = new StringBuilder(elements.pop());
438
439        while (!elements.isEmpty()) {
440            sb.append('/');
441            sb.append(elements.pop());
442        }
443
444        return sb.toString();
445    }
446
447    /**
448     * Reads bytes from the current dump archive entry.
449     *
450     * This method is aware of the boundaries of the current
451     * entry in the archive and will deal with them as if they
452     * were this stream's start and EOF.
453     *
454     * @param buf The buffer into which to place bytes read.
455     * @param off The offset at which to place bytes read.
456     * @param len The number of bytes to read.
457     * @return The number of bytes read, or -1 at EOF.
458     * @throws IOException on error
459     */
460    @Override
461    public int read(byte[] buf, int off, int len) throws IOException {
462        int totalRead = 0;
463
464        if (hasHitEOF || isClosed || entryOffset >= entrySize) {
465            return -1;
466        }
467
468        if (active == null) {
469            throw new IllegalStateException("No current dump entry");
470        }
471
472        if (len + entryOffset > entrySize) {
473            len = (int) (entrySize - entryOffset);
474        }
475
476        while (len > 0) {
477            int sz = len > readBuf.length - recordOffset
478                ? readBuf.length - recordOffset : len;
479
480            // copy any data we have
481            if (recordOffset + sz <= readBuf.length) {
482                System.arraycopy(readBuf, recordOffset, buf, off, sz);
483                totalRead += sz;
484                recordOffset += sz;
485                len -= sz;
486                off += sz;
487            }
488
489            // load next block if necessary.
490            if (len > 0) {
491                if (readIdx >= 512) {
492                    byte[] headerBytes = raw.readRecord();
493
494                    if (!DumpArchiveUtil.verify(headerBytes)) {
495                        throw new InvalidFormatException();
496                    }
497
498                    active = DumpArchiveEntry.parse(headerBytes);
499                    readIdx = 0;
500                }
501
502                if (!active.isSparseRecord(readIdx++)) {
503                    int r = raw.read(readBuf, 0, readBuf.length);
504                    if (r != readBuf.length) {
505                        throw new EOFException();
506                    }
507                } else {
508                    Arrays.fill(readBuf, (byte) 0);
509                }
510
511                recordOffset = 0;
512            }
513        }
514
515        entryOffset += totalRead;
516
517        return totalRead;
518    }
519
520    /**
521     * Closes the stream for this entry.
522     */
523    @Override
524    public void close() throws IOException {
525        if (!isClosed) {
526            isClosed = true;
527            raw.close();
528        }
529    }
530
531    /**
532     * Look at the first few bytes of the file to decide if it's a dump
533     * archive. With 32 bytes we can look at the magic value, with a full
534     * 1k we can verify the checksum.
535     */
536    public static boolean matches(byte[] buffer, int length) {
537        // do we have enough of the header?
538        if (length < 32) {
539            return false;
540        }
541
542        // this is the best test
543        if (length >= DumpArchiveConstants.TP_SIZE) {
544            return DumpArchiveUtil.verify(buffer);
545        }
546
547        // this will work in a pinch.
548        return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer,
549            24);
550    }
551
552}