001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
023
024import java.io.IOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import org.apache.commons.compress.archivers.zip.ZipEncoding;
028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
029
030/**
031 * This class provides static utility methods to work with byte streams.
032 *
033 * @Immutable
034 */
035// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
036public class TarUtils {
037
038    private static final int BYTE_MASK = 255;
039
040    static final ZipEncoding DEFAULT_ENCODING =
041        ZipEncodingHelper.getZipEncoding(null);
042
043    /**
044     * Encapsulates the algorithms used up to Commons Compress 1.3 as
045     * ZipEncoding.
046     */
047    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
048            public boolean canEncode(String name) { return true; }
049
050            public ByteBuffer encode(String name) {
051                final int length = name.length();
052                byte[] buf = new byte[length];
053
054                // copy until end of input or output is reached.
055                for (int i = 0; i < length; ++i) {
056                    buf[i] = (byte) name.charAt(i);
057                }
058                return ByteBuffer.wrap(buf);
059            }
060
061            public String decode(byte[] buffer) {
062                final int length = buffer.length;
063                StringBuilder result = new StringBuilder(length);
064
065                for (int i = 0; i < length; ++i) {
066                    byte b = buffer[i];
067                    if (b == 0) { // Trailing null
068                        break;
069                    }
070                    result.append((char) (b & 0xFF)); // Allow for sign-extension
071                }
072
073                return result.toString();
074            }
075        };
076
077    /** Private constructor to prevent instantiation of this utility class. */
078    private TarUtils(){
079    }
080
081    /**
082     * Parse an octal string from a buffer.
083     *
084     * <p>Leading spaces are ignored.
085     * The buffer must contain a trailing space or NUL,
086     * and may contain an additional trailing space or NUL.</p>
087     *
088     * <p>The input buffer is allowed to contain all NULs,
089     * in which case the method returns 0L
090     * (this allows for missing fields).</p>
091     *
092     * <p>To work-around some tar implementations that insert a
093     * leading NUL this method returns 0 if it detects a leading NUL
094     * since Commons Compress 1.4.</p>
095     *
096     * @param buffer The buffer from which to parse.
097     * @param offset The offset into the buffer from which to parse.
098     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
099     * @return The long value of the octal string.
100     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
101     */
102    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
103        long    result = 0;
104        int     end = offset + length;
105        int     start = offset;
106
107        if (length < 2){
108            throw new IllegalArgumentException("Length "+length+" must be at least 2");
109        }
110
111        if (buffer[start] == 0) {
112            return 0L;
113        }
114
115        // Skip leading spaces
116        while (start < end){
117            if (buffer[start] == ' '){
118                start++;
119            } else {
120                break;
121            }
122        }
123
124        // Trim all trailing NULs and spaces.
125        // The ustar and POSIX tar specs require a trailing NUL or
126        // space but some implementations use the extra digit for big
127        // sizes/uids/gids ...
128        byte trailer = buffer[end - 1];
129        while (start < end && (trailer == 0 || trailer == ' ')) {
130            end--;
131            trailer = buffer[end - 1];
132        }
133
134        for ( ;start < end; start++) {
135            final byte currentByte = buffer[start];
136            // CheckStyle:MagicNumber OFF
137            if (currentByte < '0' || currentByte > '7'){
138                throw new IllegalArgumentException(
139                        exceptionMessage(buffer, offset, length, start, currentByte));
140            }
141            result = (result << 3) + (currentByte - '0'); // convert from ASCII
142            // CheckStyle:MagicNumber ON
143        }
144
145        return result;
146    }
147
148    /** 
149     * Compute the value contained in a byte buffer.  If the most
150     * significant bit of the first byte in the buffer is set, this
151     * bit is ignored and the rest of the buffer is interpreted as a
152     * binary number.  Otherwise, the buffer is interpreted as an
153     * octal number as per the parseOctal function above.
154     *
155     * @param buffer The buffer from which to parse.
156     * @param offset The offset into the buffer from which to parse.
157     * @param length The maximum number of bytes to parse.
158     * @return The long value of the octal or binary string.
159     * @throws IllegalArgumentException if the trailing space/NUL is
160     * missing or an invalid byte is detected in an octal number, or
161     * if a binary number would exceed the size of a signed long
162     * 64-bit integer.
163     * @since 1.4
164     */
165    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
166                                          final int length) {
167
168        if ((buffer[offset] & 0x80) == 0) {
169            return parseOctal(buffer, offset, length);
170        }
171        final boolean negative = buffer[offset] == (byte) 0xff;
172        if (length < 9) {
173            return parseBinaryLong(buffer, offset, length, negative);
174        }
175        return parseBinaryBigInteger(buffer, offset, length, negative);
176    }
177
178    private static long parseBinaryLong(final byte[] buffer, final int offset,
179                                        final int length,
180                                        final boolean negative) {
181        if (length >= 9) {
182            throw new IllegalArgumentException("At offset " + offset + ", "
183                                               + length + " byte binary number"
184                                               + " exceeds maximum signed long"
185                                               + " value");
186        }
187        long val = 0;
188        for (int i = 1; i < length; i++) {
189            val = (val << 8) + (buffer[offset + i] & 0xff);
190        }
191        if (negative) {
192            // 2's complement
193            val--;
194            val ^= (long) Math.pow(2, (length - 1) * 8) - 1;
195        }
196        return negative ? -val : val;
197    }
198
199    private static long parseBinaryBigInteger(final byte[] buffer,
200                                              final int offset,
201                                              final int length,
202                                              final boolean negative) {
203        byte[] remainder = new byte[length - 1];
204        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
205        BigInteger val = new BigInteger(remainder);
206        if (negative) {
207            // 2's complement
208            val = val.add(BigInteger.valueOf(-1)).not();
209        }
210        if (val.bitLength() > 63) {
211            throw new IllegalArgumentException("At offset " + offset + ", "
212                                               + length + " byte binary number"
213                                               + " exceeds maximum signed long"
214                                               + " value");
215        }
216        return negative ? -val.longValue() : val.longValue();
217    }
218
219    /**
220     * Parse a boolean byte from a buffer.
221     * Leading spaces and NUL are ignored.
222     * The buffer may contain trailing spaces or NULs.
223     *
224     * @param buffer The buffer from which to parse.
225     * @param offset The offset into the buffer from which to parse.
226     * @return The boolean value of the bytes.
227     * @throws IllegalArgumentException if an invalid byte is detected.
228     */
229    public static boolean parseBoolean(final byte[] buffer, final int offset) {
230        return buffer[offset] == 1;
231    }
232
233    // Helper method to generate the exception message
234    private static String exceptionMessage(byte[] buffer, final int offset,
235            final int length, int current, final byte currentByte) {
236        // default charset is good enough for an exception message,
237        //
238        // the alternative was to modify parseOctal and
239        // parseOctalOrBinary to receive the ZipEncoding of the
240        // archive (deprecating the existing public methods, of
241        // course) and dealing with the fact that ZipEncoding#decode
242        // can throw an IOException which parseOctal* doesn't declare
243        String string = new String(buffer, offset, length);
244
245        string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
246        final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
247        return s;
248    }
249
250    /**
251     * Parse an entry name from a buffer.
252     * Parsing stops when a NUL is found
253     * or the buffer length is reached.
254     *
255     * @param buffer The buffer from which to parse.
256     * @param offset The offset into the buffer from which to parse.
257     * @param length The maximum number of bytes to parse.
258     * @return The entry name.
259     */
260    public static String parseName(byte[] buffer, final int offset, final int length) {
261        try {
262            return parseName(buffer, offset, length, DEFAULT_ENCODING);
263        } catch (IOException ex) {
264            try {
265                return parseName(buffer, offset, length, FALLBACK_ENCODING);
266            } catch (IOException ex2) {
267                // impossible
268                throw new RuntimeException(ex2);
269            }
270        }
271    }
272
273    /**
274     * Parse an entry name from a buffer.
275     * Parsing stops when a NUL is found
276     * or the buffer length is reached.
277     *
278     * @param buffer The buffer from which to parse.
279     * @param offset The offset into the buffer from which to parse.
280     * @param length The maximum number of bytes to parse.
281     * @param encoding name of the encoding to use for file names
282     * @since 1.4
283     * @return The entry name.
284     */
285    public static String parseName(byte[] buffer, final int offset,
286                                   final int length,
287                                   final ZipEncoding encoding)
288        throws IOException {
289
290        int len = length;
291        for (; len > 0; len--) {
292            if (buffer[offset + len - 1] != 0) {
293                break;
294            }
295        }
296        if (len > 0) {
297            byte[] b = new byte[len];
298            System.arraycopy(buffer, offset, b, 0, len);
299            return encoding.decode(b);
300        }
301        return "";
302    }
303
304    /**
305     * Copy a name into a buffer.
306     * Copies characters from the name into the buffer
307     * starting at the specified offset. 
308     * If the buffer is longer than the name, the buffer
309     * is filled with trailing NULs.
310     * If the name is longer than the buffer,
311     * the output is truncated.
312     *
313     * @param name The header name from which to copy the characters.
314     * @param buf The buffer where the name is to be stored.
315     * @param offset The starting offset into the buffer
316     * @param length The maximum number of header bytes to copy.
317     * @return The updated offset, i.e. offset + length
318     */
319    public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) {
320        try {
321            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
322        } catch (IOException ex) {
323            try {
324                return formatNameBytes(name, buf, offset, length,
325                                       FALLBACK_ENCODING);
326            } catch (IOException ex2) {
327                // impossible
328                throw new RuntimeException(ex2);
329            }
330        }
331    }
332
333    /**
334     * Copy a name into a buffer.
335     * Copies characters from the name into the buffer
336     * starting at the specified offset. 
337     * If the buffer is longer than the name, the buffer
338     * is filled with trailing NULs.
339     * If the name is longer than the buffer,
340     * the output is truncated.
341     *
342     * @param name The header name from which to copy the characters.
343     * @param buf The buffer where the name is to be stored.
344     * @param offset The starting offset into the buffer
345     * @param length The maximum number of header bytes to copy.
346     * @param encoding name of the encoding to use for file names
347     * @since 1.4
348     * @return The updated offset, i.e. offset + length
349     */
350    public static int formatNameBytes(String name, byte[] buf, final int offset,
351                                      final int length,
352                                      final ZipEncoding encoding)
353        throws IOException {
354        int len = name.length();
355        ByteBuffer b = encoding.encode(name);
356        while (b.limit() > length && len > 0) {
357            b = encoding.encode(name.substring(0, --len));
358        }
359        final int limit = b.limit() - b.position();
360        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
361
362        // Pad any remaining output bytes with NUL
363        for (int i = limit; i < length; ++i) {
364            buf[offset + i] = 0;
365        }
366
367        return offset + length;
368    }
369
370    /**
371     * Fill buffer with unsigned octal number, padded with leading zeroes.
372     * 
373     * @param value number to convert to octal - treated as unsigned
374     * @param buffer destination buffer
375     * @param offset starting offset in buffer
376     * @param length length of buffer to fill
377     * @throws IllegalArgumentException if the value will not fit in the buffer
378     */
379    public static void formatUnsignedOctalString(final long value, byte[] buffer,
380            final int offset, final int length) {
381        int remaining = length;
382        remaining--;
383        if (value == 0) {
384            buffer[offset + remaining--] = (byte) '0';
385        } else {
386            long val = value;
387            for (; remaining >= 0 && val != 0; --remaining) {
388                // CheckStyle:MagicNumber OFF
389                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
390                val = val >>> 3;
391                // CheckStyle:MagicNumber ON
392            }
393            if (val != 0){
394                throw new IllegalArgumentException
395                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
396            }
397        }
398
399        for (; remaining >= 0; --remaining) { // leading zeros
400            buffer[offset + remaining] = (byte) '0';
401        }
402    }
403
404    /**
405     * Write an octal integer into a buffer.
406     *
407     * Uses {@link #formatUnsignedOctalString} to format
408     * the value as an octal string with leading zeros.
409     * The converted number is followed by space and NUL
410     * 
411     * @param value The value to write
412     * @param buf The buffer to receive the output
413     * @param offset The starting offset into the buffer
414     * @param length The size of the output buffer
415     * @return The updated offset, i.e offset+length
416     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
417     */
418    public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) {
419
420        int idx=length-2; // For space and trailing null
421        formatUnsignedOctalString(value, buf, offset, idx);
422
423        buf[offset + idx++] = (byte) ' '; // Trailing space
424        buf[offset + idx]   = 0; // Trailing null
425
426        return offset + length;
427    }
428
429    /**
430     * Write an octal long integer into a buffer.
431     * 
432     * Uses {@link #formatUnsignedOctalString} to format
433     * the value as an octal string with leading zeros.
434     * The converted number is followed by a space.
435     * 
436     * @param value The value to write as octal
437     * @param buf The destinationbuffer.
438     * @param offset The starting offset into the buffer.
439     * @param length The length of the buffer
440     * @return The updated offset
441     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
442     */
443    public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) {
444
445        int idx=length-1; // For space
446
447        formatUnsignedOctalString(value, buf, offset, idx);
448        buf[offset + idx] = (byte) ' '; // Trailing space
449
450        return offset + length;
451    }
452
453    /**
454     * Write an long integer into a buffer as an octal string if this
455     * will fit, or as a binary number otherwise.
456     * 
457     * Uses {@link #formatUnsignedOctalString} to format
458     * the value as an octal string with leading zeros.
459     * The converted number is followed by a space.
460     * 
461     * @param value The value to write into the buffer.
462     * @param buf The destination buffer.
463     * @param offset The starting offset into the buffer.
464     * @param length The length of the buffer.
465     * @return The updated offset.
466     * @throws IllegalArgumentException if the value (and trailer)
467     * will not fit in the buffer.
468     * @since 1.4
469     */
470    public static int formatLongOctalOrBinaryBytes(
471        final long value, byte[] buf, final int offset, final int length) {
472
473        // Check whether we are dealing with UID/GID or SIZE field
474        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
475
476        final boolean negative = value < 0;
477        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
478            return formatLongOctalBytes(value, buf, offset, length);
479        }
480
481        if (length < 9) {
482            formatLongBinary(value, buf, offset, length, negative);
483        }
484        formatBigIntegerBinary(value, buf, offset, length, negative);
485
486        buf[offset] = (byte) (negative ? 0xff : 0x80);
487        return offset + length;
488    }
489
490    private static void formatLongBinary(final long value, byte[] buf,
491                                         final int offset, final int length,
492                                         final boolean negative) {
493        final int bits = (length - 1) * 8;
494        final long max = 1l << bits;
495        long val = Math.abs(value);
496        if (val >= max) {
497            throw new IllegalArgumentException("Value " + value +
498                " is too large for " + length + " byte field.");
499        }
500        if (negative) {
501            val ^= max - 1;
502            val |= 0xff << bits;
503            val++;
504        }
505        for (int i = offset + length - 1; i >= offset; i--) {
506            buf[i] = (byte) val;
507            val >>= 8;
508        }
509    }
510
511    private static void formatBigIntegerBinary(final long value, byte[] buf,
512                                               final int offset,
513                                               final int length,
514                                               final boolean negative) {
515        BigInteger val = BigInteger.valueOf(value);
516        final byte[] b = val.toByteArray();
517        final int len = b.length;
518        final int off = offset + length - len;
519        System.arraycopy(b, 0, buf, off, len);
520        final byte fill = (byte) (negative ? 0xff : 0);
521        for (int i = offset + 1; i < off; i++) {
522            buf[i] = fill;
523        }
524    }
525
526    /**
527     * Writes an octal value into a buffer.
528     * 
529     * Uses {@link #formatUnsignedOctalString} to format
530     * the value as an octal string with leading zeros.
531     * The converted number is followed by NUL and then space.
532     *
533     * @param value The value to convert
534     * @param buf The destination buffer
535     * @param offset The starting offset into the buffer.
536     * @param length The size of the buffer.
537     * @return The updated value of offset, i.e. offset+length
538     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
539     */
540    public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) {
541
542        int idx=length-2; // for NUL and space
543        formatUnsignedOctalString(value, buf, offset, idx);
544
545        buf[offset + idx++]   = 0; // Trailing null
546        buf[offset + idx]     = (byte) ' '; // Trailing space
547
548        return offset + length;
549    }
550
551    /**
552     * Compute the checksum of a tar entry header.
553     *
554     * @param buf The tar entry's header buffer.
555     * @return The computed checksum.
556     */
557    public static long computeCheckSum(final byte[] buf) {
558        long sum = 0;
559
560        for (byte element : buf) {
561            sum += BYTE_MASK & element;
562        }
563
564        return sum;
565    }
566
567    /**
568     * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
569     * <blockquote>
570     * The checksum is calculated by taking the sum of the unsigned byte values
571     * of the header block with the eight checksum bytes taken to be ascii
572     * spaces (decimal value 32). It is stored as a six digit octal number with
573     * leading zeroes followed by a NUL and then a space. Various
574     * implementations do not adhere to this format. For better compatibility,
575     * ignore leading and trailing whitespace, and get the first six digits. In
576     * addition, some historic tar implementations treated bytes as signed.
577     * Implementations typically calculate the checksum both ways, and treat it
578     * as good if either the signed or unsigned sum matches the included
579     * checksum.
580     * </blockquote>
581     * <p>
582     * In addition there are
583     * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a>
584     * that seem to have parts of their header cleared to zero (no detectable
585     * magic bytes, etc.) but still have a reasonable-looking checksum field
586     * present. It looks like we can detect such cases reasonably well by
587     * checking whether the stored checksum is <em>greater than</em> the
588     * computed unsigned checksum. That check is unlikely to pass on some
589     * random file header, as it would need to have a valid sequence of
590     * octal digits in just the right place.
591     * <p>
592     * The return value of this method should be treated as a best-effort
593     * heuristic rather than an absolute and final truth. The checksum
594     * verification logic may well evolve over time as more special cases
595     * are encountered.
596     *
597     * @param header tar header
598     * @return whether the checksum is reasonably good
599     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
600     * @since 1.5
601     */
602    public static boolean verifyCheckSum(byte[] header) {
603        long storedSum = 0;
604        long unsignedSum = 0;
605        long signedSum = 0;
606
607        int digits = 0;
608        for (int i = 0; i < header.length; i++) {
609            byte b = header[i];
610            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
611                if ('0' <= b && b <= '7' && digits++ < 6) {
612                    storedSum = storedSum * 8 + b - '0';
613                } else if (digits > 0) {
614                    digits = 6; // only look at the first octal digit sequence
615                }
616                b = ' ';
617            }
618            unsignedSum += 0xff & b;
619            signedSum += b;
620        }
621
622        return storedSum == unsignedSum || storedSum == signedSum
623                || storedSum > unsignedSum; // COMPRESS-177
624    }
625
626}