001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020/* 021 * This package is based on the work done by Keiron Liddle, Aftex Software 022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his 023 * great code. 024 */ 025package org.apache.commons.compress.compressors.bzip2; 026 027import java.io.IOException; 028import java.io.InputStream; 029 030import org.apache.commons.compress.compressors.CompressorInputStream; 031 032/** 033 * An input stream that decompresses from the BZip2 format to be read as any other stream. 034 * 035 * @NotThreadSafe 036 */ 037public class BZip2CompressorInputStream extends CompressorInputStream implements 038 BZip2Constants { 039 040 /** 041 * Index of the last char in the block, so the block size == last + 1. 042 */ 043 private int last; 044 045 /** 046 * Index in zptr[] of original string after sorting. 047 */ 048 private int origPtr; 049 050 /** 051 * always: in the range 0 .. 9. The current block size is 100000 * this 052 * number. 053 */ 054 private int blockSize100k; 055 056 private boolean blockRandomised; 057 058 private int bsBuff; 059 private int bsLive; 060 private final CRC crc = new CRC(); 061 062 private int nInUse; 063 064 private InputStream in; 065 private final boolean decompressConcatenated; 066 067 private static final int EOF = 0; 068 private static final int START_BLOCK_STATE = 1; 069 private static final int RAND_PART_A_STATE = 2; 070 private static final int RAND_PART_B_STATE = 3; 071 private static final int RAND_PART_C_STATE = 4; 072 private static final int NO_RAND_PART_A_STATE = 5; 073 private static final int NO_RAND_PART_B_STATE = 6; 074 private static final int NO_RAND_PART_C_STATE = 7; 075 076 private int currentState = START_BLOCK_STATE; 077 078 private int storedBlockCRC, storedCombinedCRC; 079 private int computedBlockCRC, computedCombinedCRC; 080 081 // Variables used by setup* methods exclusively 082 083 private int su_count; 084 private int su_ch2; 085 private int su_chPrev; 086 private int su_i2; 087 private int su_j2; 088 private int su_rNToGo; 089 private int su_rTPos; 090 private int su_tPos; 091 private char su_z; 092 093 /** 094 * All memory intensive stuff. This field is initialized by initBlock(). 095 */ 096 private BZip2CompressorInputStream.Data data; 097 098 /** 099 * Constructs a new BZip2CompressorInputStream which decompresses bytes 100 * read from the specified stream. This doesn't suppprt decompressing 101 * concatenated .bz2 files. 102 * 103 * @throws IOException 104 * if the stream content is malformed or an I/O error occurs. 105 * @throws NullPointerException 106 * if <tt>in == null</tt> 107 */ 108 public BZip2CompressorInputStream(final InputStream in) throws IOException { 109 this(in, false); 110 } 111 112 /** 113 * Constructs a new BZip2CompressorInputStream which decompresses bytes 114 * read from the specified stream. 115 * 116 * @param in the InputStream from which this object should be created 117 * @param decompressConcatenated 118 * if true, decompress until the end of the input; 119 * if false, stop after the first .bz2 stream and 120 * leave the input position to point to the next 121 * byte after the .bz2 stream 122 * 123 * @throws IOException 124 * if the stream content is malformed or an I/O error occurs. 125 * @throws NullPointerException 126 * if <tt>in == null</tt> 127 */ 128 public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 129 this.in = in; 130 this.decompressConcatenated = decompressConcatenated; 131 132 init(true); 133 initBlock(); 134 } 135 136 @Override 137 public int read() throws IOException { 138 if (this.in != null) { 139 int r = read0(); 140 count(r < 0 ? -1 : 1); 141 return r; 142 } else { 143 throw new IOException("stream closed"); 144 } 145 } 146 147 /* 148 * (non-Javadoc) 149 * 150 * @see java.io.InputStream#read(byte[], int, int) 151 */ 152 @Override 153 public int read(final byte[] dest, final int offs, final int len) 154 throws IOException { 155 if (offs < 0) { 156 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 157 } 158 if (len < 0) { 159 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 160 } 161 if (offs + len > dest.length) { 162 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 163 + len + ") > dest.length(" + dest.length + ")."); 164 } 165 if (this.in == null) { 166 throw new IOException("stream closed"); 167 } 168 169 final int hi = offs + len; 170 int destOffs = offs; 171 int b; 172 while (destOffs < hi && ((b = read0()) >= 0)) { 173 dest[destOffs++] = (byte) b; 174 count(1); 175 } 176 177 int c = (destOffs == offs) ? -1 : (destOffs - offs); 178 return c; 179 } 180 181 private void makeMaps() { 182 final boolean[] inUse = this.data.inUse; 183 final byte[] seqToUnseq = this.data.seqToUnseq; 184 185 int nInUseShadow = 0; 186 187 for (int i = 0; i < 256; i++) { 188 if (inUse[i]) { 189 seqToUnseq[nInUseShadow++] = (byte) i; 190 } 191 } 192 193 this.nInUse = nInUseShadow; 194 } 195 196 private int read0() throws IOException { 197 switch (currentState) { 198 case EOF: 199 return -1; 200 201 case START_BLOCK_STATE: 202 return setupBlock(); 203 204 case RAND_PART_A_STATE: 205 throw new IllegalStateException(); 206 207 case RAND_PART_B_STATE: 208 return setupRandPartB(); 209 210 case RAND_PART_C_STATE: 211 return setupRandPartC(); 212 213 case NO_RAND_PART_A_STATE: 214 throw new IllegalStateException(); 215 216 case NO_RAND_PART_B_STATE: 217 return setupNoRandPartB(); 218 219 case NO_RAND_PART_C_STATE: 220 return setupNoRandPartC(); 221 222 default: 223 throw new IllegalStateException(); 224 } 225 } 226 227 private boolean init(boolean isFirstStream) throws IOException { 228 if (null == in) { 229 throw new IOException("No InputStream"); 230 } 231 232 int magic0 = this.in.read(); 233 if (magic0 == -1 && !isFirstStream) { 234 return false; 235 } 236 int magic1 = this.in.read(); 237 int magic2 = this.in.read(); 238 239 if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') { 240 throw new IOException(isFirstStream 241 ? "Stream is not in the BZip2 format" 242 : "Garbage after a valid BZip2 stream"); 243 } 244 245 int blockSize = this.in.read(); 246 if ((blockSize < '1') || (blockSize > '9')) { 247 throw new IOException("BZip2 block size is invalid"); 248 } 249 250 this.blockSize100k = blockSize - '0'; 251 252 this.bsLive = 0; 253 this.computedCombinedCRC = 0; 254 255 return true; 256 } 257 258 private void initBlock() throws IOException { 259 char magic0; 260 char magic1; 261 char magic2; 262 char magic3; 263 char magic4; 264 char magic5; 265 266 while (true) { 267 // Get the block magic bytes. 268 magic0 = bsGetUByte(); 269 magic1 = bsGetUByte(); 270 magic2 = bsGetUByte(); 271 magic3 = bsGetUByte(); 272 magic4 = bsGetUByte(); 273 magic5 = bsGetUByte(); 274 275 // If isn't end of stream magic, break out of the loop. 276 if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 277 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) { 278 break; 279 } 280 281 // End of stream was reached. Check the combined CRC and 282 // advance to the next .bz2 stream if decoding concatenated 283 // streams. 284 if (complete()) { 285 return; 286 } 287 } 288 289 if (magic0 != 0x31 || // '1' 290 magic1 != 0x41 || // ')' 291 magic2 != 0x59 || // 'Y' 292 magic3 != 0x26 || // '&' 293 magic4 != 0x53 || // 'S' 294 magic5 != 0x59 // 'Y' 295 ) { 296 this.currentState = EOF; 297 throw new IOException("bad block header"); 298 } else { 299 this.storedBlockCRC = bsGetInt(); 300 this.blockRandomised = bsR(1) == 1; 301 302 /** 303 * Allocate data here instead in constructor, so we do not allocate 304 * it if the input file is empty. 305 */ 306 if (this.data == null) { 307 this.data = new Data(this.blockSize100k); 308 } 309 310 // currBlockNo++; 311 getAndMoveToFrontDecode(); 312 313 this.crc.initialiseCRC(); 314 this.currentState = START_BLOCK_STATE; 315 } 316 } 317 318 private void endBlock() throws IOException { 319 this.computedBlockCRC = this.crc.getFinalCRC(); 320 321 // A bad CRC is considered a fatal error. 322 if (this.storedBlockCRC != this.computedBlockCRC) { 323 // make next blocks readable without error 324 // (repair feature, not yet documented, not tested) 325 this.computedCombinedCRC = (this.storedCombinedCRC << 1) 326 | (this.storedCombinedCRC >>> 31); 327 this.computedCombinedCRC ^= this.storedBlockCRC; 328 329 throw new IOException("BZip2 CRC error"); 330 } 331 332 this.computedCombinedCRC = (this.computedCombinedCRC << 1) 333 | (this.computedCombinedCRC >>> 31); 334 this.computedCombinedCRC ^= this.computedBlockCRC; 335 } 336 337 private boolean complete() throws IOException { 338 this.storedCombinedCRC = bsGetInt(); 339 this.currentState = EOF; 340 this.data = null; 341 342 if (this.storedCombinedCRC != this.computedCombinedCRC) { 343 throw new IOException("BZip2 CRC error"); 344 } 345 346 // Look for the next .bz2 stream if decompressing 347 // concatenated files. 348 return !decompressConcatenated || !init(false); 349 } 350 351 @Override 352 public void close() throws IOException { 353 InputStream inShadow = this.in; 354 if (inShadow != null) { 355 try { 356 if (inShadow != System.in) { 357 inShadow.close(); 358 } 359 } finally { 360 this.data = null; 361 this.in = null; 362 } 363 } 364 } 365 366 private int bsR(final int n) throws IOException { 367 int bsLiveShadow = this.bsLive; 368 int bsBuffShadow = this.bsBuff; 369 370 if (bsLiveShadow < n) { 371 final InputStream inShadow = this.in; 372 do { 373 int thech = inShadow.read(); 374 375 if (thech < 0) { 376 throw new IOException("unexpected end of stream"); 377 } 378 379 bsBuffShadow = (bsBuffShadow << 8) | thech; 380 bsLiveShadow += 8; 381 } while (bsLiveShadow < n); 382 383 this.bsBuff = bsBuffShadow; 384 } 385 386 this.bsLive = bsLiveShadow - n; 387 return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1); 388 } 389 390 private boolean bsGetBit() throws IOException { 391 int bsLiveShadow = this.bsLive; 392 int bsBuffShadow = this.bsBuff; 393 394 if (bsLiveShadow < 1) { 395 int thech = this.in.read(); 396 397 if (thech < 0) { 398 throw new IOException("unexpected end of stream"); 399 } 400 401 bsBuffShadow = (bsBuffShadow << 8) | thech; 402 bsLiveShadow += 8; 403 this.bsBuff = bsBuffShadow; 404 } 405 406 this.bsLive = bsLiveShadow - 1; 407 return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; 408 } 409 410 private char bsGetUByte() throws IOException { 411 return (char) bsR(8); 412 } 413 414 private int bsGetInt() throws IOException { 415 return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8); 416 } 417 418 /** 419 * Called by createHuffmanDecodingTables() exclusively. 420 */ 421 private static void hbCreateDecodeTables(final int[] limit, 422 final int[] base, final int[] perm, final char[] length, 423 final int minLen, final int maxLen, final int alphaSize) { 424 for (int i = minLen, pp = 0; i <= maxLen; i++) { 425 for (int j = 0; j < alphaSize; j++) { 426 if (length[j] == i) { 427 perm[pp++] = j; 428 } 429 } 430 } 431 432 for (int i = MAX_CODE_LEN; --i > 0;) { 433 base[i] = 0; 434 limit[i] = 0; 435 } 436 437 for (int i = 0; i < alphaSize; i++) { 438 base[length[i] + 1]++; 439 } 440 441 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { 442 b += base[i]; 443 base[i] = b; 444 } 445 446 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { 447 final int nb = base[i + 1]; 448 vec += nb - b; 449 b = nb; 450 limit[i] = vec - 1; 451 vec <<= 1; 452 } 453 454 for (int i = minLen + 1; i <= maxLen; i++) { 455 base[i] = ((limit[i - 1] + 1) << 1) - base[i]; 456 } 457 } 458 459 private void recvDecodingTables() throws IOException { 460 final Data dataShadow = this.data; 461 final boolean[] inUse = dataShadow.inUse; 462 final byte[] pos = dataShadow.recvDecodingTables_pos; 463 final byte[] selector = dataShadow.selector; 464 final byte[] selectorMtf = dataShadow.selectorMtf; 465 466 int inUse16 = 0; 467 468 /* Receive the mapping table */ 469 for (int i = 0; i < 16; i++) { 470 if (bsGetBit()) { 471 inUse16 |= 1 << i; 472 } 473 } 474 475 for (int i = 256; --i >= 0;) { 476 inUse[i] = false; 477 } 478 479 for (int i = 0; i < 16; i++) { 480 if ((inUse16 & (1 << i)) != 0) { 481 final int i16 = i << 4; 482 for (int j = 0; j < 16; j++) { 483 if (bsGetBit()) { 484 inUse[i16 + j] = true; 485 } 486 } 487 } 488 } 489 490 makeMaps(); 491 final int alphaSize = this.nInUse + 2; 492 493 /* Now the selectors */ 494 final int nGroups = bsR(3); 495 final int nSelectors = bsR(15); 496 497 for (int i = 0; i < nSelectors; i++) { 498 int j = 0; 499 while (bsGetBit()) { 500 j++; 501 } 502 selectorMtf[i] = (byte) j; 503 } 504 505 /* Undo the MTF values for the selectors. */ 506 for (int v = nGroups; --v >= 0;) { 507 pos[v] = (byte) v; 508 } 509 510 for (int i = 0; i < nSelectors; i++) { 511 int v = selectorMtf[i] & 0xff; 512 final byte tmp = pos[v]; 513 while (v > 0) { 514 // nearly all times v is zero, 4 in most other cases 515 pos[v] = pos[v - 1]; 516 v--; 517 } 518 pos[0] = tmp; 519 selector[i] = tmp; 520 } 521 522 final char[][] len = dataShadow.temp_charArray2d; 523 524 /* Now the coding tables */ 525 for (int t = 0; t < nGroups; t++) { 526 int curr = bsR(5); 527 final char[] len_t = len[t]; 528 for (int i = 0; i < alphaSize; i++) { 529 while (bsGetBit()) { 530 curr += bsGetBit() ? -1 : 1; 531 } 532 len_t[i] = (char) curr; 533 } 534 } 535 536 // finally create the Huffman tables 537 createHuffmanDecodingTables(alphaSize, nGroups); 538 } 539 540 /** 541 * Called by recvDecodingTables() exclusively. 542 */ 543 private void createHuffmanDecodingTables(final int alphaSize, 544 final int nGroups) { 545 final Data dataShadow = this.data; 546 final char[][] len = dataShadow.temp_charArray2d; 547 final int[] minLens = dataShadow.minLens; 548 final int[][] limit = dataShadow.limit; 549 final int[][] base = dataShadow.base; 550 final int[][] perm = dataShadow.perm; 551 552 for (int t = 0; t < nGroups; t++) { 553 int minLen = 32; 554 int maxLen = 0; 555 final char[] len_t = len[t]; 556 for (int i = alphaSize; --i >= 0;) { 557 final char lent = len_t[i]; 558 if (lent > maxLen) { 559 maxLen = lent; 560 } 561 if (lent < minLen) { 562 minLen = lent; 563 } 564 } 565 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, 566 maxLen, alphaSize); 567 minLens[t] = minLen; 568 } 569 } 570 571 private void getAndMoveToFrontDecode() throws IOException { 572 this.origPtr = bsR(24); 573 recvDecodingTables(); 574 575 final InputStream inShadow = this.in; 576 final Data dataShadow = this.data; 577 final byte[] ll8 = dataShadow.ll8; 578 final int[] unzftab = dataShadow.unzftab; 579 final byte[] selector = dataShadow.selector; 580 final byte[] seqToUnseq = dataShadow.seqToUnseq; 581 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; 582 final int[] minLens = dataShadow.minLens; 583 final int[][] limit = dataShadow.limit; 584 final int[][] base = dataShadow.base; 585 final int[][] perm = dataShadow.perm; 586 final int limitLast = this.blockSize100k * 100000; 587 588 /* 589 * Setting up the unzftab entries here is not strictly necessary, but it 590 * does save having to do it later in a separate pass, and so saves a 591 * block's worth of cache misses. 592 */ 593 for (int i = 256; --i >= 0;) { 594 yy[i] = (char) i; 595 unzftab[i] = 0; 596 } 597 598 int groupNo = 0; 599 int groupPos = G_SIZE - 1; 600 final int eob = this.nInUse + 1; 601 int nextSym = getAndMoveToFrontDecode0(0); 602 int bsBuffShadow = this.bsBuff; 603 int bsLiveShadow = this.bsLive; 604 int lastShadow = -1; 605 int zt = selector[groupNo] & 0xff; 606 int[] base_zt = base[zt]; 607 int[] limit_zt = limit[zt]; 608 int[] perm_zt = perm[zt]; 609 int minLens_zt = minLens[zt]; 610 611 while (nextSym != eob) { 612 if ((nextSym == RUNA) || (nextSym == RUNB)) { 613 int s = -1; 614 615 for (int n = 1; true; n <<= 1) { 616 if (nextSym == RUNA) { 617 s += n; 618 } else if (nextSym == RUNB) { 619 s += n << 1; 620 } else { 621 break; 622 } 623 624 if (groupPos == 0) { 625 groupPos = G_SIZE - 1; 626 zt = selector[++groupNo] & 0xff; 627 base_zt = base[zt]; 628 limit_zt = limit[zt]; 629 perm_zt = perm[zt]; 630 minLens_zt = minLens[zt]; 631 } else { 632 groupPos--; 633 } 634 635 int zn = minLens_zt; 636 637 // Inlined: 638 // int zvec = bsR(zn); 639 while (bsLiveShadow < zn) { 640 final int thech = inShadow.read(); 641 if (thech >= 0) { 642 bsBuffShadow = (bsBuffShadow << 8) | thech; 643 bsLiveShadow += 8; 644 continue; 645 } else { 646 throw new IOException("unexpected end of stream"); 647 } 648 } 649 int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) 650 & ((1 << zn) - 1); 651 bsLiveShadow -= zn; 652 653 while (zvec > limit_zt[zn]) { 654 zn++; 655 while (bsLiveShadow < 1) { 656 final int thech = inShadow.read(); 657 if (thech >= 0) { 658 bsBuffShadow = (bsBuffShadow << 8) | thech; 659 bsLiveShadow += 8; 660 continue; 661 } else { 662 throw new IOException( 663 "unexpected end of stream"); 664 } 665 } 666 bsLiveShadow--; 667 zvec = (zvec << 1) 668 | ((bsBuffShadow >> bsLiveShadow) & 1); 669 } 670 nextSym = perm_zt[zvec - base_zt[zn]]; 671 } 672 673 final byte ch = seqToUnseq[yy[0]]; 674 unzftab[ch & 0xff] += s + 1; 675 676 while (s-- >= 0) { 677 ll8[++lastShadow] = ch; 678 } 679 680 if (lastShadow >= limitLast) { 681 throw new IOException("block overrun"); 682 } 683 } else { 684 if (++lastShadow >= limitLast) { 685 throw new IOException("block overrun"); 686 } 687 688 final char tmp = yy[nextSym - 1]; 689 unzftab[seqToUnseq[tmp] & 0xff]++; 690 ll8[lastShadow] = seqToUnseq[tmp]; 691 692 /* 693 * This loop is hammered during decompression, hence avoid 694 * native method call overhead of System.arraycopy for very 695 * small ranges to copy. 696 */ 697 if (nextSym <= 16) { 698 for (int j = nextSym - 1; j > 0;) { 699 yy[j] = yy[--j]; 700 } 701 } else { 702 System.arraycopy(yy, 0, yy, 1, nextSym - 1); 703 } 704 705 yy[0] = tmp; 706 707 if (groupPos == 0) { 708 groupPos = G_SIZE - 1; 709 zt = selector[++groupNo] & 0xff; 710 base_zt = base[zt]; 711 limit_zt = limit[zt]; 712 perm_zt = perm[zt]; 713 minLens_zt = minLens[zt]; 714 } else { 715 groupPos--; 716 } 717 718 int zn = minLens_zt; 719 720 // Inlined: 721 // int zvec = bsR(zn); 722 while (bsLiveShadow < zn) { 723 final int thech = inShadow.read(); 724 if (thech >= 0) { 725 bsBuffShadow = (bsBuffShadow << 8) | thech; 726 bsLiveShadow += 8; 727 continue; 728 } else { 729 throw new IOException("unexpected end of stream"); 730 } 731 } 732 int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) 733 & ((1 << zn) - 1); 734 bsLiveShadow -= zn; 735 736 while (zvec > limit_zt[zn]) { 737 zn++; 738 while (bsLiveShadow < 1) { 739 final int thech = inShadow.read(); 740 if (thech >= 0) { 741 bsBuffShadow = (bsBuffShadow << 8) | thech; 742 bsLiveShadow += 8; 743 continue; 744 } else { 745 throw new IOException("unexpected end of stream"); 746 } 747 } 748 bsLiveShadow--; 749 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); 750 } 751 nextSym = perm_zt[zvec - base_zt[zn]]; 752 } 753 } 754 755 this.last = lastShadow; 756 this.bsLive = bsLiveShadow; 757 this.bsBuff = bsBuffShadow; 758 } 759 760 private int getAndMoveToFrontDecode0(final int groupNo) throws IOException { 761 final InputStream inShadow = this.in; 762 final Data dataShadow = this.data; 763 final int zt = dataShadow.selector[groupNo] & 0xff; 764 final int[] limit_zt = dataShadow.limit[zt]; 765 int zn = dataShadow.minLens[zt]; 766 int zvec = bsR(zn); 767 int bsLiveShadow = this.bsLive; 768 int bsBuffShadow = this.bsBuff; 769 770 while (zvec > limit_zt[zn]) { 771 zn++; 772 while (bsLiveShadow < 1) { 773 final int thech = inShadow.read(); 774 775 if (thech >= 0) { 776 bsBuffShadow = (bsBuffShadow << 8) | thech; 777 bsLiveShadow += 8; 778 continue; 779 } else { 780 throw new IOException("unexpected end of stream"); 781 } 782 } 783 bsLiveShadow--; 784 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); 785 } 786 787 this.bsLive = bsLiveShadow; 788 this.bsBuff = bsBuffShadow; 789 790 return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]]; 791 } 792 793 private int setupBlock() throws IOException { 794 if (currentState == EOF || this.data == null) { 795 return -1; 796 } 797 798 final int[] cftab = this.data.cftab; 799 final int[] tt = this.data.initTT(this.last + 1); 800 final byte[] ll8 = this.data.ll8; 801 cftab[0] = 0; 802 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); 803 804 for (int i = 1, c = cftab[0]; i <= 256; i++) { 805 c += cftab[i]; 806 cftab[i] = c; 807 } 808 809 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { 810 tt[cftab[ll8[i] & 0xff]++] = i; 811 } 812 813 if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { 814 throw new IOException("stream corrupted"); 815 } 816 817 this.su_tPos = tt[this.origPtr]; 818 this.su_count = 0; 819 this.su_i2 = 0; 820 this.su_ch2 = 256; /* not a char and not EOF */ 821 822 if (this.blockRandomised) { 823 this.su_rNToGo = 0; 824 this.su_rTPos = 0; 825 return setupRandPartA(); 826 } 827 return setupNoRandPartA(); 828 } 829 830 private int setupRandPartA() throws IOException { 831 if (this.su_i2 <= this.last) { 832 this.su_chPrev = this.su_ch2; 833 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 834 this.su_tPos = this.data.tt[this.su_tPos]; 835 if (this.su_rNToGo == 0) { 836 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 837 if (++this.su_rTPos == 512) { 838 this.su_rTPos = 0; 839 } 840 } else { 841 this.su_rNToGo--; 842 } 843 this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; 844 this.su_i2++; 845 this.currentState = RAND_PART_B_STATE; 846 this.crc.updateCRC(su_ch2Shadow); 847 return su_ch2Shadow; 848 } else { 849 endBlock(); 850 initBlock(); 851 return setupBlock(); 852 } 853 } 854 855 private int setupNoRandPartA() throws IOException { 856 if (this.su_i2 <= this.last) { 857 this.su_chPrev = this.su_ch2; 858 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 859 this.su_ch2 = su_ch2Shadow; 860 this.su_tPos = this.data.tt[this.su_tPos]; 861 this.su_i2++; 862 this.currentState = NO_RAND_PART_B_STATE; 863 this.crc.updateCRC(su_ch2Shadow); 864 return su_ch2Shadow; 865 } else { 866 this.currentState = NO_RAND_PART_A_STATE; 867 endBlock(); 868 initBlock(); 869 return setupBlock(); 870 } 871 } 872 873 private int setupRandPartB() throws IOException { 874 if (this.su_ch2 != this.su_chPrev) { 875 this.currentState = RAND_PART_A_STATE; 876 this.su_count = 1; 877 return setupRandPartA(); 878 } else if (++this.su_count >= 4) { 879 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 880 this.su_tPos = this.data.tt[this.su_tPos]; 881 if (this.su_rNToGo == 0) { 882 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 883 if (++this.su_rTPos == 512) { 884 this.su_rTPos = 0; 885 } 886 } else { 887 this.su_rNToGo--; 888 } 889 this.su_j2 = 0; 890 this.currentState = RAND_PART_C_STATE; 891 if (this.su_rNToGo == 1) { 892 this.su_z ^= 1; 893 } 894 return setupRandPartC(); 895 } else { 896 this.currentState = RAND_PART_A_STATE; 897 return setupRandPartA(); 898 } 899 } 900 901 private int setupRandPartC() throws IOException { 902 if (this.su_j2 < this.su_z) { 903 this.crc.updateCRC(this.su_ch2); 904 this.su_j2++; 905 return this.su_ch2; 906 } else { 907 this.currentState = RAND_PART_A_STATE; 908 this.su_i2++; 909 this.su_count = 0; 910 return setupRandPartA(); 911 } 912 } 913 914 private int setupNoRandPartB() throws IOException { 915 if (this.su_ch2 != this.su_chPrev) { 916 this.su_count = 1; 917 return setupNoRandPartA(); 918 } else if (++this.su_count >= 4) { 919 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 920 this.su_tPos = this.data.tt[this.su_tPos]; 921 this.su_j2 = 0; 922 return setupNoRandPartC(); 923 } else { 924 return setupNoRandPartA(); 925 } 926 } 927 928 private int setupNoRandPartC() throws IOException { 929 if (this.su_j2 < this.su_z) { 930 int su_ch2Shadow = this.su_ch2; 931 this.crc.updateCRC(su_ch2Shadow); 932 this.su_j2++; 933 this.currentState = NO_RAND_PART_C_STATE; 934 return su_ch2Shadow; 935 } else { 936 this.su_i2++; 937 this.su_count = 0; 938 return setupNoRandPartA(); 939 } 940 } 941 942 private static final class Data extends Object { 943 944 // (with blockSize 900k) 945 final boolean[] inUse = new boolean[256]; // 256 byte 946 947 final byte[] seqToUnseq = new byte[256]; // 256 byte 948 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 949 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 950 951 /** 952 * Freq table collected to save a pass over the data during 953 * decompression. 954 */ 955 final int[] unzftab = new int[256]; // 1024 byte 956 957 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 958 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 959 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 960 final int[] minLens = new int[N_GROUPS]; // 24 byte 961 962 final int[] cftab = new int[257]; // 1028 byte 963 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte 964 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 965 // byte 966 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte 967 // --------------- 968 // 60798 byte 969 970 int[] tt; // 3600000 byte 971 byte[] ll8; // 900000 byte 972 973 // --------------- 974 // 4560782 byte 975 // =============== 976 977 Data(int blockSize100k) { 978 this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE]; 979 } 980 981 /** 982 * Initializes the {@link #tt} array. 983 * 984 * This method is called when the required length of the array is known. 985 * I don't initialize it at construction time to avoid unneccessary 986 * memory allocation when compressing small files. 987 */ 988 int[] initTT(int length) { 989 int[] ttShadow = this.tt; 990 991 // tt.length should always be >= length, but theoretically 992 // it can happen, if the compressor mixed small and large 993 // blocks. Normally only the last block will be smaller 994 // than others. 995 if ((ttShadow == null) || (ttShadow.length < length)) { 996 this.tt = ttShadow = new int[length]; 997 } 998 999 return ttShadow; 1000 } 1001 1002 } 1003 1004 /** 1005 * Checks if the signature matches what is expected for a bzip2 file. 1006 * 1007 * @param signature 1008 * the bytes to check 1009 * @param length 1010 * the number of bytes to check 1011 * @return true, if this stream is a bzip2 compressed stream, false otherwise 1012 * 1013 * @since 1.1 1014 */ 1015 public static boolean matches(byte[] signature, int length) { 1016 1017 if (length < 3) { 1018 return false; 1019 } 1020 1021 if (signature[0] != 'B') { 1022 return false; 1023 } 1024 1025 if (signature[1] != 'Z') { 1026 return false; 1027 } 1028 1029 if (signature[2] != 'h') { 1030 return false; 1031 } 1032 1033 return true; 1034 } 1035}