123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- /*++
- Copyright (c) Alex Ionescu. All rights reserved.
- Module Name:
- lzma2dec.c
- Abstract:
- This module implements the LZMA2 decoding logic responsible for parsing the
- LZMA2 Control Byte, the Information Bytes (Compressed & Uncompressed Stream
- Size), and the Property Byte during the initial Dictionary Reset. Note that
- this module only implements support for a single such reset (i.e.: archives
- in "solid" mode).
- Author:
- Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
- Environment:
- Windows & Linux, user mode and kernel mode.
- --*/
- #include "minlzlib.h"
- #include "lzma2dec.h"
- bool
- Lz2DecodeChunk (
- uint32_t* BytesProcessed,
- uint32_t RawSize,
- uint16_t CompressedSize
- )
- {
- uint32_t bytesProcessed;
- //
- // Go and decode this chunk, sequence by sequence
- //
- if (!LzDecode())
- {
- return false;
- }
- //
- // In a correctly formatted stream, the last arithmetic-coded sequence must
- // be zero once we finished with the last chunk. Make sure the stream ended
- // exactly where we expected it to.
- //
- if (!RcIsComplete(&bytesProcessed) || (bytesProcessed != CompressedSize))
- {
- return false;
- }
- //
- // The entire output stream must have been written to, and the dictionary
- // must be full now.
- //
- if (!DtIsComplete(&bytesProcessed) || (bytesProcessed != RawSize))
- {
- return false;
- }
- *BytesProcessed += bytesProcessed;
- return true;
- }
- bool
- Lz2DecodeStream (
- uint32_t* BytesProcessed,
- bool GetSizeOnly
- )
- {
- const uint8_t* inBytes;
- LZMA2_CONTROL_BYTE controlByte;
- uint8_t propertyByte;
- uint32_t rawSize;
- uint16_t compressedSize;
- //
- // Read the first control byte
- //
- *BytesProcessed = 0;
- while (BfRead(&controlByte.Value))
- {
- //
- // When the LZMA2 control byte is 0, the entire stream is decoded. This
- // is the only success path out of this function.
- //
- if (controlByte.Value == 0)
- {
- return true;
- }
- //
- // Read the appropriate number of info bytes based on the stream type.
- //
- if (!BfSeek((controlByte.u.Common.IsLzma == 1 ) ? 4 : 2, &inBytes))
- {
- break;
- }
- //
- // For LZMA streams calculate both the uncompressed and compressed size
- // from the info bytes. Uncompressed streams only have the former.
- //
- if (controlByte.u.Common.IsLzma == 1)
- {
- rawSize = controlByte.u.Lzma.RawSize << 16;
- compressedSize = (uint16_t)(inBytes[2] << 8);
- compressedSize += inBytes[3] + 1;
- }
- else
- {
- rawSize = 0;
- compressedSize = 0;
- }
- //
- // Make sure that the output buffer that was supplied is big enough to
- // fit the uncompressed chunk, unless we're just calculating the size.
- //
- rawSize += inBytes[0] << 8;
- rawSize += inBytes[1] + 1;
- if (!GetSizeOnly && !DtSetLimit(rawSize))
- {
- break;
- }
- //
- // Check if the full LZMA state needs to be reset, which must happen at
- // the start of stream. Also check for a property reset, which occurs
- // when an LZMA stream follows an uncompressed stream. Separately,
- // check for a state reset without a property byte (happens rarely,
- // but does happen in a few compressed streams).
- //
- if ((controlByte.u.Lzma.ResetState == Lzma2FullReset) ||
- (controlByte.u.Lzma.ResetState == Lzma2PropertyReset))
- {
- //
- // Read the LZMA properties and then initialize the decoder.
- //
- if (!BfRead(&propertyByte) || !LzInitialize(propertyByte))
- {
- break;
- }
- }
- else if (controlByte.u.Lzma.ResetState == Lzma2SimpleReset)
- {
- LzResetState();
- }
- else if (controlByte.u.Lzma.ResetState == Lzma2NoReset)
- {
- ;
- }
- //
- // Don't do any decompression if the caller only wants to know the size
- //
- if (GetSizeOnly)
- {
- *BytesProcessed += rawSize;
- BfSeek((controlByte.u.Common.IsLzma == 1) ? compressedSize : rawSize,
- &inBytes);
- continue;
- }
- else if (controlByte.u.Common.IsLzma == 0)
- {
- //
- // Seek to the requested size in the input buffer
- //
- if (!BfSeek(rawSize, &inBytes))
- {
- return false;
- }
- //
- // Copy the data into the dictionary as-is
- //
- for (uint32_t i = 0; i < rawSize; i++)
- {
- DtPutSymbol(inBytes[i]);
- }
- //
- // Update bytes and keep going to the next chunk
- //
- *BytesProcessed += rawSize;
- continue;
- }
- //
- // Record how many bytes are left in this sequence as our SoftLimit for
- // the other operations. This allows us to omit most range checking
- // logic in rangedec.c. This soft limit lasts until reset below.
- //
- if (!BfSetSoftLimit(compressedSize))
- {
- break;
- }
- //
- // Read the initial range and code bytes to initialize the arithmetic
- // coding decoder, and let it know how much input data exists. We've
- // already validated that this much space exists in the input buffer.
- //
- if (!RcInitialize(&compressedSize))
- {
- break;
- }
- //
- // Start decoding the LZMA sequences in this chunk
- //
- if (!Lz2DecodeChunk(BytesProcessed, rawSize, compressedSize))
- {
- break;
- }
- //
- // Having decoded that chunk, reset our soft limit (to the full
- // input stream) so we can read the next chunk.
- //
- BfResetSoftLimit();
- }
- return false;
- }
|