lzma2dec.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /*++
  2. Copyright (c) Alex Ionescu. All rights reserved.
  3. Module Name:
  4. lzma2dec.c
  5. Abstract:
  6. This module implements the LZMA2 decoding logic responsible for parsing the
  7. LZMA2 Control Byte, the Information Bytes (Compressed & Uncompressed Stream
  8. Size), and the Property Byte during the initial Dictionary Reset. Note that
  9. this module only implements support for a single such reset (i.e.: archives
  10. in "solid" mode).
  11. Author:
  12. Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
  13. Environment:
  14. Windows & Linux, user mode and kernel mode.
  15. --*/
  16. #include "minlzlib.h"
  17. #include "lzma2dec.h"
  18. bool
  19. Lz2DecodeChunk (
  20. uint32_t* BytesProcessed,
  21. uint32_t RawSize,
  22. uint16_t CompressedSize
  23. )
  24. {
  25. uint32_t bytesProcessed;
  26. //
  27. // Go and decode this chunk, sequence by sequence
  28. //
  29. if (!LzDecode())
  30. {
  31. return false;
  32. }
  33. //
  34. // In a correctly formatted stream, the last arithmetic-coded sequence must
  35. // be zero once we finished with the last chunk. Make sure the stream ended
  36. // exactly where we expected it to.
  37. //
  38. if (!RcIsComplete(&bytesProcessed) || (bytesProcessed != CompressedSize))
  39. {
  40. return false;
  41. }
  42. //
  43. // The entire output stream must have been written to, and the dictionary
  44. // must be full now.
  45. //
  46. if (!DtIsComplete(&bytesProcessed) || (bytesProcessed != RawSize))
  47. {
  48. return false;
  49. }
  50. *BytesProcessed += bytesProcessed;
  51. return true;
  52. }
  53. bool
  54. Lz2DecodeStream (
  55. uint32_t* BytesProcessed,
  56. bool GetSizeOnly
  57. )
  58. {
  59. const uint8_t* inBytes;
  60. LZMA2_CONTROL_BYTE controlByte;
  61. uint8_t propertyByte;
  62. uint32_t rawSize;
  63. uint16_t compressedSize;
  64. //
  65. // Read the first control byte
  66. //
  67. *BytesProcessed = 0;
  68. while (BfRead(&controlByte.Value))
  69. {
  70. //
  71. // When the LZMA2 control byte is 0, the entire stream is decoded. This
  72. // is the only success path out of this function.
  73. //
  74. if (controlByte.Value == 0)
  75. {
  76. return true;
  77. }
  78. //
  79. // Read the appropriate number of info bytes based on the stream type.
  80. //
  81. if (!BfSeek((controlByte.u.Common.IsLzma == 1 ) ? 4 : 2, &inBytes))
  82. {
  83. break;
  84. }
  85. //
  86. // For LZMA streams calculate both the uncompressed and compressed size
  87. // from the info bytes. Uncompressed streams only have the former.
  88. //
  89. if (controlByte.u.Common.IsLzma == 1)
  90. {
  91. rawSize = controlByte.u.Lzma.RawSize << 16;
  92. compressedSize = (uint16_t)(inBytes[2] << 8);
  93. compressedSize += inBytes[3] + 1;
  94. }
  95. else
  96. {
  97. rawSize = 0;
  98. compressedSize = 0;
  99. }
  100. //
  101. // Make sure that the output buffer that was supplied is big enough to
  102. // fit the uncompressed chunk, unless we're just calculating the size.
  103. //
  104. rawSize += inBytes[0] << 8;
  105. rawSize += inBytes[1] + 1;
  106. if (!GetSizeOnly && !DtSetLimit(rawSize))
  107. {
  108. break;
  109. }
  110. //
  111. // Check if the full LZMA state needs to be reset, which must happen at
  112. // the start of stream. Also check for a property reset, which occurs
  113. // when an LZMA stream follows an uncompressed stream. Separately,
  114. // check for a state reset without a property byte (happens rarely,
  115. // but does happen in a few compressed streams).
  116. //
  117. if ((controlByte.u.Lzma.ResetState == Lzma2FullReset) ||
  118. (controlByte.u.Lzma.ResetState == Lzma2PropertyReset))
  119. {
  120. //
  121. // Read the LZMA properties and then initialize the decoder.
  122. //
  123. if (!BfRead(&propertyByte) || !LzInitialize(propertyByte))
  124. {
  125. break;
  126. }
  127. }
  128. else if (controlByte.u.Lzma.ResetState == Lzma2SimpleReset)
  129. {
  130. LzResetState();
  131. }
  132. else if (controlByte.u.Lzma.ResetState == Lzma2NoReset)
  133. {
  134. ;
  135. }
  136. //
  137. // Don't do any decompression if the caller only wants to know the size
  138. //
  139. if (GetSizeOnly)
  140. {
  141. *BytesProcessed += rawSize;
  142. BfSeek((controlByte.u.Common.IsLzma == 1) ? compressedSize : rawSize,
  143. &inBytes);
  144. continue;
  145. }
  146. else if (controlByte.u.Common.IsLzma == 0)
  147. {
  148. //
  149. // Seek to the requested size in the input buffer
  150. //
  151. if (!BfSeek(rawSize, &inBytes))
  152. {
  153. return false;
  154. }
  155. //
  156. // Copy the data into the dictionary as-is
  157. //
  158. for (uint32_t i = 0; i < rawSize; i++)
  159. {
  160. DtPutSymbol(inBytes[i]);
  161. }
  162. //
  163. // Update bytes and keep going to the next chunk
  164. //
  165. *BytesProcessed += rawSize;
  166. continue;
  167. }
  168. //
  169. // Record how many bytes are left in this sequence as our SoftLimit for
  170. // the other operations. This allows us to omit most range checking
  171. // logic in rangedec.c. This soft limit lasts until reset below.
  172. //
  173. if (!BfSetSoftLimit(compressedSize))
  174. {
  175. break;
  176. }
  177. //
  178. // Read the initial range and code bytes to initialize the arithmetic
  179. // coding decoder, and let it know how much input data exists. We've
  180. // already validated that this much space exists in the input buffer.
  181. //
  182. if (!RcInitialize(&compressedSize))
  183. {
  184. break;
  185. }
  186. //
  187. // Start decoding the LZMA sequences in this chunk
  188. //
  189. if (!Lz2DecodeChunk(BytesProcessed, rawSize, compressedSize))
  190. {
  191. break;
  192. }
  193. //
  194. // Having decoded that chunk, reset our soft limit (to the full
  195. // input stream) so we can read the next chunk.
  196. //
  197. BfResetSoftLimit();
  198. }
  199. return false;
  200. }