lzma2dec.h 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /*++
  2. Copyright (c) Alex Ionescu. All rights reserved.
  3. Module Name:
  4. lzma2dec.h
  5. Abstract:
  6. This header file contains C-style data structures and enumerations that map
  7. back to the LZMA2 standard. This includes the encoding of the LZMA2 Control
  8. Byte and the possible LZMA2 Reset States.
  9. Author:
  10. Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
  11. Environment:
  12. Windows & Linux, user mode and kernel mode.
  13. --*/
  14. #pragma once
  15. #ifdef _MSC_VER
  16. #pragma warning(disable:4214)
  17. #endif
  18. //
  19. // The most complex LZMA sequence possible is a "match" sequence where the
  20. // the length is > 127 bytes, and the distance is > 127 bytes. This type of
  21. // sequence starts with {1,1} for "match", followed by {1,1,nnnnnnnn} for
  22. // "8-bit encoded length", followed by {1,1,1,1,1,1} to select the distance
  23. // slot (63). That's 18 bits so far, which all come from arithmetic-coded
  24. // bit trees with various probabilities. The next 26 bits are going to be
  25. // fixed-probability, meaning that the bit tree is mathematically hardcoded
  26. // at 50%. Finally, there are the last 4 "align" distance bits which also
  27. // come from an arithmetic-coded bit tree, bringing the total such bits to
  28. // 22.
  29. //
  30. // Each time we have to "normalize" the arithmetic coder, it consumes an
  31. // additional byte. Normalization is done whenever we consume more than 8
  32. // of the high bits of the coder's range (i.e.: below 2^24), so exactly
  33. // every 8 direct bits (which always halve the range due to their 50%).
  34. // The other bits can have arbitrary probabilities, but in the worst case
  35. // we need to normalize the range every n bits. As such, this is a total of
  36. // 20 worst-case normalization per LZMA sequence. Finally, we do one last
  37. // normalization at the end of LzDecode, to make sure that the decoder is
  38. // always in a normalized state. This means that a compressed chunk should
  39. // be at least 21 bytes if we want to guarantee that LzDecode can never
  40. // read past the current input stream, and avoid range checking.
  41. //
  42. #define LZMA_MAX_SEQUENCE_SIZE 21
  43. //
  44. // This describes the different ways an LZMA2 control byte can request a reset
  45. //
  46. typedef enum _LZMA2_COMPRESSED_RESET_STATE
  47. {
  48. Lzma2NoReset = 0,
  49. Lzma2SimpleReset = 1,
  50. Lzma2PropertyReset = 2,
  51. Lzma2FullReset = 3
  52. } LZMA2_COMPRESSED_RESET_STATE;
  53. //
  54. // This describes how an LZMA2 control byte can be parsed
  55. //
  56. typedef union _LZMA2_CONTROL_BYTE
  57. {
  58. union
  59. {
  60. struct
  61. {
  62. uint8_t ResetState : 2;
  63. uint8_t Reserved : 5;
  64. uint8_t IsLzma : 1;
  65. } Raw;
  66. struct
  67. {
  68. uint8_t RawSize : 5;
  69. uint8_t ResetState : 2;
  70. uint8_t IsLzma : 1;
  71. } Lzma;
  72. struct
  73. {
  74. uint8_t : 7;
  75. uint8_t IsLzma : 1;
  76. } Common;
  77. } u;
  78. uint8_t Value;
  79. } LZMA2_CONTROL_BYTE;
  80. //static_assert(sizeof(LZMA2_CONTROL_BYTE) == 1, "Invalid control byte size");