xzstream.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. /*++
  2. Copyright (c) Alex Ionescu. All rights reserved.
  3. Module Name:
  4. xzstream.c
  5. Abstract:
  6. This module implements the XZ stream format decoding, including support for
  7. parsing the stream header and block header, and then handing off the block
  8. decoding to the LZMA2 decoder. Finally, if "meta checking" is enabled, then
  9. the index and stream footer are also parsed and validated. Optionally, each
  10. of these component structures can be checked against its CRC32 checksum, if
  11. "integrity checking" has been enabled. Note that this library only supports
  12. single-stream, single-block XZ files that have CRC32 (or None) set as their
  13. block checking algorithm. Finally, no BJC filters are supported, and files
  14. with a compressed/uncompressed size metadata indicator are not handled.
  15. Author:
  16. Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
  17. Environment:
  18. Windows & Linux, user mode and kernel mode.
  19. --*/
  20. #include "minlzlib.h"
  21. #include "xzstream.h"
  22. //#ifdef _WIN32
  23. //void __security_check_cookie(_In_ uintptr_t _StackCookie) { (void)(_StackCookie); }
  24. //#endif
  25. #ifdef MINLZ_META_CHECKS
  26. //
  27. // XZ Stream Container State
  28. //
  29. typedef struct _CONTAINER_STATE
  30. {
  31. //
  32. // Size of the XZ header and the index, used to validate against footer
  33. //
  34. uint32_t HeaderSize;
  35. uint32_t IndexSize;
  36. //
  37. // Size of the uncompressed block
  38. //
  39. uint32_t UncompressedBlockSize;
  40. uint32_t UnpaddedBlockSize;
  41. //
  42. // Checksum data
  43. //
  44. uint32_t ChecksumSize;
  45. uint8_t ChecksumType;
  46. bool ChecksumError;
  47. } CONTAINER_STATE, * PCONTAINER_STATE;
  48. CONTAINER_STATE Container;
  49. #endif
  50. #ifdef MINLZ_META_CHECKS
  51. bool
  52. XzDecodeVli (
  53. vli_type* Vli
  54. )
  55. {
  56. uint8_t vliByte;
  57. uint32_t bitPos;
  58. //
  59. // Read the initial VLI byte (might be the value itself)
  60. //
  61. if (!BfRead(&vliByte))
  62. {
  63. return false;
  64. }
  65. *Vli = vliByte & 0x7F;
  66. //
  67. // Check if this was a complex VLI (and we have space for it)
  68. //
  69. bitPos = 7;
  70. while ((vliByte & 0x80) != 0)
  71. {
  72. //
  73. // Read the next byte
  74. //
  75. if (!BfRead(&vliByte))
  76. {
  77. return false;
  78. }
  79. //
  80. // Make sure we're not decoding an invalid VLI
  81. //
  82. if ((bitPos == (7 * VLI_BYTES_MAX)) || (vliByte == 0))
  83. {
  84. return false;
  85. }
  86. //
  87. // Decode it and move to the next 7 bits
  88. //
  89. *Vli |= (vli_type)((vliByte & 0x7F) << bitPos);
  90. bitPos += 7;
  91. }
  92. return true;
  93. }
  94. bool
  95. XzDecodeIndex (
  96. void
  97. )
  98. {
  99. uint32_t vli;
  100. const uint8_t* indexStart;
  101. const uint8_t* indexEnd;
  102. const uint32_t* pCrc32;
  103. uint8_t indexByte;
  104. //
  105. // Remember where the index started so we can compute its size
  106. //
  107. BfSeek(0, &indexStart);
  108. //
  109. // The index always starts out with an empty byte
  110. //
  111. if (!BfRead(&indexByte) || (indexByte != 0))
  112. {
  113. return false;
  114. }
  115. //
  116. // Then the count of blocks, which we expect to be 1
  117. //
  118. if (!XzDecodeVli(&vli) || (vli != 1))
  119. {
  120. return false;
  121. }
  122. //
  123. // Then the unpadded block size, which should match
  124. //
  125. if (!XzDecodeVli(&vli) || (Container.UnpaddedBlockSize != vli))
  126. {
  127. return false;
  128. }
  129. //
  130. // Then the uncompressed block size, which should match
  131. //
  132. if (!XzDecodeVli(&vli) || (Container.UncompressedBlockSize != vli))
  133. {
  134. return false;
  135. }
  136. //
  137. // Then we pad to the next multiple of 4
  138. //
  139. if (!BfAlign())
  140. {
  141. return false;
  142. }
  143. //
  144. // Store the index size with padding to validate the footer later
  145. //
  146. BfSeek(0, &indexEnd);
  147. Container.IndexSize = (uint32_t)(indexEnd - indexStart);
  148. //
  149. // Read the CRC32, which is not part of the index size
  150. //
  151. if (!BfSeek(sizeof(*pCrc32), (const uint8_t**)&pCrc32))
  152. {
  153. return false;
  154. }
  155. #ifdef MINLZ_INTEGRITY_CHECKS
  156. //
  157. // Make sure the index is not corrupt
  158. //
  159. if (Crc32(indexStart, Container.IndexSize) != *pCrc32)
  160. {
  161. Container.ChecksumError = true;
  162. }
  163. #endif
  164. return true;
  165. }
  166. bool
  167. XzDecodeStreamFooter (
  168. void
  169. )
  170. {
  171. PXZ_STREAM_FOOTER streamFooter;
  172. //
  173. // Seek past the footer, making sure we have space in the input stream
  174. //
  175. if (!BfSeek(sizeof(*streamFooter), (const uint8_t**)&streamFooter))
  176. {
  177. return false;
  178. }
  179. //
  180. // Validate the footer magic
  181. //
  182. if (streamFooter->Magic != k_XzStreamFooterMagic)
  183. {
  184. return false;
  185. }
  186. //
  187. // Validate no flags other than checksum type are set
  188. //
  189. if ((streamFooter->u.s.ReservedFlags != 0) ||
  190. (streamFooter->u.s.ReservedType != 0) ||
  191. (streamFooter->u.s.CheckType != Container.ChecksumType))
  192. {
  193. return false;
  194. }
  195. //
  196. // Validate if the footer accurately describes the size of the index
  197. //
  198. if (Container.IndexSize != (streamFooter->BackwardSize * 4))
  199. {
  200. return false;
  201. }
  202. #ifdef MINLZ_INTEGRITY_CHECKS
  203. //
  204. // Compute the footer's CRC32 and make sure it's not corrupted
  205. //
  206. if (Crc32(&streamFooter->BackwardSize,
  207. sizeof(streamFooter->BackwardSize) +
  208. sizeof(streamFooter->u.Flags)) !=
  209. streamFooter->Crc32)
  210. {
  211. Container.ChecksumError = true;
  212. }
  213. #endif
  214. return true;
  215. }
  216. #endif
  217. #if MINLZ_INTEGRITY_CHECKS
  218. bool
  219. XzCrc (
  220. uint8_t* OutputBuffer,
  221. uint32_t BlockSize,
  222. const uint8_t* InputEnd
  223. )
  224. {
  225. //
  226. // Compute the appropriate checksum and compare it with the expected result
  227. //
  228. switch (Container.ChecksumType)
  229. {
  230. case XzCheckTypeCrc32:
  231. return Crc32(OutputBuffer, BlockSize) != *(uint32_t*)InputEnd;
  232. case XzCheckTypeCrc64:
  233. return Crc64(OutputBuffer, BlockSize) != *(uint64_t*)InputEnd;
  234. default:
  235. return false;
  236. }
  237. }
  238. #endif
  239. bool
  240. XzDecodeBlock (
  241. uint8_t* OutputBuffer,
  242. uint32_t* BlockSize
  243. )
  244. {
  245. #ifdef MINLZ_META_CHECKS
  246. const uint8_t *inputStart, *inputEnd;
  247. #endif
  248. //
  249. // Decode the LZMA2 stream. If full integrity checking is enabled, also
  250. // save the offset before and after decoding, so we can save the block
  251. // sizes and compare them against the footer and index after decoding.
  252. //
  253. #ifdef MINLZ_META_CHECKS
  254. BfSeek(0, &inputStart);
  255. #endif
  256. if (!Lz2DecodeStream(BlockSize, OutputBuffer == NULL))
  257. {
  258. return false;
  259. }
  260. #ifdef MINLZ_META_CHECKS
  261. BfSeek(0, &inputEnd);
  262. Container.UnpaddedBlockSize = Container.HeaderSize +
  263. (uint32_t)(inputEnd - inputStart);
  264. Container.UncompressedBlockSize = *BlockSize;
  265. #endif
  266. //
  267. // After the block data, we need to pad to 32-bit alignment
  268. //
  269. if (!BfAlign())
  270. {
  271. return false;
  272. }
  273. #ifdef MINLZ_META_CHECKS
  274. //
  275. // Finally, move past the size of the checksum if any, then compare it with
  276. // with the actual checksum of the block, if integrity checks are enabled.
  277. // If meta checks are enabled, update the block size so the index checking
  278. // can validate it.
  279. //
  280. if (!BfSeek(Container.ChecksumSize, &inputEnd))
  281. {
  282. return false;
  283. }
  284. #endif
  285. (void)(OutputBuffer);
  286. #ifdef MINLZ_INTEGRITY_CHECKS
  287. if ((OutputBuffer != NULL) && !(XzCrc(OutputBuffer, *BlockSize, inputEnd)))
  288. {
  289. Container.ChecksumError = true;
  290. }
  291. #endif
  292. #ifdef MINLZ_META_CHECKS
  293. Container.UnpaddedBlockSize += Container.ChecksumSize;
  294. #endif
  295. return true;
  296. }
  297. bool
  298. XzDecodeStreamHeader (
  299. void
  300. )
  301. {
  302. PXZ_STREAM_HEADER streamHeader;
  303. //
  304. // Seek past the header, making sure we have space in the input stream
  305. //
  306. if (!BfSeek(sizeof(*streamHeader), (const uint8_t**)&streamHeader))
  307. {
  308. return false;
  309. }
  310. #ifdef MINLZ_META_CHECKS
  311. //
  312. // Validate the header magic
  313. //
  314. if ((*(uint32_t*)&streamHeader->Magic[1] != k_XzStreamHeaderMagic1) ||
  315. (streamHeader->Magic[0] != k_XzStreamHeaderMagic0) ||
  316. (streamHeader->Magic[5] != k_XzStreamHeaderMagic5))
  317. {
  318. return false;
  319. }
  320. //
  321. // Validate the header flags
  322. //
  323. if ((streamHeader->u.s.ReservedFlags != 0) ||
  324. (streamHeader->u.s.ReservedType != 0))
  325. {
  326. return false;
  327. }
  328. //
  329. // Save checksum type and compute pre-defined size for it
  330. //
  331. Container.ChecksumType = streamHeader->u.s.CheckType;
  332. Container.ChecksumSize = k_XzBlockCheckSizes[streamHeader->u.s.CheckType];
  333. if ((Container.ChecksumType != XzCheckTypeNone) &&
  334. (Container.ChecksumType != XzCheckTypeCrc32) &&
  335. (Container.ChecksumType != XzCheckTypeCrc64))
  336. {
  337. Container.ChecksumError = true;
  338. }
  339. #endif
  340. #ifdef MINLZ_INTEGRITY_CHECKS
  341. //
  342. // Compute the header's CRC32 and make sure it's not corrupted
  343. //
  344. if (Crc32(&streamHeader->u.Flags, sizeof(streamHeader->u.Flags)) !=
  345. streamHeader->Crc32)
  346. {
  347. Container.ChecksumError = true;
  348. }
  349. #endif
  350. return true;
  351. }
  352. bool
  353. XzDecodeBlockHeader (
  354. void
  355. )
  356. {
  357. PXZ_BLOCK_HEADER blockHeader;
  358. #ifdef MINLZ_META_CHECKS
  359. uint32_t dictionarySize;
  360. #endif
  361. //
  362. // Seek past the header, making sure we have space in the input stream. If
  363. // the header indicates a size of 0, then this is a blockless (empty) file
  364. // and this is actually an index. Undo the seek so we can parse the index.
  365. //
  366. if (!BfSeek(sizeof(*blockHeader), (const uint8_t**)&blockHeader) ||
  367. (blockHeader->Size == 0))
  368. {
  369. BfSeek((uint32_t)(-(uint16_t)sizeof(*blockHeader)),
  370. (const uint8_t**)&blockHeader);
  371. return false;
  372. }
  373. #ifdef MINLZ_META_CHECKS
  374. //
  375. // Validate that the size of the header is what we expect
  376. //
  377. Container.HeaderSize = (blockHeader->Size + 1) * 4;
  378. if (Container.HeaderSize != sizeof(*blockHeader))
  379. {
  380. return false;
  381. }
  382. //
  383. // Validate that no additional flags or filters are enabled
  384. //
  385. if (blockHeader->u.Flags != 0)
  386. {
  387. return false;
  388. }
  389. //
  390. // Validate that the only filter is the LZMA2 filter
  391. //
  392. if (blockHeader->LzmaFlags.Id != k_XzLzma2FilterIdentifier)
  393. {
  394. return false;
  395. }
  396. //
  397. // With the expected number of property bytes
  398. //
  399. if (blockHeader->LzmaFlags.Size
  400. != sizeof(blockHeader->LzmaFlags.u.Properties))
  401. {
  402. return false;
  403. }
  404. //
  405. // The only property is the dictionary size, make sure it is valid.
  406. //
  407. // We don't actually need to store or compare the size with anything since
  408. // the library expects the caller to always put in a buffer that's large
  409. // enough to contain the full uncompressed file (or calling it in "get size
  410. // only" mode to get this information).
  411. //
  412. // This output buffer can thus be smaller than the size of the dictionary
  413. // which is absolutely OK as long as that's actually the size of the output
  414. // file. If callers pass in a buffer size that's too small, decoding will
  415. // fail at later stages anyway, and that's incorrect use of minlzlib.
  416. //
  417. dictionarySize = blockHeader->LzmaFlags.u.s.DictionarySize;
  418. if (dictionarySize > 39)
  419. {
  420. return false;
  421. }
  422. #ifdef MINLZ_INTEGRITY_CHECKS
  423. //
  424. // Compute the header's CRC32 and make sure it's not corrupted
  425. //
  426. if (Crc32(blockHeader,
  427. Container.HeaderSize - sizeof(blockHeader->Crc32)) !=
  428. blockHeader->Crc32)
  429. {
  430. Container.ChecksumError = true;
  431. }
  432. #endif
  433. #endif
  434. return true;
  435. }
  436. bool
  437. XzDecode (
  438. const uint8_t* InputBuffer,
  439. uint32_t InputSize,
  440. uint8_t* OutputBuffer,
  441. uint32_t* OutputSize
  442. )
  443. {
  444. //
  445. // Initialize the input buffer descriptor and history buffer (dictionary)
  446. //
  447. BfInitialize(InputBuffer, InputSize);
  448. DtInitialize(OutputBuffer, *OutputSize, 0);
  449. //
  450. // Decode the stream header to check for validity
  451. //
  452. if (!XzDecodeStreamHeader())
  453. {
  454. return false;
  455. }
  456. //
  457. // Decode the block header to check for validity. If it appears valid, go
  458. // decode the block. Otherwise, this may be a blockless (empty input) file.
  459. //
  460. if (XzDecodeBlockHeader())
  461. {
  462. if (!XzDecodeBlock(OutputBuffer, OutputSize))
  463. {
  464. return false;
  465. }
  466. }
  467. else
  468. {
  469. *OutputSize = 0;
  470. }
  471. #ifdef MINLZ_META_CHECKS
  472. //
  473. // Decode the index for validity checks
  474. //
  475. if (!XzDecodeIndex())
  476. {
  477. return false;
  478. }
  479. //
  480. // And finally decode the footer as a final set of checks
  481. //
  482. if (!XzDecodeStreamFooter())
  483. {
  484. return false;
  485. }
  486. #endif
  487. return true;
  488. }
  489. bool
  490. XzChecksumError (
  491. void
  492. )
  493. {
  494. //
  495. // Return to an external caller if a checksum error was encountered
  496. //
  497. #ifdef MINLZ_INTEGRITY_CHECKS
  498. return Container.ChecksumError;
  499. #else
  500. return false;
  501. #endif
  502. }