spdxids.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. # Copyright (c) 2020, 2021 The Linux Foundation
  2. #
  3. # SPDX-License-Identifier: Apache-2.0
  4. import re
  5. def getSPDXIDSafeCharacter(c):
  6. """
  7. Converts a character to an SPDX-ID-safe character.
  8. Arguments:
  9. - c: character to test
  10. Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.');
  11. '-' otherwise
  12. """
  13. if c.isalpha() or c.isdigit() or c == "-" or c == ".":
  14. return c
  15. return "-"
  16. def convertToSPDXIDSafe(s):
  17. """
  18. Converts a filename or other string to only SPDX-ID-safe characters.
  19. Note that a separate check (such as in getUniqueID, below) will need
  20. to be used to confirm that this is still a unique identifier, after
  21. conversion.
  22. Arguments:
  23. - s: string to be converted.
  24. Returns: string with all non-safe characters replaced with dashes.
  25. """
  26. return "".join([getSPDXIDSafeCharacter(c) for c in s])
  27. def getUniqueFileID(filenameOnly, timesSeen):
  28. """
  29. Find an SPDX ID that is unique among others seen so far.
  30. Arguments:
  31. - filenameOnly: filename only (directories omitted) seeking ID.
  32. - timesSeen: dict of all filename-only to number of times seen.
  33. Returns: unique SPDX ID; updates timesSeen to include it.
  34. """
  35. converted = convertToSPDXIDSafe(filenameOnly)
  36. spdxID = f"SPDXRef-File-{converted}"
  37. # determine whether spdxID is unique so far, or not
  38. filenameTimesSeen = timesSeen.get(converted, 0) + 1
  39. if filenameTimesSeen > 1:
  40. # we'll append the # of times seen to the end
  41. spdxID += f"-{filenameTimesSeen}"
  42. else:
  43. # first time seeing this filename
  44. # edge case: if the filename itself ends in "-{number}", then we
  45. # need to add a "-1" to it, so that we don't end up overlapping
  46. # with an appended number from a similarly-named file.
  47. p = re.compile(r"-\d+$")
  48. if p.search(converted):
  49. spdxID += "-1"
  50. timesSeen[converted] = filenameTimesSeen
  51. return spdxID