12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- # Copyright (c) 2020, 2021 The Linux Foundation
- #
- # SPDX-License-Identifier: Apache-2.0
- import re
- def getSPDXIDSafeCharacter(c):
- """
- Converts a character to an SPDX-ID-safe character.
- Arguments:
- - c: character to test
- Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.');
- '-' otherwise
- """
- if c.isalpha() or c.isdigit() or c == "-" or c == ".":
- return c
- return "-"
- def convertToSPDXIDSafe(s):
- """
- Converts a filename or other string to only SPDX-ID-safe characters.
- Note that a separate check (such as in getUniqueID, below) will need
- to be used to confirm that this is still a unique identifier, after
- conversion.
- Arguments:
- - s: string to be converted.
- Returns: string with all non-safe characters replaced with dashes.
- """
- return "".join([getSPDXIDSafeCharacter(c) for c in s])
- def getUniqueFileID(filenameOnly, timesSeen):
- """
- Find an SPDX ID that is unique among others seen so far.
- Arguments:
- - filenameOnly: filename only (directories omitted) seeking ID.
- - timesSeen: dict of all filename-only to number of times seen.
- Returns: unique SPDX ID; updates timesSeen to include it.
- """
- converted = convertToSPDXIDSafe(filenameOnly)
- spdxID = f"SPDXRef-File-{converted}"
- # determine whether spdxID is unique so far, or not
- filenameTimesSeen = timesSeen.get(converted, 0) + 1
- if filenameTimesSeen > 1:
- # we'll append the # of times seen to the end
- spdxID += f"-{filenameTimesSeen}"
- else:
- # first time seeing this filename
- # edge case: if the filename itself ends in "-{number}", then we
- # need to add a "-1" to it, so that we don't end up overlapping
- # with an appended number from a similarly-named file.
- p = re.compile(r"-\d+$")
- if p.search(converted):
- spdxID += "-1"
- timesSeen[converted] = filenameTimesSeen
- return spdxID
|