| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 | 
							- # Copyright (c) 2020, 2021 The Linux Foundation
 
- #
 
- # SPDX-License-Identifier: Apache-2.0
 
- import hashlib
 
- import os
 
- import re
 
- from west import log
 
- from zspdx.licenses import LICENSES
 
- from zspdx.util import getHashes
 
- # ScannerConfig contains settings used to configure how the SPDX
 
- # Document scanning should occur.
 
- class ScannerConfig:
 
-     def __init__(self):
 
-         super(ScannerConfig, self).__init__()
 
-         # when assembling a Package's data, should we auto-conclude the
 
-         # Package's license, based on the licenses of its Files?
 
-         self.shouldConcludePackageLicense = True
 
-         # when assembling a Package's Files' data, should we auto-conclude
 
-         # each File's license, based on its detected license(s)?
 
-         self.shouldConcludeFileLicenses = True
 
-         # number of lines to scan for SPDX-License-Identifier (0 = all)
 
-         # defaults to 20
 
-         self.numLinesScanned = 20
 
-         # should we calculate SHA256 hashes for each Package's Files?
 
-         # note that SHA1 hashes are mandatory, per SPDX 2.2
 
-         self.doSHA256 = True
 
-         # should we calculate MD5 hashes for each Package's Files?
 
-         self.doMD5 = False
 
- def parseLineForExpression(line):
 
-     """Return parsed SPDX expression if tag found in line, or None otherwise."""
 
-     p = line.partition("SPDX-License-Identifier:")
 
-     if p[2] == "":
 
-         return None
 
-     # strip away trailing comment marks and whitespace, if any
 
-     expression = p[2].strip()
 
-     expression = expression.rstrip("/*")
 
-     expression = expression.strip()
 
-     return expression
 
- def getExpressionData(filePath, numLines):
 
-     """
 
-     Scans the specified file for the first SPDX-License-Identifier:
 
-     tag in the file.
 
-     Arguments:
 
-         - filePath: path to file to scan.
 
-         - numLines: number of lines to scan for an expression before
 
-                     giving up. If 0, will scan the entire file.
 
-     Returns: parsed expression if found; None if not found.
 
-     """
 
-     log.dbg(f"  - getting licenses for {filePath}")
 
-     with open(filePath, "r") as f:
 
-         try:
 
-             lineno = 0
 
-             for line in f:
 
-                 lineno += 1
 
-                 if lineno > numLines > 0:
 
-                     break
 
-                 expression = parseLineForExpression(line)
 
-                 if expression is not None:
 
-                     return expression
 
-         except UnicodeDecodeError:
 
-             # invalid UTF-8 content
 
-             return None
 
-     # if we get here, we didn't find an expression
 
-     return None
 
- def splitExpression(expression):
 
-     """
 
-     Parse a license expression into its constituent identifiers.
 
-     Arguments:
 
-         - expression: SPDX license expression
 
-     Returns: array of split identifiers
 
-     """
 
-     # remove parens and plus sign
 
-     e2 = re.sub(r'\(|\)|\+', "", expression, flags=re.IGNORECASE)
 
-     # remove word operators, ignoring case, leaving a blank space
 
-     e3 = re.sub(r' AND | OR | WITH ', " ", e2, flags=re.IGNORECASE)
 
-     # and split on space
 
-     e4 = e3.split(" ")
 
-     return sorted(e4)
 
- def calculateVerificationCode(pkg):
 
-     """
 
-     Calculate the SPDX Package Verification Code for all files in the package.
 
-     Arguments:
 
-         - pkg: Package
 
-     Returns: verification code as string
 
-     """
 
-     hashes = []
 
-     for f in pkg.files.values():
 
-         hashes.append(f.sha1)
 
-     hashes.sort()
 
-     filelist = "".join(hashes)
 
-     hSHA1 = hashlib.sha1()
 
-     hSHA1.update(filelist.encode('utf-8'))
 
-     return hSHA1.hexdigest()
 
- def checkLicenseValid(lic, doc):
 
-     """
 
-     Check whether this license ID is a valid SPDX license ID, and add it
 
-     to the custom license IDs set for this Document if it isn't.
 
-     Arguments:
 
-         - lic: detected license ID
 
-         - doc: Document
 
-     """
 
-     if lic not in LICENSES:
 
-         doc.customLicenseIDs.add(lic)
 
- def getPackageLicenses(pkg):
 
-     """
 
-     Extract lists of all concluded and infoInFile licenses seen.
 
-     Arguments:
 
-         - pkg: Package
 
-     Returns: sorted list of concluded license exprs,
 
-              sorted list of infoInFile ID's
 
-     """
 
-     licsConcluded = set()
 
-     licsFromFiles = set()
 
-     for f in pkg.files.values():
 
-         licsConcluded.add(f.concludedLicense)
 
-         for licInfo in f.licenseInfoInFile:
 
-             licsFromFiles.add(licInfo)
 
-     return sorted(list(licsConcluded)), sorted(list(licsFromFiles))
 
- def normalizeExpression(licsConcluded):
 
-     """
 
-     Combine array of license expressions into one AND'd expression,
 
-     adding parens where needed.
 
-     Arguments:
 
-         - licsConcluded: array of license expressions
 
-     Returns: string with single AND'd expression.
 
-     """
 
-     # return appropriate for simple cases
 
-     if len(licsConcluded) == 0:
 
-         return "NOASSERTION"
 
-     if len(licsConcluded) == 1:
 
-         return licsConcluded[0]
 
-     # more than one, so we'll need to combine them
 
-     # iff an expression has spaces, it needs parens
 
-     revised = []
 
-     for lic in licsConcluded:
 
-         if lic in ["NONE", "NOASSERTION"]:
 
-             continue
 
-         if " " in lic:
 
-             revised.append(f"({lic})")
 
-         else:
 
-             revised.append(lic)
 
-     return " AND ".join(revised)
 
- def scanDocument(cfg, doc):
 
-     """
 
-     Scan for licenses and calculate hashes for all Files and Packages
 
-     in this Document.
 
-     Arguments:
 
-         - cfg: ScannerConfig
 
-         - doc: Document
 
-     """
 
-     for pkg in doc.pkgs.values():
 
-         log.inf(f"scanning files in package {pkg.cfg.name} in document {doc.cfg.name}")
 
-         # first, gather File data for this package
 
-         for f in pkg.files.values():
 
-             # set relpath based on package's relativeBaseDir
 
-             f.relpath = os.path.relpath(f.abspath, pkg.cfg.relativeBaseDir)
 
-             # get hashes for file
 
-             hashes = getHashes(f.abspath)
 
-             if not hashes:
 
-                 log.wrn("unable to get hashes for file {f.abspath}; skipping")
 
-                 continue
 
-             hSHA1, hSHA256, hMD5 = hashes
 
-             f.sha1 = hSHA1
 
-             if cfg.doSHA256:
 
-                 f.sha256 = hSHA256
 
-             if cfg.doMD5:
 
-                 f.md5 = hMD5
 
-             # get licenses for file
 
-             expression = getExpressionData(f.abspath, cfg.numLinesScanned)
 
-             if expression:
 
-                 if cfg.shouldConcludeFileLicenses:
 
-                     f.concludedLicense = expression
 
-                 f.licenseInfoInFile = splitExpression(expression)
 
-             # check if any custom license IDs should be flagged for document
 
-             for lic in f.licenseInfoInFile:
 
-                 checkLicenseValid(lic, doc)
 
-         # now, assemble the Package data
 
-         licsConcluded, licsFromFiles = getPackageLicenses(pkg)
 
-         if cfg.shouldConcludePackageLicense:
 
-             pkg.concludedLicense = normalizeExpression(licsConcluded)
 
-         pkg.licenseInfoFromFiles = licsFromFiles
 
-         pkg.verificationCode = calculateVerificationCode(pkg)
 
 
  |