From d085e9f1ed42af7bd20f76568d286dd8887063a9 Mon Sep 17 00:00:00 2001 From: Harrison Mutai Date: Tue, 3 Nov 2020 13:48:53 +0000 Subject: [PATCH 1/2] Add workaround for files with permissive binary licenses Add function "has_binary_license" to check if a file has a non-permissive license contains one. PBL is not recognized by scancode, causing it to be flagged as a non-permissive license. CI doesn't allow any non-permissive licenses, although, files flageed as SPDX are allowed. Workaround causes all files with a valid PBL to be flagged as missing an SPDX. Add condition in "has_spdx_text_in_scancode_output" to ignore any spdx identifier with "unknown" in the name. Scancode erroneously matches PBL to matched_rule.identifer "spdx-license-identifier: unknown-spdx". This prevents the workaround from working. --- tools/test/travis-ci/scancode-evaluate.py | 23 +++++++--- .../test/travis-ci/scancode_evaluate_test.py | 17 ++++++- .../scancode_test/scancode_test_3.json | 44 +++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/tools/test/travis-ci/scancode-evaluate.py b/tools/test/travis-ci/scancode-evaluate.py index c487ae5e717..d8a1d18a45a 100644 --- a/tools/test/travis-ci/scancode-evaluate.py +++ b/tools/test/travis-ci/scancode-evaluate.py @@ -54,12 +54,13 @@ def path_leaf(path): return tail or os.path.basename(head) -def has_permissive_text_in_scancode_output(scancode_output_data_file_licenses): - """Returns true if at list one license in the scancode output is permissive.""" +def has_permissive_text_in_scancode_output(scancode_output_data_file): + """Returns true if at least one license in the scancode output is permissive or is a Permissive Binary License""" + # temporary workaround for files with Permissive Binary Licenses return any( scancode_output_data_file_license['category'] == 'Permissive' - for scancode_output_data_file_license in scancode_output_data_file_licenses - ) + for scancode_output_data_file_license in scancode_output_data_file['licenses'] + ) or has_binary_license(scancode_output_data_file) def has_spdx_text_in_scancode_output(scancode_output_data_file_licenses): @@ -75,6 +76,18 @@ def has_spdx_text_in_analysed_file(scanned_file_content): return bool(re.findall("SPDX-License-Identifier:?", scanned_file_content)) +def has_binary_license(scancode_output_data_file): + """Returns true if the file analysed by ScanCode contains a Permissive Binary License.""" + file_path = os.path.abspath(scancode_output_data_file['path']) + try: + with open(file_path, 'r') as read_file: + scanned_file_content = read_file.read() + return bool(re.findall("Permissive Binary License", scanned_file_content)) + except UnicodeDecodeError: + userlog.warning("Unable to look for PBL text in `{}`:".format(file_path)) + return False + + def license_check(scancode_output_path): """Check licenses in the scancode json file for specified directory. @@ -112,7 +125,7 @@ def license_check(scancode_output_path): # check the next file in the scancode output continue - if not has_permissive_text_in_scancode_output(scancode_output_data_file['licenses']): + if not has_permissive_text_in_scancode_output(scancode_output_data_file): scancode_output_data_file['fail_reason'] = MISSING_PERMISSIVE_LICENSE_TEXT license_offenders.append(scancode_output_data_file) diff --git a/tools/test/travis-ci/scancode_evaluate_test.py b/tools/test/travis-ci/scancode_evaluate_test.py index fc2e068e6cf..be3466e06a7 100644 --- a/tools/test/travis-ci/scancode_evaluate_test.py +++ b/tools/test/travis-ci/scancode_evaluate_test.py @@ -34,22 +34,36 @@ * limitations under the License.\ */" +BINARY_HEADER = "/*\ + * Copyright (c) 2019, Arm Limited, All Rights Reserved\ + * SPDX-License-Identifier: LicenseRef-PBL\ + *\ + * This file and the related binary are licensed under the\ + * Permissive Binary License, Version 1.0 (the \"License\");\ + * you may not use these files except in compliance with the License.\ + *\ + */" + @pytest.fixture() def create_scanned_files(): """Create stub files. test3.h missing license notice test4.h with license notice test5.h with license notice + test6.h with permissive binary license """ file_paths = [ os.path.join(STUBS_PATH, "test3.h"), os.path.join(STUBS_PATH, "test4.h"), - os.path.join(STUBS_PATH, "test5.h") + os.path.join(STUBS_PATH, "test5.h"), + os.path.join(STUBS_PATH, "test6.h") ] for file_path in file_paths: with open(file_path, "w") as new_file: if file_path in [os.path.join(STUBS_PATH, "test3.h")]: new_file.write(HEADER_WITHOUT_SPDX) + elif file_path in [os.path.join(STUBS_PATH, "test6.h")]: + new_file.write(BINARY_HEADER) else: new_file.write(HEADER_WITH_SPDX) yield @@ -81,6 +95,7 @@ def test_missing_license_permissive_license_and_spdx(self, create_scanned_files) test3.h: Missing `Permissive` license text and `spdx` in match.identifier and not in file tested by ScanCode (error count += 1) test4.h: Missing `Permissive` license text and `spdx` in match.identifier but found in file tested by ScanCode (error count += 1) test5.h: Missing `spdx` in match.identifier but found in file tested by ScanCode. (error count += 0) + test6.h: Matching 'unknown-spdx' in match.identifier and Permissive Binary License in header (error count += 1) @inputs scancode_test/scancode_test_2.json @output 3 """ diff --git a/tools/test/travis-ci/scancode_test/scancode_test_3.json b/tools/test/travis-ci/scancode_test/scancode_test_3.json index bf4f9da7121..5c5dc9deace 100644 --- a/tools/test/travis-ci/scancode_test/scancode_test_3.json +++ b/tools/test/travis-ci/scancode_test/scancode_test_3.json @@ -170,6 +170,50 @@ ], "scan_errors":[ + ] + }, + { + "path":"tools/test/travis-ci/scancode_test/test6.h", + "type":"file", + "licenses":[ + { + "key": "unknown-spdx", + "score": 100.0, + "name": "Unknown SPDX license detected but not recognized", + "short_name": "unknown SPDX", + "category": "Unstated License", + "is_exception": false, + "owner": "Unspecified", + "homepage_url": null, + "text_url": "", + "reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:unknown-spdx", + "spdx_license_key": null, + "spdx_url": "", + "start_line": 3, + "end_line": 3, + "matched_rule": { + "identifier": "spdx-license-identifier: unknown-spdx", + "license_expression": "unknown-spdx", + "licenses": [ + "unknown-spdx" + ], + "is_license_text": false, + "is_license_notice": false, + "is_license_reference": false, + "is_license_tag": true, + "matcher": "1-spdx-id", + "rule_length": 1, + "matched_length": 1, + "match_coverage": 100.0, + "rule_relevance": 100 + } + } + ], + "license_expressions":[ + "unknown-spdx" + ], + "scan_errors":[ + ] } ] From e93a3e2beb9b6f52c7396463e34bf1b4300e69ec Mon Sep 17 00:00:00 2001 From: Harrison Mutai Date: Fri, 6 Nov 2020 09:44:43 +0000 Subject: [PATCH 2/2] Refactor workaround for scancode evaluation of PBL Add function "has_binary_license" to check if a file has a non-permissive license contains one. PBL is not recognized by scancode, causing it to be flagged as a non-permissive license. CI doesn't allow any non-permissive licenses, although, files flageed as SPDX are allowed. Workaround causes all files with a valid PBL to be flagged as missing an SPDX. Add condition in "has_spdx_text_in_scancode_output" to ignore any spdx identifier with "unknown" in the name. Scancode erroneously matches PBL to matched_rule.identifer "spdx-license-identifier: unknown-spdx". This prevents the workaround from working. --- tools/test/travis-ci/scancode-evaluate.py | 48 +++++++++---------- .../test/travis-ci/scancode_evaluate_test.py | 8 ++-- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tools/test/travis-ci/scancode-evaluate.py b/tools/test/travis-ci/scancode-evaluate.py index d8a1d18a45a..e650fc9d0c7 100644 --- a/tools/test/travis-ci/scancode-evaluate.py +++ b/tools/test/travis-ci/scancode-evaluate.py @@ -30,6 +30,7 @@ userlog = logging.getLogger("scancode-evaluate") + class ReturnCode(Enum): """Return codes.""" @@ -54,13 +55,12 @@ def path_leaf(path): return tail or os.path.basename(head) -def has_permissive_text_in_scancode_output(scancode_output_data_file): - """Returns true if at least one license in the scancode output is permissive or is a Permissive Binary License""" - # temporary workaround for files with Permissive Binary Licenses +def has_permissive_text_in_scancode_output(scancode_output_data_file_licenses): + """Returns true if at least one license in the scancode output is permissive""" return any( scancode_output_data_file_license['category'] == 'Permissive' - for scancode_output_data_file_license in scancode_output_data_file['licenses'] - ) or has_binary_license(scancode_output_data_file) + for scancode_output_data_file_license in scancode_output_data_file_licenses + ) def has_spdx_text_in_scancode_output(scancode_output_data_file_licenses): @@ -76,16 +76,20 @@ def has_spdx_text_in_analysed_file(scanned_file_content): return bool(re.findall("SPDX-License-Identifier:?", scanned_file_content)) -def has_binary_license(scancode_output_data_file): +def has_binary_license(scanned_file_content): """Returns true if the file analysed by ScanCode contains a Permissive Binary License.""" + return bool(re.findall("Permissive Binary License", scanned_file_content)) + + +def get_file_text(scancode_output_data_file): + """Returns file text for scancode output file""" file_path = os.path.abspath(scancode_output_data_file['path']) try: with open(file_path, 'r') as read_file: - scanned_file_content = read_file.read() - return bool(re.findall("Permissive Binary License", scanned_file_content)) + return read_file.read() except UnicodeDecodeError: - userlog.warning("Unable to look for PBL text in `{}`:".format(file_path)) - return False + userlog.warning("Unable to decode file text in: %s" % file_path) + # Ignore files that cannot be decoded def license_check(scancode_output_path): @@ -98,7 +102,7 @@ def license_check(scancode_output_path): Returns: 0 if nothing found - >0 - count how many license isses found + >0 - count how many license issues found ReturnCode.ERROR.value if any error in file licenses found """ @@ -125,25 +129,21 @@ def license_check(scancode_output_path): # check the next file in the scancode output continue - if not has_permissive_text_in_scancode_output(scancode_output_data_file): - scancode_output_data_file['fail_reason'] = MISSING_PERMISSIVE_LICENSE_TEXT - license_offenders.append(scancode_output_data_file) + if not has_permissive_text_in_scancode_output(scancode_output_data_file['licenses']): + scanned_file_content = get_file_text(scancode_output_data_file) + if not (scanned_file_content and has_binary_license(scanned_file_content)): + scancode_output_data_file['fail_reason'] = MISSING_PERMISSIVE_LICENSE_TEXT + license_offenders.append(scancode_output_data_file) if not has_spdx_text_in_scancode_output(scancode_output_data_file['licenses']): # Scancode does not recognize license notice in Python file headers. # Issue: https://github.com/nexB/scancode-toolkit/issues/1913 # Therefore check if the file tested by ScanCode actually has a licence notice. - file_path = os.path.abspath(scancode_output_data_file['path']) - try: - with open(file_path, 'r') as read_file: - scanned_file_content = read_file.read() - except UnicodeDecodeError: - userlog.warning("Unable to look for SPDX text in `{}`:".format(file_path)) - # Ignore files that cannot be decoded - # check the next file in the scancode output - continue + scanned_file_content = get_file_text(scancode_output_data_file) - if not has_spdx_text_in_analysed_file(scanned_file_content): + if not scanned_file_content: + continue + elif not has_spdx_text_in_analysed_file(scanned_file_content): scancode_output_data_file['fail_reason'] = MISSING_SPDX_TEXT spdx_offenders.append(scancode_output_data_file) diff --git a/tools/test/travis-ci/scancode_evaluate_test.py b/tools/test/travis-ci/scancode_evaluate_test.py index be3466e06a7..dc5b8fbe933 100644 --- a/tools/test/travis-ci/scancode_evaluate_test.py +++ b/tools/test/travis-ci/scancode_evaluate_test.py @@ -34,7 +34,7 @@ * limitations under the License.\ */" -BINARY_HEADER = "/*\ +HEADER_WITH_BINARY_LICENSE = "/*\ * Copyright (c) 2019, Arm Limited, All Rights Reserved\ * SPDX-License-Identifier: LicenseRef-PBL\ *\ @@ -63,7 +63,7 @@ def create_scanned_files(): if file_path in [os.path.join(STUBS_PATH, "test3.h")]: new_file.write(HEADER_WITHOUT_SPDX) elif file_path in [os.path.join(STUBS_PATH, "test6.h")]: - new_file.write(BINARY_HEADER) + new_file.write(HEADER_WITH_BINARY_LICENSE) else: new_file.write(HEADER_WITH_SPDX) yield @@ -95,7 +95,7 @@ def test_missing_license_permissive_license_and_spdx(self, create_scanned_files) test3.h: Missing `Permissive` license text and `spdx` in match.identifier and not in file tested by ScanCode (error count += 1) test4.h: Missing `Permissive` license text and `spdx` in match.identifier but found in file tested by ScanCode (error count += 1) test5.h: Missing `spdx` in match.identifier but found in file tested by ScanCode. (error count += 0) - test6.h: Matching 'unknown-spdx' in match.identifier and Permissive Binary License in header (error count += 1) + test6.h: Matching `spdx` in match.identifier but Permissive Binary License header (error count += 0) @inputs scancode_test/scancode_test_2.json @output 3 """ @@ -107,4 +107,4 @@ def test_permissive_license_no_spdx(self, create_scanned_files): @inputs scancode_test/scancode_test_2.json @outputs 0 """ - assert license_check(os.path.join(STUBS_PATH, "scancode_test_4.json")) == 0 + assert license_check(os.path.join(STUBS_PATH, "scancode_test_4.json")) == 0 \ No newline at end of file