Skip to content

Commit d025b39

Browse files
committed
[colic] Add support of scancode_cli to colic backend
A faster version of scancode is now added to colic backend Signed-off-by: inishchith <[email protected]>
1 parent 1915529 commit d025b39

File tree

2 files changed

+106
-19
lines changed

2 files changed

+106
-19
lines changed

graal/backends/core/analyzers/scancode.py

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,38 +22,45 @@
2222

2323
import json
2424
import subprocess
25-
25+
import os
2626
from graal.graal import (GraalError,
2727
GraalRepository)
2828
from .analyzer import Analyzer
2929

3030

31+
SCANCODE_CLI_EXEC = "etc/scripts/scancli.py"
32+
33+
3134
class ScanCode(Analyzer):
3235
"""A wrapper for nexB/scancode-toolkit.
3336
3437
This class allows to call scancode-toolkit over a file, parses
3538
the result of the analysis and returns it as a dict.
3639
3740
:param exec_path: path of the scancode executable
41+
:param cli: True, if scancode_cli is used
3842
"""
39-
version = '0.1.0'
43+
version = '0.2.0'
4044

41-
def __init__(self, exec_path):
45+
def __init__(self, exec_path, cli=False):
4246
if not GraalRepository.exists(exec_path):
4347
raise GraalError(cause="executable path %s not valid" % exec_path)
4448

4549
self.exec_path = exec_path
50+
self.cli = cli
4651

47-
def analyze(self, **kwargs):
48-
"""Add information about license
52+
if self.cli:
53+
exec_dir, exec_file = os.path.split(self.exec_path)
54+
self.exec_path = os.path.join(exec_dir, SCANCODE_CLI_EXEC)
4955

50-
:param file_path: file path
56+
def __analyze_scancode(self, file_path):
57+
"""Add information about license using scancode
5158
59+
:param file_path: file path
5260
:returns result: dict of the results of the analysis
5361
"""
54-
result = {'licenses': []}
55-
file_path = kwargs['file_path']
5662

63+
result = {'licenses': []}
5764
try:
5865
msg = subprocess.check_output([self.exec_path, '--json-pp', '-', '--license', file_path]).decode("utf-8")
5966
except subprocess.CalledProcessError as e:
@@ -68,3 +75,60 @@ def analyze(self, **kwargs):
6875
result['licenses'] = licenses_raw['files'][0]['licenses']
6976

7077
return result
78+
79+
def __analyze_scancode_cli(self, file_paths):
80+
"""Add information about license using scancode-cli
81+
82+
:param file_paths: file paths
83+
:returns result: dict of the results of the analysis
84+
"""
85+
86+
result = {'files': []}
87+
88+
try:
89+
cmd_scancli = ['python3', self.exec_path]
90+
cmd_scancli.extend(file_paths)
91+
msg = subprocess.check_output(cmd_scancli).decode("utf-8")
92+
except subprocess.CalledProcessError as e:
93+
raise GraalError(cause="Scancode failed at %s, %s" % (file_paths,
94+
e.output.decode("utf-8")))
95+
finally:
96+
subprocess._cleanup()
97+
98+
output_content = ''
99+
outputs_json = []
100+
for line in msg.split('\n'):
101+
if line == '':
102+
if output_content:
103+
output_json = json.loads(output_content)[1:]
104+
outputs_json.append(output_json)
105+
output_content = ''
106+
else:
107+
continue
108+
else:
109+
output_content += line
110+
111+
if output_content:
112+
output_json = json.loads(output_content)[1:]
113+
outputs_json.append(output_json)
114+
115+
for output_json in outputs_json:
116+
file_info = output_json[0]['files'][0]
117+
result['files'].append(file_info)
118+
119+
return result
120+
121+
def analyze(self, **kwargs):
122+
"""Add information about license
123+
124+
:param file_path: file path
125+
126+
:returns result: dict of the results of the analysis
127+
"""
128+
129+
if self.cli:
130+
result = self.__analyze_scancode_cli(kwargs['file_paths'])
131+
else:
132+
result = self.__analyze_scancode(kwargs['file_path'])
133+
134+
return result

graal/backends/core/colic.py

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@
3333

3434
NOMOS = 'nomos'
3535
SCANCODE = 'scancode'
36+
SCANCODE_CLI = 'scancode_cli'
3637

3738
CATEGORY_COLIC_NOMOS = 'code_license_' + NOMOS
3839
CATEGORY_COLIC_SCANCODE = 'code_license_' + SCANCODE
40+
CATEGORY_COLIC_SCANCODE_CLI = 'code_license_' + SCANCODE_CLI
3941

4042
logger = logging.getLogger(__name__)
4143

@@ -44,7 +46,7 @@ class CoLic(Graal):
4446
"""CoLic backend.
4547
4648
This class extends the Graal backend. It gathers license information
47-
using Nomos
49+
using Nomos, Scancode or Scancode-cli
4850
4951
:param uri: URI of the Git repository
5052
:param git_path: path to the repository or to the log file
@@ -59,9 +61,9 @@ class CoLic(Graal):
5961
:raises RepositoryError: raised when there was an error cloning or
6062
updating the repository.
6163
"""
62-
version = '0.4.0'
64+
version = '0.5.0'
6365

64-
CATEGORIES = [CATEGORY_COLIC_NOMOS, CATEGORY_COLIC_SCANCODE]
66+
CATEGORIES = [CATEGORY_COLIC_NOMOS, CATEGORY_COLIC_SCANCODE, CATEGORY_COLIC_SCANCODE_CLI]
6567

6668
def __init__(self, uri, git_path, exec_path, worktreepath=DEFAULT_WORKTREE_PATH,
6769
entrypoint=None, in_paths=None, out_paths=None,
@@ -84,6 +86,8 @@ def fetch(self, category=CATEGORY_COLIC_NOMOS, paths=None,
8486

8587
if category == CATEGORY_COLIC_SCANCODE:
8688
self.analyzer_kind = SCANCODE
89+
elif category == CATEGORY_COLIC_SCANCODE_CLI:
90+
self.analyzer_kind = SCANCODE_CLI
8791
elif category == CATEGORY_COLIC_NOMOS:
8892
self.analyzer_kind = NOMOS
8993
else:
@@ -101,13 +105,15 @@ def fetch(self, category=CATEGORY_COLIC_NOMOS, paths=None,
101105
def metadata_category(item):
102106
"""Extracts the category from a Code item.
103107
104-
This backend generates two types of item which can be:
105-
'code_license_nomos' or 'code_license_scancode'.
108+
This backend generates the following types of item:
109+
'code_license_nomos', 'code_license_scancode' or 'code_license_scancode_cli.
106110
"""
107111
if item['analyzer'] == NOMOS:
108112
return CATEGORY_COLIC_NOMOS
109113
elif item['analyzer'] == SCANCODE:
110114
return CATEGORY_COLIC_SCANCODE
115+
elif item['analyzer'] == SCANCODE_CLI:
116+
return CATEGORY_COLIC_SCANCODE_CLI
111117
else:
112118
raise GraalError(cause="Unknown analyzer %s" % item['analyzer'])
113119

@@ -135,6 +141,7 @@ def _analyze(self, commit):
135141
:param commit: a Perceval commit item
136142
"""
137143
analysis = []
144+
files_to_process = []
138145

139146
for committed_file in commit['files']:
140147

@@ -148,9 +155,18 @@ def _analyze(self, commit):
148155
if not GraalRepository.exists(local_path):
149156
continue
150157

151-
license_info = self.analyzer.analyze(local_path)
152-
license_info.update({'file_path': file_path})
153-
analysis.append(license_info)
158+
if self.analyzer_kind == NOMOS or self.analyzer_kind == SCANCODE:
159+
license_info = self.analyzer.analyze(local_path)
160+
license_info.update({'file_path': file_path})
161+
analysis.append(license_info)
162+
elif self.analyzer_kind == SCANCODE_CLI:
163+
files_to_process.append((file_path, local_path))
164+
165+
if files_to_process:
166+
local_paths = [path[1] for path in files_to_process]
167+
analysis = self.analyzer.analyze(local_paths)
168+
for i in range(len(analysis['files'])):
169+
analysis['files'][i]['file_path'] = files_to_process[i][0]
154170

155171
return analysis
156172

@@ -170,17 +186,20 @@ class LicenseAnalyzer:
170186
"""Class to analyse the content of files
171187
172188
:param exec_path: path of the license analyzer executable
173-
:param kind: the analyzer kind (e.g., NOMOS, SCANCODE)
189+
:param kind: the analyzer kind (e.g., NOMOS, SCANCODE, SCANCODE_CLI)
174190
"""
175191

176192
def __init__(self, exec_path, kind=NOMOS):
193+
self.kind = kind
177194
if kind == SCANCODE:
178195
self.analyzer = ScanCode(exec_path)
196+
elif kind == SCANCODE_CLI:
197+
self.analyzer = ScanCode(exec_path, cli=True)
179198
else:
180199
self.analyzer = Nomos(exec_path)
181200

182201
def analyze(self, file_path):
183-
"""Analyze the content of a file using Nomos
202+
"""Analyze the content of a file using Nomos/Scancode
184203
185204
:param file_path: file path
186205
@@ -189,7 +208,11 @@ def analyze(self, file_path):
189208
'licenses': [..]
190209
}
191210
"""
192-
kwargs = {'file_path': file_path}
211+
if self.kind == SCANCODE_CLI:
212+
kwargs = {'file_paths': file_path}
213+
else:
214+
kwargs = {'file_path': file_path}
215+
193216
analysis = self.analyzer.analyze(**kwargs)
194217

195218
return analysis

0 commit comments

Comments
 (0)