Skip to content

Commit 78e17fa

Browse files
authored
[python] re-work manifest-analysis tracker (CycloneDX#2083)
Adds SrcFile and evidence to the components from requirements.txt file Set overall confidence to max from all methods Repo tests Signed-off-by: Prabhu Subramanian <[email protected]>
1 parent d880192 commit 78e17fa

File tree

11 files changed

+265
-320
lines changed

11 files changed

+265
-320
lines changed

.github/workflows/repotests.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,19 @@ jobs:
581581
run: |
582582
bin/cdxgen.js -p -t js --no-recurse -o bomresults/bom.json --evidence .
583583
shell: bash
584+
- name: repotests python issues
585+
run: |
586+
bin/cdxgen.js -t python test/data/issue-2069 -o bomresults/issue-2069.json --fail-on-error -p
587+
bin/cdxgen.js -t python test/data/issue-2069 -o bomresults/issue-2069.json --only fastapi --only google --fail-on-error -p
588+
bin/cdxgen.js -t python test/data/issue-2082 -o bomresults/issue-2082.json --fail-on-error -p
589+
bin/cdxgen.js -t python test/data/issue-2082 -o bomresults/issue-2082.json --fail-on-error --only fastapi --only aio -p
590+
bin/cdxgen.js -t python test/data/issue-2069 -o bomresults/issue-2069.json --fail-on-error --technique manifest-analysis -p
591+
bin/cdxgen.js -t python test/data/issue-2082 -o bomresults/issue-2082.json --fail-on-error --technique manifest-analysis -p
592+
bin/cdxgen.js -t python test/data/issue-2069 -o bomresults/issue-2069.json --fail-on-error --lifecycle pre-build -p
593+
bin/cdxgen.js -t python test/data/issue-2082 -o bomresults/issue-2082.json --fail-on-error --lifecycle pre-build -p
594+
shell: bash
595+
env:
596+
CDXGEN_DEBUG_MODE: debug
584597
- name: repotests django-DefectDojo
585598
run: |
586599
bin/cdxgen.js -t python repotests/django-DefectDojo -o bomresults/django-DefectDojo-safe.json --feature-flags safe-pip-install --fail-on-error

devenv.lock

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
"devenv": {
44
"locked": {
55
"dir": "src/modules",
6-
"lastModified": 1752692978,
6+
"lastModified": 1752951785,
77
"owner": "cachix",
88
"repo": "devenv",
9-
"rev": "b2281100077d641cbf135e8680e973c0c2270bc4",
9+
"rev": "3d4f8b778378a0e3f29ba779af0ff1717cf1fa00",
1010
"type": "github"
1111
},
1212
"original": {
@@ -163,10 +163,10 @@
163163
]
164164
},
165165
"locked": {
166-
"lastModified": 1752645690,
166+
"lastModified": 1752950636,
167167
"owner": "bobvanderlinden",
168168
"repo": "nixpkgs-ruby",
169-
"rev": "e4bc98b2ed06c9baead6c9516eed10c94f861ea2",
169+
"rev": "7486abed72d397612fb83360dbb7117f3a6c992b",
170170
"type": "github"
171171
},
172172
"original": {
@@ -177,10 +177,10 @@
177177
},
178178
"nixpkgs-unstable": {
179179
"locked": {
180-
"lastModified": 1752596105,
180+
"lastModified": 1752900028,
181181
"owner": "nixos",
182182
"repo": "nixpkgs",
183-
"rev": "dab3a6e781554f965bde3def0aa2fda4eb8f1708",
183+
"rev": "6b4955211758ba47fac850c040a27f23b9b4008f",
184184
"type": "github"
185185
},
186186
"original": {

lib/cli/index.js

Lines changed: 28 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -3620,7 +3620,6 @@ export async function createPythonBom(path, options) {
36203620
let dependencies = [];
36213621
let pkgList = [];
36223622
let formulationList = [];
3623-
const packageTechniqueMap = new Map();
36243623
const tempDir = mkdtempSync(join(getTmpDir(), "cdxgen-venv-"));
36253624
let parentComponent = createDefaultParentComponent(path, "pypi", options);
36263625
// We are checking only the root here for pipenv
@@ -3843,13 +3842,19 @@ export async function createPythonBom(path, options) {
38433842
} else if (reqDirFiles?.length) {
38443843
for (const j in reqDirFiles) {
38453844
const f = reqDirFiles[j];
3846-
const reqData = readFileSync(f, { encoding: "utf-8" });
3847-
const dlist = await parseReqFile(reqData, false);
3845+
const dlist = await parseReqFile(f, false);
38483846
if (dlist?.length) {
38493847
pkgList = pkgList.concat(dlist);
38503848
}
38513849
}
38523850
metadataFilename = reqDirFiles.join(", ");
3851+
} else if (reqFiles?.length) {
3852+
for (const f of reqFiles) {
3853+
const dlist = await parseReqFile(f, true);
3854+
if (dlist?.length) {
3855+
pkgList = pkgList.concat(dlist);
3856+
}
3857+
}
38533858
}
38543859
}
38553860

@@ -3884,55 +3889,20 @@ export async function createPythonBom(path, options) {
38843889
}
38853890
for (const f of reqFiles) {
38863891
const basePath = dirname(f);
3887-
let reqData;
3888-
let frozen = false;
3889-
3890-
reqData = readFileSync(f, { encoding: "utf-8" });
3891-
await parseReqFile(reqData, true, packageTechniqueMap);
3892-
38933892
if (options.installDeps) {
38943893
const pkgMap = await getPipFrozenTree(
38953894
basePath,
38963895
f,
38973896
tempDir,
38983897
parentComponent,
38993898
);
3900-
39013899
if (pkgMap.pkgList?.length) {
3902-
pkgMap.pkgList.forEach((pkg) => {
3903-
const existingTechnique = packageTechniqueMap.get(
3904-
pkg.name.toLowerCase(),
3905-
);
3906-
if (existingTechnique) {
3907-
// Update evidence to preserve original technique
3908-
if (pkg.evidence?.identity?.methods) {
3909-
pkg.evidence.identity.methods =
3910-
pkg.evidence.identity.methods.map((method) => ({
3911-
...method,
3912-
technique: existingTechnique,
3913-
}));
3914-
}
3915-
} else {
3916-
// New transitive dependency - mark as manifest-analysis derived
3917-
packageTechniqueMap.set(
3918-
pkg.name.toLowerCase(),
3919-
"manifest-analysis",
3920-
);
3921-
if (pkg.evidence?.identity?.methods) {
3922-
pkg.evidence.identity.methods =
3923-
pkg.evidence.identity.methods.map((method) => ({
3924-
...method,
3925-
technique: "manifest-analysis",
3926-
}));
3927-
}
3928-
}
3929-
});
3930-
39313900
pkgList = pkgList.concat(pkgMap.pkgList);
3932-
frozen = pkgMap.frozen;
3901+
pkgList = trimComponents(pkgList);
39333902
}
39343903
if (pkgMap.formulationList?.length) {
39353904
formulationList = formulationList.concat(pkgMap.formulationList);
3905+
formulationList = trimComponents(formulationList);
39363906
}
39373907
if (pkgMap.dependenciesList) {
39383908
dependencies = mergeDependencies(
@@ -3956,22 +3926,6 @@ export async function createPythonBom(path, options) {
39563926
);
39573927
}
39583928
}
3959-
// Fallback to parsing manually
3960-
if (!pkgList.length || !frozen) {
3961-
thoughtLog(
3962-
`Manually parsing ${f}. The result would include only direct dependencies.`,
3963-
);
3964-
if (DEBUG_MODE) {
3965-
console.log(
3966-
`Manually parsing ${f}. The result would include only direct dependencies.`,
3967-
);
3968-
}
3969-
reqData = readFileSync(f, { encoding: "utf-8" });
3970-
const dlist = await parseReqFile(reqData, true);
3971-
if (dlist?.length) {
3972-
pkgList = pkgList.concat(dlist);
3973-
}
3974-
}
39753929
}
39763930
} else if (!poetryMode) {
39773931
pkgMap = await getPipFrozenTree(
@@ -6913,8 +6867,8 @@ export function trimComponents(components) {
69136867
}
69146868
// comp.evidence.identity can be an array or object
69156869
// Merge the evidence.identity based on methods or objects
6916-
const isArray = Array.isArray(comp.evidence.identity);
6917-
const identities = isArray
6870+
const isIdentityArray = Array.isArray(comp.evidence.identity);
6871+
const identities = isIdentityArray
69186872
? comp.evidence.identity
69196873
: [comp.evidence.identity];
69206874
for (const aident of identities) {
@@ -6945,9 +6899,23 @@ export function trimComponents(components) {
69456899
existingComponent.evidence.identity.push(aident);
69466900
}
69476901
}
6948-
if (!isArray) {
6902+
if (!isIdentityArray) {
6903+
const firstIdentity = existingComponent.evidence.identity[0];
6904+
let identConfidence = firstIdentity?.confidence;
6905+
// We need to set the confidence to the max of all confidences
6906+
if (firstIdentity?.methods?.length > 1) {
6907+
for (const aidentMethod of firstIdentity.methods) {
6908+
if (
6909+
aidentMethod?.confidence &&
6910+
aidentMethod.confidence > identConfidence
6911+
) {
6912+
identConfidence = aidentMethod.confidence;
6913+
}
6914+
}
6915+
}
6916+
firstIdentity.confidence = identConfidence;
69496917
existingComponent.evidence = {
6950-
identity: existingComponent.evidence.identity[0],
6918+
identity: firstIdentity,
69516919
};
69526920
}
69536921
}

lib/helpers/utils.js

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5624,25 +5624,66 @@ export async function parsePyLockData(lockData, lockFile, pyProjectFile) {
56245624
}
56255625

56265626
/**
5627-
* Method to parse requirements.txt data. This must be replaced with atom parsedeps.
5627+
* Method to parse requirements.txt file. This must be replaced with atom parsedeps.
56285628
*
5629-
* @param {Object} reqData Requirements.txt data
5629+
* @param {String} reqFile Requirements.txt file
56305630
* @param {Boolean} fetchDepsInfo Fetch dependencies info from pypi
5631-
* @param {Object} packageTechniqueMap Mapping of package techniques
5631+
*
5632+
* @returns {Promise[Array<Object>]} List of direct dependencies from the requirements file
56325633
*/
5633-
export async function parseReqFile(
5634-
reqData,
5635-
fetchDepsInfo,
5636-
packageTechniqueMap = null,
5637-
) {
5638-
let pkgList = [];
5634+
export async function parseReqFile(reqFile, fetchDepsInfo = false) {
5635+
return await parseReqData(reqFile, null, fetchDepsInfo);
5636+
}
5637+
5638+
/**
5639+
* Method to parse requirements.txt file. Must only be used internally.
5640+
*
5641+
* @param {String} reqFile Requirements.txt file
5642+
* @param {Object} reqData Requirements.txt data for internal invocations from setup.py file etc.
5643+
*
5644+
* @param {Boolean} fetchDepsInfo Fetch dependencies info from pypi
5645+
*
5646+
* @returns {Promise[Array<Object>]} List of direct dependencies from the requirements file
5647+
*/
5648+
async function parseReqData(reqFile, reqData = null, fetchDepsInfo = false) {
5649+
const pkgList = [];
56395650
let compScope;
5651+
if (!reqFile && !reqData) {
5652+
console.warn(
5653+
"Either the requirements file or the data needs to be provided for parsing.",
5654+
);
5655+
return pkgList;
5656+
}
5657+
reqData = reqData || readFileSync(reqFile, { encoding: "utf-8" });
5658+
const evidence = reqFile
5659+
? {
5660+
identity: {
5661+
field: "purl",
5662+
confidence: 0.5,
5663+
methods: [
5664+
{
5665+
technique: "manifest-analysis",
5666+
confidence: 0.5,
5667+
value: reqFile,
5668+
},
5669+
],
5670+
},
5671+
}
5672+
: undefined;
56405673
reqData
56415674
.replace(/\r/g, "")
56425675
.replace(/ [\\]\n/g, "")
56435676
.replace(/ {4}/g, " ")
56445677
.split("\n")
56455678
.forEach((l) => {
5679+
const properties = reqFile
5680+
? [
5681+
{
5682+
name: "SrcFile",
5683+
value: reqFile,
5684+
},
5685+
]
5686+
: [];
56465687
l = l.trim();
56475688
let markers;
56485689
if (l.includes(" ; ")) {
@@ -5677,11 +5718,11 @@ export async function parseReqFile(
56775718
const name = tmpA[0].trim().replace(";", "");
56785719
const versionSpecifiers = l.replace(name, "");
56795720
if (!PYTHON_STD_MODULES.includes(name)) {
5680-
const properties = [];
56815721
const apkg = {
56825722
name,
56835723
version: versionStr,
56845724
scope: compScope,
5725+
evidence,
56855726
};
56865727
if (
56875728
versionSpecifiers?.length > 0 &&
@@ -5713,7 +5754,9 @@ export async function parseReqFile(
57135754
name,
57145755
version: undefined,
57155756
scope: compScope,
5757+
evidence,
57165758
properties: [
5759+
...properties,
57175760
{
57185761
name: "cdx:pypi:versionSpecifiers",
57195762
value: versionSpecifiers?.length
@@ -5736,7 +5779,9 @@ export async function parseReqFile(
57365779
name,
57375780
version: undefined,
57385781
scope: compScope,
5782+
evidence,
57395783
properties: [
5784+
...properties,
57405785
{
57415786
name: "cdx:pypi:versionSpecifiers",
57425787
value: versionSpecifiers?.length
@@ -5761,6 +5806,7 @@ export async function parseReqFile(
57615806
name,
57625807
version: undefined,
57635808
scope: compScope,
5809+
evidence,
57645810
properties: [
57655811
{
57665812
name: "cdx:pypi:versionSpecifiers",
@@ -5779,6 +5825,7 @@ export async function parseReqFile(
57795825
name,
57805826
version: null,
57815827
scope: compScope,
5828+
evidence,
57825829
properties: [
57835830
{
57845831
name: "cdx:pypi:versionSpecifiers",
@@ -5793,33 +5840,7 @@ export async function parseReqFile(
57935840
}
57945841
}
57955842
});
5796-
const directDependencies = await getPyMetadata(pkgList, fetchDepsInfo);
5797-
if (packageTechniqueMap && directDependencies?.length) {
5798-
// Mark direct dependencies from requirements.txt as manifest-analysis
5799-
directDependencies.forEach((pkg) => {
5800-
packageTechniqueMap.set(pkg.name.toLowerCase(), "manifest-analysis");
5801-
// Also mark the evidence
5802-
if (!pkg.evidence) {
5803-
pkg.evidence = {
5804-
identity: {
5805-
field: "purl",
5806-
confidence: pkg.version ? 0.5 : 0.3,
5807-
methods: [],
5808-
},
5809-
};
5810-
}
5811-
if (!pkg.evidence.identity.methods) {
5812-
pkg.evidence.identity.methods = [];
5813-
}
5814-
pkg.evidence.identity.methods.push({
5815-
technique: "manifest-analysis",
5816-
confidence: pkg.version ? 0.5 : 0.3,
5817-
value: pkg["bom-ref"] || pkg.purl,
5818-
});
5819-
});
5820-
pkgList = pkgList.concat(directDependencies);
5821-
}
5822-
return directDependencies;
5843+
return await getPyMetadata(pkgList, fetchDepsInfo);
58235844
}
58245845

58255846
/**
@@ -5928,7 +5949,7 @@ export async function parseSetupPyFile(setupPyData) {
59285949
lines = lines.concat(tmpA);
59295950
}
59305951
});
5931-
return await parseReqFile(lines.join("\n"), false);
5952+
return await parseReqData(null, lines.join("\n"), false);
59325953
}
59335954

59345955
/**
@@ -13655,7 +13676,7 @@ export function createUVLock(basePath, options) {
1365513676
* @param {string} tempVenvDir Temp venv dir
1365613677
* @param {Object} parentComponent Parent component
1365713678
*
13658-
* @returns List of packages from the virtual env
13679+
* @returns {Object} List of packages from the virtual env
1365913680
*/
1366013681
export async function getPipFrozenTree(
1366113682
basePath,
@@ -13676,9 +13697,8 @@ export async function getPipFrozenTree(
1367613697
// FIX: Create a set of explicit dependencies from requirements.txt to identify root packages.
1367713698
const explicitDeps = new Set();
1367813699
if (reqOrSetupFile?.endsWith(".txt") && safeExistsSync(reqOrSetupFile)) {
13679-
const reqData = readFileSync(reqOrSetupFile, { encoding: "utf-8" });
1368013700
// We only need the package names, so we pass `false` to avoid fetching full metadata.
13681-
const tempPkgList = await parseReqFile(reqData, false);
13701+
const tempPkgList = await parseReqFile(reqOrSetupFile, null, false);
1368213702
for (const pkg of tempPkgList) {
1368313703
if (pkg.name) {
1368413704
// Normalize the name (lowercase, hyphenated) for accurate lookups.

0 commit comments

Comments
 (0)