Skip to content

Commit 4d9d253

Browse files
authored
Merge pull request #626 from weizhongchun/master
Add onsite module
2 parents c482ab1 + 155d8a3 commit 4d9d253

File tree

13 files changed

+274
-30
lines changed

13 files changed

+274
-30
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
3737
3. (Optional) Add extra PSM features using [`ms2rescore`](https://github.com/compomics/ms2rescore)
3838
4. Re-scoring peptide identifications [`percolator`](https://github.com/percolator/percolator)
3939
5. Peptide identification FDR [`openms fdr tool`](https://github.com/bigbio/quantms/blob/HEAD/modules/local/openms/false_discovery_rate/main.nf)
40-
6. Modification localization [`luciphor`](https://github.com/dfermin/lucXor)
40+
6. Modification localization [`onsite`](https://github.com/bigbio/onsite)
4141
7. Quantification: Feature detection [`proteomicsLFQ`](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteomicsLFQ.html)
4242
8. Protein inference and quantification [`proteomicsLFQ`](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteomicsLFQ.html)
4343
9. QC report generation [`pmultiqc`](https://github.com/bigbio/pmultiqc)
@@ -50,7 +50,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
5050
3. (Optional) Add extra PSM features using [`ms2rescore`](https://github.com/compomics/ms2rescore)
5151
4. Re-scoring peptide identifications [`percolator`](https://github.com/percolator/percolator)
5252
5. Peptide identification FDR [`openms fdr tool`](https://github.com/bigbio/quantms/blob/HEAD/modules/local/openms/false_discovery_rate/main.nf)
53-
6. Modification localization [`luciphor`](https://github.com/dfermin/lucXor)
53+
6. Modification localization [`onsite`](https://github.com/bigbio/onsite)
5454
7. Extracts and normalizes isobaric labeling [`IsobaricAnalyzer`](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IsobaricAnalyzer.html)
5555
8. Protein inference [`ProteinInference`](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteinInference.html) or [`Epifany`](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_Epifany.html) for bayesian inference.
5656
9. Protein Quantification [`ProteinQuantifier`](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteinQuantifier.html)

conf/dev.config

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,9 @@ process {
2121
// Conda is no longer supported
2222
container = {"${ ( workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ) && !task.ext.singularity_pull_docker_container ? 'oras://ghcr.io/openms/openms-tools-thirdparty-sif:latest' : 'ghcr.io/openms/openms-tools-thirdparty:latest' }"}
2323
}
24+
25+
// ONSITE uses its own container (not OpenMS thirdparty)
26+
withName: '.*:PHOSPHO_SCORING:ONSITE' {
27+
container = {"${ ( workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ) && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pyonsite:0.0.1--pyhdfd78af_0' : 'quay.io/biocontainers/pyonsite:0.0.1--pyhdfd78af_0' }"}
28+
}
2429
}

conf/modules/modules.config

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ if (params.search_engines.split(",").size() > 1) {
109109
if (params.enable_mod_localization) {
110110
process {
111111

112-
// ID_SCORE_SWITCHER_LUCIPHOR
112+
// ID_SCORE_SWITCHER_ONSITE
113113
withName: '.*:ID:PHOSPHO_SCORING:ID_SCORE_SWITCHER' {
114114
ext.args = [
115115
"-new_score_orientation lower_better",
@@ -119,9 +119,9 @@ if (params.enable_mod_localization) {
119119
].join(' ').trim()
120120
}
121121

122-
// LuciphorAdapter
123-
withName: '.*:ID:PHOSPHO_SCORING:LUCIPHOR' {
124-
ext.args = "-debug $params.luciphor_debug"
122+
// onsite
123+
withName: '.*:ID:PHOSPHO_SCORING:ONSITE' {
124+
ext.args = "-debug $params.onsite_debug"
125125
}
126126
}
127127
}

conf/modules/verbose_modules.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ process {
157157
}
158158

159159
// Set the default publish for PTM processing steps
160-
withName: '.*:LUCIPHOR' {
160+
withName: '.*:ONSITE' {
161161
publishDir = [
162-
path: { "${params.outdir}/ptm_localization/luciphor" },
162+
path: { "${params.outdir}/ptm_localization/onsite" },
163163
mode: params.publish_dir_mode,
164164
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
165165
]

docs/output.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
2020
8. If multiple search engines were chosen, the results are combined with OpenMS' ConsensusID
2121
9. If multiple search engines were chosen, a combined FDR is calculated
2222
10. Single run PSM/Peptide-level FDR filtering
23-
11. If localization of modifications was requested, Luciphor2 is applied.
23+
11. If localization of modifications was requested, onsite is applied.
2424
12. (**DDA-LFQ**) Protein inference and label-free quantification based on spectral counting or MS1 feature detection, alignment and integration with OpenMS' ProteomicsLFQ. Performs an additional experiment-wide FDR filter on protein (and if requested peptide/PSM-level).
2525
13. (**DDA-ISO**) Extracts and normalizes isobaric labeling
2626
14. (**DDA-ISO**) Protein inference using the OpenMS ProteinInference tool. In addition, protein FDR filtering is performed in this step for Isobaric datasets (TMT, iTRAQ).
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
process ONSITE {
2+
tag "$meta.mzml_id"
3+
label 'process_medium'
4+
label 'openms'
5+
6+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/pyonsite:0.0.2--pyhdfd78af_0' :
8+
'quay.io/biocontainers/pyonsite:0.0.2--pyhdfd78af_0' }"
9+
10+
input:
11+
tuple val(meta), path(mzml_file), path(id_file)
12+
13+
output:
14+
tuple val(meta), path("${id_file.baseName}_onsite_*.idXML"), emit: ptm_in_id_onsite
15+
path "versions.yml", emit: versions
16+
path "*.log", emit: log
17+
18+
script:
19+
def args = task.ext.args ?: ''
20+
def prefix = task.ext.prefix ?: "${meta.mzml_id}"
21+
22+
// Select algorithm (ascore, phosphors, or lucxor)
23+
def algorithm = params.onsite_algorithm ?: 'lucxor'
24+
25+
// Basic parameters
26+
def fragment_tolerance = params.onsite_fragment_tolerance ?: 0.5
27+
def fragment_units = params.onsite_fragment_error_units ?: 'Da'
28+
def threads = params.onsite_threads ?: task.cpus
29+
def add_decoys = params.onsite_add_decoys ?: false
30+
def min_psms = params.onsite_min_psms ?: 5
31+
def disable_split_by_charge = params.onsite_disable_split_by_charge ?: false
32+
def compute_all_scores = params.onsite_compute_all_scores != null ? params.onsite_compute_all_scores : true
33+
34+
// Algorithm-specific parameters
35+
def fragment_method = params.onsite_fragment_method ?: meta.dissociationmethod
36+
def neutral_losses = params.onsite_neutral_losses ? "--neutral-losses ${params.onsite_neutral_losses}" : ""
37+
def decoy_mass = params.onsite_decoy_mass ? "--decoy-mass ${params.onsite_decoy_mass}" : ""
38+
def decoy_losses = params.onsite_decoy_neutral_losses ? "--decoy-neutral-losses ${params.onsite_decoy_neutral_losses}" : ""
39+
def min_psms_param = "--min-num-psms-model ${min_psms}"
40+
41+
// Debug options - onsite only accepts --debug flag without value
42+
def debug = params.onsite_debug ? "--debug" : ""
43+
44+
// Build algorithm-specific parameter strings
45+
def tolerance_param = ""
46+
def method_param = ""
47+
def algorithm_specific_params = ""
48+
def decoy_param = ""
49+
50+
if (algorithm == 'lucxor') {
51+
// LucXor uses --fragment-error-units and --fragment-method
52+
tolerance_param = "--fragment-error-units ${fragment_units}"
53+
method_param = fragment_method ? "--fragment-method ${fragment_method}" : ""
54+
algorithm_specific_params = "${neutral_losses} ${decoy_mass} ${decoy_losses} ${min_psms_param}"
55+
56+
// Add LucXor-specific parameters
57+
// Note: disable_split_by_charge is only supported by LucXor
58+
if (disable_split_by_charge) {
59+
algorithm_specific_params += " --disable-split-by-charge"
60+
}
61+
if (compute_all_scores) {
62+
algorithm_specific_params += " --compute-all-scores"
63+
}
64+
65+
// LucXor uses --target-modifications
66+
// Build target modifications list from params.mod_localization
67+
if (params.mod_localization) {
68+
def target_mods = params.mod_localization.tokenize(',').collect { it.trim() }
69+
70+
// Add decoy modification if enabled
71+
if (add_decoys) {
72+
target_mods.add('PhosphoDecoy(A)')
73+
}
74+
75+
// Format as command line arguments
76+
decoy_param = "--target-modifications '${target_mods.join(',')}'"
77+
} else if (add_decoys) {
78+
// If no mod_localization specified but decoys enabled, use default with decoy
79+
decoy_param = "--target-modifications 'Phospho(S),Phospho(T),Phospho(Y),PhosphoDecoy(A)'"
80+
}
81+
} else {
82+
// AScore and PhosphoRS use --fragment-mass-unit
83+
tolerance_param = "--fragment-mass-unit ${fragment_units}"
84+
method_param = ""
85+
algorithm_specific_params = ""
86+
87+
// Add compute_all_scores parameter for AScore and PhosphoRS
88+
// Note: disable_split_by_charge is LucXor-specific and not used here
89+
if (compute_all_scores) {
90+
algorithm_specific_params += " --compute-all-scores"
91+
}
92+
93+
// AScore and PhosphoRS use --add-decoys flag
94+
if (add_decoys) {
95+
decoy_param = "--add-decoys"
96+
}
97+
}
98+
99+
"""
100+
onsite ${algorithm} \\
101+
-in ${mzml_file} \\
102+
-id ${id_file} \\
103+
-out ${id_file.baseName}_onsite_${algorithm}.idXML \\
104+
--fragment-mass-tolerance ${fragment_tolerance} \\
105+
${tolerance_param} \\
106+
--threads ${threads} \\
107+
${method_param} \\
108+
${decoy_param} \\
109+
${algorithm_specific_params} \\
110+
${debug} \\
111+
2>&1 | tee ${id_file.baseName}_onsite_${algorithm}.log
112+
113+
cat <<-END_VERSIONS > versions.yml
114+
"${task.process}":
115+
onsite: \$(onsite --version 2>&1 | grep -oP 'version \\K[0-9.]+' || echo "0.0.1")
116+
END_VERSIONS
117+
"""
118+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
name: onsite
2+
description: PTM site localization using onsite (AScore, PhosphoRS, or LucXor).
3+
keywords:
4+
- onsite
5+
- AScore
6+
- PhosphoRS
7+
- LucXor
8+
- modification
9+
- PTM
10+
tools:
11+
- onsite:
12+
description: |
13+
Mass spectrometry post-translational modification localization tool
14+
homepage: https://github.com/bigbio/onsite
15+
documentation: https://github.com/bigbio/onsite
16+
input:
17+
- meta:
18+
type: map
19+
description: Groovy Map containing sample information
20+
- mzml_file:
21+
type: file
22+
description: Input spectrum file.
23+
pattern: "*.mzML"
24+
- id_file:
25+
type: file
26+
description: Protein/peptide identifications file
27+
pattern: "*.idXML"
28+
output:
29+
- meta:
30+
type: map
31+
description: Groovy Map containing sample information
32+
- out_id_ptmscores:
33+
type: file
34+
description: Protein/peptide identifications file with scored PTM sites
35+
pattern: "*.idXML"
36+
- log:
37+
type: file
38+
description: log file
39+
pattern: "*.log"
40+
- version:
41+
type: file
42+
description: File containing software version
43+
pattern: "versions.yml"
44+
authors:
45+
- "@weizhongchun"

nextflow.config

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ params {
4545
percolator_debug = 0
4646
consensusid_debug = 0
4747
idmapper_debug = 0
48-
luciphor_debug = 0
48+
onsite_debug = 0
4949
protein_inference_debug = 0
5050
plfq_debug = 0
5151
protein_quant_debug = 0
@@ -144,10 +144,19 @@ params {
144144
min_consensus_support = 0
145145
consensusid_considered_top_hits = 0
146146

147-
// Luciphor options
148-
luciphor_neutral_losses = null
149-
luciphor_decoy_mass = null
150-
luciphor_decoy_neutral_losses = null
147+
// onsite options
148+
onsite_algorithm = 'lucxor' // Options: 'ascore', 'phosphors', 'lucxor'
149+
onsite_fragment_method = 'CID' // CID or HCD
150+
onsite_fragment_tolerance = 0.5
151+
onsite_fragment_error_units = 'Da' // Da or ppm
152+
onsite_add_decoys = false // Add decoy modifications
153+
onsite_neutral_losses = null
154+
onsite_decoy_mass = null
155+
onsite_decoy_neutral_losses = null
156+
onsite_threads = 1
157+
onsite_min_psms = 5 // Minimum number of high-scoring PSMs for lucxor model training
158+
onsite_disable_split_by_charge = false // Disable splitting PSMs by charge state for lucxor
159+
onsite_compute_all_scores = false // Compute all scores for all candidate sites
151160

152161
// Epifany
153162
top_PSMs = 1

nextflow_schema.json

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -437,30 +437,97 @@
437437
"default": "Phospho (S),Phospho (T),Phospho (Y)",
438438
"fa_icon": "fas fa-tasks"
439439
},
440-
"luciphor_neutral_losses": {
440+
"onsite_algorithm": {
441+
"type": "string",
442+
"description": "Algorithm to use for PTM localization (ascore, phosphors, or lucxor)",
443+
"default": "lucxor",
444+
"enum": ["ascore", "phosphors", "lucxor"],
445+
"fa_icon": "fas fa-cogs",
446+
"hidden": true
447+
},
448+
"onsite_fragment_method": {
449+
"type": "string",
450+
"description": "Fragmentation method (CID or HCD)",
451+
"default": "CID",
452+
"enum": ["CID", "HCD"],
453+
"fa_icon": "fas fa-atom",
454+
"hidden": true
455+
},
456+
"onsite_fragment_tolerance": {
457+
"type": "number",
458+
"description": "Fragment mass tolerance",
459+
"default": 0.5,
460+
"fa_icon": "fas fa-ruler",
461+
"hidden": true
462+
},
463+
"onsite_fragment_error_units": {
464+
"type": "string",
465+
"description": "Fragment error units (Da or ppm)",
466+
"default": "Da",
467+
"enum": ["Da", "ppm"],
468+
"fa_icon": "fas fa-balance-scale",
469+
"hidden": true
470+
},
471+
"onsite_add_decoys": {
472+
"type": "boolean",
473+
"description": "Add decoy modifications for validation",
474+
"default": false,
475+
"fa_icon": "fas fa-shield-alt",
476+
"help_text": "When enabled, adds decoy modifications for validation. For AScore/PhosphoRS, adds --add-decoys flag. For LucXor, adds PhosphoDecoy(A) to target modifications.",
477+
"hidden": true
478+
},
479+
"onsite_neutral_losses": {
441480
"type": "string",
442481
"description": "List of neutral losses to consider for mod. localization.",
443482
"fa_icon": "fas fa-font",
444483
"help_text": "List the types of neutral losses that you want to consider. The residue field is case sensitive. For example: lower case 'sty' implies that the neutral loss can only occur if the specified modification is present.\nSyntax: 'NL = <RESDIUES> -<NEUTRAL_LOSS_MOLECULAR_FORMULA> <MASS_LOST>'\n(default: '[sty -H3PO4 -97.97690]')",
445484
"hidden": true
446485
},
447-
"luciphor_decoy_mass": {
486+
"onsite_decoy_mass": {
448487
"type": "number",
449488
"description": "How much to add to an amino acid to make it a decoy for mod. localization.",
450489
"fa_icon": "fas fa-font",
451490
"hidden": true
452491
},
453-
"luciphor_decoy_neutral_losses": {
492+
"onsite_decoy_neutral_losses": {
454493
"type": "string",
455494
"description": "List of neutral losses to consider for mod. localization from an internally generated decoy sequence.",
456495
"fa_icon": "fas fa-font",
457496
"help_text": "For handling the neutral loss from a decoy sequence. The syntax for this is identical to that of the normal neutral losses given above except that the residue is always 'X'. Syntax: DECOY_NL = X -<NEUTRAL_LOSS_MOLECULAR_FORMULA> <MASS_LOST> (default: '[X -H3PO4 -97.97690]')",
458497
"hidden": true
459498
},
460-
"luciphor_debug": {
499+
"onsite_threads": {
500+
"type": "integer",
501+
"description": "Number of threads for onsite processing",
502+
"default": 1,
503+
"fa_icon": "fas fa-microchip",
504+
"hidden": true
505+
},
506+
"onsite_min_psms": {
507+
"type": "integer",
508+
"description": "Minimum number of high-scoring PSMs required for model training in LucXor",
509+
"default": 5,
510+
"fa_icon": "fas fa-chart-line",
511+
"hidden": true
512+
},
513+
"onsite_disable_split_by_charge": {
514+
"type": "boolean",
515+
"description": "Disable splitting PSMs by charge state for lucxor",
516+
"default": false,
517+
"fa_icon": "fas fa-ban",
518+
"hidden": true
519+
},
520+
"onsite_compute_all_scores": {
521+
"type": "boolean",
522+
"description": "Compute all scores for all candidate sites",
523+
"default": false,
524+
"fa_icon": "fas fa-calculator",
525+
"hidden": true
526+
},
527+
"onsite_debug": {
461528
"type": "integer",
462529
"fa_icon": "fas fa-bug",
463-
"description": "Debug level for Luciphor step. Increase for verbose logging and keeping temp files.",
530+
"description": "Debug level for onsite step. Increase for verbose logging and keeping temp files.",
464531
"hidden": true
465532
}
466533
},

subworkflows/local/dda_id/main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ workflow DDA_ID {
167167

168168
if (params.enable_mod_localization) {
169169
PHOSPHO_SCORING(ch_file_preparation_results, PSM_FDR_CONTROL.out.id_filtered)
170-
ch_software_versions = ch_software_versions.mix(PHOSPHO_SCORING.out.versions)
171-
ch_id_results = PHOSPHO_SCORING.out.id_luciphor
170+
ch_software_versions = ch_software_versions.mix(PHOSPHO_SCORING.out.versions.ifEmpty(null))
171+
ch_id_results = PHOSPHO_SCORING.out.id_onsite
172172
} else {
173173
ch_id_results = PSM_FDR_CONTROL.out.id_filtered
174174
}

0 commit comments

Comments
 (0)