Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bin/diann_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def convert(ctx, diann_report, exp_design):
out_triqler.loc[:, "searchScore"] = 1 - report['PEP']

out_msstats = out_msstats[out_msstats["Intensity"] != 0]
out_msstats.to_csv('./out_msstats.csv', sep=',', index=False)
out_msstats.to_csv(os.path.basename(exp_design) + '_out_msstats.csv', sep=',', index=False)
out_triqler = out_triqler[out_triqler["intensity"] != 0]
out_triqler.to_csv('./out_triqler.tsv', sep='\t', index=False)
out_triqler.to_csv(os.path.basename(exp_design) + '_out_triqler.tsv', sep='\t', index=False)

def query_expdesign_value(reference, f_table, s_table):
query_reference = f_table[f_table["run"] == reference]
Expand Down
21 changes: 13 additions & 8 deletions bin/msstats_plfq.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,21 @@ if (l == 1) {
#write all comparisons into one CSV file
write.table(test.MSstats$ComparisonResult, file=paste0(args[8],"_comparisons.csv"), quote=FALSE, sep='\t', row.names = FALSE)

groupComparisonPlots(data=test.MSstats$ComparisonResult, type="ComparisonPlot",
width=12, height=12,dot.size = 2)

test.MSstats$Volcano <- test.MSstats$ComparisonResult[!is.na(test.MSstats$ComparisonResult$pvalue),]
groupComparisonPlots(data=test.MSstats$Volcano, type="VolcanoPlot",
valid_comp_data <- test.MSstats$ComparisonResult[!is.na(test.MSstats$ComparisonResult$pvalue), ]
if (nrow(valid_comp_data[!duplicated(valid_comp_data$Protein),]) < 2) {
warning("Warning: No valid Protein Comparison, Skip groupComparisonPlots step!")
} else {
groupComparisonPlots(data=test.MSstats$ComparisonResult, type="ComparisonPlot",
width=12, height=12,dot.size = 2)

# Otherwise it fails since the behaviour is undefined
if (nrow(contrast_mat) > 1) {
groupComparisonPlots(data=test.MSstats$ComparisonResult, type="Heatmap",
groupComparisonPlots(data=valid_comp_data, type="VolcanoPlot",
width=12, height=12,dot.size = 2)

# Otherwise it fails since the behaviour is undefined
if (nrow(contrast_mat) > 1) {
groupComparisonPlots(data=test.MSstats$ComparisonResult, type="Heatmap",
width=12, height=12,dot.size = 2)
}
}

}
23 changes: 23 additions & 0 deletions bin/msstats_tmt.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,4 +242,27 @@ if (l == 1) {

#TODO allow manual input (e.g. proteins of interest)
write.table(test.MSstatsTMT$ComparisonResult, file=paste0("msstatsiso_results.csv"), quote=FALSE, sep='\t', row.names = FALSE)

valid_comp_data <- test.MSstatsTMT$ComparisonResult[!is.na(test.MSstatsTMT$ComparisonResult$pvalue), ]

if (nrow(valid_comp_data[!duplicated(valid_comp_data$Protein),]) < 2) {
warning("Warning: No enough valid Protein Comparison, Skip groupComparisonPlots step!")
} else {
require(MSstats)
# BUG groupComparisonPlots function: re-run OpenMStoMSstatsTMTFormat
quant <- OpenMStoMSstatsTMTFormat(data, useUniquePeptide=useUniquePeptide, rmPSM_withfewMea_withinRun=rmPSM_withfewMea_withinRun,
rmProtein_with1Feature=rmProtein_with1Feature
)
groupComparisonPlots(data=test.MSstatsTMT$ComparisonResult, type="ComparisonPlot", width=12, height=12, dot.size = 2)

groupComparisonPlots(data=valid_comp_data, type="VolcanoPlot",
width=12, height=12, dot.size = 2)

# Otherwise it fails since the behavior is undefined
if (nrow(contrast_mat) > 1) {
groupComparisonPlots(data=test.MSstatsTMT$ComparisonResult, type="Heatmap",
width=12, height=12, dot.size = 2)
}
}

}
26 changes: 1 addition & 25 deletions bin/prepare_diann_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ def generate_cfg(ctx, enzyme, fix_mod, var_mod, precursor_tolerence, precursor_t
cut = enzyme_cut(enzyme)
unimod_database = UnimodDatabase()
fix_ptm, var_ptm = convert_mod(unimod_database, fix_mod, var_mod)
mass_acc, mass_acc_ms1 = mass_tolerence(precursor_tolerence, precursor_tolerence_unit, fragment_tolerence, fragment_tolerence_unit)
mass_acc = " --mass-acc " + str(mass_acc)
mass_acc_ms1 = " --mass-acc-ms1 " + str(mass_acc_ms1)

var_ptm_str = " --var-mod "
fix_ptm_str = " --fixed-mod "
Expand All @@ -36,11 +33,7 @@ def generate_cfg(ctx, enzyme, fix_mod, var_mod, precursor_tolerence, precursor_t
diann_var_ptm += (var_ptm_str + mod)

with open("diann_config.cfg", "w") as f:
f.write("--dir ./mzMLs --cut " + cut + diann_fix_ptm + diann_var_ptm + mass_acc + mass_acc_ms1 +
" --matrices --report-lib-info")

with open("library_config.cfg", "w") as f:
f.write("--cut " + cut + diann_fix_ptm + diann_var_ptm + " --gen-spec-lib --smart-profiling")
f.write("--cut " + cut + diann_fix_ptm + diann_var_ptm)

def convert_mod(unimod_database, fix_mod, var_mod):
pattern = re.compile("\((.*?)\)")
Expand Down Expand Up @@ -110,23 +103,6 @@ def enzyme_cut(enzyme):
cut = "--cut"
return cut

def mass_tolerence(prec, precursor_tolerence_unit, frag, fragment_tolerence_unit):
if precursor_tolerence_unit == "ppm":
ms1_tolerence = prec
else:
# Default 10 ppm
print("Warning: " + precursor_tolerence_unit + " unit not supported for DIA-NN. Default 10 ppm")
ms1_tolerence = 10

if fragment_tolerence_unit == "ppm":
ms2_tolerence = frag
else:
# Default 20 ppm
ms2_tolerence = 20
print("Warning: " + fragment_tolerence_unit + " unit not supported for DIA-NN. Default 20 ppm")

return ms1_tolerence, ms2_tolerence

cli.add_command(generate_cfg)

if __name__ == "__main__":
Expand Down
5 changes: 5 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ process {
// DIA-NN
withName: '.*:DIA:.*' {
ext.when = { !params.enable_conda }
publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

}
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
process LIBRARYGENERATION {
label 'process_high'
process ASSEMBLE_EMPIRICAL_LIBRARY {
label 'process_low'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' :
'biocontainers/diann:v1.8.1_cv1' }"

input:
tuple file(mzml), file(fasta)
file(library_config)
path(mzMLs)
path("quant/*")
path(lib)
path(diann_config)

output:
path "*_lib.tsv", emit: lib_splib
path "empirical_library.tsv", emit: empirical_library
path "assemble_empirical_library.log", emit: log
path "versions.yml", emit: version
path "report.log.txt", emit: log
path "*.tsv.speclib", emit: speclib
path "*.predicted.speclib", emit: predict_speclib

when:
task.ext.when == null || task.ext.when
Expand All @@ -24,29 +24,37 @@ process LIBRARYGENERATION {

min_pr_mz = params.min_pr_mz ? "--min-pr-mz $params.min_pr_mz":""
max_pr_mz = params.max_pr_mz ? "--max-pr-mz $params.max_pr_mz":""
min_fr_mz = params.min_fr_mz ? "--min_fr_mz $params.min_fr_mz":""
max_fr_mz = params.max_fr_mz ? "--max_fr_mz $params.max_fr_mz":""
min_fr_mz = params.min_fr_mz ? "--min-fr-mz $params.min_fr_mz":""
max_fr_mz = params.max_fr_mz ? "--max-fr-mz $params.max_fr_mz":""

mass_acc = params.mass_acc_automatic ? "--quick-mass-acc --individual-mass-acc" : "--mass-acc $params.mass_acc_ms2 --mass-acc-ms1 $params.mass_acc_ms1"
scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window"

"""
diann `cat library_config.cfg` \\
--fasta ${fasta} \\
--fasta-search \\
--f ${mzml} \\
--out-lib ${mzml.baseName}_lib.tsv \\
diann "echo \$(cat ${diann_config})" \\
--f ${(mzMLs as List).join(' --f ')} \\
--lib ${lib} \\
${min_pr_mz} \\
${max_pr_mz} \\
${min_fr_mz} \\
${max_fr_mz} \\
--threads ${task.cpus} \\
--out-lib empirical_library.tsv \\
--missed-cleavages $params.allowed_missed_cleavages \\
--min-pep-len $params.min_peptide_length \\
--max-pep-len $params.max_peptide_length \\
--min-pr-charge $params.min_precursor_charge \\
--max-pr-charge $params.max_precursor_charge \\
--var-mods $params.max_mods \\
--threads ${task.cpus} \\
--predictor \\
--verbose $params.diann_debug \\
|& tee diann.log
--rt-profiling \\
--temp ./quant/ \\
--use-quant \\
${mass_acc} \\
${scan_window} \\
--gen-spec-lib \\
$args \\
|& tee assemble_empirical_library.log


cat <<-END_VERSIONS > versions.yml
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
name: diannsearch
description: A module for DIA library free analysis based on DIA-NN.
name: assemble_empirical_library
description: A module for assemble empirical library analysis based on DIA-NN.
keywords:
- DIA-NN
- library free
- DIA
tools:
- DIA-NN:
Expand All @@ -11,34 +10,30 @@ tools:
homepage: https://github.com/vdemichev/DiaNN
documentation: https://github.com/vdemichev/DiaNN
input:
- mzMLs:
type: file
description: Spectra file in mzML format
pattern: "*.mzML"
- quant:
type: file
description: The .quant files containing IDs and quantification information
- lib:
type: file
description: Spectra library file
pattern: "*.tsv"
- spectra:
type: dir
description: The directory for spectra files
- searchdb:
type: file
description: Fasta sequence file
pattern: "*.{fasta,fa}"
- cfg:
- diann_config:
type: dir
description: Specifies a file to load options/commands from.
pattern: "*.cfg"
output:
- report:
type: file
description: Main report file. A text table containing precursor and protein IDs, as well as plenty of associated information. Most column names are self-explanatory.
pattern: "diann_report.tsv"
- report_stat:
- empirical_library:
type: file
description: Contains a number of QC metrics which can be used for data filtering, e.g. to exclude failed runs, or as a readout for method optimization.
pattern: "diann_report.stats.tsv"
description: An empirical spectral library from the .quant files.
pattern: "empirical_library.tsv"
- log:
type: file
description: DIA-NN log file
pattern: "diann_report.log.txt"
pattern: "assemble_empirical_library.log"
- version:
type: file
description: File containing software version
Expand Down
19 changes: 9 additions & 10 deletions modules/local/diann_preliminary_analysis/main.nf
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
process DIANN_PRELIMINARY_ANALYSIS {
tag "$meta.id"
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' :
'biocontainers/diann:v1.8.1_cv1' }"

input:
tuple file(mzML), file(predict_tsv), file(diann_config)
tuple val(meta), file(mzML), file(predict_tsv), file(diann_config)

output:
path "*.quant", emit: diann_quant
path "${mzML.baseName}_lib.tsv", emit: lib
path "*.log.txt", emit: log
tuple val(meta), path("*_diann.log"), emit: log
path "versions.yml", emit: version

when:
Expand All @@ -24,11 +24,12 @@ process DIANN_PRELIMINARY_ANALYSIS {
min_fr_mz = params.min_fr_mz ? "--min-fr-mz $params.min_fr_mz" : ""
max_fr_mz = params.max_fr_mz ? "--max-fr-mz $params.max_fr_mz" : ""

quick_mass_acc = params.quick_mass_acc ? "--quick-mass-acc" : ""
mass_acc = params.mass_acc_automatic ? "--quick-mass-acc --individual-mass-acc" : "--mass-acc $params.mass_acc_ms2 --mass-acc-ms1 $params.mass_acc_ms1"
scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window"
time_corr_only = params.time_corr_only ? "--time-corr-only" : ""

"""
diann `cat diann_config.cfg` \\
diann "echo \$(cat ${diann_config})" \\
--lib ${predict_tsv} \\
--f ${mzML} \\
${min_pr_mz} \\
Expand All @@ -43,16 +44,14 @@ process DIANN_PRELIMINARY_ANALYSIS {
--max-pr-charge $params.max_precursor_charge \\
--var-mods $params.max_mods \\
--verbose $params.diann_debug \\
--window $params.scan_window \\
--gen-spec-lib \\
--out-lib ${mzML.baseName}_lib.tsv \\
${scan_window} \\
--temp ./ \\
--min-corr $params.min_corr \\
--corr-diff $params.corr_diff \\
${quick_mass_acc} \\
${mass_acc} \\
${time_corr_only} \\
$args \\
|& tee diann.log
|& tee ${mzML.baseName}_diann.log
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like --gen-spec-lib should not be here but let's wait for input



cat <<-END_VERSIONS > versions.yml
Expand Down
9 changes: 4 additions & 5 deletions modules/local/diann_preliminary_analysis/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ tools:
homepage: https://github.com/vdemichev/DiaNN
documentation: https://github.com/vdemichev/DiaNN
input:
- meta:
type: map
description: Groovy Map containing sample information
- predict_tsv:
type: file
description: Silico-predicted spectral library by deep leaning predictor in DIA-NN
Expand All @@ -27,14 +30,10 @@ output:
type: file
description: Quantification file from DIA-NN
pattern: "*.quant"
- lib:
type: file
description: Spectral library file
pattern: "*.tsv"
- log:
type: file
description: DIA-NN log file
pattern: "diann_report.log.txt"
pattern: "*_diann.log"
- version:
type: file
description: File containing software version
Expand Down
5 changes: 3 additions & 2 deletions modules/local/diannconvert/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process DIANNCONVERT {
tag "$exp_design.Name"
label 'process_low'

conda (params.enable_conda ? "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.21" : null)
Expand All @@ -13,8 +14,8 @@ process DIANNCONVERT {
path(exp_design)

output:
path "out_msstats.csv", emit: out_msstats
path "out_triqler.tsv", emit: out_triqler
path "*out_msstats.csv", emit: out_msstats
path "*out_triqler.tsv", emit: out_triqler
path "versions.yml", emit: version

script:
Expand Down
Loading