Skip to content

Commit c81e07a

Browse files
authored
Merge pull request #613 from maxulysse/dev_vep_plugin
add params for dbnsfp vep plugin + fix filenames for vep plugins
2 parents f2ca98c + 047cfc1 commit c81e07a

File tree

7 files changed

+51
-18
lines changed

7 files changed

+51
-18
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3434
- [#597](https://github.com/nf-core/sarek/pull/597) - Added tiddit for tumor variant calling
3535
- [#600](https://github.com/nf-core/sarek/pull/600) - Added description for UMI related params in schema
3636
- [#604](https://github.com/nf-core/sarek/pull/604), [#617](https://github.com/nf-core/sarek/pull/617) - Added full size tests WGS 30x NA12878
37+
- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--dbnsfp_fields` to allow configuration of fields for the `dbnsfp` `VEP` plugin
38+
- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--dbnsfp_consequence` to allow configuration of consequence for the `dbnsfp` `VEP` plugin
39+
- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--vep_version` to allow more configuration on the vep container definition
3740
- [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools
3841

3942
### Changed
@@ -111,6 +114,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
111114
- [#600](https://github.com/nf-core/sarek/pull/600) - Remove `nf-core lint` warnings
112115
- [#602](https://github.com/nf-core/sarek/pull/602) - Fixed bug in `alignment_to_fastq` and added tests
113116
- [#609](https://github.com/nf-core/sarek/pull/609) - Remove unused intervals code, reorganize combined intervals file
117+
- [#613](https://github.com/nf-core/sarek/pull/613) - Fixed filenames for `dbnsfp` and `SpliceAI` `VEP` plugin
114118
- [#615](https://github.com/nf-core/sarek/pull/615) - Fix ASCAT igenomes file paths
115119
- [#619](https://github.com/nf-core/sarek/pull/619) - Fix issue with checking samplesheet content with AWS
116120

conf/igenomes.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ params {
3535
vep_cache_version = 105
3636
vep_genome = 'GRCh37'
3737
vep_species = 'homo_sapiens'
38+
vep_version = '104.3'
3839
}
3940
'GATK.GRCh38' {
4041
ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip"
@@ -64,6 +65,7 @@ params {
6465
vep_cache_version = 105
6566
vep_genome = 'GRCh38'
6667
vep_species = 'homo_sapiens'
68+
vep_version = '104.3'
6769
}
6870
'Ensembl.GRCh37' {
6971
bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/"
@@ -92,6 +94,7 @@ params {
9294
vep_cache_version = 102
9395
vep_genome = 'GRCm38'
9496
vep_species = 'mus_musculus'
97+
vep_version = '104.3'
9598
}
9699
'TAIR10' {
97100
bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/"
@@ -116,6 +119,7 @@ params {
116119
vep_cache_version = 105
117120
vep_genome = 'WBcel235'
118121
vep_species = 'caenorhabditis_elegans'
122+
vep_version = '104.3'
119123
}
120124
'CanFam3.1' {
121125
bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/"

conf/modules.config

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,17 +1146,18 @@ process{
11461146
// VEP
11471147
if (params.tools && (params.tools.contains('vep') || params.tools.contains('merge'))) {
11481148
withName: 'ENSEMBLVEP' {
1149-
// If just VEP: <vcf prefix>_VEP.ann.vcf
1150-
ext.prefix = { "${vcf.baseName.minus(".vcf")}_VEP" }
11511149
ext.args = [
11521150
'--everything --filter_common --per_gene --total_length --offline --format vcf',
1153-
(params.vep_dbnsfp && params.dbnsfp) ? '--plugin dbNSFP,dbNSFP.gz,rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF' : '',
1154-
(params.vep_loftee) ? '--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-104.3/share/ensembl-vep-104.3-0' : '',
1155-
(params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? '--plugin SpliceAI,snv=spliceai_scores.raw.snv.hg38.vcf.gz,indel=spliceai_scores.raw.indel.hg38.vcf.gz' : '',
1156-
(params.vep_spliceregion) ? '--plugin SpliceRegion' : '',
1157-
(params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf'
1151+
(params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '',
1152+
(params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '',
1153+
(params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '',
1154+
(params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},${params.spliceai_indel.split("/")[-1]}" : '',
1155+
(params.vep_spliceregion) ? '--plugin SpliceRegion' : '',
1156+
(params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf'
11581157
].join(' ').trim()
1159-
if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:104.3.${params.vep_genome}" : "nfcore/vep:104.3.${params.genome}" }
1158+
// If just VEP: <vcf prefix>_VEP.ann.vcf
1159+
ext.prefix = { "${vcf.baseName.minus(".vcf")}_VEP" }
1160+
if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:${params.vep_version}.${params.vep_genome}" : "nfcore/vep:${params.vep_version}.${params.genome}" }
11601161
publishDir = [
11611162
[
11621163
mode: params.publish_dir_mode,
@@ -1175,7 +1176,7 @@ process{
11751176
// SNPEFF THEN VEP
11761177
if (params.tools && params.tools.contains('merge')) {
11771178
withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" {
1178-
// If megre: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab
1179+
// If merge: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab
11791180
ext.prefix = { "${vcf.baseName.minus(".ann.vcf")}_VEP" }
11801181
}
11811182
}

conf/test.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ params {
4141
vep_cache_version = 104
4242
vep_genome = 'WBcel235'
4343
vep_species = 'caenorhabditis_elegans'
44+
vep_version = '104.3'
4445

4546
// Ignore params that will throw warning through params validation
4647
schema_ignore_params = "genomes,test_data"

main.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ params.pon_tbi = WorkflowMain.getGenomeAttribute(params, 'pon_tbi'
5252
params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db')
5353
params.snpeff_genome = WorkflowMain.getGenomeAttribute(params, 'snpeff_genome')
5454
params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version')
55+
params.vep_version = WorkflowMain.getGenomeAttribute(params, 'vep_version')
5556
params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genome')
5657
params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species')
5758

nextflow.config

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ params {
7676
vep_dbnsfp = null // dbnsfp plugin disabled within VEP
7777
dbnsfp = null // No dbnsfp processed file
7878
dbnsfp_tbi = null // No dbnsfp processed file index
79+
dbnsfp_consequence = null // No default consequence for dbnsfp plugin
80+
dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin
7981
vep_loftee = null // loftee plugin disabled within VEP
8082
vep_spliceai = null // spliceai plugin disabled within VEP
8183
spliceai_snv = null // No spliceai_snv file

nextflow_schema.json

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@
325325
"type": "string",
326326
"fa_icon": "fas fa-database",
327327
"description": "Path to dbNSFP processed file.",
328-
"help_text": "To be used with `--vep_dbnsfp`.",
328+
"help_text": "To be used with `--vep_dbnsfp`.\ndbNSFP files and more information are available at https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp and https://sites.google.com/site/jpopgen/dbNSFP/",
329329
"hidden": true
330330
},
331331
"dbnsfp_tbi": {
@@ -335,6 +335,21 @@
335335
"help_text": "To be used with `--vep_dbnsfp`.",
336336
"hidden": true
337337
},
338+
"dbnsfp_consequence": {
339+
"type": "string",
340+
"fa_icon": "fas fa-database",
341+
"description": "Consequence to annotate with",
342+
"help_text": "To be used with `--vep_dbnsfp`.\nThis params is used to filter/limit outputs to a specific effect of the variant.\nThe set of consequence terms is defined by the Sequence Ontology and an overview of those used in VEP can be found here: https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html\nIf one wants to filter using several consequences, then separate those by using '&' (i.e. 'consequence=3_prime_UTR_variant&intron_variant'.",
343+
"hidden": true
344+
},
345+
"dbnsfp_fields": {
346+
"type": "string",
347+
"fa_icon": "fas fa-database",
348+
"description": "Fields to annotate with",
349+
"default": "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF",
350+
"help_text": "To be used with `--vep_dbnsfp`.\nThis params can be used to retrieve individual values from the dbNSFP file. The values correspond to the name of the columns in the dbNSFP file and are separated by comma.\nThe column names might differ between the different dbNSFP versions. Please check the Readme.txt file, which is provided with the dbNSFP file, to obtain the correct column names. The Readme file contains also a short description of the provided values and the version of the tools used to generate them.\nDefault value are explained below;/nrs_dbSNP - rs number from dbSNP/nHGVSc_VEP - HGVS coding variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_transcriptid/nHGVSp_VEP - HGVS protein variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_proteinid/n1000Gp3_EAS_AF - Alternative allele frequency in the 1000Gp3 East Asian descendent samples/n1000Gp3_AMR_AF - Alternative allele counts in the 1000Gp3 American descendent samples/nLRT_score - Original LRT two-sided p-value (LRTori), ranges from 0 to 1/nGERP++_RS - Conservation score. The larger the score, the more conserved the site, ranges from -12.3 to 6.17/ngnomAD_exomes_AF - Alternative allele frequency in the whole gnomAD exome samples/n.",
351+
"hidden": true
352+
},
338353
"vep_loftee": {
339354
"type": "boolean",
340355
"fa_icon": "fas fa-database",
@@ -401,6 +416,14 @@
401416
"description": "Path to VEP cache.",
402417
"help_text": "To be used with `--annotation_cache`.",
403418
"hidden": true
419+
},
420+
"vep_out_format": {
421+
"type": "string",
422+
"default": "vcf",
423+
"description": "VEP output-file format.",
424+
"enum": ["json", "tab", "vcf"],
425+
"help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.",
426+
"fa_icon": "fas fa-table"
404427
}
405428
}
406429
},
@@ -425,13 +448,12 @@
425448
"ascat_chromosomes": {
426449
"type": "string",
427450
"fa_icon": "fa-solid fa-text",
428-
"default": "'c(1:22, 'X')'",
451+
"default": "c(1:22, 'X')",
429452
"help_text": "Specify specific chromosomes to run ASCAT on, i.e 'c('21', '22')'."
430453
},
431454
"ascat_genome": {
432455
"type": "string",
433456
"fa_icon": "fa-solid fa-text",
434-
"default": "hg38",
435457
"description": "ASCAT genome.",
436458
"help_text": "Must be set to run ASCAT, either hg19 or hg38. If you use AWS iGenomes, this has already been set for you appropriately."
437459
},
@@ -589,13 +611,11 @@
589611
"description": "VEP cache version.",
590612
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately."
591613
},
592-
"vep_out_format": {
614+
"vep_version": {
593615
"type": "string",
594-
"default": "vcf",
595-
"description": "VEP output-file format.",
596-
"enum": ["json", "tab", "vcf"],
597-
"help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.",
598-
"fa_icon": "fas fa-table"
616+
"fa_icon": "fas fa-tag",
617+
"description": "VEP version.",
618+
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately."
599619
},
600620
"save_reference": {
601621
"type": "boolean",

0 commit comments

Comments
 (0)