diff --git a/CHANGELOG.md b/CHANGELOG.md index 90ea7299d9..9d43d69ca5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ Vuoinesluobbalah is a lake close to Bierikjávrre. - [#1093](https://github.com/nf-core/sarek/pull/1093) - Fixing Ascat by reverting meta.id in channels allele_files, loci_files, gc_file and rt_file to baseName. - [#1098](https://github.com/nf-core/sarek/pull/1098) - Fix Channel issue in Mutect2 subworkflow [#1094](https://github.com/nf-core/sarek/pull/1094) - [#1100](https://github.com/nf-core/sarek/pull/1100) - Remove duplicate index with deepvariant when no_intervals [#1069](https://github.com/nf-core/sarek/pull/1069) +- [#1101](https://github.com/nf-core/sarek/pull/1101) - Remove duplicate index computation for GATK4 Markduplicates & [#1065](https://github.com/nf-core/sarek/issues/1065) +- [#1101](https://github.com/nf-core/sarek/pull/1101) - Fix GATK4 version for GATK4 MarkduplicatesSpark [#1068](https://github.com/nf-core/sarek/issues/1068) - [#1105](https://github.com/nf-core/sarek/pull/1105) - Remove `params.tracedir` ## [3.2.1](https://github.com/nf-core/sarek/releases/tag/3.2.1) - Pierikjaure diff --git a/modules.json b/modules.json index 97eb570c13..6732e1901e 100644 --- a/modules.json +++ b/modules.json @@ -258,12 +258,12 @@ }, "gatk4/markduplicates": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "0a261469640941da2488e1a5aa023b64db837c70", "installed_by": ["modules"] }, "gatk4/markduplicatesspark": { "branch": "master", - "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "git_sha": "0a261469640941da2488e1a5aa023b64db837c70", "installed_by": ["modules"] }, "gatk4/mergemutectstats": { diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf index cb5c1cc1a1..f4b3f6dcf0 100644 --- a/modules/nf-core/gatk4/markduplicates/main.nf +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -2,10 +2,10 @@ process GATK4_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "bioconda::gatk4=4.4.0.0 bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0': + 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0' }" input: tuple val(meta), path(bam) @@ -25,7 +25,11 @@ process GATK4_MARKDUPLICATES { script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + def input_list = bam.collect{"--INPUT $it"}.join(' ') def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" @@ -35,23 +39,29 @@ process GATK4_MARKDUPLICATES { } else { avail_mem = (task.memory.mega*0.8).intValue() } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 """ gatk --java-options "-Xmx${avail_mem}M" MarkDuplicates \\ $input_list \\ - --OUTPUT ${prefix} \\ + --OUTPUT ${prefix_bam} \\ --METRICS_FILE ${prefix}.metrics \\ --TMP_DIR . \\ ${reference} \\ $args - - if [[ ${prefix} == *.cram ]]&&[[ -f ${prefix}.bai ]]; then - mv ${prefix}.bai ${prefix}.crai + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} fi cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/gatk4/markduplicatesspark/main.nf b/modules/nf-core/gatk4/markduplicatesspark/main.nf index f51d598f86..30f4703613 100644 --- a/modules/nf-core/gatk4/markduplicatesspark/main.nf +++ b/modules/nf-core/gatk4/markduplicatesspark/main.nf @@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda "bioconda::gatk4=4.3.0.0 conda-forge::openjdk=8.0.312" + conda "bioconda::gatk4=4.4.0.0 conda-forge::openjdk=8.0.312" container "nf-core/gatk:4.4.0.0" input: diff --git a/subworkflows/local/bam_markduplicates/main.nf b/subworkflows/local/bam_markduplicates/main.nf index 89cd1aeb15..f64d30be74 100644 --- a/subworkflows/local/bam_markduplicates/main.nf +++ b/subworkflows/local/bam_markduplicates/main.nf @@ -6,7 +6,6 @@ include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main' include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main' -include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../modules/nf-core/samtools/index/main' workflow BAM_MARKDUPLICATES { take: @@ -22,11 +21,8 @@ workflow BAM_MARKDUPLICATES { // RUN MARKUPDUPLICATES GATK4_MARKDUPLICATES(bam, fasta, fasta_fai) - // Index cram - INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES.out.cram) - // Join with the crai file - cram = GATK4_MARKDUPLICATES.out.cram.join(INDEX_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true) + cram = GATK4_MARKDUPLICATES.out.cram.join(GATK4_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true) // QC on CRAM CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined) @@ -37,7 +33,6 @@ workflow BAM_MARKDUPLICATES { // Gather versions of all tools used versions = versions.mix(GATK4_MARKDUPLICATES.out.versions) - versions = versions.mix(INDEX_MARKDUPLICATES.out.versions) versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions) emit: