diff --git a/conf/modules.config b/conf/modules.config index e42d20381b..37a66ee735 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -33,7 +33,7 @@ process { withName: 'BWAMEM1_INDEX' { ext.when = { !params.bwa && params.step == "mapping" && params.aligner == "bwa-mem" } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference" }, pattern: "bwa" @@ -43,7 +43,7 @@ process { withName: 'BWAMEM2_INDEX' { ext.when = { !params.bwa && params.step == "mapping" && params.aligner == "bwa-mem2" } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference" }, pattern: "bwamem2" @@ -53,7 +53,7 @@ process { withName: 'DRAGMAP_HASHTABLE' { ext.when = { !params.bwa && params.step == "mapping" && params.aligner == "dragmap" } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference" }, pattern: "dragmap" @@ -63,7 +63,7 @@ process { withName: 'GATK4_CREATESEQUENCEDICTIONARY' { ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/gatk4" }, pattern: "*dict" @@ -73,7 +73,7 @@ process { withName: 'MSISENSORPRO_SCAN' { ext.when = { params.tools && params.tools.contains('msisensorpro') } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/msi" }, pattern: "*list" @@ -83,7 +83,7 @@ process { withName: 'SAMTOOLS_FAIDX' { ext.when = { !params.fasta_fai && params.step != "annotate" } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/fai" }, pattern: "*fai" @@ -93,7 +93,7 @@ process { withName: 'TABIX_DBSNP' { ext.when = { !params.dbsnp_tbi && params.dbsnp && (params.step == "mapping" || params.step == "prepare_recalibration") || params.tools && (params.tools.contains('controlfreec') || params.tools.contains('haplotypecaller') || params.tools.contains('mutect2')) } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/dbsnp" }, pattern: "*vcf.gz.tbi" @@ -103,7 +103,7 @@ process { withName: 'TABIX_GERMLINE_RESOURCE' { ext.when = { !params.germline_resource_tbi && params.germline_resource && params.tools && params.tools.contains('mutect2') } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/germline_resource" }, pattern: "*vcf.gz.tbi" @@ -113,7 +113,7 @@ process { withName: 'TABIX_KNOWN_INDELS' { ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == 'prepare_recalibration') } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/known_indels" }, pattern: "*vcf.gz.tbi" @@ -123,7 +123,7 @@ process { withName: 'TABIX_PON' { ext.when = { !params.pon_tbi && params.pon && params.tools && params.tools.contains('mutect2') } publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/pon" }, pattern: "*vcf.gz.tbi" @@ -134,7 +134,7 @@ process { withName: 'CREATE_INTERVALS_BED' { publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/intervals" }, pattern: "*bed" @@ -143,7 +143,7 @@ process { withName: 'GATK4_INTERVALLISTTOBED' { publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/intervals" }, pattern: "*bed" @@ -153,7 +153,7 @@ process { withName: 'TABIX_BGZIPTABIX' { ext.prefix = {"${meta.id}.bed"} publishDir = [ - enabled: "${params.save_reference}", + enabled: params.save_reference, mode: params.publish_dir_mode, path: { "${params.outdir}/reference/target" }, pattern: "*bed.gz" @@ -162,12 +162,12 @@ process { // BAM TO FASTQ - withName: 'SAMTOOLS_FASTQ_MAPPED'{ + withName: 'COLLATE_FASTQ_MAP'{ ext.args2 = '-N' ext.prefix = {"${meta.id}.mapped"} } - withName: 'SAMTOOLS_FASTQ_UNMAPPED'{ + withName: 'COLLATE_FASTQ_UNMAP'{ ext.args2 = '-N' ext.prefix = {"${meta.id}.unmapped"} } @@ -199,20 +199,17 @@ process { [ path: { "${params.outdir}/reports/trimgalore/${meta.id}/fastqc" }, mode: params.publish_dir_mode, - pattern: "*.{html,zip}", - enabled: true + pattern: "*.{html,zip}" ], [ path: { "${params.outdir}/trimgalore/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.fq.gz", - enabled: true + pattern: "*.fq.gz" ], [ path: { "${params.outdir}/reports/trimgalore/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.txt", - enabled: true + pattern: "*.txt" ] ] } @@ -246,10 +243,9 @@ process { ext.args = { "--by-size ${params.split_fastq}" } ext.when = { params.split_fastq > 1 } publishDir = [ - enabled: "${params.save_split_fastqs}", - mode: params.publish_dir_mode, - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.save_split_fastqs, + path: { "${params.outdir}/preprocessing/${meta.sample}/seqkit" }, + mode: params.publish_dir_mode ] } @@ -275,7 +271,10 @@ process { ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.contains('markduplicates'))) ? '-n' : '' } ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last()) : "" } publishDir = [ - enabled: false + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/${meta.id}/mapped" }, + pattern: "*.bam", + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? it : null } ] } @@ -284,9 +283,8 @@ process { ext.args = { meta.status == 1 ? "-K 100000000 -M -B 3 -R ${meta.read_group}" : "-K 100000000 -M -R ${meta.read_group}" } } - withName: 'INDEX_MERGE_BAM' { + withName: 'MERGE_BAM|INDEX_MERGE_BAM' { publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/mapped" }, pattern: "*{bam,bai}" @@ -299,10 +297,9 @@ process { ext.prefix = { "${meta.id}.md" } ext.when = { !(params.skip_tools && params.skip_tools.contains('markduplicates_report')) } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, - pattern: "*{metrics}" + pattern: "*metrics" ] } @@ -310,10 +307,9 @@ process { ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' ext.prefix = { "${meta.id}.md" } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/reports/${meta.id}/markduplicates" }, - pattern: "*{metrics}" + pattern: "*metrics" ] } @@ -321,7 +317,6 @@ process { ext.args = '--remove-sequencing-duplicates false -VS LENIENT' ext.prefix = { !(params.skip_tools && (params.skip_tools.contains('bamqc') || params.skip_tools.contains('deeptools'))) ? "${meta.id}.md.bam" : "${meta.id}.md.cram" } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, pattern: "*{cram,crai}" @@ -332,10 +327,9 @@ process { ext.when = { !(params.skip_tools && params.skip_tools.contains('markduplicates')) } } - withName: 'SAMTOOLS_BAM_TO_CRAM' { + withName: 'SAMTOOLS_BAMTOCRAM' { ext.prefix = { "${meta.id}.md" } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, pattern: "*{cram,crai}" @@ -344,7 +338,6 @@ process { withName: 'INDEX_MARKDUPLICATES' { publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, pattern: "*{cram,crai}" @@ -356,7 +349,6 @@ process { withName: 'BASERECALIBRATOR|BASERECALIBRATOR_SPARK|GATHERBQSRREPORTS' { ext.prefix = {"${meta.id}.recal"} publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/recal_table" }, pattern: "*.table" @@ -372,7 +364,10 @@ process { withName: 'APPLYBQSR|APPLYBQSR_SPARK' { ext.prefix = {"${meta.id}.recal"} publishDir = [ - enabled: false + enabled: !params.no_intervals, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" }, + pattern: "*cram" ] } @@ -380,15 +375,14 @@ process { ext.prefix = { "${meta.id}.recal" } ext.when = { !params.no_intervals } publishDir = [ - enabled: false, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" }, pattern: "*cram" ] } + withName: 'NFCORE_SAREK:SAREK:(RECALIBRATE|RECALIBRATE_SPARK):MERGE_INDEX_CRAM:INDEX_CRAM' { publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" }, pattern: "*{recal.cram,recal.cram.crai}" @@ -404,8 +398,7 @@ process { [ path: { "${params.outdir}/reports/fastqc/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{html,zip}", - enabled: true + pattern: "*.{html,zip}" ] ] } @@ -415,7 +408,6 @@ process { ext.prefix = { "${meta.id}.mapped" } ext.when = { !(params.skip_tools && params.skip_tools.contains('bamqc')) } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/reports/qualimap/${meta.id}" } ] @@ -424,7 +416,6 @@ process { withName: 'SAMTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.contains('samtools')) } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/reports/samtools_stats/${meta.id}" } ] @@ -433,7 +424,6 @@ process { withName: 'DEEPTOOLS_BAMCOVERAGE' { ext.when = { !(params.skip_tools && params.skip_tools.contains('deeptools')) } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/reports/deeptools/${meta.id}" } ] @@ -444,7 +434,6 @@ process { ext.prefix = { "${meta.id}.recal" } ext.when = { !(params.skip_tools && params.skip_tools.contains('bamqc')) } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/reports/qualimap/${meta.id}" } ] @@ -453,7 +442,6 @@ process { withName: 'NFCORE_SAREK:SAREK:CRAM_QC:SAMTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.contains('samtools')) } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/reports/samtools_stats/${meta.id}" } ] @@ -482,7 +470,7 @@ process{ // DEEPVARIANT withName: 'CONCAT_DEEPVARIANT_.*' { publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -495,7 +483,7 @@ process{ ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } ext.when = { params.tools && params.tools.contains('deepvariant') } publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -503,7 +491,6 @@ process{ } withName : 'TABIX_VC_DEEPVARIANT.*' { publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -513,7 +500,7 @@ process{ // FREEBAYES withName: 'CONCAT_FREEBAYES' { publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" } ] @@ -522,14 +509,13 @@ process{ ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' ext.when = { params.tools && params.tools.contains('freebayes') } publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" } ] } withName : 'TABIX_VC_FREEBAYES' { publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" } ] @@ -539,7 +525,7 @@ process{ withName: 'CONCAT_HAPLOTYPECALLER' { ext.prefix = {"${meta.id}.g"} publishDir = [ - enabled: "${!params.no_intervals }", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/haplotypecaller" } ] @@ -549,7 +535,7 @@ process{ ext.prefix = {"${meta.id}.g"} ext.when = { params.tools && params.tools.contains('haplotypecaller') } publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/haplotypecaller"}, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -558,7 +544,7 @@ process{ withName: 'GENOTYPEGVCFS' { ext.when = { params.tools && params.tools.contains('haplotypecaller') && params.joint_germline} publishDir = [ - enabled: "${params.generate_gvcf}", + enabled: params.generate_gvcf, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/haplotypecaller"}, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -568,7 +554,7 @@ process{ // MANTA withName: 'CONCAT_MANTA.*' { publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/manta" }, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -587,7 +573,7 @@ process{ ext.args = { params.wes ? "--exome" : "" } ext.when = { params.tools && params.tools.contains('manta') } publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/manta" }, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -597,7 +583,7 @@ process{ // STRELKA withName: 'CONCAT_STRELKA.*' { publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/strelka" }, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -613,20 +599,19 @@ process{ ext.args = { params.wes ? "--exome" : "" } ext.when = { params.tools && params.tools.contains('strelka') } publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/strelka" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - //withName: 'TIDDIT_SV' { - // publishDir = [ - // enabled: true, - // mode: params.publish_dir_mode, - // path: { "${params.outdir}/variant_calling/${meta.id}/tiddit" } - // ] - //} + // withName: 'TIDDIT_SV' { + // publishDir = [ + // mode: params.publish_dir_mode, + // path: { "${params.outdir}/variant_calling/${meta.id}/tiddit" } + // ] + // } // TUMOR_VARIANT_CALLING @@ -642,7 +627,7 @@ process{ withName: 'CAT_MPILEUP_.*' { ext.when = { !params.no_intervals } publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/controlfreec" }, pattern: "*{mpileup}" @@ -720,7 +705,7 @@ process{ withName: 'MPILEUP_.*' { ext.when = { params.tools && params.tools.contains('controlfreec') } publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/controlfreec" }, pattern: "*{mpileup}" @@ -735,7 +720,6 @@ process{ //MUTECT2 withName: 'GATK4_CALCULATECONTAMINATION' { publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] @@ -743,7 +727,7 @@ process{ withName: 'CONCAT_MUTECT2.*' { publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, pattern: "*{vcf.gz,vcf.gz.tbi}" @@ -753,7 +737,6 @@ process{ withName: 'FILTERMUTECTCALLS.*'{ ext.prefix = {"${meta.id}.filtered"} publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] @@ -763,7 +746,7 @@ process{ ext.prefix = { "${meta.id}.table" } ext.when = { "${!params.no_intervals}"} publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] @@ -771,7 +754,7 @@ process{ withName: 'GETPILEUPSUMMARIES.*' { publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] @@ -780,7 +763,6 @@ process{ withName: 'MERGEMUTECTSTATS' { ext.prefix = { "${meta.id}.vcf.gz" } publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] @@ -790,7 +772,7 @@ process{ ext.when = { params.tools && params.tools.contains('mutect2') } ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" } publishDir = [ - enabled: "${params.no_intervals}", + enabled: params.no_intervals, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, pattern: "*{gz,gz.tbi,stats}" @@ -889,7 +871,7 @@ process{ withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING:GATHERPILEUPSUMMARIES.*' { ext.prefix = { "${meta.id}.table" } publishDir = [ - enabled: "${!params.no_intervals}", + enabled: !params.no_intervals, mode: params.publish_dir_mode, //use ${meta.tumor_id}_vs_${meta_normal_id} to publish in the same directory as the remainders of the //somatic output whilst keeping the filename prefix identifieable for status type @@ -968,7 +950,6 @@ process { withName: 'ANNOTATION_BGZIPTABIX' { publishDir = [ - enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" }, pattern: "*{gz,gz.tbi}" diff --git a/modules.json b/modules.json index 352c90fc11..e0040c33bd 100644 --- a/modules.json +++ b/modules.json @@ -174,6 +174,12 @@ "samtools/bam2fq": { "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, + "samtools/bamtocram": { + "git_sha": "569e07f0af74e2a6ea43fca61ae90bb762893461" + }, + "samtools/collatefastq": { + "git_sha": "705f8c9ac4dfdf07666e71abde28f267e2dfd5eb" + }, "samtools/faidx": { "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, @@ -204,6 +210,9 @@ "strelka/somatic": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "tabix/bgzip": { + "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c" + }, "tabix/bgziptabix": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, diff --git a/modules/local/samtools/index/main.nf b/modules/local/samtools/index/main.nf deleted file mode 100644 index 48d5c461f6..0000000000 --- a/modules/local/samtools/index/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process SAMTOOLS_INDEX { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("*.bam", includeInputs:true), path("*.bai") , optional:true, emit: bam_bai - tuple val(meta), path("*.bam", includeInputs:true), path("*.csi") , optional:true, emit: bam_csi - tuple val(meta), path("*.cram", includeInputs:true), path("*.crai"), optional:true, emit: cram_crai - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools index -@ ${task.cpus-1} $args $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/samtools/index/meta.yml b/modules/local/samtools/index/meta.yml deleted file mode 100644 index e5cadbc24d..0000000000 --- a/modules/local/samtools/index/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samtools_index -description: Index SAM/BAM/CRAM file -keywords: - - index - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - csi: - type: file - description: CSI index file - pattern: "*.{csi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@maxulysse" diff --git a/modules/local/samtools/mergecram/main.nf b/modules/local/samtools/mergecram/main.nf deleted file mode 100644 index 0769b689cb..0000000000 --- a/modules/local/samtools/mergecram/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMTOOLS_MERGE_CRAM { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - tuple val(meta), path(crams) - path fasta - - output: - tuple val(meta), path("*.cram"), emit: cram - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - samtools merge -@${task.cpus} --reference ${fasta} ${prefix}.cram $crams - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/samtools/viewindex/meta.yml b/modules/local/samtools/viewindex/meta.yml deleted file mode 100644 index 244b4e972f..0000000000 --- a/modules/local/samtools/viewindex/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: samtools_view -description: filter/convert SAM/BAM/CRAM file -keywords: - - view - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: filtered/converted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" -authors: - - "@drpatelh" - - "@joseespinosa" diff --git a/modules/local/samtoolsview.nf b/modules/local/samtoolsview.nf deleted file mode 100644 index 70eba8c705..0000000000 --- a/modules/local/samtoolsview.nf +++ /dev/null @@ -1,45 +0,0 @@ -process SAMTOOLS_VIEW { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - tuple val(meta), path(input) - path fasta - - output: - tuple val(meta), path("*.bam"), path("*.bai") , emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta}" : "" - //if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - samtools \\ - view \\ - --threads ${task.cpus-1} \\ - ${reference} \\ - $args \\ - $input \\ - $args2 \\ - > ${prefix}.bam - - samtools index ${prefix}.bam - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/samtools/viewindex/main.nf b/modules/nf-core/modules/samtools/bamtocram/main.nf similarity index 67% rename from modules/local/samtools/viewindex/main.nf rename to modules/nf-core/modules/samtools/bamtocram/main.nf index e72dde1dca..b49c308fb9 100644 --- a/modules/local/samtools/viewindex/main.nf +++ b/modules/nf-core/modules/samtools/bamtocram/main.nf @@ -1,5 +1,5 @@ //There is a -L option to only output alignments in interval, might be an option for exons/panel data? -process SAMTOOLS_VIEWINDEX { +process SAMTOOLS_BAMTOCRAM { tag "$meta.id" label 'process_medium' @@ -11,12 +11,11 @@ process SAMTOOLS_VIEWINDEX { input: tuple val(meta), path(input), path(index) path fasta - path fasta_fai + path fai output: - tuple val(meta), path("*.bam"), path("*.bai") , optional: true, emit: bam_bai - tuple val(meta), path("*.cram"), path("*.crai"), optional: true, emit: cram_crai - path "versions.yml" , emit: versions + tuple val(meta), path("*.cram"), path("*.crai"), emit: cram_crai + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,9 +23,8 @@ process SAMTOOLS_VIEWINDEX { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def reference_command = fasta ? "--reference ${fasta} -C" : "" """ - samtools view --threads ${task.cpus-1} ${reference_command} $args $input > ${prefix}.cram + samtools view --threads ${task.cpus} --reference ${fasta} -C $args $input > ${prefix}.cram samtools index -@${task.cpus} ${prefix}.cram cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/samtools/mergecram/meta.yml b/modules/nf-core/modules/samtools/bamtocram/meta.yml similarity index 65% rename from modules/local/samtools/mergecram/meta.yml rename to modules/nf-core/modules/samtools/bamtocram/meta.yml index 85db025da5..037704c622 100644 --- a/modules/local/samtools/mergecram/meta.yml +++ b/modules/nf-core/modules/samtools/bamtocram/meta.yml @@ -1,9 +1,9 @@ -name: samtools_merge -description: Merge BAM file +name: samtools_bamtocram +description: filter/convert and then index CRAM file keywords: - - merge + - view + - index - bam - - sam - cram tools: - samtools: @@ -21,25 +21,32 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file - description: BAM file - pattern: "*.{bam,cram,sam}" + description: BAM/SAM file + pattern: "*.{bam,sam}" + - index: + type: file + description: BAM/SAM index file + pattern: "*.{bai,sai}" + - fasta: + type: file + description: Reference file to create the CRAM file + pattern: "*.{fasta,fa}" output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - merged_bam: + - cram_crai: type: file - description: BAM file - pattern: "*.{bam}" + description: filtered/converted CRAM file + index + pattern: "*{.cram,.crai}" - version: type: file description: File containing software version pattern: "*.{version.txt}" authors: - - "@drpatelh" - - "@yuukiiwa " + - "@FriederikeHanssen" - "@maxulysse" diff --git a/modules/local/samtools/fastq/main.nf b/modules/nf-core/modules/samtools/collatefastq/main.nf similarity index 72% rename from modules/local/samtools/fastq/main.nf rename to modules/nf-core/modules/samtools/collatefastq/main.nf index a86fb45bdc..3d9becdae4 100644 --- a/modules/local/samtools/fastq/main.nf +++ b/modules/nf-core/modules/samtools/collatefastq/main.nf @@ -1,4 +1,4 @@ -process SAMTOOLS_FASTQ { +process SAMTOOLS_COLLATEFASTQ { tag "$meta.id" label 'process_low' @@ -13,8 +13,8 @@ process SAMTOOLS_FASTQ { output: //TODO might be good to have ordered output of the fastq files, so we can // make sure the we get the right files - tuple val(meta), path("*.fq.gz"), emit: reads - path "versions.yml" , emit: versions + tuple val(meta), path("*_{1,2}.fq.gz"), path("*_other.fq.gz"), path("*_singleton.fq.gz"), emit: reads + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,12 +24,16 @@ process SAMTOOLS_FASTQ { def args2 = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - samtools collate -O -@$task.cpus $args $input . | + samtools collate \\ + $args \\ + --threads $task.cpus \\ + -O \\ + $input \\ + . | - samtools \\ - fastq \\ + samtools fastq \\ $args2 \\ - -@ $task.cpus \\ + --threads $task.cpus \\ -1 ${prefix}_1.fq.gz \\ -2 ${prefix}_2.fq.gz \\ -0 ${prefix}_other.fq.gz \\ diff --git a/modules/local/samtools/fastq/meta.yml b/modules/nf-core/modules/samtools/collatefastq/meta.yml similarity index 71% rename from modules/local/samtools/fastq/meta.yml rename to modules/nf-core/modules/samtools/collatefastq/meta.yml index 319a60cf68..d3a2e3af07 100644 --- a/modules/local/samtools/fastq/meta.yml +++ b/modules/nf-core/modules/samtools/collatefastq/meta.yml @@ -1,6 +1,6 @@ -name: samtools_bam2fq +name: samtools_collatefastq description: | - The module uses bam2fq method from samtools to + The module uses collate and then fastq methods from samtools to convert a SAM, BAM or CRAM file to FASTQ format keywords: - bam2fq @@ -21,18 +21,10 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - inputbam: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - split: - type: boolean - description: | - TRUE/FALSE value to indicate if reads should be separated into - /1, /2 and if present other, or singleton. - Note: choosing TRUE will generate 4 different files. - Choosing FALSE will produce a single file, which will be interleaved in case - the input contains paired reads. output: - meta: @@ -40,16 +32,17 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - reads: type: file description: | FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) or a single interleaved .fq.gz file if the user chooses not to split the reads. pattern: "*.fq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@lescai" + - "@maxulysse" diff --git a/modules/local/bgzip.nf b/modules/nf-core/modules/tabix/bgzip/main.nf similarity index 54% rename from modules/local/bgzip.nf rename to modules/nf-core/modules/tabix/bgzip/main.nf index 4e3169c8ff..18e83c84d8 100644 --- a/modules/local/bgzip.nf +++ b/modules/nf-core/modules/tabix/bgzip/main.nf @@ -1,28 +1,30 @@ - -process BGZIP { +process TABIX_BGZIP { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::tabix=1.11" : null) + conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" input: - tuple val(meta), path(vcf_gz) + tuple val(meta), path(input) output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}*"), emit: output + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = input.toString().endsWith(".gz") + command1 = in_bgzip ? '-d' : '-c' + command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" """ - bgzip -d $vcf_gz -@${task.cpus} + bgzip $command1 $args -@${task.cpus} $input $command2 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/tabix/bgzip/meta.yml b/modules/nf-core/modules/tabix/bgzip/meta.yml new file mode 100644 index 0000000000..5007017510 --- /dev/null +++ b/modules/nf-core/modules/tabix/bgzip/meta.yml @@ -0,0 +1,42 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: Output compressed/decompressed file + pattern: "*." + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/local/bam2fastq.nf b/subworkflows/local/bam2fastq.nf deleted file mode 100644 index a875db6d36..0000000000 --- a/subworkflows/local/bam2fastq.nf +++ /dev/null @@ -1,85 +0,0 @@ -// -// BAM/CRAM to FASTQ conversion, paired end only -// - -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_UNMAPPED } from '../../modules/nf-core/modules/samtools/merge/main' -include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_UNMAPPED } from '../../modules/local/samtools/fastq/main' -include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_MAPPED } from '../../modules/local/samtools/fastq/main' -include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' - -workflow ALIGNMENT_TO_FASTQ { - take: - input // channel: [meta, alignment (BAM or CRAM), index (optional)] - fasta // optional: reference file if CRAM format and reference not in header - - main: - ch_versions = Channel.empty() - // Index File if not PROVIDED -> this also requires updates to samtools view possibly URGH - - //QC input BAM? -> needs another FASTQC module implementation - - // MAP - MAP - SAMTOOLS_VIEW_MAP_MAP(input, fasta) - - // UNMAP - UNMAP - SAMTOOLS_VIEW_UNMAP_UNMAP(input, fasta) - - // UNMAP - MAP - SAMTOOLS_VIEW_UNMAP_MAP(input, fasta) - - // MAP - UNMAP - SAMTOOLS_VIEW_MAP_UNMAP(input, fasta) - - // Merge UNMAP - SAMTOOLS_VIEW_UNMAP_UNMAP.out.bam.join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, remainder: true) - .join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, remainder: true) - .map{ meta, unmap_unmap, unmap_map, map_unmap -> - [meta, [unmap_unmap, unmap_map, map_unmap]] - }.set{ all_unmapped_bam } - - SAMTOOLS_MERGE_UNMAPPED(all_unmapped_bam, fasta) - - // Collate & convert unmapped - SAMTOOLS_FASTQ_UNMAPPED(SAMTOOLS_MERGE_UNMAPPED.out.bam) - - // Collate & convert mapped - SAMTOOLS_FASTQ_MAPPED(SAMTOOLS_VIEW_MAP_MAP.out.bam) - - // join Mapped & unmapped fastq - SAMTOOLS_FASTQ_UNMAPPED.out.reads.map{ meta, reads -> - fq_1 = reads.findAll{ it.toString().endsWith("_1.fq.gz") }.get(0) - fq_2 = reads.findAll{ it.toString().endsWith("_2.fq.gz") }.get(0) - [meta, [ fq_1, fq_2]] - }.set{unmapped_reads} - - SAMTOOLS_FASTQ_MAPPED.out.reads.map{ meta, reads -> - fq_1 = reads.findAll{ it.toString().endsWith("_1.fq.gz") }.get(0) - fq_2 = reads.findAll{ it.toString().endsWith("_2.fq.gz") }.get(0) - [meta, [ fq_1, fq_2]] - }.set{mapped_reads} - - mapped_reads.join(unmapped_reads).map{ meta, mapped_reads, unmapped_reads -> - [meta, [mapped_reads[0], mapped_reads[1], unmapped_reads[0], unmapped_reads[1]]] - }.set{ reads_to_concat } - - // Concatenate Mapped_R1 with Unmapped_R1 and Mapped_R2 with Unmapped_R2 - CAT_FASTQ(reads_to_concat) - - // Gather versions of all tools used - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_FASTQ_MAPPED.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_FASTQ_UNMAPPED.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_MERGE_UNMAPPED.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_MAP.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_UNMAP.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_MAP.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_UNMAP.out.versions) - - emit: - reads = CAT_FASTQ.out.reads - versions = ch_versions -} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 81b1719e61..3dad1766ad 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -11,7 +11,6 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/modules/bwa/index/main' include { BWAMEM2_INDEX } from '../../modules/nf-core/modules/bwamem2/index/main' include { DRAGMAP_HASHTABLE } from '../../modules/nf-core/modules/dragmap/hashtable/main' -include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/modules/gatk4/createsequencedictionary/main' include { MSISENSORPRO_SCAN } from '../../modules/nf-core/modules/msisensorpro/scan/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/modules/samtools/faidx/main' @@ -48,7 +47,7 @@ workflow PREPARE_GENOME { TABIX_PON(pon.map{ it -> [[id:it[0].baseName], it] }) chr_files = chr_dir - //TODO this works, but is not pretty. I will leave this in yuor hands during refactoring @Maxime + //TODO this works, but is not pretty. I will leave this in your hands during refactoring @Maxime if ( params.chr_dir.endsWith('tar.gz')){ UNTAR_CHR_DIR(chr_dir.map{ it -> [[id:it[0].baseName], it] }) chr_files = UNTAR_CHR_DIR.out.untar.map{ it[1] } diff --git a/subworkflows/local/split_fastq.nf b/subworkflows/local/split_fastq.nf index 14ea551d6e..1062356c3a 100644 --- a/subworkflows/local/split_fastq.nf +++ b/subworkflows/local/split_fastq.nf @@ -14,26 +14,11 @@ workflow SPLIT_FASTQ { ch_versions = Channel.empty() - reads_no_split = reads_input.map{ meta, reads -> - [[ id:meta.id, - patient:meta.patient, - sample:meta.sample, - gender:meta.gender, - status:meta.status, - numLanes:meta.numLanes, - read_group: meta.read_group, - data_type:meta.data_type, - size:1], reads] - } - // Only if we want to split fastq files SEQKIT_SPLIT2(reads_input) - // Empty channel when splitting fastq files - if (params.split_fastq > 1) reads_no_split = Channel.empty() - // Remapping the channel - reads_split = SEQKIT_SPLIT2.out.reads.map{ key, reads -> + reads = SEQKIT_SPLIT2.out.reads.map{ key, reads -> //TODO maybe this can be replaced by a regex to include part_001 etc. //sorts list of split fq files by : @@ -44,8 +29,6 @@ workflow SPLIT_FASTQ { [key, read_files] }.transpose() - reads = reads_no_split.mix(reads_split) - ch_versions = ch_versions.mix(SEQKIT_SPLIT2.out.versions) emit: diff --git a/subworkflows/nf-core/alignment_to_fastq.nf b/subworkflows/nf-core/alignment_to_fastq.nf new file mode 100644 index 0000000000..81720133d4 --- /dev/null +++ b/subworkflows/nf-core/alignment_to_fastq.nf @@ -0,0 +1,87 @@ +// +// BAM/CRAM to FASTQ conversion, paired end only +// + +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' +include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_UNMAP } from '../../modules/nf-core/modules/samtools/merge/main' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_UNMAP } from '../../modules/nf-core/modules/samtools/collatefastq/main' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_MAP } from '../../modules/nf-core/modules/samtools/collatefastq/main' +include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' + +workflow ALIGNMENT_TO_FASTQ { + take: + input // channel: [meta, alignment (BAM or CRAM), index (optional)] + fasta // optional: reference file if CRAM format and reference not in header + + main: + ch_versions = Channel.empty() + // Index File if not PROVIDED -> this also requires updates to samtools view possibly URGH + + // MAP - MAP + SAMTOOLS_VIEW_MAP_MAP(input, fasta) + + // UNMAP - UNMAP + SAMTOOLS_VIEW_UNMAP_UNMAP(input, fasta) + + // UNMAP - MAP + SAMTOOLS_VIEW_UNMAP_MAP(input, fasta) + + // MAP - UNMAP + SAMTOOLS_VIEW_MAP_UNMAP(input, fasta) + + // Merge UNMAP + all_unmapped_bam = SAMTOOLS_VIEW_UNMAP_UNMAP.out.bam + .join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, remainder: true) + .join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, remainder: true) + .map{ meta, unmap_unmap, unmap_map, map_unmap -> + [meta, [unmap_unmap, unmap_map, map_unmap]] + } + + SAMTOOLS_MERGE_UNMAP(all_unmapped_bam, fasta) + + // Collate & convert unmapped + COLLATE_FASTQ_UNMAP(SAMTOOLS_MERGE_UNMAP.out.bam) + + // Collate & convert mapped + COLLATE_FASTQ_MAP(SAMTOOLS_VIEW_MAP_MAP.out.bam) + + // join Mapped & unmapped fastq + unmapped_reads = COLLATE_FASTQ_UNMAP.out.reads + .map{ meta, reads -> + fq_1 = reads.find{ it.toString().endsWith("_1.fq.gz") } + fq_2 = reads.find{ it.toString().endsWith("_2.fq.gz") } + [meta, [ fq_1, fq_2]] + } + + mapped_reads = COLLATE_FASTQ_MAP.out.reads + .map{ meta, reads -> + fq_1 = reads.find{ it.toString().endsWith("_1.fq.gz") } + fq_2 = reads.find{ it.toString().endsWith("_2.fq.gz") } + [meta, [ fq_1, fq_2]] + } + + reads_to_concat = mapped_reads.join(unmapped_reads) + .map{ meta, mapped_reads, unmapped_reads -> + [meta, [mapped_reads[0], mapped_reads[1], unmapped_reads[0], unmapped_reads[1]]] + } + + // Concatenate Mapped_R1 with Unmapped_R1 and Mapped_R2 with Unmapped_R2 + CAT_FASTQ(reads_to_concat) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + ch_versions = ch_versions.mix(COLLATE_FASTQ_MAP.out.versions) + ch_versions = ch_versions.mix(COLLATE_FASTQ_UNMAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE_UNMAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_MAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_UNMAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_MAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_UNMAP.out.versions) + + emit: + reads = CAT_FASTQ.out.reads + versions = ch_versions +} diff --git a/subworkflows/nf-core/bam_to_cram.nf b/subworkflows/nf-core/bam_to_cram.nf index 8f27bf1e50..ec3801e5a4 100644 --- a/subworkflows/nf-core/bam_to_cram.nf +++ b/subworkflows/nf-core/bam_to_cram.nf @@ -4,15 +4,15 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { DEEPTOOLS_BAMCOVERAGE } from '../../modules/nf-core/modules/deeptools/bamcoverage/main' -include { QUALIMAP_BAMQC } from '../../modules/nf-core/modules/qualimap/bamqc/main' -include { SAMTOOLS_VIEWINDEX as SAMTOOLS_BAM_TO_CRAM } from '../../modules/local/samtools/viewindex/main' +include { DEEPTOOLS_BAMCOVERAGE } from '../../modules/nf-core/modules/deeptools/bamcoverage/main' +include { QUALIMAP_BAMQC } from '../../modules/nf-core/modules/qualimap/bamqc/main' +include { SAMTOOLS_BAMTOCRAM } from '../../modules/nf-core/modules/samtools/bamtocram/main' workflow BAM_TO_CRAM { take: bam_indexed // channel: [mandatory] meta, bam, bai fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai + fai // channel: [mandatory] fai intervals_combined_bed_gz_tbi // channel: [optional] intervals_bed.gz, intervals_bed.gz.tbi main: @@ -23,7 +23,7 @@ workflow BAM_TO_CRAM { bam_no_index = bam_indexed.map{ meta, bam, bai -> [meta, bam] } // Convert bam input to cram - SAMTOOLS_BAM_TO_CRAM(bam_indexed, fasta, fasta_fai) + SAMTOOLS_BAMTOCRAM(bam_indexed, fasta, fai) // Reports on bam input DEEPTOOLS_BAMCOVERAGE(bam_indexed) @@ -38,10 +38,10 @@ workflow BAM_TO_CRAM { // Gather versions of all tools used ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE.out.versions.first()) ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_BAM_TO_CRAM.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_BAMTOCRAM.out.versions.first()) emit: - cram = SAMTOOLS_BAM_TO_CRAM.out.cram_crai + cram = SAMTOOLS_BAMTOCRAM.out.cram_crai qc = qc_reports versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/nf-core/gatk4/mapping/main.nf b/subworkflows/nf-core/gatk4/mapping/main.nf index 02b7e3a4aa..8c820677ff 100644 --- a/subworkflows/nf-core/gatk4/mapping/main.nf +++ b/subworkflows/nf-core/gatk4/mapping/main.nf @@ -24,20 +24,12 @@ workflow GATK4_MAPPING { BWAMEM2_MEM(ch_reads, ch_map_index, sort) // If aligner is bwa-mem2 DRAGMAP_ALIGN(ch_reads, ch_map_index, sort) // If aligner is dragmap - // Grouping the bams from the same samples not to stall the workflow - ch_bam_mapped = BWAMEM1_MEM.out.bam.mix(BWAMEM2_MEM.out.bam, DRAGMAP_ALIGN.out.bam).map{ meta, bam -> - new_meta = meta.clone() - - numLanes = meta.numLanes ?: 1 - size = meta.size ?: 1 - - // Use groupKey to make sure that the correct group can advance as soon as it is complete - // and not stall the workflow until all reads from all channels are mapped - def groupKey = groupKey(new_meta, numLanes * size) - - //Returns the values we need - tuple(groupKey, new_meta, bam) - }.groupTuple(by:[0,1]).map{ groupKey, new_meta, bam -> [new_meta, bam] } + // Get the bam files from the aligner + // Only one aligner is run + ch_bam_mapped = Channel.empty() + ch_bam_mapped = ch_bam_mapped.mix(BWAMEM1_MEM.out.bam) + ch_bam_mapped = ch_bam_mapped.mix(BWAMEM2_MEM.out.bam) + ch_bam_mapped = ch_bam_mapped.mix(DRAGMAP_ALIGN.out.bam) // Gather reports of all tools used ch_reports = ch_reports.mix(DRAGMAP_ALIGN.out.log) diff --git a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf index 01f5e710a2..ac010b5adc 100644 --- a/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf +++ b/subworkflows/nf-core/gatk4/markduplicates_spark/main.nf @@ -6,7 +6,7 @@ include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../../../modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main' include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/nf-core/modules/gatk4/markduplicatesspark/main' -include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../../modules/local/samtools/index/main' +include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../../modules/nf-core/modules/samtools/index/main' include { BAM_TO_CRAM } from '../../bam_to_cram' workflow MARKDUPLICATES_SPARK { @@ -26,15 +26,21 @@ workflow MARKDUPLICATES_SPARK { GATK4_MARKDUPLICATES_SPARK(bam, fasta, fasta_fai, dict) INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES_SPARK.out.output) + bam_bai = GATK4_MARKDUPLICATES_SPARK.out.output + .join(INDEX_MARKDUPLICATES.out.bai) + + cram_crai = GATK4_MARKDUPLICATES_SPARK.out.output + .join(INDEX_MARKDUPLICATES.out.crai) + // Convert Markupduplicates spark bam output to cram when running bamqc and/or deeptools - BAM_TO_CRAM(INDEX_MARKDUPLICATES.out.bam_bai, fasta, fasta_fai, intervals_combined_bed_gz_tbi) + BAM_TO_CRAM(bam_bai, fasta, fasta_fai, intervals_combined_bed_gz_tbi) // Only one of these channel is not empty: // - running Markupduplicates spark with bam output // - running Markupduplicates spark with cram output cram_markduplicates = Channel.empty().mix( BAM_TO_CRAM.out.cram, - GATK4_MARKDUPLICATES_SPARK.out.output.join(INDEX_MARKDUPLICATES.out.cram_crai)) + cram_crai) // When running Marduplicates spark, and saving reports GATK4_ESTIMATELIBRARYCOMPLEXITY(bam, fasta, fasta_fai, dict) diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index 6abe74c9e0..1d6620df4d 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -2,7 +2,7 @@ // Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls // -include { BGZIP as BGZIP_MUTECT2 } from '../../../../modules/local/bgzip' +include { TABIX_BGZIP as BGZIP_MUTECT2 } from '../../../../modules/nf-core/modules/tabix/bgzip/main' include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' include { GATK4_MUTECT2 as MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mutect2/main' include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../../modules/nf-core/modules/gatk4/mergemutectstats/main' @@ -81,7 +81,7 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { BGZIP_MUTECT2(MUTECT2.out.vcf) CONCAT_MUTECT2( - BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> + BGZIP_MUTECT2.out.output.map{ meta, vcf -> [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], vcf] }.groupTuple(size: num_intervals), fai, diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 97ab85b85f..6e94218d23 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -2,7 +2,7 @@ // Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls // -include { BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/local/bgzip' +include { TABIX_BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/nf-core/modules/tabix/bgzip/main' include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' include { GATK4_MUTECT2 as MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mutect2/main' include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../../modules/nf-core/modules/gatk4/mergemutectstats/main' @@ -66,7 +66,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { //Merge Mutect2 VCF BGZIP_VC_MUTECT2(mutect2_vcf_branch.intervals) - CONCAT_MUTECT2(BGZIP_VC_MUTECT2.out.vcf.map{ meta, vcf -> + CONCAT_MUTECT2(BGZIP_VC_MUTECT2.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, vcf] diff --git a/subworkflows/nf-core/merge_index_bam.nf b/subworkflows/nf-core/merge_index_bam.nf index 9839248250..b914d24f85 100644 --- a/subworkflows/nf-core/merge_index_bam.nf +++ b/subworkflows/nf-core/merge_index_bam.nf @@ -4,7 +4,7 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { SAMTOOLS_INDEX as INDEX_MERGE_BAM } from '../../modules/local/samtools/index/main' +include { SAMTOOLS_INDEX as INDEX_MERGE_BAM } from '../../modules/nf-core/modules/samtools/index/main' include { SAMTOOLS_MERGE as MERGE_BAM } from '../../modules/nf-core/modules/samtools/merge/main' workflow MERGE_INDEX_BAM { @@ -23,11 +23,15 @@ workflow MERGE_INDEX_BAM { MERGE_BAM(bam_to_merge.multiple, []) INDEX_MERGE_BAM(bam_to_merge.single.mix(MERGE_BAM.out.bam)) + bam_bai = bam_to_merge.single + .mix(MERGE_BAM.out.bam) + .join(INDEX_MERGE_BAM.out.bai) + // Gather versions of all tools used ch_versions = ch_versions.mix(INDEX_MERGE_BAM.out.versions.first()) ch_versions = ch_versions.mix(MERGE_BAM.out.versions.first()) emit: - bam_bai = INDEX_MERGE_BAM.out.bam_bai + bam_bai versions = ch_versions } diff --git a/subworkflows/nf-core/merge_index_cram.nf b/subworkflows/nf-core/merge_index_cram.nf index 04394afc26..7692681331 100644 --- a/subworkflows/nf-core/merge_index_cram.nf +++ b/subworkflows/nf-core/merge_index_cram.nf @@ -1,11 +1,11 @@ // -// MERGE INDEX BAM +// MERGE INDEX CRAM // // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { SAMTOOLS_INDEX as INDEX_CRAM } from '../../modules/local/samtools/index/main' -include { SAMTOOLS_MERGE_CRAM as MERGE_CRAM } from '../../modules/local/samtools/mergecram/main' +include { SAMTOOLS_INDEX as INDEX_CRAM } from '../../modules/nf-core/modules/samtools/index/main' +include { SAMTOOLS_MERGE as MERGE_CRAM } from '../../modules/nf-core/modules/samtools/merge/main' workflow MERGE_INDEX_CRAM { take: @@ -17,11 +17,12 @@ workflow MERGE_INDEX_CRAM { ch_versions = Channel.empty() // Figuring out if there is one or more cram(s) from the same sample - ch_cram = ch_cram - .map{ meta, cram -> - meta.id = meta.sample - [meta, cram] - }.groupTuple(size: num_intervals) + ch_cram.map{ meta, cram -> + new_meta = meta.clone() + new_meta.id = meta.sample + + [new_meta, cram] + }.groupTuple(size: num_intervals) .branch{ single: it[1].size() == 1 multiple: it[1].size() > 1 @@ -30,11 +31,15 @@ workflow MERGE_INDEX_CRAM { MERGE_CRAM(cram_to_merge.multiple, fasta) INDEX_CRAM(cram_to_merge.single.mix(MERGE_CRAM.out.cram)) + cram_crai = cram_to_merge.single + .mix(MERGE_CRAM.out.cram) + .join(INDEX_CRAM.out.crai) + // Gather versions of all tools used ch_versions = ch_versions.mix(INDEX_CRAM.out.versions.first()) ch_versions = ch_versions.mix(MERGE_CRAM.out.versions.first()) emit: - cram_crai = INDEX_CRAM.out.cram_crai + cram_crai versions = ch_versions } diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf index f5568c83b6..715c160f11 100644 --- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -1,5 +1,5 @@ -include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../../modules/local/bgzip' +include { TABIX_BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/bgzip/main' include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../../modules/local/concat_vcf/main' include { DEEPVARIANT } from '../../../../modules/nf-core/modules/deepvariant/main' @@ -33,7 +33,7 @@ workflow RUN_DEEPVARIANT { BGZIP_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) CONCAT_DEEPVARIANT_VCF( - BGZIP_VC_DEEPVARIANT_VCF.out.vcf + BGZIP_VC_DEEPVARIANT_VCF.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample @@ -43,7 +43,7 @@ workflow RUN_DEEPVARIANT { intervals_bed_gz) CONCAT_DEEPVARIANT_GVCF( - BGZIP_VC_DEEPVARIANT_GVCF.out.vcf + BGZIP_VC_DEEPVARIANT_GVCF.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index fec674ff4b..f6c39cbee4 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,4 +1,4 @@ -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/local/bgzip' +include { TABIX_BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/bgzip/main' include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../../modules/local/concat_vcf/main' include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' @@ -28,7 +28,7 @@ workflow RUN_FREEBAYES { BGZIP_VC_FREEBAYES(FREEBAYES.out.vcf) CONCAT_FREEBAYES( - BGZIP_VC_FREEBAYES.out.vcf + BGZIP_VC_FREEBAYES.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf index 7e456566e9..28bffeeec8 100644 --- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -1,4 +1,4 @@ -include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../../modules/local/bgzip' +include { TABIX_BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/tabix/bgzip/main' include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../../modules/local/concat_vcf/main' include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/gatk4/haplotypecaller/main' @@ -43,7 +43,7 @@ workflow RUN_HAPLOTYPECALLER { BGZIP_VC_HAPLOTYPECALLER(haplotypecaller_vcf_branch.intervals) CONCAT_HAPLOTYPECALLER( - BGZIP_VC_HAPLOTYPECALLER.out.vcf + BGZIP_VC_HAPLOTYPECALLER.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample @@ -117,7 +117,7 @@ workflow RUN_HAPLOTYPECALLER { //ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) //ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) + // ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) emit: versions = ch_versions diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf index 71c405b898..66346eaa9e 100644 --- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -1,10 +1,10 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' -include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too @@ -54,7 +54,7 @@ workflow RUN_MANTA_GERMLINE { BGZIP_VC_MANTA_SV(manta_sv_vcf.intervals) CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.vcf + BGZIP_VC_MANTA_SV.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample @@ -66,7 +66,7 @@ workflow RUN_MANTA_GERMLINE { BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.vcf + BGZIP_VC_MANTA_DIPLOID.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf index 3c72cc7a68..67764c2dc7 100644 --- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -1,12 +1,12 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' -include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: @@ -52,7 +52,7 @@ workflow RUN_MANTA_SOMATIC { BGZIP_VC_MANTA_SV(manta_candidate_small_indels_vcf.intervals) CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] @@ -63,7 +63,7 @@ workflow RUN_MANTA_SOMATIC { BGZIP_VC_MANTA_SMALL_INDELS(manta_candidate_sv_vcf.intervals) CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] @@ -74,7 +74,7 @@ workflow RUN_MANTA_SOMATIC { BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_DIPLOID.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] @@ -85,7 +85,7 @@ workflow RUN_MANTA_SOMATIC { BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) CONCAT_MANTA_SOMATIC( - BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_SOMATIC.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf index ba699bcb27..b9858d5db7 100644 --- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -1,10 +1,10 @@ -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../../../modules/local/concat_vcf/main' -include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too @@ -42,7 +42,7 @@ workflow RUN_MANTA_TUMORONLY { BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, vcf] @@ -53,7 +53,7 @@ workflow RUN_MANTA_TUMORONLY { BGZIP_VC_MANTA_SV(manta_candidate_sv_vcf.intervals) CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, vcf] @@ -64,7 +64,7 @@ workflow RUN_MANTA_TUMORONLY { BGZIP_VC_MANTA_TUMOR(manta_tumor_sv_vcf.intervals) CONCAT_MANTA_TUMOR( - BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_TUMOR.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, vcf] diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index bdaf5dbce6..865f013fb2 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -1,8 +1,8 @@ -include { BGZIP as BGZIP_VC_STRELKA } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../../../modules/local/concat_vcf/main' -include { STRELKA_GERMLINE } from '../../../../../modules/nf-core/modules/strelka/germline/main' +include { TABIX_BGZIP as BGZIP_VC_STRELKA } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../../../modules/local/concat_vcf/main' +include { STRELKA_GERMLINE } from '../../../../../modules/nf-core/modules/strelka/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too @@ -35,7 +35,7 @@ workflow RUN_STRELKA_SINGLE { BGZIP_VC_STRELKA(strelka_vcf.intervals) CONCAT_STRELKA( - BGZIP_VC_STRELKA.out.vcf + BGZIP_VC_STRELKA.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample @@ -47,7 +47,7 @@ workflow RUN_STRELKA_SINGLE { BGZIP_VC_STRELKA_GENOME(strelka_genome_vcf.intervals) CONCAT_STRELKA_GENOME( - BGZIP_VC_STRELKA_GENOME.out.vcf + BGZIP_VC_STRELKA_GENOME.out.output .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf index 913b24b3a5..bccc00f59d 100644 --- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf @@ -1,8 +1,8 @@ -include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../../../modules/local/concat_vcf/main' -include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' +include { TABIX_BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' +include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../../../modules/local/concat_vcf/main' +include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too @@ -34,7 +34,7 @@ workflow RUN_STRELKA_SOMATIC { // Only when using intervals BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) - CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> + CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] @@ -44,7 +44,7 @@ workflow RUN_STRELKA_SOMATIC { BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) - CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> + CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.output.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] diff --git a/subworkflows/nf-core/vcf_qc.nf b/subworkflows/nf-core/vcf_qc.nf index 19e385ab67..4765dc2087 100644 --- a/subworkflows/nf-core/vcf_qc.nf +++ b/subworkflows/nf-core/vcf_qc.nf @@ -1,10 +1,9 @@ -include { BCFTOOLS_STATS } from '../../modules/nf-core/modules/bcftools/stats/main' +include { BCFTOOLS_STATS } from '../../modules/nf-core/modules/bcftools/stats/main' +include { VCFTOOLS as VCFTOOLS_SUMMARY } from '../../modules/nf-core/modules/vcftools/main' include { VCFTOOLS as VCFTOOLS_TSTV_COUNT } from '../../modules/nf-core/modules/vcftools/main' -include { VCFTOOLS as VCFTOOLS_TSTV_QUAL } from '../../modules/nf-core/modules/vcftools/main' -include { VCFTOOLS as VCFTOOLS_SUMMARY } from '../../modules/nf-core/modules/vcftools/main' +include { VCFTOOLS as VCFTOOLS_TSTV_QUAL } from '../../modules/nf-core/modules/vcftools/main' workflow VCF_QC { - take: vcf target_bed @@ -22,9 +21,10 @@ workflow VCF_QC { ch_versions = ch_versions.mix(VCFTOOLS_TSTV_COUNT.out.versions) emit: - versions = ch_versions bcftools_stats = BCFTOOLS_STATS.out.stats vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual vcftools_filter_summary = VCFTOOLS_SUMMARY.out.filter_summary + + versions = ch_versions } diff --git a/tests/test_save_bam_mapped.yml b/tests/test_save_bam_mapped.yml index 2bf13885b8..0113cf97cf 100644 --- a/tests/test_save_bam_mapped.yml +++ b/tests/test_save_bam_mapped.yml @@ -5,8 +5,8 @@ - save_bam_mapped files: # - path: results/multiqc - - path: results/preprocessing/test/mapped/test-test_L1.bam - - path: results/preprocessing/test/mapped/test-test_L1.bam.bai + - path: results/preprocessing/test/mapped/test.bam + - path: results/preprocessing/test/mapped/test.bam.bai - path: results/preprocessing/test/markduplicates/test.md.cram - path: results/preprocessing/test/markduplicates/test.md.cram.crai - path: results/preprocessing/test/recal_table/test.recal.table diff --git a/tests/test_skip_markduplicates.yml b/tests/test_skip_markduplicates.yml index d6b50a2074..5fc14ce2f0 100644 --- a/tests/test_skip_markduplicates.yml +++ b/tests/test_skip_markduplicates.yml @@ -6,8 +6,8 @@ - skip_markduplicates files: # - path: results/multiqc - - path: results/preprocessing/test/mapped/test-test_L1.bam - - path: results/preprocessing/test/mapped/test-test_L1.bam.bai + - path: results/preprocessing/test/mapped/test.bam + - path: results/preprocessing/test/mapped/test.bam.bai - path: results/preprocessing/test/recal_table/test.recal.table - path: results/preprocessing/test/markduplicates/test.md.cram - path: results/preprocessing/test/markduplicates/test.md.cram.crai diff --git a/tests/test_split_fastq.yml b/tests/test_split_fastq.yml index 46bb5804c3..96bec90012 100644 --- a/tests/test_split_fastq.yml +++ b/tests/test_split_fastq.yml @@ -3,7 +3,7 @@ tags: - split_fastq files: - - path: results/seqkit/test-test_L1/test_1.part_001.fastq.gz - - path: results/seqkit/test-test_L1/test_1.part_002.fastq.gz - - path: results/seqkit/test-test_L1/test_2.part_001.fastq.gz - - path: results/seqkit/test-test_L1/test_2.part_002.fastq.gz + - path: results/preprocessing/test/seqkit/test-test_L1/test_1.part_001.fastq.gz + - path: results/preprocessing/test/seqkit/test-test_L1/test_1.part_002.fastq.gz + - path: results/preprocessing/test/seqkit/test-test_L1/test_2.part_001.fastq.gz + - path: results/preprocessing/test/seqkit/test-test_L1/test_2.part_002.fastq.gz diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 165edd7c72..90f59208c6 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -128,8 +128,8 @@ include { PREPARE_GENOME } from '../subworkflows include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals' // Convert BAM files to FASTQ files -include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_INPUT } from '../subworkflows/local/bam2fastq' -include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_UMI } from '../subworkflows/local/bam2fastq' +include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_INPUT } from '../subworkflows/nf-core/alignment_to_fastq' +include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_UMI } from '../subworkflows/nf-core/alignment_to_fastq' // Split FASTQ files include { SPLIT_FASTQ } from '../subworkflows/local/split_fastq' @@ -308,9 +308,10 @@ workflow SAREK { ch_reads = RUN_TRIMGALORE.out.reads - ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), - RUN_TRIMGALORE.out.trim_html.collect{it[1]}.ifEmpty([]), - RUN_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_html.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(RUN_TRIMGALORE.out.versions) } else { ch_reads = ch_input_fastq @@ -350,19 +351,44 @@ workflow SAREK { // STEP 1: MAPPING READS TO REFERENCE GENOME // reads will be sorted + + ch_reads_to_map = ch_reads_to_map.map{ meta, reads -> + new_meta = meta.clone() + + // update ID when no multiple lanes or splitted fastqs + new_meta.id = meta.size * meta.numLanes == 1 ? meta.sample : meta.id + + [new_meta, reads] + } + GATK4_MAPPING(ch_reads_to_map, ch_map_index, true) + // Grouping the bams from the same samples not to stall the workflow ch_bam_mapped = GATK4_MAPPING.out.bam.map{ meta, bam -> new_meta = meta.clone() + + numLanes = meta.numLanes ?: 1 + size = meta.size ?: 1 + // remove no longer necessary fields new_meta.remove('read_group') // Now in the BAM header + new_meta.remove('numLanes') // Was only needed for mapping new_meta.remove('size') // Was only needed for mapping // update ID to be based on the sample name new_meta.id = meta.sample - [new_meta, bam] - } + // update data_type + new_meta.data_type = 'bam' + + // Use groupKey to make sure that the correct group can advance as soon as it is complete + // and not stall the workflow until all reads from all channels are mapped + def groupKey = groupKey(new_meta, numLanes * size) + + //Returns the values we need + [groupKey, new_meta, bam] + }.groupTuple(by:[0,1]) + .map{ groupKey, new_meta, bam -> [new_meta, bam] } // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here // Except if and only if skipping markduplicates or saving mapped bams @@ -812,6 +838,7 @@ def extract_csv(csv_file) { meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "fastq" + meta.size = 1 // default number of splitted fastq return [meta, [fastq_1, fastq_2]] // start from BAM } else if (row.lane && row.bam) { @@ -822,6 +849,7 @@ def extract_csv(csv_file) { meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "bam" + meta.size = 1 // default number of splitted fastq return [meta, bam] // recalibration } else if (row.table && row.cram) {