|
4 | 4 | ======================================================================================== |
5 | 5 | */ |
6 | 6 |
|
7 | | -params.seqkit_split2_options = [:] |
8 | 7 | params.bwamem1_mem_options = [:] |
9 | 8 | params.bwamem1_mem_tumor_options = [:] |
10 | 9 | params.bwamem2_mem_options = [:] |
11 | 10 | params.bwamem2_mem_tumor_options = [:] |
| 11 | +params.merge_bam_options = [:] |
| 12 | +params.samtools_index_options = [:] |
| 13 | +params.seqkit_split2_options = [:] |
12 | 14 |
|
13 | | -include { SEQKIT_SPLIT2 } from '../../modules/nf-core/modules/seqkit/split2/main.nf' addParams(options: params.seqkit_split2_options) |
| 15 | +include { BWAMEM2_MEM as BWAMEM2_MEM_T } from '../../modules/local/bwamem2/mem/main' addParams(options: params.bwamem2_mem_tumor_options) |
| 16 | +include { BWAMEM2_MEM } from '../../modules/local/bwamem2/mem/main' addParams(options: params.bwamem2_mem_options) |
14 | 17 | include { BWA_MEM as BWAMEM1_MEM } from '../../modules/local/bwa/mem/main' addParams(options: params.bwamem1_mem_options) |
15 | 18 | include { BWA_MEM as BWAMEM1_MEM_T } from '../../modules/local/bwa/mem/main' addParams(options: params.bwamem1_mem_tumor_options) |
16 | | -include { BWAMEM2_MEM } from '../../modules/local/bwamem2/mem/main' addParams(options: params.bwamem2_mem_options) |
17 | | -include { BWAMEM2_MEM as BWAMEM2_MEM_T } from '../../modules/local/bwamem2/mem/main' addParams(options: params.bwamem2_mem_tumor_options) |
| 19 | +include { SAMTOOLS_INDEX } from '../../modules/local/samtools/index/main' addParams(options: params.samtools_index_options) |
| 20 | +include { SAMTOOLS_MERGE } from '../../modules/nf-core/modules/samtools/merge/main' addParams(options: params.merge_bam_options) |
| 21 | +include { SEQKIT_SPLIT2 } from '../../modules/nf-core/modules/seqkit/split2/main.nf' addParams(options: params.seqkit_split2_options) |
18 | 22 |
|
19 | 23 | workflow MAPPING { |
20 | 24 | take: |
21 | | - aligner // string: [mandatory] "bwa-mem" or "bwa-mem2" |
22 | | - bwa // channel: [mandatory] bwa |
23 | | - fai // channel: [mandatory] fai |
24 | | - fasta // channel: [mandatory] fasta |
25 | | - reads_input // channel: [mandatory] meta, reads_input |
| 25 | + aligner // string: [mandatory] "bwa-mem" or "bwa-mem2" |
| 26 | + bwa // channel: [mandatory] bwa |
| 27 | + fai // channel: [mandatory] fai |
| 28 | + fasta // channel: [mandatory] fasta |
| 29 | + reads_input // channel: [mandatory] meta, reads_input |
| 30 | + skip_markduplicates // boolean: true/false |
26 | 31 |
|
27 | 32 | main: |
28 | 33 |
|
29 | | - bam_mapped_index = Channel.empty() |
30 | | - bam_reports = Channel.empty() |
31 | | - |
| 34 | + bam_indexed = Channel.empty() |
32 | 35 |
|
33 | | - if(params.split_fastq > 1){ |
| 36 | + if (params.split_fastq > 1) { |
34 | 37 | reads_input_split = SEQKIT_SPLIT2(reads_input).reads.map{ |
35 | 38 | key, reads -> |
36 | 39 | //TODO maybe this can be replaced by a regex to include part_001 etc. |
37 | 40 |
|
38 | 41 | //sorts list of split fq files by : |
39 | 42 | //[R1.part_001, R2.part_001, R1.part_002, R2.part_002,R1.part_003, R2.part_003,...] |
40 | 43 | //TODO: determine whether it is possible to have an uneven number of parts, so remainder: true woud need to be used, I guess this could be possible for unfiltered reads, reads that don't have pairs etc. |
41 | | - return [key, reads.sort{ a,b -> a.getName().tokenize('.')[ a.getName().tokenize('.').size() - 3] <=> b.getName().tokenize('.')[ b.getName().tokenize('.').size() - 3]} |
42 | | - .collate(2)] |
| 44 | + return [key, reads.sort{ a,b -> a.getName().tokenize('.')[ a.getName().tokenize('.').size() - 3] <=> b.getName().tokenize('.')[ b.getName().tokenize('.').size() - 3]}.collate(2)] |
43 | 45 | }.transpose() |
44 | | - }else{ |
| 46 | + } else { |
45 | 47 | reads_input_split = reads_input |
46 | 48 | } |
47 | 49 |
|
@@ -77,14 +79,31 @@ workflow MAPPING { |
77 | 79 | bam_bwa.map{ meta, bam -> |
78 | 80 | meta.remove('read_group') |
79 | 81 | meta.id = meta.sample |
| 82 | + // groupKey is to makes sure that the correct group can advance as soon as it is complete |
| 83 | + // and not stall the workflow until all pieces are mapped |
80 | 84 | def groupKey = groupKey(meta, meta.numLanes * params.split_fastq) |
81 | 85 | tuple(groupKey, bam) |
82 | 86 | [meta, bam] |
83 | | - }.groupTuple() //groupKey above is somehow makes sure, the workflow doesn't stall until all pieces are mapped, but that the correct group can advance as soon as it is complete |
| 87 | + }.groupTuple() |
84 | 88 | .set{bam_mapped} |
85 | 89 |
|
86 | | - // STEP 1.5: MERGING AND INDEXING BAM FROM MULTIPLE LANES // MarkDuplicates can take care of this |
| 90 | + // MarkDuplicates can handle multiple BAMS as input, so no merging/indexing at this step |
| 91 | + // Except if and only if skipping MarkDuplicates |
| 92 | + |
| 93 | + if (skip_markduplicates) { |
| 94 | + bam_mapped.branch{ |
| 95 | + single: it[1].size() == 1 |
| 96 | + multiple: it[1].size() > 1 |
| 97 | + }.set{ bam_to_merge } |
| 98 | + |
| 99 | + SAMTOOLS_MERGE(bam_to_merge.multiple) |
| 100 | + bam_merged = bam_to_merge.single.mix(SAMTOOLS_MERGE.out.bam) |
| 101 | + |
| 102 | + SAMTOOLS_INDEX(bam_merged) |
| 103 | + bam_indexed = bam_merged.join(SAMTOOLS_INDEX.out.bai) |
| 104 | + } |
87 | 105 |
|
88 | 106 | emit: |
89 | | - bam = bam_mapped |
| 107 | + bam = bam_mapped |
| 108 | + bam_indexed = bam_indexed |
90 | 109 | } |
0 commit comments