Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
a733603
add re to scrape Trim Galore version
chelauk Feb 14, 2020
b0e50f2
add Trim galore, process and multiqc
chelauk Feb 14, 2020
93a0697
add Trim Galore to environment.yml
chelauk Feb 14, 2020
3aabf7a
include changes
chelauk Feb 14, 2020
ca3e979
--add trimFastq option
chelauk Feb 19, 2020
34fc93e
add test_local.config for small local refs. nextflow.config altered t…
chelauk Feb 19, 2020
699474b
add options to trimFastq
chelauk Feb 19, 2020
41d08ab
Update main.nf
chelauk Feb 19, 2020
97a6946
Update main.nf
chelauk Feb 19, 2020
1b9211a
Update nextflow.config
chelauk Feb 19, 2020
ad00fd1
Update main.nf
chelauk Feb 19, 2020
38311c4
Update main.nf
chelauk Feb 19, 2020
809aef4
Update main.nf
chelauk Feb 19, 2020
a7ab22c
Update main.nf
chelauk Feb 19, 2020
6af90b6
Update nextflow.config
chelauk Feb 19, 2020
1817d08
Update nextflow.config
chelauk Feb 19, 2020
375be24
Update main.nf
chelauk Feb 19, 2020
cb66e52
Update main.nf
chelauk Feb 19, 2020
31f66dc
Update main.nf
chelauk Feb 19, 2020
4225b10
Update main.nf
chelauk Feb 19, 2020
05f08b9
Update main.nf
chelauk Feb 19, 2020
9cf3746
Update CHANGELOG.md
chelauk Feb 19, 2020
d0cbf88
Update usage.md
chelauk Feb 19, 2020
083d725
Merge branch 'dev' into dev
chelauk Feb 19, 2020
57a01e9
Update nextflow.config
chelauk Feb 20, 2020
7051461
Delete test_local.config
chelauk Feb 20, 2020
0133607
Update main.nf
chelauk Feb 20, 2020
7b0fb2c
Update usage.md
chelauk Feb 20, 2020
9abfe4b
Apply suggestions from code review
chelauk Feb 20, 2020
90f9920
Update docs/usage.md
chelauk Feb 20, 2020
66f1f21
Update usage.md
chelauk Feb 20, 2020
94eca69
Update usage.md
chelauk Feb 20, 2020
2e3ad6b
Update main.nf
chelauk Feb 24, 2020
9538b6c
Update usage.md
chelauk Feb 24, 2020
6acda52
Update usage.md
chelauk Feb 24, 2020
5a6f5ff
Merge branch 'dev' into dev
maxulysse Feb 24, 2020
7cd92b9
Update main.nf
chelauk Feb 24, 2020
5578a01
Update usage.md
chelauk Feb 24, 2020
b43e109
Update main.nf
chelauk Feb 24, 2020
a176c67
Update nextflow.config
chelauk Feb 24, 2020
e6b91a1
Update nextflow.config
chelauk Feb 24, 2020
dcc325f
Update nextflow.config
chelauk Feb 24, 2020
6a44979
Update nextflow.config
chelauk Feb 24, 2020
2d5d7f3
Update nextflow.config
chelauk Feb 24, 2020
253694b
Update nextflow.config
chelauk Feb 24, 2020
08aeb26
Update main.nf
chelauk Feb 24, 2020
595bf35
Update nextflow.config
chelauk Feb 24, 2020
a47b76b
Update main.nf
chelauk Feb 24, 2020
797558e
Update main.nf
chelauk Feb 24, 2020
8afec12
Update main.nf
chelauk Feb 24, 2020
3a96c2f
Update main.nf
chelauk Feb 24, 2020
bbc7aa5
Apply suggestions from code review
chelauk Feb 24, 2020
7a6326a
Merge branch 'dev' into dev
maxulysse Feb 25, 2020
bbd72ab
Update main.nf
chelauk Feb 25, 2020
02d3c36
Update nextflow.config
chelauk Feb 25, 2020
96e3655
Update environment.yml
chelauk Feb 25, 2020
bdbf223
Update main.nf
chelauk Feb 25, 2020
d8774d4
Update main.nf
chelauk Feb 25, 2020
80443d0
Merge branch 'dev' into dev
maxulysse Feb 25, 2020
38a78c8
Update usage.md
chelauk Feb 26, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a

## dev

### `Added`
- Add `Trim Galore` possibilities to Sarek
- Add `Trim Galore` plot to `MultiQC` report
- include `Trim Galore` in `scrape_software_versions.py`
- include `Trim Galore` in `environment.yml`

### `Added`

- [#76](https://github.com/nf-core/sarek/pull/76) - Add `GATK Spark` possibilities to Sarek
Expand Down
2 changes: 2 additions & 0 deletions bin/scrape_software_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"],
'Strelka': ['v_strelka.txt', r"([0-9.]+)"],
'TIDDIT': ['v_tiddit.txt', r"TIDDIT-(\S+)"],
'Trim Galore': ['v_trim_galore.txt', r"version (\S+)"],
'vcftools': ['v_vcftools.txt', r"([0-9.]+)"],
'VEP': ['v_vep.txt', r"ensembl-vep : (\S+)"],
}
Expand All @@ -44,6 +45,7 @@
results['SnpEff'] = '<span style="color:#999999;\">N/A</span>'
results['Strelka'] = '<span style="color:#999999;\">N/A</span>'
results['TIDDIT'] = '<span style="color:#999999;\">N/A</span>'
results['Trim Galore'] = '<span style="color:#999999;\">N/A</span>'
results['vcftools'] = '<span style="color:#999999;\">N/A</span>'
results['VEP'] = '<span style="color:#999999;\">N/A</span>'

Expand Down
36 changes: 36 additions & 0 deletions conf/test_local.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/sarek -profile test
*/

params {
config_profile_description = 'Minimal test dataset to check pipeline function'
config_profile_name = 'Test profile'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = 6.GB
max_time = 48.h
// Input data
// input = 'https://github.com/nf-core/test-datasets/raw/sarek/testdata/tsv/tiny-manta-https.tsv'
input = '/home/vagrant/demo/test-datasets-sarek/testdata/tsv/tiny-manta_local.tsv'
// Small reference genome
igenomes_ignore = true
genome = 'smallGRCh37'
//genomes_base = "https://github.com/nf-core/test-datasets/raw/sarek/reference"
genomes_base = '/home/vagrant/demo/test-datasets-sarek/reference'
}

process {
withName:Snpeff {
container = 'nfcore/sareksnpeff:dev.GRCh37'
maxForks = 1
}
withLabel:VEP {
container = 'nfcore/sarekvep:dev.GRCh37'
maxForks = 1
}
}
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ dependencies:
- snpeff=4.3.1t
- strelka=2.9.10
- tiddit=2.7.1
- trim-galore=0.6.5
- vcfanno=0.3.1
- vcftools=0.1.16
- vcftools=0.1.16
85 changes: 72 additions & 13 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ def helpMessage() {
--pon panel-of-normals VCF (bgzipped, indexed). See: https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_hellbender_tools_walkers_mutect_CreateSomaticPanelOfNormals.php
--pon_index index of pon panel-of-normals VCF

Trimming:
--trimFastq Run Trim Galore
--clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads)
--clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only)
--three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed
--three_prime_clip_r2 [int] Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed
--trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails
--saveTrimmed Save trimmed FastQ file intermediates

References If not specified in the configuration file or you wish to overwrite any of the references.
--ac_loci acLoci file
--ac_loci_gc acLoci GC file
Expand Down Expand Up @@ -480,13 +489,16 @@ if (params.target_bed) summary['Target BED'] = params.target_bed
if (step) summary['Step'] = step
if (params.tools) summary['Tools'] = tools.join(', ')
if (params.skip_qc) summary['QC tools skip'] = skipQC.join(', ')
if (params.trimFastq) summary['Fastq trim'] = "Fastq trim selected"

if (params.no_intervals && step != 'annotate') summary['Intervals'] = 'Do not use'
if ('haplotypecaller' in tools) summary['GVCF'] = params.no_gvcf ? 'No' : 'Yes'
if ('strelka' in tools && 'manta' in tools ) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes'
if (params.sequencing_center) summary['Sequenced by'] = params.sequencing_center
if (params.pon && 'mutect2' in tools) summary['Panel of normals'] = params.pon

summary['Trim Fastq'] = params.trimFastq ? 'Yes' : 'No'
//summary['Saved Trimmed Fastq'] = params.saveTrimmed ? 'Yes' : 'No'
summary['Save Reference'] = params.save_reference ? 'Yes' : 'No'
summary['Nucleotides/s'] = params.nucleotides_per_second
summary['Output dir'] = params.outdir
Expand Down Expand Up @@ -539,6 +551,12 @@ else log.info "\033[2m----------------------------------------------------\033[0

if ('mutect2' in tools && !(params.pon)) log.warn "[nf-core/sarek] Mutect2 was requested, but as no panel of normals were given, results will not be optimal"


// Define regular variables so that they can be overwritten
clip_r1 = params.clip_r1
clip_r2 = params.clip_r2
three_prime_clip_r1 = params.three_prime_clip_r1
three_prime_clip_r2 = params.three_prime_clip_r2
// Check the hostnames against configured profiles
checkHostname()

Expand Down Expand Up @@ -572,6 +590,7 @@ process GetSoftwareVersions {
R -e "library(ASCAT); help(package='ASCAT')" &> v_ascat.txt
samtools --version &> v_samtools.txt 2>&1 || true
tiddit &> v_tiddit.txt 2>&1 || true
trim_galore -v &> v_trim_galore.txt 2>&1 || true
vcftools --version &> v_vcftools.txt 2>&1 || true
vep --help &> v_vep.txt 2>&1 || true

Expand Down Expand Up @@ -887,7 +906,7 @@ if (params.split_fastq){

inputPairReads = inputPairReads.dump(tag:'INPUT')

(inputPairReads, inputPairReadsFastQC) = inputPairReads.into(2)
(inputPairReads, inputPairReadsTrimGalore, inputPairReadsFastQC) = inputPairReads.into(3)

// STEP 0.5: QC ON READS

Expand All @@ -909,7 +928,7 @@ process FastQCFQ {
file("*.{html,zip}") into fastQCFQReport

when: !('fastqc' in skipQC)

script:
"""
fastqc -t 2 -q ${idSample}_${idRun}_R1.fastq.gz ${idSample}_${idRun}_R2.fastq.gz
Expand Down Expand Up @@ -942,11 +961,50 @@ fastQCReport = fastQCFQReport.mix(fastQCBAMReport)

fastQCReport = fastQCReport.dump(tag:'FastQC')

outputPairReadsTrimGalore = Channel.create()

if (params.trimFastq) {
process TrimGalore {
label 'TrimGalore'

tag {idPatient + "-" + idRun}

publishDir "${params.outdir}/Reports/${idSample}/TrimGalore/${idSample}_${idRun}", mode: params.publish_dir_mode,
saveAs: {filename ->
if (filename.indexOf("_fastqc") > 0) "FastQC/$filename"
else if (filename.indexOf("trimming_report.txt") > 0) "logs/$filename"
else if (params.saveTrimmed) filename
else null
}

input:
set idPatient, idSample, idRun, file("${idSample}_${idRun}_R1.fastq.gz"), file("${idSample}_${idRun}_R2.fastq.gz") from inputPairReadsTrimGalore

output:
file("*.{html,zip,txt}") into TrimGaloreReport
set idPatient, idSample, idRun, file("${idSample}_${idRun}_R1_val_1.fq.gz"), file("${idSample}_${idRun}_R2_val_2.fq.gz") into outputPairReadsTrimGalore

script:
c_r1 = clip_r1 > 0 ? "--clip_r1 ${clip_r1}" : ''
c_r2 = clip_r2 > 0 ? "--clip_r2 ${clip_r2}" : ''
tpc_r1 = three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${three_prime_clip_r1}" : ''
tpc_r2 = three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${three_prime_clip_r2}" : ''
nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : ''
"""
trim_galore --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 $nextseq ${idSample}_${idRun}_R1.fastq.gz ${idSample}_${idRun}_R2.fastq.gz
"""
}
} else {
inputPairReadsTrimGalore
.set {outputPairReadsTrimGalore}
TrimGaloreReport = Channel.empty()
}

// STEP 1: MAPPING READS TO REFERENCE GENOME WITH BWA MEM

inputPairReads = inputPairReads.dump(tag:'INPUT')

inputPairReads = inputPairReads.mix(inputBam)
inputPairReads = outputPairReadsTrimGalore.mix(inputBam)
inputPairReads = inputPairReads.dump(tag:'INPUT')

(inputPairReads, inputPairReadsSentieon) = inputPairReads.into(2)
if (params.sentieon) inputPairReads.close()
Expand Down Expand Up @@ -1036,7 +1094,7 @@ process SentieonMapReads {
"""
sentieon bwa mem -K 100000000 -R \"${readGroup}\" ${extra} -t ${task.cpus} -M ${fasta} \
${inputFile1} ${inputFile2} | \
sentieon util sort -r ${fasta} -o ${idSample}_${idRun}.bam -t ${task.cpus} --sam2bam -i -
sentieon util sort -r ${fasta} -o ${idSample}_${idRun}.bam -t ${task.cpus} --sam2bam -i -
"""
}

Expand Down Expand Up @@ -1408,7 +1466,7 @@ process SentieonBQSR {
file(knownIndelsIndex) from ch_known_indels_tbi

output:
set idPatient, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bam.bai") into bamRecalSentieon
set idPatient, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bam.bai") into bamRecalSentieon
set idPatient, idSample into bamRecalSentieonTSV
file("${idSample}_recal_result.csv") into bamRecalSentieonQC

Expand Down Expand Up @@ -2082,7 +2140,7 @@ process MergeMutect2Stats {

when: 'mutect2' in tools

script:
script:
stats = statsFiles.collect{ "-stats ${it} " }.join(' ')
"""
gatk --java-options "-Xmx${task.memory.toGiga()}g" \
Expand Down Expand Up @@ -2119,11 +2177,11 @@ process ConcatVCF {
when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools)

script:
if (variantCaller == 'HaplotypeCallerGVCF')
if (variantCaller == 'HaplotypeCallerGVCF')
outputFile = "HaplotypeCaller_${idSample}.g.vcf"
else if (variantCaller == "Mutect2")
else if (variantCaller == "Mutect2")
outputFile = "Mutect2_unfiltered_${idSample}.vcf"
else
else
outputFile = "${variantCaller}_${idSample}.vcf"
options = params.target_bed ? "-t ${targetBED}" : ""
"""
Expand Down Expand Up @@ -2213,13 +2271,13 @@ process CalculateContamination {

input:
set idPatient, idSampleNormal, idSampleTumor, file(bamNormal), file(baiNormal), file(bamTumor), file(baiTumor), file(mergedPileup) from pairBamCalculateContamination

output:
set idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${idSampleTumor}_contamination.table") into contaminationTable

when: 'mutect2' in tools

script:
script:
"""
# calculate contamination
gatk --java-options "-Xmx${task.memory.toGiga()}g" \
Expand Down Expand Up @@ -2251,7 +2309,7 @@ process FilterMutect2Calls {
file(germlineResource) from ch_germline_resource
file(germlineResourceIndex) from ch_germline_resource_tbi
file(intervals) from ch_intervals

output:
set val("Mutect2"), idPatient, idSamplePair, file("Mutect2_filtered_${idSamplePair}.vcf.gz"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.tbi"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.filteringStats.tsv") into filteredMutect2Output

Expand Down Expand Up @@ -3187,6 +3245,7 @@ process MultiQC {
file ('DuplicateMarked/*.recal.table') from baseRecalibratorReport.collect().ifEmpty([])
file ('SamToolsStats/*') from samtoolsStatsReport.collect().ifEmpty([])
file ('snpEff/*') from snpeffReport.collect().ifEmpty([])
file ('TrimGalore/*') from TrimGaloreReport.collect().ifEmpty([])
file ('VCFTools/*') from vcftoolsReport.collect().ifEmpty([])

output:
Expand Down
10 changes: 9 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ params {
save_reference = null // Built Indexes not saved
sequencing_center = null // No sequencing center to be written in BAM header in MapReads process
sentieon = null // Not using Sentieon by default
trimFastq = null // Not trimming by default
clip_r1 = null // Integer, Instructs Trim Galore to remove bp from the 5' end of read1 (or single-end reads)
clip_r2 = null // Integer, Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only)
three_prime_clip_r1 = null // Integer, Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed
three_prime_clip_r2 = null // Integer, Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed
trim_nextseq = null // Integer, Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails
saveTrimmed = null // Option to save trimmed FastQ file intermediates

// Optional files/directory
cadd_indels = false // No CADD InDels file
Expand Down Expand Up @@ -71,7 +78,7 @@ params {
plaintext_email = false // Plaintext email disabled

// Base specifications
cpus = 8
cpus = 8
max_cpus = 16
max_memory = 128.GB
max_time = 240.h
Expand Down Expand Up @@ -120,6 +127,7 @@ profiles {
singularity.enabled = true
}
test { includeConfig 'conf/test.config' }
test_local { includeConfig 'conf/test_local.config' }
test_annotation { includeConfig 'conf/test_annotation.config' }
test_splitfastq { includeConfig 'conf/test_splitfastq.config' }
test_targeted { includeConfig 'conf/test_targeted.config' }
Expand Down