Skip to content

Commit 7895433

Browse files
authored
Merge pull request #5 from nickhsmith/nf-core-dev
Nf core dev
2 parents eee70d0 + 07aa343 commit 7895433

File tree

134 files changed

+5704
-685
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+5704
-685
lines changed

.github/workflows/ci.yml

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ jobs:
1919
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/sarek') }}
2020
runs-on: ubuntu-latest
2121
strategy:
22+
# HACK Remove after DSL2 rewrite is done
23+
fail-fast: false
2224
matrix:
2325
# Nextflow versions
2426
include:
@@ -34,11 +36,11 @@ jobs:
3436
- 'default'
3537
- 'split_fastq'
3638
- 'gatk4_spark'
37-
- 'save_bam_mapped'
39+
#- 'save_bam_mapped'
3840
- 'skip_markduplicates'
39-
# - 'targeted'
41+
- 'targeted'
4042
- 'tumor_normal_pair'
41-
# - 'variant_calling'
43+
- 'variant_calling'
4244
steps:
4345
- name: Check out pipeline code
4446
uses: actions/checkout@v2
@@ -62,3 +64,22 @@ jobs:
6264

6365
- name: Run pipeline with tests settings
6466
run: pytest --tag ${{ matrix.test }} --kwdof
67+
68+
- name: Output log on failure
69+
if: failure()
70+
run: |
71+
sudo apt install bat > /dev/null
72+
batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err}
73+
74+
- name: Upload logs on failure
75+
if: failure()
76+
uses: actions/upload-artifact@v2
77+
with:
78+
name: logs-${{ matrix.profile }}
79+
path: |
80+
/home/runner/pytest_workflow_*/*/.nextflow.log
81+
/home/runner/pytest_workflow_*/*/log.out
82+
/home/runner/pytest_workflow_*/*/log.err
83+
/home/runner/pytest_workflow_*/*/work
84+
!/home/runner/pytest_workflow_*/*/work/conda
85+
!/home/runner/pytest_workflow_*/*/work/singularity

.gitpod.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
image: nfcore/gitpod:latest
2+
3+
vscode:
4+
extensions: # based on nf-core.nf-core-extensionpack
5+
- codezombiech.gitignore # Language support for .gitignore files
6+
# - cssho.vscode-svgviewer # SVG viewer
7+
- davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code
8+
- eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed
9+
- EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files
10+
- Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar
11+
- mechatroner.rainbow-csv # Highlight columns in csv files in different colors
12+
# - nextflow.nextflow # Nextflow syntax highlighting
13+
- oderwat.indent-rainbow # Highlight indentation level
14+
- streetsidesoftware.code-spell-checker # Spelling checker for source code

bin/concatenateVCFs.sh

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,16 @@ then
5858

5959
CONTIGS=($(cut -f1 ${genomeIndex}))
6060

61-
# Concatenate VCFs in the correct order
61+
#Concatenate VCFs in the correct order
6262
(
6363
cat header
6464

6565
for chr in "${CONTIGS[@]}"; do
6666
# Skip if globbing would not match any file to avoid errors such as
6767
# "ls: cannot access chr3_*.vcf.gz: No such file or directory" when chr3
6868
# was not processed.
69-
pattern="*_${chr}_*.vcf.gz"
70-
if ! compgen -G "${pattern}" > /dev/null; then continue; fi
69+
pattern="*_${chr}_*.vcf"
70+
if ! compgen -G "${pattern}" > /dev/null ; then continue; fi
7171

7272
# ls -v sorts by numeric value ("version"), which means that chr1_100_
7373
# is sorted *after* chr1_99_.
@@ -83,14 +83,16 @@ then
8383
tail -n +$((L+1)) <(zcat ${vcf})
8484
done
8585
done
86-
) | bgzip -@${cpus} > rawcalls.vcf.gz
87-
tabix rawcalls.vcf.gz
86+
) | bgzip -@${cpus} > rawcalls.unsorted.vcf.gz
8887
else
89-
VCF=$(ls no_intervals*.vcf.gz)
90-
mv -v $VCF rawcalls.vcf.gz
91-
tabix rawcalls.vcf.gz
88+
VCF=$(ls no_intervals*.vcf)
89+
cp $VCF rawcalls.unsorted.vcf
90+
bgzip -@${cpus} rawcalls.unsorted.vcf
9291
fi
9392

93+
bcftools sort rawcalls.unsorted.vcf.gz | bgzip > rawcalls.vcf.gz
94+
tabix -p vcf rawcalls.vcf.gz
95+
9496
set +u
9597

9698
# Now we have the concatenated VCF file, check for WES/panel targets, and generate a subset if there is a BED provided
@@ -100,5 +102,5 @@ if [ ! -z ${targetBED+x} ]; then
100102
tabix ${outputFile}.gz
101103
else
102104
# Rename the raw calls as WGS results
103-
for f in rawcalls*; do mv -v $f ${outputFile}${f#rawcalls.vcf}; done
105+
for f in rawcalls.vcf*; do mv -v $f ${outputFile}${f#rawcalls.vcf}; done
104106
fi

conf/genomes.config

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,6 @@ params {
2929
fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta"
3030
known_indels = "${params.genomes_base}/dbsnp_138.b37.small.vcf.gz"
3131
}
32-
'smallGRCh38' {
33-
dbsnp = "${params.genomes_base}/dbsnp_146_hg38_chr20_tso-only.vcf.gz"
34-
fasta = "${params.genomes_base}/Homo_sapiens_assembly38_chr20.fasta"
35-
known_indels = "${params.genomes_base}/Mills_and_1000G_gold_standard_indels_hg38_chr20.vcf.gz"
36-
snpeff_db = 'GRCh38.99'
37-
vep_genome = 'GRCh38'
38-
vep_species = 'homo_sapiens'
39-
vep_cache_version = '99'
40-
}
4132
'custom' {
4233
fasta = null
4334
}

conf/test.config

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ params {
1818
// Limit resources so that this can run on GitHub Actions
1919
max_cpus = 2
2020
max_memory = '6.GB'
21-
max_time = '6.h'
21+
max_time = '8.h'
2222

2323
// Input data
2424
input = "${baseDir}/tests/csv/3.0/fastq_single.csv"
@@ -28,11 +28,12 @@ params {
2828
genome = 'small_hg38'
2929
genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules'
3030

31-
dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
32-
fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta"
33-
germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
34-
intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.interval_list"
35-
known_indels = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
31+
dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
32+
fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta"
33+
germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
34+
intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.interval_list"
35+
known_indels = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
36+
nucleotides_per_second = 20
3637

3738
snpeff_db = 'WBcel235.99'
3839
vep_species = 'caenorhabditis_elegans'
@@ -53,7 +54,7 @@ profiles {
5354
params.input = "${baseDir}/tests/csv/3.0/fastq_pair.csv"
5455
}
5556
prepare_recalibration {
56-
params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-mapped-normal-https.csv'
57+
params.input = "${baseDir}/tests/csv/3.0/mapped_single.csv"
5758
params.step = 'prepare_recalibration'
5859
}
5960
save_bam_mapped {
@@ -65,19 +66,28 @@ profiles {
6566
split_fastq {
6667
params.split_fastq = 150000
6768
params.save_split_fastqs = true
68-
6969
}
70-
targeted {
71-
params.target_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/target.bed'
72-
params.tools = 'manta,strelka'
70+
no_intervals {
71+
params.no_intervals = true
7372
}
74-
tool {
75-
params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-recal-normal-https.csv'
76-
params.step = 'variant_calling'
73+
targeted {
74+
params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/multi_intervals.bed"
75+
params.wes = true
7776
}
78-
tool_pair {
79-
params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-recal-pair-https.csv'
77+
tools {
78+
params.input = "${baseDir}/tests/csv/3.0/recalibrated.csv"
79+
params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz"
80+
params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta"
81+
params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz"
82+
params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed"
83+
params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz"
8084
params.step = 'variant_calling'
85+
params.tools = 'deepvariant,freebayes,haplotypecaller,manta,msisensorpro,mutect2,strelka,snpeff,vep' //tiddit
86+
params.joint_germline = true
87+
params.wes = true
88+
params.genome = 'WBcel235'
89+
params.vep_genome = 'WBcel235'
90+
//params.vep_cache =
8191
}
8292
trimming {
8393
params.clip_r1 = 1
@@ -89,21 +99,9 @@ profiles {
8999
use_gatk_spark {
90100
params.use_gatk_spark = 'bqsr,markduplicates'
91101
}
92-
umi_quiaseq {
93-
params.genome = 'smallGRCh38'
94-
params.genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/reference/chr20_hg38'
95-
params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-umi-qiaseq-https.csv'
96-
params.read_structure1 = '12M11S+T'
97-
params.read_structure2 = '12M11S+T'
98-
params.umi = true
99-
}
100-
umi_tso {
101-
genome = 'smallGRCh38'
102-
genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/reference/chr20_hg38'
103-
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-umi-tso-https.csv'
104-
read_structure1 = '7M1S+T'
105-
read_structure2 = '7M1S+T'
106-
umi = true
102+
umi {
103+
params.input = "${baseDir}/tests/csv/3.0/fastq_umi.csv"
104+
params.umi_read_structure = '7M1S+T'
107105
}
108106
}
109107

0 commit comments

Comments
 (0)