diff --git a/conf/igenomes.config b/conf/igenomes.config index 49088ef144..4c71497e86 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -53,14 +53,14 @@ params { bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" chr_dir = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Chromosomes" chr_length = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Length/GRCm38.len" - dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/Annotation/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" - dbsnp_index = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/Annotation/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" + dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" + dbsnp_index = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" intervals = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/intervals/GRCm38_calling_list.bed" - known_indels = "${params.igenomes_base}/Mus_musculus/Annotation/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" - known_indels_index = "${params.igenomes_base}/Mus_musculus/Annotation/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" + known_indels = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" + known_indels_index = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" snpeff_db = 'GRCm38.86' species = 'mus_musculus' vep_cache_version = '98' diff --git a/docs/input.md b/docs/input.md index 2b42bc9d8b..5443c8bc90 100644 --- a/docs/input.md +++ b/docs/input.md @@ -5,15 +5,15 @@ Input files for Sarek can be specified using a TSV file given to the `--input` command. The TSV file is a Tab Separated Value file with columns: -- `subject gender status sample lane fastq1 fastq2` for step `mapping` with paired-end FASTQs -- `subject gender status sample lane bam` for step `mapping` with unmapped BAMs -- `subject gender status sample bam bai recaltable` for step `recalibrate` with BAMs -- `subject gender status sample bam bai` for step `variantcalling` with BAMs +- `subject sex status sample lane fastq1 fastq2` for step `mapping` with paired-end FASTQs +- `subject sex status sample lane bam` for step `mapping` with unmapped BAMs +- `subject sex status sample bam bai recaltable` for step `recalibrate` with BAMs +- `subject sex status sample bam bai` for step `variantcalling` with BAMs The content of these columns is quite straight-forward: - `subject` designate the subject, it should be the ID of the Patient, and it must design only one patient -- `gender` is the gender of the Patient, (XX or XY) +- `sex` are the sex chromosomes of the Patient, (XX or XY) - `status` is the status of the Patient, (0 for Normal or 1 for Tumor) - `sample` designate the Sample, it should be the ID of the sample (it is possible to have more than one tumor sample for each patient, i.e. a tumor and a relapse), it must design only one sample - `lane` is used when the sample is multiplexed on several lanes, it must be unique for each lane in the same sample