Skip to content

Commit 85cd0d9

Browse files
committed
include clip reads, def:p1a2
1 parent c6e3635 commit 85cd0d9

File tree

8 files changed

+575
-249
lines changed

8 files changed

+575
-249
lines changed

src/align.c

Lines changed: 343 additions & 157 deletions
Large diffs are not rendered by default.

src/align.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
#define LONGCALLD_GAP_LEFT_ALN 1 // default: put gap at the left-most position
1414
#define LONGCALLD_GAP_RIGHT_ALN 2
1515

16-
#define LONGCALLD_EXT_ALN_RIGHT 1
17-
#define LONGCALLD_EXT_ALN_LEFT 2
16+
#define LONGCALLD_EXT_ALN_LEFT_TO_RIGHT 1
17+
#define LONGCALLD_EXT_ALN_RIGHT_TO_LEFT 2
1818

1919
#ifdef __cplusplus
2020
extern "C" {
@@ -23,13 +23,6 @@ extern "C" {
2323
struct bam_chunk_t;
2424
struct aln_str_t;
2525

26-
int test_wfa(char *pattern, char *text);
27-
int test_abpoa(uint8_t **bseqs, int n_seqs, int *seq_lens);
28-
int test_edlib(char *pattern, char *text);
29-
int test_ksw2(char *pattern, char *text, const call_var_opt_t *opt);
30-
31-
int wfa_heuristic_aln(uint8_t *pattern, int plen, uint8_t *text, int tlen, int *n_eq, int *n_xid);
32-
3326
// int wfa_aln(int gap_pos, char *pattern, int plen, char *text, int tlen, uint32_t **cigar_buf);
3427
int end2end_aln(const call_var_opt_t *opt, char *pattern, int plen, uint8_t *text, int tlen, uint32_t **cigar_buf);
3528
int collect_reg_ref_cseq(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg_end, char **ref_cseq);

src/call_var_main.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ call_var_opt_t *call_var_init_para(void) {
105105
opt->min_noisy_reg_ratio = LONGCALLD_NOISY_REG_RATIO;
106106
opt->max_noisy_frac_per_read = LONGCALLD_MAX_NOISY_FRAC_PER_READ;
107107
opt->min_hap_full_reads = LONGCALLD_MIN_HAP_FULL_READS;
108+
opt->min_hap_reads = LONGCALLD_MIN_HAP_READS;
108109
opt->min_no_hap_full_reads = LONGCALLD_MIN_NO_HAP_FULL_READS;
109110

110111
opt->min_somatic_af = LONGCALLD_MIN_SOMATIC_AF;
@@ -123,6 +124,8 @@ call_var_opt_t *call_var_init_para(void) {
123124
opt->pl_threads = MIN_OF_TWO(CALL_VAR_PL_THREAD_N, get_num_processors());
124125
opt->n_threads = MIN_OF_TWO(CALL_VAR_THREAD_N, get_num_processors());
125126

127+
opt->p_error = 0.001; opt->log_p = -3.0; opt->log_1p = log10(1-opt->p_error); opt->log_2 = 0.301023;
128+
opt->max_gq = 60; opt->max_qual = 60;
126129
opt->out_vcf = NULL; opt->no_vcf_header = 0; opt->out_amb_base = 0;
127130
opt->out_bam = NULL; opt->no_bam_header = 0;
128131
// opt->verbose = 0;

src/call_var_main.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,10 @@
3636
#define LONGCALLD_NOISY_REG_READS 5 // >= 5 reads supporting noisy region
3737
#define LONGCALLD_NOISY_REG_RATIO 0.20 // >= 25% reads supporting noisy region
3838

39-
#define LONGCALLD_MIN_HAP_FULL_READS 2 // full read supporting each haplotype
40-
#define LONGCALLD_MIN_HAP_READS 3 // XXX >= 3 reads supporting each haplotype, including partial/clipped reads
41-
#define LONGCALLD_MIN_NO_HAP_FULL_READS 10 // total full reads in noisy region
39+
#define LONGCALLD_MIN_HAP_FULL_READS 1 // >= 1full read supporting each haplotype
40+
#define LONGCALLD_MIN_HAP_READS 2 // >= 3 reads supporting each haplotype, including partial/clipped reads, call consensus from >= 3 reads
41+
#define LONGCALLD_MIN_NO_HAP_FULL_READS 10 // >10 total full reads in noisy region
42+
#define LONGCALLD_MIN_READ_TO_HAP_CONS_SIM 0.9 // for reads with >= 90% equal bases, assign haplotype
4243
// for sdust
4344
#define LONGCALLD_SDUST_T 5
4445
#define LONGCALLD_SDUST_W 20
@@ -98,18 +99,20 @@ typedef struct call_var_opt_t {
9899
// filters for noisy region, i.e., coverage/ratio
99100
int max_noisy_reg_reads, max_noisy_reg_len, min_noisy_reg_reads;
100101
float max_noisy_frac_per_read, min_noisy_reg_ratio;
101-
int min_hap_full_reads, min_no_hap_full_reads;
102+
int min_hap_full_reads, min_hap_reads, min_no_hap_full_reads;
102103
// alignment
103104
int match, mismatch, gap_open1, gap_ext1, gap_open2, gap_ext2;
104105
int gap_aln; // default: 1: left (minimap2, abpoa), 2: right (wfa2)
106+
float min_read_to_hap_cons_sim;
105107
int disable_read_realign; // disable re-alignment/MSA, only use variant calling from consensus sequence
106108
// phasing is based on read cluster of the current haplotype, so not error-robust
107109
// general
108110
// int max_ploidy;
109111
int pl_threads, n_threads;
110112
// output
111113
htsFile *out_bam; // phased bam
112-
FILE *out_vcf;
114+
FILE *out_vcf;
115+
double p_error, log_p, log_1p, log_2; int max_gq; int max_qual;
113116
int8_t no_vcf_header, out_amb_base, no_bam_header; // phased vcf
114117
} call_var_opt_t;
115118

src/collect_var.c

Lines changed: 205 additions & 74 deletions
Large diffs are not rendered by default.

src/collect_var.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
#define LONGCALLD_VAR_CATE_TYPE(var_cate) LONGCALLD_VAR_CATE_STR[(int)(log2(var_cate))]
2424

2525
#define LONGCALLD_REF_CONS_ALN_STR(clu_aln_strs) clu_aln_strs
26-
#define LONGCALLD_CONS_READ_ALN_STR(clu_aln_strs, read_i) clu_aln_strs+(read_i+1)*2-1
27-
#define LONGCALLD_REF_READ_ALN_STR(clu_aln_strs, read_i) clu_aln_strs+(read_i+1)*2
26+
#define LONGCALLD_CONS_READ_ALN_STR(clu_aln_strs, read_i) clu_aln_strs+read_i+1
27+
#define LONGCALLD_REF_READ_ALN_STR(clu_aln_strs, read_i) clu_aln_strs+read_i+1
28+
// #define LONGCALLD_CONS_READ_ALN_STR(clu_aln_strs, read_i) clu_aln_strs+(read_i+1)*2-1
29+
// #define LONGCALLD_REF_READ_ALN_STR(clu_aln_strs, read_i) clu_aln_strs+(read_i+1)*2
2830

2931
#ifdef __cplusplus
3032
extern "C" {
@@ -73,6 +75,8 @@ typedef struct aln_str_t {
7375
uint8_t *target_aln;
7476
uint8_t *query_aln;
7577
int aln_len;
78+
// for partially aligned reads
79+
int target_beg, target_end, query_beg, query_end; // 0..aln_len-1, including '-'/5
7680
} aln_str_t;
7781

7882
struct bam_chunk_t;

src/vcf_utils.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ int write_var_to_vcf(var_t *vars, const struct call_var_opt_t *opt, char *chrom)
5959
for (int i = 0; i < n_vars; i++) {
6060
var1_t var = vars->vars[i];
6161
if (var.n_alt_allele == 0) continue;
62+
if (var.DP < opt->min_dp || var.AD[1] < opt->min_alt_dp) continue;
6263
if (opt->out_amb_base == 0) {
6364
uint8_t skip = 0;
6465
for (int j = 0; j < var.ref_len; j++) {
@@ -111,7 +112,12 @@ int write_var_to_vcf(var_t *vars, const struct call_var_opt_t *opt, char *chrom)
111112
fprintf(out_vcf, "\t%d\tPASS\tEND=%" PRId64 "", var.QUAL, var.pos + var.ref_len - 1);
112113
if (is_sv) fprintf(out_vcf, ";%s;%s\t", SVTYPE, SVLEN);
113114
else fprintf(out_vcf, "\t");
114-
fprintf(out_vcf, "GT:PS\t%d|%d:%" PRId64 "\n", var.GT[0], var.GT[1], var.PS);
115+
fprintf(out_vcf, "GT:DP:AD:GQ:PS\t%d|%d:%d:", var.GT[0], var.GT[1], var.DP);
116+
for (int j = 0; j < 1+var.n_alt_allele; j++) {
117+
if (j > 0) fprintf(out_vcf, ",");
118+
fprintf(out_vcf, "%d", var.AD[j]);
119+
}
120+
fprintf(out_vcf, ":%d:%" PRId64 "\n", var.GQ, var.PS);
115121
}
116122
return ret;
117123
}

0 commit comments

Comments
 (0)