@@ -519,7 +519,7 @@ int collect_partial_aln_beg_end(int gap_aln, int a, int b, int q, int e, int q2,
519519 return ret ;
520520}
521521
522- int abpoa_partial_aln_msa_cons (const call_var_opt_t * opt , abpoa_t * ab , int wb , int n_reads , int * read_ids , uint8_t * * read_seqs , int * read_lens , int * read_full_cover , char * * names ,
522+ int abpoa_partial_aln_msa_cons (const call_var_opt_t * opt , abpoa_t * ab , int wb , int n_reads , int * read_ids , uint8_t * * read_seqs , uint8_t * * read_quals , int * read_lens , int * read_full_cover , char * * names ,
523523 int max_n_cons , int * cons_lens , uint8_t * * cons_seqs , int * clu_n_seqs , int * * clu_read_ids , int * msa_seq_lens , uint8_t * * msa_seqs ) {
524524 // abpoa_t *ab = abpoa_init();
525525 int needs_free_ab = 0 ;
@@ -546,7 +546,16 @@ int abpoa_partial_aln_msa_cons(const call_var_opt_t *opt, abpoa_t *ab, int wb, i
546546 ab -> abs -> n_seq = n_reads ;
547547 for (int i = 0 ; i < n_reads ; ++ i ) {
548548 if (LONGCALLD_VERBOSE >= 2 ) {
549- fprintf (stderr , ">%s %d %d\n" , names [i ], read_lens [i ], read_full_cover [i ]);
549+ int min_qual = 100 , min_i = -1 , ave_qual = 0 ;
550+ for (int j = 0 ; j < read_lens [i ]; ++ j ) {
551+ if (read_quals [i ][j ] < min_qual ) {
552+ min_qual = read_quals [i ][j ];
553+ min_i = j ;
554+ }
555+ ave_qual += read_quals [i ][j ];
556+ }
557+ ave_qual /= (read_lens [i ]);
558+ fprintf (stderr , ">%s %d %d qual: %d(%d) %d\n" , names [i ], read_lens [i ], read_full_cover [i ], min_qual ,min_i , ave_qual );
550559 for (int j = 0 ; j < read_lens [i ]; ++ j ) {
551560 fprintf (stderr , "%c" , "ACGTN" [read_seqs [i ][j ]]);
552561 } fprintf (stderr , "\n" );
@@ -988,7 +997,7 @@ hts_pos_t collect_phase_set_with_both_haps(int n_reads, int *read_haps, int *rea
988997// 1. ref vs cons: 1
989998// 2. cons vs n_reads: n_reads
990999// 3. ref vs n_reads: n_reads
991- int wfa_collect_noisy_aln_str_with_ps_hap (const call_var_opt_t * opt , int n_reads , int * noisy_read_ids , int * lens , uint8_t * * seqs , char * * names ,
1000+ int wfa_collect_noisy_aln_str_with_ps_hap (const call_var_opt_t * opt , int n_reads , int * noisy_read_ids , int * lens , uint8_t * * seqs , uint8_t * * quals , char * * names ,
9921001 int * haps , hts_pos_t * phase_sets , int * fully_covers , hts_pos_t ps , uint8_t * ref_seq , int ref_seq_len ,
9931002 int * clu_n_seqs , int * * clu_read_ids , aln_str_t * * aln_strs ) {
9941003 // given specific PS, collect consensus sequences for each haplotype
@@ -997,6 +1006,7 @@ int wfa_collect_noisy_aln_str_with_ps_hap(const call_var_opt_t *opt, int n_reads
9971006 int * ps_hap_read_ids = (int * )malloc (total_n_reads * sizeof (int ));
9981007 int * ps_hap_read_lens = (int * )malloc (total_n_reads * sizeof (int ));
9991008 uint8_t * * ps_hap_read_seqs = (uint8_t * * )malloc (total_n_reads * sizeof (uint8_t * ));
1009+ uint8_t * * ps_hap_read_quals = (uint8_t * * )malloc (total_n_reads * sizeof (uint8_t * ));
10001010 char * * ps_hap_read_names = (char * * )malloc (total_n_reads * sizeof (char * ));
10011011 int * ps_hap_full_covers = (int * )calloc (total_n_reads , sizeof (int ));
10021012 int * cons_lens = (int * )malloc (2 * sizeof (int )); uint8_t * * cons_seqs = (uint8_t * * )malloc (2 * sizeof (uint8_t * ));
@@ -1008,6 +1018,7 @@ int wfa_collect_noisy_aln_str_with_ps_hap(const call_var_opt_t *opt, int n_reads
10081018 ps_hap_read_ids [n_ps_hap_reads ] = noisy_read_ids [i ];
10091019 ps_hap_read_lens [n_ps_hap_reads ] = lens [i ];
10101020 ps_hap_read_seqs [n_ps_hap_reads ] = seqs [i ];
1021+ ps_hap_read_quals [n_ps_hap_reads ] = quals [i ];
10111022 ps_hap_full_covers [n_ps_hap_reads ] = fully_covers [i ];
10121023 ps_hap_read_names [n_ps_hap_reads ] = names [i ];
10131024 n_ps_hap_reads ++ ;
@@ -1016,7 +1027,7 @@ int wfa_collect_noisy_aln_str_with_ps_hap(const call_var_opt_t *opt, int n_reads
10161027 if (LONGCALLD_VERBOSE >=2 ) fprintf (stderr , "PS: %ld HAP: %d n_reads: %d\n" , ps , hap , n_ps_hap_reads );
10171028 if (n_ps_hap_reads == 0 ) continue ;
10181029 // collect consensus sequences
1019- n_cons += abpoa_partial_aln_msa_cons (opt , NULL , 10 , n_ps_hap_reads , ps_hap_read_ids , ps_hap_read_seqs , ps_hap_read_lens , ps_hap_full_covers , ps_hap_read_names ,
1030+ n_cons += abpoa_partial_aln_msa_cons (opt , NULL , 10 , n_ps_hap_reads , ps_hap_read_ids , ps_hap_read_seqs , ps_hap_read_quals , ps_hap_read_lens , ps_hap_full_covers , ps_hap_read_names ,
10201031 1 , cons_lens + hap - 1 , cons_seqs + hap - 1 , clu_n_seqs + hap - 1 , clu_read_ids + hap - 1 , NULL , NULL );
10211032 }
10221033
@@ -1061,16 +1072,18 @@ int wfa_collect_noisy_aln_str_with_ps_hap(const call_var_opt_t *opt, int n_reads
10611072 if (n_ps_hap_reads == 0 ) continue ;
10621073 }
10631074 }
1064- free (ps_hap_read_ids ); free (ps_hap_read_lens ); free (ps_hap_read_seqs ); free (ps_hap_full_covers ); free (ps_hap_read_names );
1075+ free (ps_hap_read_ids ); free (ps_hap_read_lens ); free (ps_hap_read_seqs ); free (ps_hap_read_quals ); free ( ps_hap_full_covers ); free (ps_hap_read_names );
10651076 for (int i = 0 ; i < 2 ; ++ i ) {
10661077 if (cons_seqs [i ] != NULL ) free (cons_seqs [i ]);
10671078 } free (cons_lens ); free (cons_seqs );
10681079 return n_cons ;
10691080}
10701081
1071- int collect_noisy_read_info (bam_chunk_t * chunk , hts_pos_t reg_beg , hts_pos_t reg_end , int noisy_reg_i , int n_noisy_reg_reads , int * noisy_reg_reads , int * * read_lens , uint8_t * * * read_seqs , char * * * read_names , int * * fully_covers , int * * read_haps , hts_pos_t * * phase_sets ) {
1082+ int collect_noisy_read_info (bam_chunk_t * chunk , hts_pos_t reg_beg , hts_pos_t reg_end , int noisy_reg_i , int n_noisy_reg_reads , int * noisy_reg_reads , int * * read_lens ,
1083+ uint8_t * * * read_seqs , uint8_t * * * read_quals , char * * * read_names , int * * fully_covers , int * * read_haps , hts_pos_t * * phase_sets ) {
10721084 * read_lens = (int * )calloc (n_noisy_reg_reads , sizeof (int ));
10731085 * read_seqs = (uint8_t * * )malloc (n_noisy_reg_reads * sizeof (uint8_t * ));
1086+ * read_quals = (uint8_t * * )malloc (n_noisy_reg_reads * sizeof (uint8_t * ));
10741087 * read_names = (char * * )malloc (n_noisy_reg_reads * sizeof (char * ));
10751088 * fully_covers = (int * )calloc (n_noisy_reg_reads , sizeof (int ));
10761089 * read_haps = (int * )calloc (n_noisy_reg_reads , sizeof (int ));
@@ -1115,8 +1128,10 @@ int collect_noisy_read_info(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg
11151128 else (* fully_covers )[i ] = 0 ;
11161129 // if (2*(reg_read_end-reg_read_beg+1) < (reg_end-reg_beg+1)) return 0;
11171130 (* read_seqs )[i ] = (uint8_t * )malloc ((reg_read_end - reg_read_beg + 1 ) * sizeof (uint8_t ));
1131+ (* read_quals )[i ] = (uint8_t * )malloc ((reg_read_end - reg_read_beg + 1 ) * sizeof (uint8_t ));
11181132 for (int j = reg_read_beg ; j <= reg_read_end ; ++ j ) {
11191133 (* read_seqs )[i ][j - reg_read_beg ] = seq_nt16_int [bam_seqi (read_digars -> bseq , j )];
1134+ (* read_quals )[i ][j - reg_read_beg ] = read_digars -> qual [j ];
11201135 }
11211136 (* read_lens )[i ] = reg_read_end - reg_read_beg + 1 ;
11221137 (* read_haps )[i ] = chunk -> haps [read_i ];
@@ -1131,8 +1146,8 @@ int collect_noisy_reg_aln_strs(const call_var_opt_t *opt, bam_chunk_t *chunk, ht
11311146 int * clu_n_seqs , int * * clu_read_ids , aln_str_t * * aln_strs ) {
11321147 if (n_noisy_reg_reads <= 0 ) return 0 ;
11331148 // fully_cover: 0 -> none, 1 -> left, 2 -> right, 3 -> both
1134- char * * names = NULL ; uint8_t * * seqs = NULL ; int * fully_covers = NULL , * lens = NULL , * haps = NULL ; hts_pos_t * phase_sets = NULL ;
1135- collect_noisy_read_info (chunk , noisy_reg_beg , noisy_reg_end , noisy_reg_i , n_noisy_reg_reads , noisy_reads , & lens , & seqs , & names , & fully_covers , & haps , & phase_sets );
1149+ char * * names = NULL ; uint8_t * * seqs = NULL ; int * fully_covers = NULL , * lens = NULL , * haps = NULL ; hts_pos_t * phase_sets = NULL ; uint8_t * * base_quals = NULL ;
1150+ collect_noisy_read_info (chunk , noisy_reg_beg , noisy_reg_end , noisy_reg_i , n_noisy_reg_reads , noisy_reads , & lens , & seqs , & base_quals , & names , & fully_covers , & haps , & phase_sets );
11361151
11371152 sort_by_full_cover_and_length (n_noisy_reg_reads , noisy_reads , lens , seqs , fully_covers , names , haps , phase_sets );
11381153 // >= min_hap_full_read_count reads for each hap && >= min_hap_read_count reads (including not full-cover, but >= full-cover length) for each hap
@@ -1145,7 +1160,7 @@ int collect_noisy_reg_aln_strs(const call_var_opt_t *opt, bam_chunk_t *chunk, ht
11451160 // XXX only use fully-covered reads, including cliping reads (after re-align to backbone read)
11461161 // two cases to call consensus sequences
11471162 if (ps_with_both_haps > 0 ) { // call consensus sequences for each haplotype
1148- n_cons = wfa_collect_noisy_aln_str_with_ps_hap (opt , n_noisy_reg_reads , noisy_reads , lens , seqs , names , haps , phase_sets , fully_covers , ps_with_both_haps ,
1163+ n_cons = wfa_collect_noisy_aln_str_with_ps_hap (opt , n_noisy_reg_reads , noisy_reads , lens , seqs , base_quals , names , haps , phase_sets , fully_covers , ps_with_both_haps ,
11491164 ref_seq , ref_seq_len , clu_n_seqs , clu_read_ids , aln_strs );
11501165 // >= min_no_hap_full_read_count full reads in total
11511166 } else if (ps_with_both_haps <= 0 && n_full_reads >= min_no_hap_full_read_count ) {
0 commit comments