@@ -1814,135 +1814,6 @@ static void bcf_record_check_err(const bcf_hdr_t *hdr, bcf1_t *rec,
18141814 (* reports )++ ;
18151815}
18161816
1817- /**
1818- * updatephasing - updates 1st phasing based on other phasing status
1819- * @param p - pointer to phase value array
1820- * @param end - end of array
1821- * @param q - pointer to consumed data
1822- * @param set - whether to set (while read) or reset (for write)
1823- * @param samples - no. of samples in array
1824- * @param ploidy - no. of phasing values per sample
1825- * @param type - value type (one of BCF_BT_...)
1826- * Returns 0 on success and 1 on failure
1827- */
1828- static int updatephasing (uint8_t * p , uint8_t * end , uint8_t * * q , int set , int samples , int ploidy , int type )
1829- {
1830- int j , k , upd = set ? 1 : -1 ;
1831- size_t bytes ;
1832- for (j = 0 ; j < samples ; ++ j ) { //for each sample
1833- int anyunphased = 0 ;
1834- uint8_t * ptr1 = p ;
1835- int32_t al1 = 0 , val ;
1836- for (k = 0 ; k < ploidy ; ++ k ) { //in each ploidy
1837- switch (type ) {
1838- case BCF_BT_INT8 :
1839- val = * (uint8_t * )p ;
1840- break ;
1841- case BCF_BT_INT16 :
1842- val = * (uint16_t * )p ;
1843- break ;
1844- case BCF_BT_INT32 :
1845- val = * (uint32_t * )p ;
1846- break ;
1847- //wont have anything bigger than 32bit for GT
1848- default : //invalid
1849- return 1 ;
1850- break ;
1851- }
1852- if (!k ) al1 = val ;
1853- else if (!(val & 1 )) {
1854- anyunphased = 1 ;
1855- }
1856- //get to next phasing or skip the rest for this sample
1857- bytes = (anyunphased ? ploidy - k : 1 ) << bcf_type_shift [type ];
1858- if (end - p < bytes )
1859- return 1 ;
1860- p += bytes ;
1861- if (anyunphased ) {
1862- break ; //no further check required
1863- }
1864- }
1865- if (!anyunphased && al1 > 1 ) { //no other unphased
1866- /*set phased on read or make unphased on write as upto 4.3 1st
1867- phasing is not described explicitly and has to be inferred*/
1868- switch (type ) {
1869- case BCF_BT_INT8 :
1870- * (uint8_t * )ptr1 += upd ;
1871- break ;
1872- case BCF_BT_INT16 :
1873- * (uint16_t * )ptr1 += upd ;
1874- break ;
1875- case BCF_BT_INT32 :
1876- * (uint32_t * )ptr1 += upd ;
1877- break ;
1878- }
1879- }
1880- }
1881- * q = p ;
1882- return 0 ;
1883- }
1884-
1885- /**
1886- * update44phasing - converts GT to/from v4.4 way representation
1887- * @param h - bcf header, to get version
1888- * @param v - pointer to bcf data
1889- * @param setreset - whether to set or reset
1890- * Returns 0 on success and -1 on failure
1891- * For data read, to be converted to v44, setreset to be 1. For data write, to
1892- * be converted to v < v44, setreset to be 0.
1893- * If the version in header is >= 4.4, no change is made. Otherwise 1st phasing
1894- * is set if there are no other unphased ones.
1895- */
1896- HTSLIB_EXPORT int update44phasing (bcf_hdr_t * h , bcf1_t * b , int setreset )
1897- {
1898- int i , idgt = -1 , ver = VCF_DEF , num , type ;
1899- uint8_t * ptr = NULL , * end = NULL ;
1900- if (!b ) return 1 ;
1901-
1902- ver = bcf_get_version (h , "" );
1903- idgt = h ? bcf_hdr_id2int (h , BCF_DT_ID , "GT" ) : -1 ;
1904- if (ver >= VCF44 || idgt == -1 ) return 0 ; //no change required
1905-
1906- if (b -> unpacked & BCF_UN_FMT ) { //unpacked, get from decoded data
1907- for (i = 0 ; i < b -> n_fmt ; i ++ )
1908- {
1909- if ( b -> d .fmt [i ].id == idgt ) {
1910- ptr = b -> d .fmt [i ].p ;
1911- end = ptr + b -> d .fmt [i ].p_len ;
1912- num = b -> d .fmt [i ].n ;
1913- type = b -> d .fmt [i ].type ;
1914- break ;
1915- }
1916- }
1917- } else { //get from indiv.s binary stream
1918- ptr = (uint8_t * ) b -> indiv .s ;
1919- end = ptr + b -> indiv .l ;
1920- int found = 0 ;
1921- for (i = 0 ; i < b -> n_fmt ; ++ i ) {
1922- int32_t key = -1 ;
1923- if (bcf_dec_typed_int1_safe (ptr , end , & ptr , & key ) != 0 ) return 1 ;
1924- if (bcf_dec_size_safe (ptr , end , & ptr , & num , & type ) != 0 ) return 1 ;
1925- if (type > BCF_BT_CHAR ) return 1 ; //invalid type
1926- if (idgt == key ) {
1927- found = 1 ;
1928- break ;
1929- } else { //skip and check next
1930- size_t bytes = ((size_t ) num << bcf_type_shift [type ]) * b -> n_sample ;
1931- if (end - ptr < bytes ) return 1 ;
1932- ptr += bytes ;
1933- }
1934- }
1935- if (!found ) {
1936- ptr = end = NULL ;
1937- }
1938- }
1939- if (ptr ) {
1940- //with GT and v < v44, need phase conversion
1941- if (updatephasing (ptr , end , & ptr , setreset , b -> n_sample , num , type )) return 1 ;
1942- }
1943- return 0 ;
1944- }
1945-
19461817static int bcf_record_check (const bcf_hdr_t * hdr , bcf1_t * rec ) {
19471818 uint8_t * ptr , * end ;
19481819 size_t bytes ;
@@ -1961,10 +1832,6 @@ static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
19611832 (1 << BCF_BT_FLOAT ) |
19621833 (1 << BCF_BT_CHAR ));
19631834 int32_t max_id = hdr ? hdr -> n [BCF_DT_ID ] : 0 ;
1964- int idgt = -1 , ver = VCF_DEF ;
1965-
1966- ver = bcf_get_version (hdr , NULL );
1967- idgt = hdr ? bcf_hdr_id2int (hdr , BCF_DT_ID , "GT" ) : -1 ;
19681835
19691836 // Check for valid contig ID
19701837 if (rec -> rid < 0
@@ -2074,16 +1941,9 @@ static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
20741941 bcf_record_check_err (hdr , rec , "type" , & reports , type );
20751942 err |= BCF_ERR_TAG_INVALID ;
20761943 }
2077- if (ver < VCF44 && idgt != -1 && idgt == key ) {
2078- //with GT and v < v44, need phase conversion
2079- if (updatephasing (ptr , end , & ptr , 1 , rec -> n_sample , num , type )) {
2080- err |= BCF_ERR_TAG_INVALID ;
2081- }
2082- } else {
2083- bytes = ((size_t ) num << bcf_type_shift [type ]) * rec -> n_sample ;
2084- if (end - ptr < bytes ) goto bad_indiv ;
2085- ptr += bytes ;
2086- }
1944+ bytes = ((size_t ) num << bcf_type_shift [type ]) * rec -> n_sample ;
1945+ if (end - ptr < bytes ) goto bad_indiv ;
1946+ ptr += bytes ;
20871947 }
20881948
20891949 if (!err && rec -> rlen < 0 ) {
@@ -2172,8 +2032,6 @@ int bcf_readrec(BGZF *fp, void *null, void *vv, int *tid, hts_pos_t *beg, hts_po
21722032 if (ret == 0 ) ret = bcf_record_check (NULL , v );
21732033 if (ret >= 0 )
21742034 * tid = v -> rid , * beg = v -> pos , * end = v -> pos + v -> rlen ;
2175- /*bcf data read may need conversion to vcf44 phasing format, as header is
2176- not availble here, it has to be done after this returns the data*/
21772035 return ret ;
21782036}
21792037
@@ -2442,11 +2300,6 @@ int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v)
24422300 return -1 ;
24432301 }
24442302
2445- if (update44phasing (h , v , 0 )) { //reset phasing update made after read
2446- hts_log_error ("Failed to set prorper phasing at %s:%" PRIhts_pos "" , bcf_seqname_safe (h ,v ), v -> pos + 1 );
2447- return -1 ;
2448- }
2449-
24502303 BGZF * fp = hfp -> fp .bgzf ;
24512304 uint8_t x [32 ];
24522305 u32_to_le (v -> shared .l + 24 , x ); // to include six 32-bit integers
@@ -6086,12 +5939,13 @@ int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, c
60865939
60875940int bcf_get_format_values (const bcf_hdr_t * hdr , bcf1_t * line , const char * tag , void * * dst , int * ndst , int type )
60885941{
6089- int i ,j , tag_id = bcf_hdr_id2int (hdr , BCF_DT_ID , tag );
5942+ int i ,j , tag_id = bcf_hdr_id2int (hdr , BCF_DT_ID , tag ), gt = 0 ;
60905943 if ( !bcf_hdr_idinfo_exists (hdr ,BCF_HL_FMT ,tag_id ) ) return -1 ; // no such FORMAT field in the header
60915944 if ( tag [0 ]== 'G' && tag [1 ]== 'T' && tag [2 ]== 0 )
60925945 {
60935946 // Ugly: GT field is considered to be a string by the VCF header but BCF represents it as INT.
60945947 if ( bcf_hdr_id2type (hdr ,BCF_HL_FMT ,tag_id )!= BCF_HT_STR ) return -2 ;
5948+ gt = 1 ;
60955949 }
60965950 else if ( bcf_hdr_id2type (hdr ,BCF_HL_FMT ,tag_id )!= type ) return -2 ; // expected different type
60975951
@@ -6151,6 +6005,39 @@ int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, v
61516005 default : hts_log_error ("Unexpected type %d at %s:%" PRIhts_pos , fmt -> type , bcf_seqname_safe (hdr ,line ), line -> pos + 1 ); exit (1 );
61526006 }
61536007 #undef BRANCH
6008+
6009+ if (gt && bcf_get_version (hdr , NULL ) < VCF44 ) {
6010+ //convert to v44 phasing if required
6011+ int32_t * v = * dst , * ptr1 , val1 , anyunphased = 0 , end = 0 ;
6012+ for (i = 0 ; i < nsmpl ; i ++ ) {
6013+ anyunphased = 0 ; val1 = 0 , end = 0 ;
6014+ for (j = 0 ; j < fmt -> n ; j ++ ) {
6015+ if (* v != bcf_int32_vector_end ) {
6016+ if (!j ) {
6017+ val1 = * v ;
6018+ ptr1 = v ;
6019+ } else {
6020+ if (!(* v & 1 )) { //unphased or unkonwn
6021+ anyunphased = 1 ;
6022+ }
6023+ }
6024+ } else {
6025+ end = 1 ;
6026+ }
6027+
6028+ if (val1 & 1 || anyunphased || end ) {
6029+ //phased || an unphased found || end of data, skip sample
6030+ v += (fmt -> n - j );
6031+ break ;
6032+ }
6033+ ++ v ;
6034+ }
6035+ if (val1 && !(val1 & 1 ) && !anyunphased ) {
6036+ //valid unphased one w/o any other unphased, make phased
6037+ * ptr1 |= 1 ;
6038+ }
6039+ }
6040+ }
61546041 return nsmpl * fmt -> n ;
61556042}
61566043
0 commit comments