Skip to content

Commit 01c5557

Browse files
committed
Add taxonomy db change to setup dbs script
1 parent 04c007b commit 01c5557

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

setup_databases.sh

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,18 @@ downloadFile() {
7474
if [ ! -f DOWNLOADS_READY ]; then
7575
if [ "${FAST_PREBUILT_DATABASES}" = "1" ]; then
7676
# new prebuilt GPU+CPU databases, that don't require calling tsv2exprofiledb
77-
downloadFile "https://opendata.steineggerlab.workers.dev/colabfold/${UNIREF30DB}.db.tar.gz" "${UNIREF30DB}.tar.gz"
78-
downloadFile "https://opendata.steineggerlab.workers.dev/colabfold/colabfold_envdb_202108.db.tar.gz" "colabfold_envdb_202108.tar.gz"
77+
downloadFile "https://opendata.mmseqs.org/colabfold/${UNIREF30DB}.db.tar.gz" "${UNIREF30DB}.tar.gz"
78+
downloadFile "https://opendata.mmseqs.org/colabfold/colabfold_envdb_202108.db.tar.gz" "colabfold_envdb_202108.tar.gz"
7979
else
8080
# old .tsv + tsv2exprofiledb databases
81-
downloadFile "https://opendata.steineggerlab.workers.dev/colabfold/${UNIREF30DB}.tar.gz" "${UNIREF30DB}.tar.gz"
82-
downloadFile "https://opendata.steineggerlab.workers.dev/colabfold/colabfold_envdb_202108.tar.gz" "colabfold_envdb_202108.tar.gz"
81+
downloadFile "https://opendata.mmseqs.org/colabfold/${UNIREF30DB}.tar.gz" "${UNIREF30DB}.tar.gz"
82+
downloadFile "https://opendata.mmseqs.org/colabfold/colabfold_envdb_202108.tar.gz" "colabfold_envdb_202108.tar.gz"
8383
fi
84-
downloadFile "https://opendata.steineggerlab.workers.dev/colabfold/pdb100_230517.fasta.gz" "pdb100_230517.fasta.gz"
85-
downloadFile "https://opendata.steineggerlab.workers.dev/colabfold/pdb100_foldseek_230517.tar.gz" "pdb100_foldseek_230517.tar.gz"
84+
if [ "${UNIREF30DB}" = "uniref30_2302" ]; then
85+
downloadFile "https://opendata.mmseqs.org/colabfold/uniref30_2302_newtaxonomy.tar.gz" "uniref30_2302_newtaxonomy.tar.gz"
86+
fi
87+
downloadFile "https://opendata.mmseqs.org/colabfold/pdb100_230517.fasta.gz" "pdb100_230517.fasta.gz"
88+
downloadFile "https://opendata.mmseqs.org/colabfold/pdb100_foldseek_230517.tar.gz" "pdb100_foldseek_230517.tar.gz"
8689
touch DOWNLOADS_READY
8790
fi
8891

@@ -127,6 +130,12 @@ if [ ! -f UNIREF30_READY ]; then
127130
mmseqs createindex "${UNIREF30DB}_db" tmp1 --remove-tmp-files 1 ${GPU_INDEX_PAR}
128131
fi
129132

133+
# replace mapping and taxonomy with rebuilt versions, see:
134+
# https://github.com/sokrypton/ColabFold/wiki/MSA-Server-Database-History#2025-08-04-updated-uniref100_2302-taxonomypairing-files
135+
if [ -e "uniref30_2302_newtaxonomy.tar.gz" ]; then
136+
tar -xzvf "uniref30_2302_newtaxonomy.tar.gz"
137+
fi
138+
130139
if [ -e ${UNIREF30DB}_db_mapping ]; then
131140
# create binary, mmap-able taxonomy mapping, saves a few seconds of load time during pairing
132141
TAXHEADER=$(od -An -N4 -t x4 "${UNIREF30DB}_db_mapping" | tr -d ' ')

0 commit comments

Comments
 (0)