Boltz (#46)

kaushalprasadhial · web-flow · commit 0df6ac3898de · 2025-06-13T14:50:08.000+05:30
* added boltz
diff --git a/applications/boltz/Dockerfile b/applications/boltz/Dockerfile
@@ -0,0 +1,61 @@
+# Use the official Ubuntu image as a base
+ARG FROM_IMAGE=ubuntu:24.04
+
+# Stage 1: Set up Conda environment
+ARG BASE_IMAGE=condaforge/miniforge3:24.3.0-0
+FROM ${BASE_IMAGE} as conda_setup
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Stage 2: Set up the main build environment
+FROM ${FROM_IMAGE} as builder
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install necessary build tools and clean up
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git build-essential wget vim ca-certificates numactl autoconf automake make && \
+    rm -rf /var/lib/apt/lists/* && \
+    apt-get autoremove -y && \
+    apt-get clean
+
+# Build arguments for host UID/GID
+ARG USER_ID=2000
+ARG GROUP_ID=2000
+
+ENV SERVICE_NAME="boltz-service"
+
+# Create a user and group with same UID and GID as host
+RUN groupadd --gid ${GROUP_ID} $SERVICE_NAME && \
+    useradd -m -g $SERVICE_NAME --shell /bin/false --uid ${USER_ID} $SERVICE_NAME
+
+# Copy Conda installation from the conda_setup stage
+COPY --from=conda_setup /opt/conda /opt/conda
+ENV PATH="/opt/conda/bin:$PATH"
+ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH:-}"
+RUN echo "source activate" >> ~/.bashrc
+
+RUN git clone --branch 5.3.0 https://github.com/jemalloc/jemalloc.git
+WORKDIR /jemalloc
+RUN bash autogen.sh --prefix=/opt/conda/ && make install
+WORKDIR /
+RUN rm -rf jemalloc
+
+WORKDIR /app
+RUN chown -R $SERVICE_NAME:$SERVICE_NAME /app
+RUN git clone --branch v0.4.1 https://github.com/jwohlwend/boltz.git
+
+WORKDIR /app/boltz
+RUN pip install -e .
+
+# Switch to non-root user
+RUN chown -R $SERVICE_NAME:$SERVICE_NAME /app/boltz
+COPY ./entrypoint.sh entrypoint.sh
+RUN chmod +x entrypoint.sh
+
+USER $SERVICE_NAME
+
+# bin bash
+# Clone Boltz 1 repository (replace with the actual repo URL)
+ENTRYPOINT ["/app/boltz/entrypoint.sh"]
+
+# Default command
+CMD ["default"]
diff --git a/applications/boltz/README.md b/applications/boltz/README.md
@@ -0,0 +1,105 @@
+## 🔍 Running Inference with Boltz Docker
+
+Follow the steps below to run inference using the Boltz Docker container:
+
+---
+
+### 🐳 1. Build the Docker Image
+
+From the root of the project directory, build the Docker image:
+
+```bash
+docker build -t boltz1 .
+```
+
+---
+
+### 📁 2. Create and Set Output Directory Permissions
+
+Create an output folder and give it proper write permissions:
+
+```bash
+mkdir -p <output_folder_location> <model_folder_location>
+chmod a+w <output_folder_location> <model_folder_location>
+          
+export OUTPUT=$PWD/<output_folder_location>
+export MODELS=$PWD/<model_folder_location>
+export INPUT=$PWD/<input_folder_location>
+```
+
+> ⚠️ Docker needs write permissions in the `<output_folder_location>` and `<model_folder_location>`  folder. `<input_folder_location>` is the folder contaning the input `.yaml` or `.fasta` file
+
+Example
+
+```bash
+mkdir -p ./output ./model
+chmod a+w ./output ./model
+          
+export OUTPUT=$PWD/output
+export MODELS=$PWD/model
+export INPUT=$PWD/examples/
+```
+
+---
+
+### 🚀 3. Run Inference
+
+In order to do inferencing few things needs to be done
+Mount the volumes for input folder and output folder. Pass the mounted volumes to boltz as arguments. So the docker run command looks like
+
+```bash
+docker run -it \
+  --shm-size=100g \
+  -v $INPUT:/app/boltz/input \
+  -v $MODELS:/home/boltz-service/.boltz/ \
+  -v $OUTPUT:/app/boltz/output \
+  boltz1
+```
+
+> 📝 The `--shm-size=100g` flag avoids shared memory issues during data loading with PyTorch.
+
+---
+
+### ✅ Output
+
+Results will be written to the <output_folder_location> folder.
+
+Boltz currently accepts three input formats:
+
+1. Fasta file, for most use cases
+
+2. A comprehensive YAML schema, for more complex use cases
+
+3. A directory containing files of the above formats, for batched processing
+
+## For more information checkout [boltz](https://github.com/jwohlwend/boltz)
+
+## License
+
+Our model and code are released under MIT License, and can be freely used for both academic and commercial purposes.
+
+
+## Cite
+
+If you use this code or the models in your research, please cite the following paper:
+
+```bibtex
+@article{wohlwend2024boltz1,
+  author = {Wohlwend, Jeremy and Corso, Gabriele and Passaro, Saro and Reveiz, Mateo and Leidal, Ken and Swiderski, Wojtek and Portnoi, Tally and Chinn, Itamar and Silterra, Jacob and Jaakkola, Tommi and Barzilay, Regina},
+  title = {Boltz-1: Democratizing Biomolecular Interaction Modeling},
+  year = {2024},
+  doi = {10.1101/2024.11.19.624167},
+  journal = {bioRxiv}
+}
+```
+
+In addition if you use the automatic MSA generation, please cite:
+
+```bibtex
+@article{mirdita2022colabfold,
+  title={ColabFold: making protein folding accessible to all},
+  author={Mirdita, Milot and Sch{\"u}tze, Konstantin and Moriwaki, Yoshitaka and Heo, Lim and Ovchinnikov, Sergey and Steinegger, Martin},
+  journal={Nature methods},
+  year={2022},
+}
+```
diff --git a/applications/boltz/entrypoint.sh b/applications/boltz/entrypoint.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+INPUT_DIR="/app/boltz/input"
+OUTPUT_DIR="/app/boltz/output"
+CUDA_VISIBLE_DEVICES=""
+
+FOUND=0
+
+for INPUT_FILE in "$INPUT_DIR"/*; do
+    # Only process .yaml or .fasta files
+    if [[ "$INPUT_FILE" == *.yaml || "$INPUT_FILE" == *.fasta ]]; then
+        echo "📂 Processing: $INPUT_FILE"
+        LD_PRELOAD=/opt/conda/lib/libjemalloc.so:$LD_PRELOAD \
+        MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1" \
+        boltz predict "$INPUT_FILE" --out_dir "$OUTPUT_DIR" --accelerator "cpu"
+        FOUND=1
+    fi
+done
+
+if [[ $FOUND -eq 0 ]]; then
+    echo "❌ No .yaml or .fasta files found in $INPUT_DIR"
+    exit 1
+fi