diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 6af0748f2471..72bbbf05ef8c 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -26,7 +26,6 @@ jobs: runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} backend: ${{ matrix.backend }} - latest-image: ${{ matrix.latest-image }} dockerfile: $${ matrix.dockerfile }} context: $${ matrix.context }} secrets: @@ -49,7 +48,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "rerankers" - latest-image: 'latest-gpu-nvidia-cuda-11-rerankers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -61,7 +59,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "vllm" - latest-image: 'latest-gpu-nvidia-cuda-11-vllm' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -73,7 +70,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "transformers" - latest-image: 'latest-gpu-nvidia-cuda-11-transformers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -85,7 +81,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "diffusers" - latest-image: 'latest-gpu-nvidia-cuda-11-diffusers' dockerfile: "./backend/Dockerfile.python" context: "./backend" # CUDA 11 additional backends @@ -98,7 +93,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "kokoro" - latest-image: 'latest-gpu-nvidia-cuda-11-kokoro' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -110,7 +104,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "faster-whisper" - latest-image: 'latest-gpu-nvidia-cuda-11-faster-whisper' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -122,7 +115,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "coqui" - latest-image: 'latest-gpu-nvidia-cuda-11-coqui' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -134,7 +126,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "bark" - latest-image: 'latest-gpu-nvidia-cuda-11-bark' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -146,7 +137,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "chatterbox" - latest-image: 'latest-gpu-nvidia-cuda-11-chatterbox' dockerfile: "./backend/Dockerfile.python" context: "./backend" # CUDA 12 builds @@ -159,7 +149,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "rerankers" - latest-image: 'latest-gpu-nvidia-cuda-12-rerankers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -171,7 +160,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "vllm" - latest-image: 'latest-gpu-nvidia-cuda-12-vllm' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -183,7 +171,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "transformers" - latest-image: 'latest-gpu-nvidia-cuda-12-transformers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -194,8 +181,7 @@ jobs: tag-suffix: '-gpu-nvidia-cuda-12-diffusers' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - backend: "diffusers" - latest-image: 'latest-gpu-nvidia-cuda-12-diffusers' + backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" # CUDA 12 additional backends @@ -208,7 +194,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "kokoro" - latest-image: 'latest-gpu-nvidia-cuda-12-kokoro' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -220,7 +205,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "faster-whisper" - latest-image: 'latest-gpu-nvidia-cuda-12-faster-whisper' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -232,7 +216,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "coqui" - latest-image: 'latest-gpu-nvidia-cuda-12-coqui' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -244,7 +227,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "bark" - latest-image: 'latest-gpu-nvidia-cuda-12-bark' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'cublas' @@ -256,7 +238,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "chatterbox" - latest-image: 'latest-gpu-nvidia-cuda-12-chatterbox' dockerfile: "./backend/Dockerfile.python" context: "./backend" # hipblas builds @@ -269,7 +250,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "rerankers" - latest-image: 'latest-gpu-rocm-hipblas-rerankers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' @@ -281,7 +261,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "vllm" - latest-image: 'latest-gpu-rocm-hipblas-vllm' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' @@ -293,7 +272,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "transformers" - latest-image: 'latest-gpu-rocm-hipblas-transformers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' @@ -305,7 +283,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "diffusers" - latest-image: 'latest-gpu-rocm-hipblas-diffusers' dockerfile: "./backend/Dockerfile.python" context: "./backend" # ROCm additional backends @@ -318,7 +295,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "kokoro" - latest-image: 'latest-gpu-rocm-hipblas-kokoro' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' @@ -330,7 +306,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "faster-whisper" - latest-image: 'latest-gpu-rocm-hipblas-faster-whisper' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' @@ -342,7 +317,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "coqui" - latest-image: 'latest-gpu-rocm-hipblas-coqui' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'hipblas' @@ -354,7 +328,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "rocm/dev-ubuntu-22.04:6.1" backend: "bark" - latest-image: 'latest-gpu-rocm-hipblas-bark' dockerfile: "./backend/Dockerfile.python" context: "./backend" # sycl builds @@ -367,7 +340,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "rerankers" - latest-image: 'latest-gpu-intel-sycl-f32-rerankers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' @@ -379,7 +351,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "rerankers" - latest-image: 'latest-gpu-intel-sycl-f16-rerankers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' @@ -391,7 +362,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "vllm" - latest-image: 'latest-gpu-intel-sycl-f32-vllm' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' @@ -403,7 +373,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "vllm" - latest-image: 'latest-gpu-intel-sycl-f16-vllm' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' @@ -415,7 +384,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "transformers" - latest-image: 'latest-gpu-intel-sycl-f32-transformers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' @@ -427,7 +395,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "transformers" - latest-image: 'latest-gpu-intel-sycl-f16-transformers' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' @@ -439,7 +406,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "diffusers" - latest-image: 'latest-gpu-intel-sycl-f32-diffusers' dockerfile: "./backend/Dockerfile.python" context: "./backend" # SYCL additional backends @@ -452,7 +418,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "kokoro" - latest-image: 'latest-gpu-intel-sycl-f32-kokoro' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' @@ -464,7 +429,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "kokoro" - latest-image: 'latest-gpu-intel-sycl-f16-kokoro' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' @@ -476,7 +440,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "faster-whisper" - latest-image: 'latest-gpu-intel-sycl-f32-faster-whisper' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' @@ -488,7 +451,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "faster-whisper" - latest-image: 'latest-gpu-intel-sycl-f16-faster-whisper' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' @@ -500,7 +462,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "coqui" - latest-image: 'latest-gpu-intel-sycl-f32-coqui' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' @@ -512,7 +473,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "coqui" - latest-image: 'latest-gpu-intel-sycl-f16-coqui' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f32' @@ -524,7 +484,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "bark" - latest-image: 'latest-gpu-intel-sycl-f32-bark' dockerfile: "./backend/Dockerfile.python" context: "./backend" - build-type: 'sycl_f16' @@ -536,7 +495,6 @@ jobs: runs-on: 'ubuntu-latest' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" backend: "bark" - latest-image: 'latest-gpu-intel-sycl-f16-bark' dockerfile: "./backend/Dockerfile.python" context: "./backend" # bark-cpp @@ -549,6 +507,5 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" backend: "bark" - latest-image: 'latest-bark-cpp' dockerfile: "./backend/Dockerfile.go" context: "./" \ No newline at end of file diff --git a/.github/workflows/backend_build.yml b/.github/workflows/backend_build.yml index 424910d46b37..fd4428368359 100644 --- a/.github/workflows/backend_build.yml +++ b/.github/workflows/backend_build.yml @@ -28,10 +28,6 @@ on: description: 'Tag latest' default: '' type: string - latest-image: - description: 'Tag latest' - default: '' - type: string tag-suffix: description: 'Tag suffix' default: '' @@ -153,7 +149,7 @@ jobs: type=sha flavor: | latest=${{ inputs.tag-latest }} - suffix=${{ inputs.tag-suffix }} + suffix=${{ inputs.tag-suffix }},onlatest=true - name: Docker meta for PR id: meta_pull_request @@ -168,7 +164,7 @@ jobs: type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }} flavor: | latest=${{ inputs.tag-latest }} - suffix=${{ inputs.tag-suffix }} + suffix=${{ inputs.tag-suffix }},onlatest=true ## End testing image - name: Set up QEMU uses: docker/setup-qemu-action@master @@ -210,7 +206,6 @@ jobs: cache-from: type=gha platforms: ${{ inputs.platforms }} push: ${{ github.event_name != 'pull_request' }} - load: ${{ github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} @@ -233,18 +228,7 @@ jobs: tags: ${{ steps.meta_pull_request.outputs.tags }} labels: ${{ steps.meta_pull_request.outputs.labels }} - - name: Cleanup - run: | - docker builder prune -f - docker system prune --force --volumes --all - - name: Latest tag - if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' - run: | - docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }} - docker push localai/localai-backends:${{ inputs.latest-image }} - docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }} - docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }} - name: job summary run: |