From 5e878001382765e0b291d19b9faa6c1dfd88ef84 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 14 May 2025 17:55:44 +0200 Subject: [PATCH 1/3] cli: improve automatic parallelism to respect cgroup limits --- CHANGELOG.md | 2 ++ lib/bootsnap/cli.rb | 3 +-- lib/bootsnap/cli/worker_pool.rb | 34 +++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c425f72e..bed3839f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Unreleased +* Improve CLI to detect cgroup CPU limits and avoid spawning too many worker processes. + # 1.18.4 * Allow using bootsnap without bundler. See #488. diff --git a/lib/bootsnap/cli.rb b/lib/bootsnap/cli.rb index d0cf8763..1d77a91a 100644 --- a/lib/bootsnap/cli.rb +++ b/lib/bootsnap/cli.rb @@ -4,7 +4,6 @@ require "bootsnap/cli/worker_pool" require "optparse" require "fileutils" -require "etc" module Bootsnap class CLI @@ -29,7 +28,7 @@ def initialize(argv) self.compile_gemfile = false self.exclude = nil self.verbose = false - self.jobs = Etc.nprocessors + self.jobs = nil self.iseq = true self.yaml = true self.json = true diff --git a/lib/bootsnap/cli/worker_pool.rb b/lib/bootsnap/cli/worker_pool.rb index a4d40bad..131942e7 100644 --- a/lib/bootsnap/cli/worker_pool.rb +++ b/lib/bootsnap/cli/worker_pool.rb @@ -1,16 +1,50 @@ # frozen_string_literal: true +require "etc" +require "rbconfig" + module Bootsnap class CLI class WorkerPool class << self def create(size:, jobs:) + size ||= default_size if size > 0 && Process.respond_to?(:fork) new(size: size, jobs: jobs) else Inline.new(jobs: jobs) end end + + def default_size + size = [Etc.nprocessors, cpu_quota || 0].min + case size + when 0, 1 + 0 + else + size + end + end + + def cpu_quota + if RbConfig::CONFIG["target_os"].include?("linux") + if File.exist?("/sys/fs/cgroup/cpu.max") + # cgroups v2: https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files + cpu_max = File.read("/sys/fs/cgroup/cpu.max") + return nil if cpu_max.start_with?("max ") # no limit + max, period = cpu_max.split.map(&:to_f) + max / period + elsif File.exist?("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us") + # cgroups v1: https://kernel.googlesource.com/pub/scm/linux/kernel/git/glommer/memcg/+/cpu_stat/Documentation/cgroups/cpu.txt + max = File.read("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us").to_i + # If the cpu.cfs_quota_us is -1, cgroup does not adhere to any CPU time restrictions + # https://docs.kernel.org/scheduler/sched-bwc.html#management + return nil if max <= 0 + period = File.read("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us").to_f + max / period + end + end + end end class Inline From d603b704ab1b8f14ffe05ba6e7893aef4021fe3f Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 14 May 2025 18:00:59 +0200 Subject: [PATCH 2/3] Attempt to detect QEMU hangs When building cross platform images with Docker, QEMU is often used under the hood and can have a bug that cause forked processes to deadlock. Before spawning workers we test for that bug. Fix: https://github.com/Shopify/bootsnap/issues/495 Closes: https://github.com/Shopify/bootsnap/pull/497 Co-Authored-By: Sarun Rattanasiri --- CHANGELOG.md | 2 ++ lib/bootsnap/cli/worker_pool.rb | 42 +++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bed3839f..e74177f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Unreleased +* Attempt to detect a QEMU bug that can cause `bootsnap precompile` to hang forever when building ARM64 docker images + from x86_64 machines. See #495. * Improve CLI to detect cgroup CPU limits and avoid spawning too many worker processes. # 1.18.4 diff --git a/lib/bootsnap/cli/worker_pool.rb b/lib/bootsnap/cli/worker_pool.rb index 131942e7..28f5b73d 100644 --- a/lib/bootsnap/cli/worker_pool.rb +++ b/lib/bootsnap/cli/worker_pool.rb @@ -2,6 +2,7 @@ require "etc" require "rbconfig" +require "io/wait" unless IO.method_defined?(:wait_readable) module Bootsnap class CLI @@ -17,12 +18,19 @@ def create(size:, jobs:) end def default_size - size = [Etc.nprocessors, cpu_quota || 0].min + nprocessors = Etc.nprocessors + size = [nprocessors, cpu_quota || nprocessors].min case size when 0, 1 0 else - size + if fork_defunct? + $stderr.puts "warning: faulty fork(2) detected, probably in cross platform docker builds. " \ + "Disabling parallel compilation." + 0 + else + size + end end end @@ -32,6 +40,7 @@ def cpu_quota # cgroups v2: https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files cpu_max = File.read("/sys/fs/cgroup/cpu.max") return nil if cpu_max.start_with?("max ") # no limit + max, period = cpu_max.split.map(&:to_f) max / period elsif File.exist?("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us") @@ -40,11 +49,40 @@ def cpu_quota # If the cpu.cfs_quota_us is -1, cgroup does not adhere to any CPU time restrictions # https://docs.kernel.org/scheduler/sched-bwc.html#management return nil if max <= 0 + period = File.read("/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us").to_f max / period end end end + + def fork_defunct? + return true unless ::Process.respond_to?(:fork) + + # Ref: https://github.com/Shopify/bootsnap/issues/495 + # The second forked process will hang on some QEMU environments + r, w = IO.pipe + pids = 2.times.map do + ::Process.fork do + exit!(true) + end + end + w.close + r.wait_readable(1) # Wait at most 1s + + defunct = false + + pids.each do |pid| + _pid, status = ::Process.wait2(pid, ::Process::WNOHANG) + if status.nil? # Didn't exit in 1s + defunct = true + Process.kill(:KILL, pid) + ::Process.wait2(pid) + end + end + + defunct + end end class Inline From abe4ec3aa9d80f2d8b7a3c7d03bf692d39044dce Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 14 May 2025 18:19:00 +0200 Subject: [PATCH 3/3] Bump bundler cache on CI --- .github/workflows/ci.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f6ae8f12..c02f94e6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -24,6 +24,7 @@ jobs: with: ruby-version: ${{ matrix.ruby }} bundler-cache: true + cache-version: 2 - run: bundle exec rake rubocop: @@ -36,6 +37,7 @@ jobs: with: ruby-version: '3.3' bundler-cache: true + cache-version: 2 - run: bundle exec rubocop rubies: @@ -51,6 +53,7 @@ jobs: with: ruby-version: ${{ matrix.ruby }} bundler-cache: true + cache-version: 2 - run: bundle exec rake psych4: @@ -68,6 +71,7 @@ jobs: with: ruby-version: ${{ matrix.ruby }} bundler-cache: true + cache-version: 2 - run: bundle exec rake minimal: @@ -83,4 +87,5 @@ jobs: with: ruby-version: ${{ matrix.ruby }} bundler-cache: true + cache-version: 2 - run: bin/test-minimal-support