From ae7f1cf0c5f9be60448c7d8639568b217100e3eb Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Thu, 24 Apr 2025 16:28:00 -0600 Subject: [PATCH 1/7] HELP-74319 Ensure partial reads are retried --- lib/mongo/socket.rb | 47 ++++++++++++++++------------ spec/mongo/socket_spec.rb | 64 +++++++++++++++++++++++++++++++++++---- 2 files changed, 86 insertions(+), 25 deletions(-) diff --git a/lib/mongo/socket.rb b/lib/mongo/socket.rb index 3134f2aa87..0c4a992d99 100644 --- a/lib/mongo/socket.rb +++ b/lib/mongo/socket.rb @@ -523,32 +523,41 @@ def write_with_timeout(*args, timeout:) def write_chunk(chunk, timeout) deadline = Utils.monotonic_time + timeout + written = 0 - begin - written += @socket.write_nonblock(chunk[written..-1]) - rescue IO::WaitWritable, Errno::EINTR - select_timeout = deadline - Utils.monotonic_time - rv = Kernel.select(nil, [@socket], nil, select_timeout) - if BSON::Environment.jruby? - # Ignore the return value of Kernel.select. - # On JRuby, select appears to return nil prior to timeout expiration - # (apparently due to a EAGAIN) which then causes us to fail the read - # even though we could have retried it. - # Check the deadline ourselves. - if deadline - select_timeout = deadline - Utils.monotonic_time - if select_timeout <= 0 - raise_timeout_error!("Took more than #{timeout} seconds to receive data", true) - end + while written < chunk.length + begin + written += @socket.write_nonblock(chunk[written..-1]) + rescue IO::WaitWritable, Errno::EINTR + if !wait_for_socket_to_be_writable(deadline) + raise_timeout_error!("Took more than #{timeout} seconds to receive data", true) end - elsif rv.nil? - raise_timeout_error!("Took more than #{timeout} seconds to receive data (select call timed out)", true) + + retry end - retry end + written end + def wait_for_socket_to_be_writable(deadline) + select_timeout = deadline - Utils.monotonic_time + rv = Kernel.select(nil, [@socket], nil, select_timeout) + if BSON::Environment.jruby? + # Ignore the return value of Kernel.select. + # On JRuby, select appears to return nil prior to timeout expiration + # (apparently due to a EAGAIN) which then causes us to fail the read + # even though we could have retried it. + # Check the deadline ourselves. + select_timeout = deadline - Utils.monotonic_time + return select_timeout > 0 + elsif rv.nil? + return false + end + + true + end + def unix_socket?(sock) defined?(UNIXSocket) && sock.is_a?(UNIXSocket) end diff --git a/spec/mongo/socket_spec.rb b/spec/mongo/socket_spec.rb index f6ef8cd696..1e532fc926 100644 --- a/spec/mongo/socket_spec.rb +++ b/spec/mongo/socket_spec.rb @@ -68,12 +68,6 @@ let(:raw_socket) { socket.instance_variable_get('@socket') } - let(:wait_readable_class) do - Class.new(Exception) do - include IO::WaitReadable - end - end - context 'timeout' do clean_slate_for_all @@ -116,4 +110,62 @@ end end end + + describe '#write' do + let(:target_host) do + host = ClusterConfig.instance.primary_address_host + # Take ipv4 address + Socket.getaddrinfo(host, 0).detect { |ai| ai.first == 'AF_INET' }[3] + end + + let(:socket) do + Mongo::Socket::TCP.new(target_host, ClusterConfig.instance.primary_address_port, 1, Socket::PF_INET) + end + + let(:raw_socket) { socket.instance_variable_get('@socket') } + + context 'with timeout' do + let(:timeout) { 5_000 } + + context 'data is less than WRITE_CHUNK_SIZE' do + let(:data) { "a" * 1024 } + + context 'when a partial write occurs' do + before do + expect(raw_socket) + .to receive(:write_nonblock) + .twice + .and_return(data.length / 2) + end + + it 'eventually writes everything' do + expect(socket.write(data, timeout: timeout)). + to be === data.length + end + end + end + + context 'data is greater than WRITE_CHUNK_SIZE' do + let(:data) { "a" * (2 * Mongo::Socket::WRITE_CHUNK_SIZE + 256) } + + context 'when a partial write occurs' do + before do + expect(raw_socket) + .to receive(:write_nonblock) + .exactly(4).times + .and_return(Mongo::Socket::WRITE_CHUNK_SIZE, + 128, + Mongo::Socket::WRITE_CHUNK_SIZE - 128, + 256) + end + + it 'eventually writes everything' do +puts "=== spec begins" + expect(socket.write(data, timeout: timeout)). + to be === data.length + end + end + end + end + end end From fa81e21ee5d2d74a5b69321d45fa8b8ddf6034c0 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Thu, 24 Apr 2025 16:32:41 -0600 Subject: [PATCH 2/7] simplify the socket wait logic --- lib/mongo/socket.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/mongo/socket.rb b/lib/mongo/socket.rb index 0c4a992d99..52cdd1d920 100644 --- a/lib/mongo/socket.rb +++ b/lib/mongo/socket.rb @@ -543,6 +543,7 @@ def write_chunk(chunk, timeout) def wait_for_socket_to_be_writable(deadline) select_timeout = deadline - Utils.monotonic_time rv = Kernel.select(nil, [@socket], nil, select_timeout) + if BSON::Environment.jruby? # Ignore the return value of Kernel.select. # On JRuby, select appears to return nil prior to timeout expiration @@ -551,11 +552,9 @@ def wait_for_socket_to_be_writable(deadline) # Check the deadline ourselves. select_timeout = deadline - Utils.monotonic_time return select_timeout > 0 - elsif rv.nil? - return false end - true + !rv.nil? end def unix_socket?(sock) From 34540c983e2c19b503f092bcf76aab3a16ae27fa Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Fri, 25 Apr 2025 08:01:27 -0600 Subject: [PATCH 3/7] Ubuntu 20.04 has been retired on GitHub Actions --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7144427790..0b680f9678 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,12 +10,12 @@ jobs: env: CI: true TESTOPTS: "-v" - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 continue-on-error: true strategy: fail-fast: false matrix: - os: [ ubuntu-20.04 ] + os: [ ubuntu-22.04 ] ruby: ["2.7", "3.0", "3.1", "3.2", "3.3"] mongodb: ["4.4", "5.0", "6.0", "7.0", "8.0"] topology: [replica_set, sharded_cluster] From e71db1cb1716bb6d4b2267afe5d7c28aa27a51f2 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Fri, 25 Apr 2025 08:28:37 -0600 Subject: [PATCH 4/7] reduce the number of tests run on GH focus on modern servers, recent rubies --- .github/workflows/test.yml | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0b680f9678..9c7f603de2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,40 +10,15 @@ jobs: env: CI: true TESTOPTS: "-v" - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 continue-on-error: true strategy: fail-fast: false matrix: os: [ ubuntu-22.04 ] - ruby: ["2.7", "3.0", "3.1", "3.2", "3.3"] - mongodb: ["4.4", "5.0", "6.0", "7.0", "8.0"] - topology: [replica_set, sharded_cluster] - include: - - os: macos - ruby: "2.7" - mongodb: "7.0" - topology: server - - os: macos - ruby: "3.0" - mongodb: "7.0" - topology: server - - os: ubuntu-latest - ruby: "2.7" - mongodb: "7.0" - topology: server - - os: ubuntu-latest - ruby: "3.1" - mongodb: "7.0" - topology: server - - os: ubuntu-latest - ruby: "3.2" - mongodb: "7.0" - topology: server - - os: ubuntu-latest - ruby: "3.2" - mongodb: "8.0" - topology: replica_set + ruby: [ "3.2" ] + mongodb: [ "7.0", "8.0" ] + topology: [ replica_set, sharded_cluster ] steps: - name: repo checkout uses: actions/checkout@v2 From 01d2645dcb2ba104c8485e9b0a64773506575005 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Fri, 25 Apr 2025 08:31:47 -0600 Subject: [PATCH 5/7] well, let's try 22.04, then --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9c7f603de2..2c128f2e03 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: env: CI: true TESTOPTS: "-v" - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 continue-on-error: true strategy: fail-fast: false From 056a481a3aeed572fbe52a4d28831e8af1cb3a5f Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Fri, 25 Apr 2025 11:49:52 -0600 Subject: [PATCH 6/7] respect the reported number of bytes written write() is blocking, but apparently may still not write the entire requested block (for various reasons). It is safest to NOT assume it wrote everything, and instead check the return value for the number of bytes actually written. --- lib/mongo/socket.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/mongo/socket.rb b/lib/mongo/socket.rb index 52cdd1d920..ae8c95386c 100644 --- a/lib/mongo/socket.rb +++ b/lib/mongo/socket.rb @@ -491,9 +491,8 @@ def write_without_timeout(*args) buf = buf.to_s i = 0 while i < buf.length - chunk = buf[i...i+WRITE_CHUNK_SIZE] - @socket.write(chunk) - i += WRITE_CHUNK_SIZE + chunk = buf[i, WRITE_CHUNK_SIZE] + i += @socket.write(chunk) end end end From 2b3af31a0887306104be977fe5176a938afd0e4a Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Fri, 25 Apr 2025 13:04:43 -0600 Subject: [PATCH 7/7] different errors being raised --- .../client_side_encryption/custom_endpoint_spec.rb | 6 +----- spec/mongo/socket_spec.rb | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/spec/integration/client_side_encryption/custom_endpoint_spec.rb b/spec/integration/client_side_encryption/custom_endpoint_spec.rb index 016a99c29a..cdddd2d350 100644 --- a/spec/integration/client_side_encryption/custom_endpoint_spec.rb +++ b/spec/integration/client_side_encryption/custom_endpoint_spec.rb @@ -94,11 +94,7 @@ end let(:error_regex) do - if BSON::Environment.jruby? - /SocketError/ - else - /Connection refused/ - end + /Connection refused|SocketError|SocketTimeoutError/ end it_behaves_like 'raising a KMS error' diff --git a/spec/mongo/socket_spec.rb b/spec/mongo/socket_spec.rb index 1e532fc926..b8c9f1f12e 100644 --- a/spec/mongo/socket_spec.rb +++ b/spec/mongo/socket_spec.rb @@ -160,7 +160,6 @@ end it 'eventually writes everything' do -puts "=== spec begins" expect(socket.write(data, timeout: timeout)). to be === data.length end