From f831f6f7ce369b979d596cdf276afbd377efbf28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Mon, 23 Jun 2025 14:16:30 -0400 Subject: [PATCH 01/13] no inbounds on AbstractArray iterate and axe Array/Memory methods --- base/abstractarray.jl | 5 +++-- base/array.jl | 4 ---- base/genericmemory.jl | 9 --------- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 8f55e6a56eba8..665d70c525d81 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -387,6 +387,7 @@ function eachindex(A::AbstractArray, B::AbstractArray...) @inline eachindex(IndexStyle(A,B...), A, B...) end +eachindex(::IndexLinear, A::Union{Array, Memory}) = unchecked_oneto(length(A)) eachindex(::IndexLinear, A::AbstractArray) = (@inline; oneto(length(A))) eachindex(::IndexLinear, A::AbstractVector) = (@inline; axes1(A)) function eachindex(::IndexLinear, A::AbstractArray, B::AbstractArray...) @@ -1237,7 +1238,7 @@ oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x) iterate_starting_state(A) = iterate_starting_state(A, IndexStyle(A)) iterate_starting_state(A, ::IndexLinear) = firstindex(A) iterate_starting_state(A, ::IndexStyle) = (eachindex(A),) -iterate(A::AbstractArray, state = iterate_starting_state(A)) = _iterate(A, state) +@inline iterate(A::AbstractArray, state = iterate_starting_state(A)) = _iterate(A, state) function _iterate(A::AbstractArray, state::Tuple) y = iterate(state...) y === nothing && return nothing @@ -1245,7 +1246,7 @@ function _iterate(A::AbstractArray, state::Tuple) end function _iterate(A::AbstractArray, state::Integer) checkbounds(Bool, A, state) || return nothing - @inbounds(A[state]), state + one(state) + A[state], state + one(state) end isempty(a::AbstractArray) = (length(a) == 0) diff --git a/base/array.jl b/base/array.jl index 5c9b37f887ea8..b614bf6f50d56 100644 --- a/base/array.jl +++ b/base/array.jl @@ -902,10 +902,6 @@ function grow_to!(dest, itr, st) return dest end -## Iteration ## - -iterate(A::Array, i=1) = (@inline; _iterate_array(A, i)) - ## Indexing: getindex ## """ diff --git a/base/genericmemory.jl b/base/genericmemory.jl index 1b45ba23bcded..8c1d97edeeeec 100644 --- a/base/genericmemory.jl +++ b/base/genericmemory.jl @@ -223,15 +223,6 @@ Memory{T}(x::AbstractArray{S,1}) where {T,S} = copyto_axcheck!(Memory{T}(undef, ## copying iterators to containers -## Iteration ## - -function _iterate_array(A::Union{Memory, Array}, i::Int) - @inline - (i - 1)%UInt < length(A)%UInt ? (A[i], i + 1) : nothing -end - -iterate(A::Memory, i=1) = (@inline; _iterate_array(A, i)) - ## Indexing: getindex ## # Faster contiguous indexing using copyto! for AbstractUnitRange and Colon From b09514e4632088cc0d7819c3ef0a2d4e7e62eeaf Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Mon, 23 Jun 2025 14:52:52 -0400 Subject: [PATCH 02/13] avoid branch in OrdinalRange length --- base/abstractarray.jl | 4 ++-- base/range.jl | 4 +--- base/subarray.jl | 2 ++ 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 665d70c525d81..6e8dbda13c3a1 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1239,12 +1239,12 @@ iterate_starting_state(A) = iterate_starting_state(A, IndexStyle(A)) iterate_starting_state(A, ::IndexLinear) = firstindex(A) iterate_starting_state(A, ::IndexStyle) = (eachindex(A),) @inline iterate(A::AbstractArray, state = iterate_starting_state(A)) = _iterate(A, state) -function _iterate(A::AbstractArray, state::Tuple) +@inline function _iterate(A::AbstractArray, state::Tuple) y = iterate(state...) y === nothing && return nothing A[y[1]], (state[1], tail(y)...) end -function _iterate(A::AbstractArray, state::Integer) +@inline function _iterate(A::AbstractArray, state::Integer) checkbounds(Bool, A, state) || return nothing A[state], state + one(state) end diff --git a/base/range.jl b/base/range.jl index 9d2b9fd736b22..a625fe50d28f2 100644 --- a/base/range.jl +++ b/base/range.jl @@ -810,10 +810,8 @@ let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}, # n.b. !(s isa T) if s isa Unsigned || -1 <= s <= 1 || s == -s a = div(diff, s) % typeof(diff) - elseif s < 0 - a = div(unsigned(-diff), -s) % typeof(diff) else - a = div(unsigned(diff), s) % typeof(diff) + a = div(unsigned(sign(s)*diff), abs(s)) % typeof(diff) end return a + oneunit(a) end diff --git a/base/subarray.jl b/base/subarray.jl index eacaddc068f1f..4c48fed7f4b07 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -515,6 +515,8 @@ end # Since bounds-checking is performance-critical and uses # indices, it's worth optimizing these implementations thoroughly axes(S::SubArray) = (@inline; _indices_sub(S.indices...)) +axes1(::SubArray{<:Any,0}) = OneTo(1) +axes1(S::SubArray) = (@inline; axes1(S.indices[1])) _indices_sub(::Real, I...) = (@inline; _indices_sub(I...)) _indices_sub() = () function _indices_sub(i1::AbstractArray, I...) From 9082e688c96b0337fbfce6e106bc7719b8efd251 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Mon, 23 Jun 2025 16:22:23 -0400 Subject: [PATCH 03/13] fix SubArray axes1 --- base/subarray.jl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/base/subarray.jl b/base/subarray.jl index 4c48fed7f4b07..2c80948d05bc0 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -515,8 +515,6 @@ end # Since bounds-checking is performance-critical and uses # indices, it's worth optimizing these implementations thoroughly axes(S::SubArray) = (@inline; _indices_sub(S.indices...)) -axes1(::SubArray{<:Any,0}) = OneTo(1) -axes1(S::SubArray) = (@inline; axes1(S.indices[1])) _indices_sub(::Real, I...) = (@inline; _indices_sub(I...)) _indices_sub() = () function _indices_sub(i1::AbstractArray, I...) @@ -524,6 +522,15 @@ function _indices_sub(i1::AbstractArray, I...) (axes(i1)..., _indices_sub(I...)...) end +axes1(::SubArray{<:Any,0}) = OneTo(1) +axes1(S::SubArray) = (@inline; axes1(S.indices[1])) +_axes1_sub(::Real, I...) = (@inline; _axes1_sub(I...)) +_axes1_sub() = () +function _axes1_sub(i1::AbstractArray, I...) + @inline + axes1(i1) +end + has_offset_axes(S::SubArray) = has_offset_axes(S.indices...) function replace_in_print_matrix(S::SubArray{<:Any,2,<:AbstractMatrix}, i::Integer, j::Integer, s::AbstractString) From 6d727aac63a3d88467a353dae408824821bb92e4 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Mon, 23 Jun 2025 16:22:35 -0400 Subject: [PATCH 04/13] implement optimized checkbounds for FastSubArray --- base/subarray.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/base/subarray.jl b/base/subarray.jl index 2c80948d05bc0..429b8db6e6abf 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -324,6 +324,11 @@ FastSubArray{T,N,P,I} = SubArray{T,N,P,I,true} @inline _reindexlinear(V::FastSubArray, i::Int) = V.offset1 + V.stride1*i @inline _reindexlinear(V::FastSubArray, i::AbstractUnitRange{Int}) = V.offset1 .+ V.stride1 .* i +# For Fast subarrays, we can safely compute the parent's index for its checkbounds; this +# helps with automatic bounds elision (but isn't possible generally because it's precisely +# the re-indexing into `V.indices` that might go out-of-bounds) +checkbounds(::Type{Bool}, V::FastSubArray, i::Int) = (@inline(); checkbounds(Bool, V.parent, _reindexlinear(V, i))) + function getindex(V::FastSubArray, i::Int) @inline @boundscheck checkbounds(V, i) From aaa8629323277ebd96d05b4606f76ce8166f9cca Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Mon, 23 Jun 2025 16:54:11 -0400 Subject: [PATCH 05/13] Remove inbounds on CodeUnits, too --- base/strings/basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index 314903898b92a..b9fa490ada81d 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -797,7 +797,7 @@ size(s::CodeUnits) = (length(s),) elsize(s::Type{<:CodeUnits{T}}) where {T} = sizeof(T) @propagate_inbounds getindex(s::CodeUnits, i::Int) = codeunit(s.s, i) IndexStyle(::Type{<:CodeUnits}) = IndexLinear() -@inline iterate(s::CodeUnits, i=1) = (i % UInt) - 1 < length(s) ? (@inbounds s[i], i + 1) : nothing +@inline iterate(s::CodeUnits, i=1) = checkbounds(Bool, s.s, i) ? (s[i], i + 1) : nothing write(io::IO, s::CodeUnits) = write(io, s.s) From 683f23c6c799ce0bb77e3d8ee438bc241f533275 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Mon, 23 Jun 2025 16:54:58 -0400 Subject: [PATCH 06/13] add tests --- test/boundscheck_exec.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl index e3d5e77c05384..7f6a80beacee3 100644 --- a/test/boundscheck_exec.jl +++ b/test/boundscheck_exec.jl @@ -353,4 +353,20 @@ if bc_opt == bc_default @test (@allocated no_alias_prove(5)) == 0 end +@testset "automatic boundscheck elision for iteration on some important types" begin + f = (bc_opt == bc_on) ? identity : (!) + + @test f(contains(sprint(code_llvm, iterate, (Memory{UInt8}, Int)), "unreachable")) + + @test f(contains(sprint(code_llvm, iterate, (Vector{UInt8}, Int)), "unreachable")) + @test f(contains(sprint(code_llvm, iterate, (Matrix{UInt8}, Int)), "unreachable")) + @test f(contains(sprint(code_llvm, iterate, (Array{UInt8,3}, Int)), "unreachable")) + + @test f(contains(sprint(code_llvm, iterate, (SubArray{Float64, 1, Vector{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable")) + @test f(contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable")) + @test f(contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}, Int)), "unreachable")) + + @test f(contains(sprint(code_llvm, iterate, (Base.CodeUnits{UInt8,String}, Int)), "unreachable")) +end + end From f31083e7825b1f70b893fd94d3663aa23f614115 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 24 Jun 2025 09:10:31 -0400 Subject: [PATCH 07/13] fixup! fix SubArray axes1 --- base/subarray.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/subarray.jl b/base/subarray.jl index 429b8db6e6abf..5cb5ccb33ebea 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -528,9 +528,10 @@ function _indices_sub(i1::AbstractArray, I...) end axes1(::SubArray{<:Any,0}) = OneTo(1) -axes1(S::SubArray) = (@inline; axes1(S.indices[1])) -_axes1_sub(::Real, I...) = (@inline; _axes1_sub(I...)) +axes1(S::SubArray) = (@inline; _axes1_sub(S.indices...)) _axes1_sub() = () +_axes1_sub(::Real, I...) = (@inline; _axes1_sub(I...)) +_axes1_sub(::AbstractArray{<:Any,0}, I...) = _axes1_sub(I...) function _axes1_sub(i1::AbstractArray, I...) @inline axes1(i1) From 74cf796cc53a3774fe11541266aacaf673075d21 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 24 Jun 2025 10:45:06 -0400 Subject: [PATCH 08/13] remove bugged SubArray checkbounds method This is not valid because it only checks the resulting index into the parent (which we explicitly say we do not check) but skips the checks into the indices (which are the important ones!) --- base/subarray.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/base/subarray.jl b/base/subarray.jl index 5cb5ccb33ebea..3a0be7d82b981 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -324,11 +324,6 @@ FastSubArray{T,N,P,I} = SubArray{T,N,P,I,true} @inline _reindexlinear(V::FastSubArray, i::Int) = V.offset1 + V.stride1*i @inline _reindexlinear(V::FastSubArray, i::AbstractUnitRange{Int}) = V.offset1 .+ V.stride1 .* i -# For Fast subarrays, we can safely compute the parent's index for its checkbounds; this -# helps with automatic bounds elision (but isn't possible generally because it's precisely -# the re-indexing into `V.indices` that might go out-of-bounds) -checkbounds(::Type{Bool}, V::FastSubArray, i::Int) = (@inline(); checkbounds(Bool, V.parent, _reindexlinear(V, i))) - function getindex(V::FastSubArray, i::Int) @inline @boundscheck checkbounds(V, i) From 6e1afb571f8731e5581fdd86a99c5cdb84b2e6a9 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 24 Jun 2025 10:59:25 -0400 Subject: [PATCH 09/13] CodeUnits: implement checkbounds and fallback for iterate --- base/strings/basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index b9fa490ada81d..876aa93dd7994 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -797,7 +797,7 @@ size(s::CodeUnits) = (length(s),) elsize(s::Type{<:CodeUnits{T}}) where {T} = sizeof(T) @propagate_inbounds getindex(s::CodeUnits, i::Int) = codeunit(s.s, i) IndexStyle(::Type{<:CodeUnits}) = IndexLinear() -@inline iterate(s::CodeUnits, i=1) = checkbounds(Bool, s.s, i) ? (s[i], i + 1) : nothing +checkbounds(::Type{Bool}, s::CodeUnits, i) = checkbounds(Bool, s.s, i) write(io::IO, s::CodeUnits) = write(io, s.s) From d97e69f8b3568fea3aafee27f89c88622a7bc88d Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 24 Jun 2025 12:19:16 -0400 Subject: [PATCH 10/13] do not test for unreachable with check-bounds=yes theoretically this is not guaranteed -- indeed an unreachable branch is not currently present for CodeUnits --- test/boundscheck_exec.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl index 7f6a80beacee3..2822176929f4a 100644 --- a/test/boundscheck_exec.jl +++ b/test/boundscheck_exec.jl @@ -354,19 +354,19 @@ if bc_opt == bc_default end @testset "automatic boundscheck elision for iteration on some important types" begin - f = (bc_opt == bc_on) ? identity : (!) + if bc_opt != bc_on + @test !contains(sprint(code_llvm, iterate, (Memory{UInt8}, Int)), "unreachable") - @test f(contains(sprint(code_llvm, iterate, (Memory{UInt8}, Int)), "unreachable")) + @test !contains(sprint(code_llvm, iterate, (Vector{UInt8}, Int)), "unreachable") + @test !contains(sprint(code_llvm, iterate, (Matrix{UInt8}, Int)), "unreachable") + @test !contains(sprint(code_llvm, iterate, (Array{UInt8,3}, Int)), "unreachable") - @test f(contains(sprint(code_llvm, iterate, (Vector{UInt8}, Int)), "unreachable")) - @test f(contains(sprint(code_llvm, iterate, (Matrix{UInt8}, Int)), "unreachable")) - @test f(contains(sprint(code_llvm, iterate, (Array{UInt8,3}, Int)), "unreachable")) + @test !contains(sprint(code_llvm, iterate, (SubArray{Float64, 1, Vector{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable") + @test !contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable") + @test !contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}, Int)), "unreachable") - @test f(contains(sprint(code_llvm, iterate, (SubArray{Float64, 1, Vector{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable")) - @test f(contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable")) - @test f(contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}, Int)), "unreachable")) - - @test f(contains(sprint(code_llvm, iterate, (Base.CodeUnits{UInt8,String}, Int)), "unreachable")) + @test !contains(sprint(code_llvm, iterate, (Base.CodeUnits{UInt8,String}, Int)), "unreachable") + end end end From dcb3f55f1dcdcad5b21a98329525cbef957d41fd Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 24 Jun 2025 13:11:45 -0400 Subject: [PATCH 11/13] fix ambiguity and more properly constrain checkbounds for CodeUnits --- base/strings/basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index 876aa93dd7994..a436a5494fa79 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -797,7 +797,7 @@ size(s::CodeUnits) = (length(s),) elsize(s::Type{<:CodeUnits{T}}) where {T} = sizeof(T) @propagate_inbounds getindex(s::CodeUnits, i::Int) = codeunit(s.s, i) IndexStyle(::Type{<:CodeUnits}) = IndexLinear() -checkbounds(::Type{Bool}, s::CodeUnits, i) = checkbounds(Bool, s.s, i) +checkbounds(::Type{Bool}, s::CodeUnits, i::Integer) = checkbounds(Bool, s.s, i) write(io::IO, s::CodeUnits) = write(io, s.s) From 8d378dff08d2acc169c0673f303188556cef092f Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 1 Jul 2025 09:39:30 -0400 Subject: [PATCH 12/13] use flipsign instead of multiplying sign Co-authored-by: N5N3 <2642243996@qq.com> --- base/range.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/range.jl b/base/range.jl index a625fe50d28f2..5b2950c052f19 100644 --- a/base/range.jl +++ b/base/range.jl @@ -811,7 +811,7 @@ let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}, if s isa Unsigned || -1 <= s <= 1 || s == -s a = div(diff, s) % typeof(diff) else - a = div(unsigned(sign(s)*diff), abs(s)) % typeof(diff) + a = div(unsigned(flipsign(diff, s)), abs(s)) % typeof(diff) end return a + oneunit(a) end From 61e26a069572e01474a1bf13aedcb66297e38745 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 1 Jul 2025 11:55:42 -0400 Subject: [PATCH 13/13] defer length(::OrdinalRange) changes; just inline it for now --- base/range.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/base/range.jl b/base/range.jl index 5b2950c052f19..3e1cd77eb914b 100644 --- a/base/range.jl +++ b/base/range.jl @@ -687,7 +687,7 @@ end ## interface implementations length(r::AbstractRange) = error("length implementation missing") # catch mistakes -size(r::AbstractRange) = (length(r),) +size(r::AbstractRange) = (@inline; (length(r),)) isempty(r::StepRange) = # steprange_last(r.start, r.step, r.stop) == r.stop @@ -802,6 +802,7 @@ let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}, # slightly more accurate length and checked_length in extreme cases # (near typemax) for types with known `unsigned` functions function length(r::OrdinalRange{T}) where T<:bigints + @inline s = step(r) diff = last(r) - first(r) isempty(r) && return zero(diff) @@ -810,8 +811,10 @@ let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}, # n.b. !(s isa T) if s isa Unsigned || -1 <= s <= 1 || s == -s a = div(diff, s) % typeof(diff) + elseif s < 0 + a = div(unsigned(-diff), -s) % typeof(diff) else - a = div(unsigned(flipsign(diff, s)), abs(s)) % typeof(diff) + a = div(unsigned(diff), s) % typeof(diff) end return a + oneunit(a) end