Skip to content

Commit 0f7dfc9

Browse files
authored
Breaking: always represent inner array as UnsafeArray (#18)
* Breaking: always use UnsafeArrays inside * fix typo * rm `StrideArraysCore` * rm `R` type; not necessary * add no-op constructor * more constructors * fix test * cleanup ci name * update versions * fix test again * test `recursive_alloc_arrays` * try withuot key
1 parent e21dde6 commit 0f7dfc9

File tree

9 files changed

+189
-94
lines changed

9 files changed

+189
-94
lines changed

.github/workflows/CI.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ concurrency:
1616
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
1717
jobs:
1818
test:
19-
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} - Bumper ${{ matrix.bumper }}
19+
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
2020
runs-on: ${{ matrix.os }}
2121
strategy:
2222
fail-fast: false
@@ -31,15 +31,15 @@ jobs:
3131
- x64
3232
- x86
3333
steps:
34-
- uses: actions/checkout@v3
35-
- uses: julia-actions/setup-julia@v1
34+
- uses: actions/checkout@v4
35+
- uses: julia-actions/setup-julia@v2
3636
with:
3737
version: ${{ matrix.version }}
3838
arch: ${{ matrix.arch }}
39-
- uses: julia-actions/cache@v1
39+
- uses: julia-actions/cache@v2
4040
- uses: julia-actions/julia-buildpkg@v1
4141
- uses: julia-actions/julia-runtest@v1
4242
- uses: julia-actions/julia-processcoverage@v1
43-
- uses: codecov/codecov-action@v3
43+
- uses: codecov/codecov-action@v5
4444
with:
4545
files: lcov.info

.github/workflows/docs.yml

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,25 @@
1-
name: Documentation
1+
name: Documenter
22
on:
33
push:
4-
branches: [main]
5-
tags: '*'
6-
paths:
7-
- '.github/workflows/docs.yml'
8-
- 'src/**'
9-
- 'docs/**'
10-
- 'Project.toml'
4+
branches: [main, master]
5+
tags: [v*]
116
pull_request:
12-
types: [opened, synchronize, reopened, ready_for_review]
13-
paths:
14-
- '.github/workflows/docs.yml'
15-
- 'src/**'
16-
- 'docs/**'
17-
- 'Project.toml'
187

198
jobs:
20-
Documentation:
21-
# Run on push's or non-draft PRs
22-
if: (github.event_name == 'push') || (github.event.pull_request.draft == false) || (github.event_name == 'workflow_dispatch')
9+
Documenter:
10+
permissions:
11+
contents: write
12+
statuses: write
13+
name: Documentation
2314
runs-on: ubuntu-latest
2415
steps:
25-
- uses: actions/checkout@v2
26-
- uses: julia-actions/setup-julia@latest
16+
- uses: actions/checkout@v4
17+
- uses: julia-actions/setup-julia@v2
2718
with:
28-
version: "1.8"
29-
- name: Cache artifacts
30-
uses: actions/cache@v2
19+
version: '1.10' # replace this with whatever version you need
20+
show-versioninfo: true # this causes versioninfo to be printed to the action log
21+
- uses: julia-actions/cache@v2 # cache using https://github.com/julia-actions/cache
22+
- uses: julia-actions/julia-buildpkg@v1 # if package requires Pkg.build()
23+
- uses: julia-actions/julia-docdeploy@v1
3124
env:
32-
cache-name: cache-artifacts
33-
with:
34-
path: |
35-
~/.julia/artifacts
36-
key: ${{ runner.os }}-docs-${{ env.cache-name }}-${{ hashFiles('**/docs/Project.toml') }}
37-
restore-keys: |
38-
${{ runner.os }}-docs-${{ env.cache-name }}-
39-
${{ runner.os }}-docs-
40-
${{ runner.os }}-
41-
- name: Install dependencies
42-
shell: julia --color=yes --project=docs/ {0}
43-
run: |
44-
using Pkg
45-
Pkg.develop(PackageSpec(path=pwd()))
46-
Pkg.instantiate()
47-
- uses: julia-actions/julia-docdeploy@releases/v1
48-
env:
49-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
50-
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
25+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Project.toml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,31 @@
11
name = "AllocArrays"
22
uuid = "5c00bae2-1499-4716-9206-27f63fd08a44"
33
authors = ["Eric P. Hanson"]
4-
version = "0.2.0"
4+
version = "0.3.0"
55

66
[deps]
77
Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e"
88
ConcurrentUtilities = "f0e56b4a-5159-44fe-b623-3e5288b988bb"
99
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
1010
ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"
11+
UnsafeArrays = "c4a57d5a-5b31-53a6-b365-19f8c011fbd6"
1112

1213
[compat]
1314
Aqua = "0.7"
1415
Bumper = "0.7"
1516
ConcurrentUtilities = "2.2.1"
17+
Functors = "0.5.2"
1618
PrecompileTools = "1.2"
1719
ScopedValues = "1"
18-
StrideArraysCore = "0.5.1"
20+
UnsafeArrays = "1.0.6"
1921
julia = "1.10"
2022

2123
[extras]
2224
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
2325
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
26+
Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
2427
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
25-
StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da"
2628
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2729

2830
[targets]
29-
test = ["Aqua", "Flux", "Random", "StrideArraysCore", "Test"]
31+
test = ["Aqua", "Flux", "Functors", "Random", "Test"]

README.md

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ function basic_reduction(a)
3535
end
3636

3737
arr = ones(Float64, 100_000)
38-
@time basic_reduction(arr) # 0.057540 seconds (209.39 k allocations: 16.487 MiB, 99.48% compilation time)
39-
@time basic_reduction(arr) # 0.000265 seconds (7 allocations: 2.289 MiB)
38+
@time basic_reduction(arr) # 0.068864 seconds (377.41 k allocations: 21.811 MiB, 99.47% compilation time)
39+
@time basic_reduction(arr) # 0.007774 seconds (10 allocations: 2.289 MiB, 94.21% gc time)
4040

4141

4242
function bumper_reduction!(b, a)
@@ -50,8 +50,8 @@ end
5050
b = BumperAllocator(2^24) # 16 MiB
5151
a = AllocArray(arr);
5252

53-
@time bumper_reduction!(b, a) # 0.223121 seconds (748.98 k allocations: 2.045 GiB, 4.32% gc time, 97.59% compilation time)
54-
@time bumper_reduction!(b, a) # 0.000311 seconds (36 allocations: 1.172 KiB)
53+
@time bumper_reduction!(b, a) # 0.246476 seconds (1.05 M allocations: 52.495 MiB, 19.78% gc time, 99.65% compilation time)
54+
@time bumper_reduction!(b, a) # 0.000226 seconds (27 allocations: 832 bytes)
5555
```
5656

5757
We can see we brought allocations down from 2.289 MiB to ~1 KiB.
@@ -60,18 +60,49 @@ For a less-toy example, in `test/flux.jl` we test inference over a Flux model:
6060

6161
```julia
6262
# Baseline: Array
63-
infer!(b, predictions, model, data): 0.499735 seconds (8.05 k allocations: 306.796 MiB, 6.47% gc time)
64-
# Baseline: StrideArray
65-
stride_data = StrideArray.(data)
66-
infer!(b, predictions, model, stride_data): 0.364180 seconds (8.05 k allocations: 306.796 MiB, 8.32% gc time)
67-
# Using AllocArray:
63+
infer!(b, predictions, model, data): 0.163573 seconds (6.77 k allocations: 221.508 MiB, 16.42% gc time)# Using AllocArray:
6864
alloc_data = AllocArray.(data)
69-
infer!(b, predictions, model, alloc_data): 0.351953 seconds (13.60 k allocations: 3.221 MiB)
65+
infer!(b, predictions, model, alloc_data): 0.114566 seconds (8.72 k allocations: 777.547 KiB)
66+
# see "Usage with Flux" below for `recursive_alloc_arrays` and `infer!`
67+
aa_model = recursive_alloc_arrays(model)
68+
infer!(b, predictions, aa_model, alloc_data): 0.113104 seconds (7.69 k allocations: 688.047 KiB)
69+
# checked example (use for testing)
7070
checked_alloc_data = CheckedAllocArray.(data)
71-
infer!(b, predictions, model, checked_alloc_data): 15.522897 seconds (25.54 k allocations: 3.742 MiB)
71+
infer!(b, predictions, model, checked_alloc_data): 13.721077 seconds (22.54 k allocations: 1.354 MiB)
7272
```
7373

74-
We can see in this example, we got 100x less allocation (and no GC time), and similar runtime, for `AllocArray`s. We can see `CheckedAllocArrays` are far slower here.
74+
We can see in this example, we got 200x less allocation (and no GC time), and similar runtime, for `AllocArray`s. We also can reduce allocations a bit more with `aa_model` than `model`. We see `CheckedAllocArrays` are far slower.
75+
76+
## Usage with Flux
77+
78+
For reducing allocations as much as possible with Flux models, we can use `recursive_alloc_arrays` below to convert a model to use `AllocArray`s. This will convert all arrays in the model to `AllocArray`s, and will also convert any arrays in the model's parameters to `AllocArray`s. This way, any layers during the forward pass of the model which use `similar` calls based on the layer's parameters will use the bump allocator, when the forward pass is invoked within `with_allocator`.
79+
80+
```julia
81+
using Functors
82+
83+
function recursive_alloc_arrays(obj)
84+
return fmap(x -> begin
85+
x isa AbstractArray || return x
86+
isbitstype(eltype(x)) || return x
87+
return AllocArray(x)
88+
end, obj; exclude=x -> x isa AbstractArray{<:Number} || x isa Function)
89+
end
90+
91+
function infer!(b::BumpAllocator, predictions, model, data)
92+
# Here we use a locked bumper for thread-safety, since NNlib multithreads
93+
# some of it's functions. However we are sure to only deallocate outside of the threaded region. (All concurrency occurs within the `model` call itself).
94+
with_allocator(b) do
95+
for (idx, x) in enumerate(data)
96+
predictions[idx] .= model(x)
97+
reset!(b) # reset `b` after each batch
98+
end
99+
end
100+
# don't escape bump-allocated memory!
101+
return predictions
102+
end
103+
```
104+
105+
The specific function to use here may need to be adjusted depending on the details of the model and Functors.jl. This one is tested in `test/flux.jl` on a simple model.
75106

76107
## Design notes
77108

@@ -83,6 +114,8 @@ In particular, the caller must:
83114
- ...not allow memory allocated with a buffer to be live after the underlying buffer has been reset
84115
- ...reset their buffers before it runs out of memory
85116

117+
In v0.3, AllocArrays started representing the inner array as an `UnsafeArray` always, rather than only when allocated via `similar`. This seems to avoid a Julia bug around missing vectorization: https://github.com/JuliaLang/julia/issues/57799.
118+
86119
## Safety
87120

88121
Before using a bump allocator (`BumperAllocator`, or `UncheckedBumperAllocator`) it is recommended the user read the [Bumper.jl README](https://github.com/MasonProtter/Bumper.jl#bumperjl) to understand how it works and what the limitations are.

julia_57799.jl

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
using AllocArrays, BenchmarkTools, UnsafeArrays
2+
3+
function mycopy(dest, src, iter)
4+
# precondition: dest and src do not alias
5+
# precondition: the iterators of dest and src are equal
6+
for i in iter
7+
@inbounds dest[i] = src[i]
8+
end
9+
return dest
10+
end
11+
12+
b = BumperAllocator(2^30);
13+
arr = rand(Float32, 50000000);
14+
arr2 = similar(arr);
15+
a = AllocArray(arr);
16+
17+
println("With mycopy(arr2, arr, eachindex(arr2)):")
18+
display(@benchmark mycopy(arr2, arr, eachindex(arr2)))
19+
20+
println("With a = AllocArray(a)")
21+
display(@benchmark with_allocator(b) do
22+
c = similar(a)
23+
mycopy(c, a, eachindex(c))
24+
end setup=(reset!(b)) evals=1)
25+
26+
# julia> include("bench.jl")
27+
# With mycopy(arr2, arr, eachindex(arr2)):
28+
# BenchmarkTools.Trial: 627 samples with 1 evaluation per sample.
29+
# Range (min … max): 6.662 ms … 38.670 ms ┊ GC (min … max): 0.00% … 0.00%
30+
# Time (median): 7.151 ms ┊ GC (median): 0.00%
31+
# Time (mean ± σ): 7.950 ms ± 2.168 ms ┊ GC (mean ± σ): 0.00% ± 0.00%
32+
33+
# █▇
34+
# ███▅▄▄▃▄▄▃▃▃▃▃▃▃▃▄▆▄▃▃▃▂▃▃▃▃▃▃▂▂▁▂▁▂▂▁▁▂▁▂▁▁▂▂▁▁▁▁▁▁▂▁▁▁▁▂ ▃
35+
# 6.66 ms Histogram: frequency by time 14 ms <
36+
37+
# Memory estimate: 16 bytes, allocs estimate: 1.
38+
# With a = AllocArray(a)
39+
# BenchmarkTools.Trial: 698 samples with 1 evaluation per sample.
40+
# Range (min … max): 6.659 ms … 17.031 ms ┊ GC (min … max): 0.00% … 0.00%
41+
# Time (median): 6.789 ms ┊ GC (median): 0.00%
42+
# Time (mean ± σ): 7.166 ms ± 941.846 μs ┊ GC (mean ± σ): 0.00% ± 0.00%
43+
44+
# █▇▄▄▂ ▁ ▁▁ ▁
45+
# ██████████████▇▇██▆▇▇▆▇█▅▇▇▇▅▆▆▁▄▁▄▆▁▄▁▆▄▅▅▁▆▅▄▆▅▄▁▆▅▅▁▁▄▅▄ ▇
46+
# 6.66 ms Histogram: log(frequency) by time 10.1 ms <
47+
48+
# Memory estimate: 384 bytes, allocs estimate: 13.

src/AllocArray.jl

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,55 @@
11
using Base: Base.Broadcast
22
using Base: Dims
33
using Base.Broadcast: Broadcasted, ArrayStyle
4+
using UnsafeArrays: UnsafeArray
45

56
"""
6-
struct AllocArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
7-
arr::A
7+
struct AllocArray{T,N} <: DenseArray{T,N}
8+
arr::UnsafeArray{T,N}
9+
gcref::Any
810
end
911
12+
AllocArray(arr::AbstractArray)
13+
1014
Wrapper type which forwards most array methods to the inner array `arr`,
1115
but dispatches `similar` to special allocation methods.
16+
17+
The inner array `arr` is always represented as an `UnsafeArray`,
18+
but the field `gcref` may hold a materialized `AbstractArray` corresponding
19+
to the same data, which is held to preserve a reference to the data to prevent GC.
20+
This field is never accessed or used, so the `::Any` type does not affect type stability
21+
of code using AllocArrays.
22+
23+
Use the constructor `AllocArray(arr)` to construct an `AllocArray`. Note that `arr`
24+
must be able to be represented as an `UnsafeArray`, meaning it must be a bits-type
25+
and have a pointer. To support `UnitRange` and similar, `collect` it first.
26+
27+
Typically this constructor is only used at the entrypoint of a larger set of code
28+
which is expected to use `similar` based on this input for further allocations.
29+
When inside a `with_allocator` block, `similar` can be dispatched to a
30+
(dynamically-scoped) bump allocator.
1231
"""
13-
struct AllocArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
14-
arr::A
32+
struct AllocArray{T,N} <: DenseArray{T,N}
33+
arr::UnsafeArray{T,N}
34+
gcref::Any
35+
36+
AllocArray(gcref::AbstractArray{T,N}) where {T,N} = AllocArray{T,N}(gcref)
37+
function AllocArray{T,N}(gcref::AbstractArray{T,N}) where {T,N}
38+
arr = UnsafeArray(pointer(gcref), size(gcref))
39+
return new{eltype(arr),ndims(arr)}(arr, gcref)
40+
end
41+
42+
# already allocated with Bumper, no gcref needed
43+
AllocArray(arr::UnsafeArray{T,N}) where {T,N} = AllocArray{T,N}(arr)
44+
AllocArray{T,N}(arr::UnsafeArray{T,N}) where {T,N} = new{T,N}(arr, nothing)
45+
46+
47+
AllocArray(a::AllocArray) = a
48+
AllocArray{T,N}(a::AllocArray{T,N}) where {T,N} = a
1549
end
1650

17-
AllocMatrix{T,A<:AbstractMatrix{T}} = AllocArray{T,2,A}
18-
AllocVector{T,A<:AbstractVector{T}} = AllocArray{T,1,A}
51+
AllocMatrix{T} = AllocArray{T,2}
52+
AllocVector{T} = AllocArray{T,1}
1953

2054
@inline Base.parent(a::AllocArray) = getfield(a, :arr)
2155

@@ -41,7 +75,7 @@ end
4175

4276
Base.size(a::AllocArray) = size(getfield(a, :arr))
4377

44-
Base.IndexStyle(::Type{<:AllocArray{T,N,Arr}}) where {T,N,Arr} = Base.IndexStyle(Arr)
78+
Base.IndexStyle(::Type{<:AllocArray{T,N}}) where {T,N} = Base.IndexStyle(UnsafeArray{T,N})
4579

4680
# used only by broadcasting?
4781
function Base.similar(::Type{<:AllocArray{T}}, dims::Dims) where {T}
@@ -79,7 +113,7 @@ function Base.unsafe_convert(::Type{Ptr{T}}, a::AllocArray) where {T}
79113
return Base.unsafe_convert(Ptr{T}, getfield(a, :arr))
80114
end
81115

82-
Base.elsize(::Type{<:AllocArray{T,N,Arr}}) where {T,N,Arr} = Base.elsize(Arr)
116+
Base.elsize(::Type{<:AllocArray{T,N}}) where {T,N} = Base.elsize(UnsafeArray{T,N})
83117

84118
Base.strides(a::AllocArray) = strides(getfield(a, :arr))
85119

test/checked.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,5 @@ end
7777

7878
@test sprint(showerror, InvalidMemoryException()) == "InvalidMemoryException: Array accessed after its memory has been deallocated."
7979

80-
@test Base.parent(c) === inner
80+
@test c.alloc_array.gcref === inner
8181
end

0 commit comments

Comments
 (0)