Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.

Automatic choice for maximum trust region radius #36

Merged
merged 1 commit into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions src/SimpleNonlinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,19 @@ include("utils.jl")
include("bisection.jl")
include("falsi.jl")
include("raphson.jl")
include("ad.jl")
include("broyden.jl")
include("klement.jl")
include("trustRegion.jl")
include("ad.jl")

import SnoopPrecompile

SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
prob_no_brack = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
for alg in (SimpleNewtonRaphson, Broyden, Klement)
for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion)
solve(prob_no_brack, alg(), abstol = T(1e-2))
end

for alg in (SimpleTrustRegion(10.0),)
solve(prob_no_brack, alg, abstol = T(1e-2))
end

#=
for alg in (SimpleNewtonRaphson,)
for u0 in ([1., 1.], StaticArraysCore.SA[1.0, 1.0])
Expand Down
6 changes: 4 additions & 2 deletions src/ad.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ end

function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
iip,
<:Dual{T, V, P}}, alg::SimpleNewtonRaphson,
<:Dual{T, V, P}},
alg::Union{SimpleNewtonRaphson, SimpleTrustRegion},
args...; kwargs...) where {iip, T, V, P}
sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
Expand All @@ -39,7 +40,8 @@ end
function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
iip,
<:AbstractArray{<:Dual{T, V, P}}},
alg::SimpleNewtonRaphson, args...; kwargs...) where {iip, T, V, P}
alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, args...;
kwargs...) where {iip, T, V, P}
sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
retcode = sol.retcode)
Expand Down
62 changes: 40 additions & 22 deletions src/trustRegion.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
"""
```julia
SimpleTrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
autodiff = Val{true}(), diff_type = Val{:forward})
SimpleTrustRegion(; chunk_size = Val{0}(),
autodiff = Val{true}(),
diff_type = Val{:forward},
max_trust_radius::Real = 0.0,
initial_trust_radius::Real = 0.0,
step_threshold::Real = 0.1,
shrink_threshold::Real = 0.25,
expand_threshold::Real = 0.75,
shrink_factor::Real = 0.25,
expand_factor::Real = 2.0,
max_shrink_times::Int = 32
```

A low-overhead implementation of a
[trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
solver

### Arguments
- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm
will change dynamically. However, it will never be greater than the `max_trust_radius`.

### Keyword Arguments

- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
Expand All @@ -26,6 +31,8 @@ solver
- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
`Val{:forward}` for forward finite differences. For more details on the choices, see the
[FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
- `max_trust_radius`: the maximum radius of the trust region. Defaults to
`max(norm(f(u0)), maximum(u0) - minimum(u0))`.
- `initial_trust_radius`: the initial trust region radius. Defaults to
`max_trust_radius / 11`.
- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is
Expand Down Expand Up @@ -58,25 +65,28 @@ struct SimpleTrustRegion{T, CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT}
shrink_factor::T
expand_factor::T
max_shrink_times::Int
function SimpleTrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
function SimpleTrustRegion(; chunk_size = Val{0}(),
autodiff = Val{true}(),
diff_type = Val{:forward},
initial_trust_radius::Number = max_trust_radius / 11,
step_threshold::Number = 0.1,
shrink_threshold::Number = 0.25,
expand_threshold::Number = 0.75,
shrink_factor::Number = 0.25,
expand_factor::Number = 2.0,
max_trust_radius::Real = 0.0,
initial_trust_radius::Real = 0.0,
step_threshold::Real = 0.1,
shrink_threshold::Real = 0.25,
expand_threshold::Real = 0.75,
shrink_factor::Real = 0.25,
expand_factor::Real = 2.0,
max_shrink_times::Int = 32)
new{typeof(initial_trust_radius), SciMLBase._unwrap_val(chunk_size),
SciMLBase._unwrap_val(autodiff), SciMLBase._unwrap_val(diff_type)}(max_trust_radius,
initial_trust_radius,
step_threshold,
shrink_threshold,
expand_threshold,
shrink_factor,
expand_factor,
max_shrink_times)
new{typeof(initial_trust_radius),
SciMLBase._unwrap_val(chunk_size),
SciMLBase._unwrap_val(autodiff),
SciMLBase._unwrap_val(diff_type)}(max_trust_radius,
initial_trust_radius,
step_threshold,
shrink_threshold,
expand_threshold,
shrink_factor,
expand_factor,
max_shrink_times)
end
end

Expand Down Expand Up @@ -114,6 +124,14 @@ function SciMLBase.__solve(prob::NonlinearProblem,
∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F)
end

# Set default trust region radius if not specified by user.
if Δₘₐₓ == 0.0
Δₘₐₓ = max(norm(F), maximum(x) - minimum(x))
end
if Δ == 0.0
Δ = Δₘₐₓ / 11
end

fₖ = 0.5 * norm(F)^2
H = ∇f * ∇f
g = ∇f * F
Expand Down
32 changes: 14 additions & 18 deletions test/basictests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ end
# SimpleTrustRegion
function benchmark_scalar(f, u0)
probN = NonlinearProblem{false}(f, u0)
sol = (solve(probN, SimpleTrustRegion(10.0)))
sol = (solve(probN, SimpleTrustRegion()))
end

sol = benchmark_scalar(sf, csu0)
Expand All @@ -68,8 +68,7 @@ using ForwardDiff
# Immutable
f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0]

for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
SimpleTrustRegion(10.0)]
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
g = function (p)
probN = NonlinearProblem{false}(f, csu0, p)
sol = solve(probN, alg, abstol = 1e-9)
Expand All @@ -84,8 +83,7 @@ end

# Scalar
f, u0 = (u, p) -> u * u - p, 1.0
for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
SimpleTrustRegion(10.0)]
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
g = function (p)
probN = NonlinearProblem{false}(f, oftype(p, u0), p)
sol = solve(probN, alg)
Expand Down Expand Up @@ -126,8 +124,7 @@ for alg in [Bisection(), Falsi()]
@test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p)
end

for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
SimpleTrustRegion(10.0)]
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
global g, p
g = function (p)
probN = NonlinearProblem{false}(f, 0.5, p)
Expand All @@ -144,8 +141,8 @@ probN = NonlinearProblem(f, u0)

@test solve(probN, SimpleNewtonRaphson()).u[end] ≈ sqrt(2.0)
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0)
@test solve(probN, SimpleTrustRegion(10.0)).u[end] ≈ sqrt(2.0)
@test solve(probN, SimpleTrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0)
@test solve(probN, SimpleTrustRegion()).u[end] ≈ sqrt(2.0)
@test solve(probN, SimpleTrustRegion(; autodiff = false)).u[end] ≈ sqrt(2.0)
@test solve(probN, Broyden()).u[end] ≈ sqrt(2.0)
@test solve(probN, Klement()).u[end] ≈ sqrt(2.0)

Expand All @@ -159,9 +156,9 @@ for u0 in [1.0, [1, 1.0]]
@test solve(probN, SimpleNewtonRaphson()).u ≈ sol
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u ≈ sol

@test solve(probN, SimpleTrustRegion(10.0)).u ≈ sol
@test solve(probN, SimpleTrustRegion(10.0)).u ≈ sol
@test solve(probN, SimpleTrustRegion(10.0; autodiff = false)).u ≈ sol
@test solve(probN, SimpleTrustRegion()).u ≈ sol
@test solve(probN, SimpleTrustRegion()).u ≈ sol
@test solve(probN, SimpleTrustRegion(; autodiff = false)).u ≈ sol

@test solve(probN, Broyden()).u ≈ sol

Expand Down Expand Up @@ -215,17 +212,16 @@ f = (u, p) -> 0.010000000000000002 .+
(0.21640425613334457 .+
216.40425613334457 ./
(1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
0.0011552453009332421u
.-p
0.0011552453009332421u .- p
g = function (p)
probN = NonlinearProblem{false}(f, u0, p)
sol = solve(probN, SimpleTrustRegion(100.0))
sol = solve(probN, SimpleTrustRegion())
return sol.u
end
p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
u = g(p)
f(u, p)
@test all(f(u, p) .< 1e-10)
@test all(abs.(f(u, p)) .< 1e-10)

# Test kwars in `SimpleTrustRegion`
max_trust_radius = [10.0, 100.0, 1000.0]
Expand All @@ -242,7 +238,7 @@ list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
expand_factor, max_shrink_times)
for options in list_of_options
local probN, sol, alg
alg = SimpleTrustRegion(options[1];
alg = SimpleTrustRegion(max_trust_radius = options[1],
initial_trust_radius = options[2],
step_threshold = options[3],
shrink_threshold = options[4],
Expand All @@ -253,5 +249,5 @@ for options in list_of_options

probN = NonlinearProblem(f, u0, p)
sol = solve(probN, alg)
@test all(f(u, p) .< 1e-10)
@test all(abs.(f(u, p)) .< 1e-10)
end