Automatic choice for maximum trust region radius

Deltadahl · Deltadahl · commit c86a98401ed5 · 2023-01-18T20:57:20.000+01:00
diff --git a/src/SimpleNonlinearSolve.jl b/src/SimpleNonlinearSolve.jl
@@ -19,23 +19,19 @@ include("utils.jl")
 include("bisection.jl")
 include("falsi.jl")
 include("raphson.jl")
-include("ad.jl")
 include("broyden.jl")
 include("klement.jl")
 include("trustRegion.jl")
+include("ad.jl")
 
 import SnoopPrecompile
 
 SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     prob_no_brack = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
-    for alg in (SimpleNewtonRaphson, Broyden, Klement)
+    for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion)
         solve(prob_no_brack, alg(), abstol = T(1e-2))
     end
 
-    for alg in (SimpleTrustRegion(10.0),)
-        solve(prob_no_brack, alg, abstol = T(1e-2))
-    end
-
     #=
     for alg in (SimpleNewtonRaphson,)
         for u0 in ([1., 1.], StaticArraysCore.SA[1.0, 1.0])
diff --git a/src/ad.jl b/src/ad.jl
@@ -30,7 +30,8 @@ end
 
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
-                                                <:Dual{T, V, P}}, alg::SimpleNewtonRaphson,
+                                                <:Dual{T, V, P}},
+                         alg::Union{SimpleNewtonRaphson, SimpleTrustRegion},
                          args...; kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
@@ -39,7 +40,8 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:AbstractArray{<:Dual{T, V, P}}},
-                         alg::SimpleNewtonRaphson, args...; kwargs...) where {iip, T, V, P}
+                         alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, args...;
+                         kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
                                     retcode = sol.retcode)
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
@@ -1,17 +1,22 @@
 """
 ```julia
-SimpleTrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
-                               autodiff = Val{true}(), diff_type = Val{:forward})
+SimpleTrustRegion(; chunk_size = Val{0}(),
+                    autodiff = Val{true}(),
+                    diff_type = Val{:forward},
+                    max_trust_radius::Real = 0.0,
+                    initial_trust_radius::Real = 0.0,
+                    step_threshold::Real = 0.1,
+                    shrink_threshold::Real = 0.25,
+                    expand_threshold::Real = 0.75,
+                    shrink_factor::Real = 0.25,
+                    expand_factor::Real = 2.0,
+                    max_shrink_times::Int = 32
 ```
 
 A low-overhead implementation of a
 [trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
 solver
 
-### Arguments
-- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm
-  will change dynamically. However, it will never be greater than the `max_trust_radius`.
-
 ### Keyword Arguments
 
 - `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
@@ -26,6 +31,8 @@ solver
 - `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
   `Val{:forward}` for forward finite differences. For more details on the choices, see the
   [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
+- `max_trust_radius`: the maximum radius of the trust region. Defaults to
+  `max(norm(f(u0)), maximum(u0) - minimum(u0))`.
 - `initial_trust_radius`: the initial trust region radius. Defaults to
   `max_trust_radius / 11`.
 - `step_threshold`: the threshold for taking a step. In every iteration, the threshold is
@@ -58,25 +65,28 @@ struct SimpleTrustRegion{T, CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT}
     shrink_factor::T
     expand_factor::T
     max_shrink_times::Int
-    function SimpleTrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
+    function SimpleTrustRegion(; chunk_size = Val{0}(),
                                autodiff = Val{true}(),
                                diff_type = Val{:forward},
-                               initial_trust_radius::Number = max_trust_radius / 11,
-                               step_threshold::Number = 0.1,
-                               shrink_threshold::Number = 0.25,
-                               expand_threshold::Number = 0.75,
-                               shrink_factor::Number = 0.25,
-                               expand_factor::Number = 2.0,
+                               max_trust_radius::Real = 0.0,
+                               initial_trust_radius::Real = 0.0,
+                               step_threshold::Real = 0.1,
+                               shrink_threshold::Real = 0.25,
+                               expand_threshold::Real = 0.75,
+                               shrink_factor::Real = 0.25,
+                               expand_factor::Real = 2.0,
                                max_shrink_times::Int = 32)
-        new{typeof(initial_trust_radius), SciMLBase._unwrap_val(chunk_size),
-            SciMLBase._unwrap_val(autodiff), SciMLBase._unwrap_val(diff_type)}(max_trust_radius,
-                                                                               initial_trust_radius,
-                                                                               step_threshold,
-                                                                               shrink_threshold,
-                                                                               expand_threshold,
-                                                                               shrink_factor,
-                                                                               expand_factor,
-                                                                               max_shrink_times)
+        new{typeof(initial_trust_radius),
+            SciMLBase._unwrap_val(chunk_size),
+            SciMLBase._unwrap_val(autodiff),
+            SciMLBase._unwrap_val(diff_type)}(max_trust_radius,
+                                              initial_trust_radius,
+                                              step_threshold,
+                                              shrink_threshold,
+                                              expand_threshold,
+                                              shrink_factor,
+                                              expand_factor,
+                                              max_shrink_times)
     end
 end
 
@@ -114,6 +124,14 @@ function SciMLBase.__solve(prob::NonlinearProblem,
         ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F)
     end
 
+    # Set default trust region radius if not specified by user.
+    if Δₘₐₓ == 0.0
+        Δₘₐₓ = max(norm(F), maximum(x) - minimum(x))
+    end
+    if Δ == 0.0
+        Δ = Δₘₐₓ / 11
+    end
+
     fₖ = 0.5 * norm(F)^2
     H = ∇f * ∇f
     g = ∇f * F
diff --git a/test/basictests.jl b/test/basictests.jl
@@ -55,7 +55,7 @@ end
 # SimpleTrustRegion
 function benchmark_scalar(f, u0)
     probN = NonlinearProblem{false}(f, u0)
-    sol = (solve(probN, SimpleTrustRegion(10.0)))
+    sol = (solve(probN, SimpleTrustRegion()))
 end
 
 sol = benchmark_scalar(sf, csu0)
@@ -68,8 +68,7 @@ using ForwardDiff
 # Immutable
 f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0]
 
-for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
-    SimpleTrustRegion(10.0)]
+for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
     g = function (p)
         probN = NonlinearProblem{false}(f, csu0, p)
         sol = solve(probN, alg, abstol = 1e-9)
@@ -84,8 +83,7 @@ end
 
 # Scalar
 f, u0 = (u, p) -> u * u - p, 1.0
-for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
-    SimpleTrustRegion(10.0)]
+for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
     g = function (p)
         probN = NonlinearProblem{false}(f, oftype(p, u0), p)
         sol = solve(probN, alg)
@@ -126,8 +124,7 @@ for alg in [Bisection(), Falsi()]
     @test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p)
 end
 
-for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
-    SimpleTrustRegion(10.0)]
+for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
     global g, p
     g = function (p)
         probN = NonlinearProblem{false}(f, 0.5, p)
@@ -144,8 +141,8 @@ probN = NonlinearProblem(f, u0)
 
 @test solve(probN, SimpleNewtonRaphson()).u[end] ≈ sqrt(2.0)
 @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0)
-@test solve(probN, SimpleTrustRegion(10.0)).u[end] ≈ sqrt(2.0)
-@test solve(probN, SimpleTrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0)
+@test solve(probN, SimpleTrustRegion()).u[end] ≈ sqrt(2.0)
+@test solve(probN, SimpleTrustRegion(; autodiff = false)).u[end] ≈ sqrt(2.0)
 @test solve(probN, Broyden()).u[end] ≈ sqrt(2.0)
 @test solve(probN, Klement()).u[end] ≈ sqrt(2.0)
 
@@ -159,9 +156,9 @@ for u0 in [1.0, [1, 1.0]]
     @test solve(probN, SimpleNewtonRaphson()).u ≈ sol
     @test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u ≈ sol
 
-    @test solve(probN, SimpleTrustRegion(10.0)).u ≈ sol
-    @test solve(probN, SimpleTrustRegion(10.0)).u ≈ sol
-    @test solve(probN, SimpleTrustRegion(10.0; autodiff = false)).u ≈ sol
+    @test solve(probN, SimpleTrustRegion()).u ≈ sol
+    @test solve(probN, SimpleTrustRegion()).u ≈ sol
+    @test solve(probN, SimpleTrustRegion(; autodiff = false)).u ≈ sol
 
     @test solve(probN, Broyden()).u ≈ sol
 
@@ -215,17 +212,16 @@ f = (u, p) -> 0.010000000000000002 .+
                  (0.21640425613334457 .+
                   216.40425613334457 ./
                   (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
-              0.0011552453009332421u
-.-p
+              0.0011552453009332421u .- p
 g = function (p)
     probN = NonlinearProblem{false}(f, u0, p)
-    sol = solve(probN, SimpleTrustRegion(100.0))
+    sol = solve(probN, SimpleTrustRegion())
     return sol.u
 end
 p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 u = g(p)
 f(u, p)
-@test all(f(u, p) .< 1e-10)
+@test all(abs.(f(u, p)) .< 1e-10)
 
 # Test kwars in `SimpleTrustRegion`
 max_trust_radius = [10.0, 100.0, 1000.0]
@@ -242,7 +238,7 @@ list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
                       expand_factor, max_shrink_times)
 for options in list_of_options
     local probN, sol, alg
-    alg = SimpleTrustRegion(options[1];
+    alg = SimpleTrustRegion(max_trust_radius = options[1],
                             initial_trust_radius = options[2],
                             step_threshold = options[3],
                             shrink_threshold = options[4],
@@ -253,5 +249,5 @@ for options in list_of_options
 
     probN = NonlinearProblem(f, u0, p)
     sol = solve(probN, alg)
-    @test all(f(u, p) .< 1e-10)
+    @test all(abs.(f(u, p)) .< 1e-10)
 end