Merge pull request #39 from CCsimon123/main

ChrisRackauckas · web-flow · commit aa3af4606687 · 2023-02-06T16:32:25.000-05:00
Implementation of a DF-Sane method
diff --git a/src/SimpleNonlinearSolve.jl b/src/SimpleNonlinearSolve.jl
@@ -24,13 +24,14 @@ include("klement.jl")
 include("trustRegion.jl")
 include("ridder.jl")
 include("brent.jl")
+include("dfsane.jl")
 include("ad.jl")
 
 import SnoopPrecompile
 
 SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
     prob_no_brack = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
-    for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion)
+    for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion, SimpleDFSane)
         solve(prob_no_brack, alg(), abstol = T(1e-2))
     end
 
@@ -51,7 +52,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
 end end
 
 # DiffEq styled algorithms
-export Bisection, Brent, Broyden, Falsi, Klement, Ridder, SimpleNewtonRaphson,
+export Bisection, Brent, Broyden, SimpleDFSane, Falsi, Klement, Ridder, SimpleNewtonRaphson,
        SimpleTrustRegion
 
 end # module
diff --git a/src/ad.jl b/src/ad.jl
@@ -31,7 +31,7 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:Dual{T, V, P}},
-                         alg::Union{SimpleNewtonRaphson, SimpleTrustRegion},
+                         alg::AbstractSimpleNonlinearSolveAlgorithm,
                          args...; kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
@@ -40,7 +40,7 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:AbstractArray{<:Dual{T, V, P}}},
-                         alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, args...;
+                         alg::AbstractSimpleNonlinearSolveAlgorithm, args...;
                          kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
diff --git a/src/dfsane.jl b/src/dfsane.jl
@@ -0,0 +1,155 @@
+"""
+```julia
+SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
+             M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
+             nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
+```
+
+A low-overhead implementation of the df-sane method for solving large-scale nonlinear
+systems of equations. For in depth information about all the parameters and the algorithm,
+see the paper: [W LaCruz, JM Martinez, and M Raydan (2006), Spectral residual mathod without
+gradient information for solving large-scale nonlinear systems of equations, Mathematics of
+Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_Spectral_Residual_Method_without_Gradient_Information_for_Solving_Large-Scale_Nonlinear_Systems_of_Equations)
+
+### Keyword Arguments
+
+- `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step
+  size in the algorithm. Defaults to `1e-10`.
+- `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step
+  size in the algorithm. Defaults to `1e10`.
+- `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step
+  size in the algorithm.. Defaults to `1.0`.
+- `M`: The monotonicity of the algorithm is determined by a this positive integer.
+  A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
+  of the function `f`. However, higher values allow for more flexibility in this reduction.
+  Despite this, the algorithm still ensures global convergence through the use of a
+  non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
+  condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call
+  for a higher value of `M`. The default setting is 10.
+- `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ`
+  will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
+- `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this
+  parameter is the minimum value of that factor. Defaults to `0.1`.
+- `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this
+  parameter is the maximum value of that factor. Defaults to `0.5`.
+- `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses
+  `nexp ∈ {1,2}`. Defaults to `2`.
+- `η_strategy`:  function to determine the parameter `η_k`, which enables growth
+  of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as
+  ``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and
+  `F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to
+  ``||F||^2 / k^2``.
+"""
+struct SimpleDFSane{T} <: AbstractSimpleNonlinearSolveAlgorithm
+    σ_min::T
+    σ_max::T
+    σ_1::T
+    M::Int
+    γ::T
+    τ_min::T
+    τ_max::T
+    nexp::Int
+    η_strategy::Function
+
+    function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
+                          M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
+                          nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
+        new{typeof(σ_min)}(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp, η_strategy)
+    end
+end
+
+function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane,
+                           args...; abstol = nothing, reltol = nothing, maxiters = 1000,
+                           kwargs...)
+    f = Base.Fix2(prob.f, prob.p)
+    x = float(prob.u0)
+    T = eltype(x)
+    σ_min = float(alg.σ_min)
+    σ_max = float(alg.σ_max)
+    σ_k = float(alg.σ_1)
+    M = alg.M
+    γ = float(alg.γ)
+    τ_min = float(alg.τ_min)
+    τ_max = float(alg.τ_max)
+    nexp = alg.nexp
+    η_strategy = alg.η_strategy
+
+    if SciMLBase.isinplace(prob)
+        error("SimpleDFSane currently only supports out-of-place nonlinear problems")
+    end
+
+    atol = abstol !== nothing ? abstol :
+           real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5)
+    rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5)
+
+    function ff(x)
+        F = f(x)
+        f_k = norm(F)^nexp
+        return f_k, F
+    end
+
+    f_k, F_k = ff(x)
+    α_1 = convert(T, 1.0)
+    f_1 = f_k
+    history_f_k = fill(f_k, M)
+
+    for k in 1:maxiters
+        iszero(F_k) &&
+            return SciMLBase.build_solution(prob, alg, x, F_k;
+                                            retcode = ReturnCode.Success)
+
+        # Spectral parameter range check
+        if abs(σ_k) > σ_max
+            σ_k = sign(σ_k) * σ_max
+        elseif abs(σ_k) < σ_min
+            σ_k = sign(σ_k) * σ_min
+        end
+
+        # Line search direction
+        d = -σ_k * F_k
+
+        η = η_strategy(f_1, k, x, F_k)
+        f̄ = maximum(history_f_k)
+        α_p = α_1
+        α_m = α_1
+        x_new = x + α_p * d
+        f_new, F_new = ff(x_new)
+        while true
+            if f_new ≤ f̄ + η - γ * α_p^2 * f_k
+                break
+            end
+
+            α_tp = α_p^2 * f_k / (f_new + (2 * α_p - 1) * f_k)
+            x_new = x - α_m * d
+            f_new, F_new = ff(x_new)
+
+            if f_new ≤ f̄ + η - γ * α_m^2 * f_k
+                break
+            end
+
+            α_tm = α_m^2 * f_k / (f_new + (2 * α_m - 1) * f_k)
+            α_p = min(τ_max * α_p, max(α_tp, τ_min * α_p))
+            α_m = min(τ_max * α_m, max(α_tm, τ_min * α_m))
+            x_new = x + α_p * d
+            f_new, F_new = ff(x_new)
+        end
+
+        if isapprox(x_new, x, atol = atol, rtol = rtol)
+            return SciMLBase.build_solution(prob, alg, x_new, F_new;
+                                            retcode = ReturnCode.Success)
+        end
+        # Update spectral parameter
+        s_k = x_new - x
+        y_k = F_new - F_k
+        σ_k = (s_k' * s_k) / (s_k' * y_k)
+
+        # Take step
+        x = x_new
+        F_k = F_new
+        f_k = f_new
+
+        # Store function value
+        history_f_k[k % M + 1] = f_new
+    end
+    return SciMLBase.build_solution(prob, alg, x, F_k; retcode = ReturnCode.MaxIters)
+end
diff --git a/test/basictests.jl b/test/basictests.jl
@@ -62,37 +62,49 @@ sol = benchmark_scalar(sf, csu0)
 @test sol.retcode === ReturnCode.Success
 @test sol.u * sol.u - 2 < 1e-9
 
+# SimpleDFSane
+function benchmark_scalar(f, u0)
+    probN = NonlinearProblem{false}(f, u0)
+    sol = (solve(probN, SimpleDFSane()))
+end
+
+sol = benchmark_scalar(sf, csu0)
+@test sol.retcode === ReturnCode.Success
+@test sol.u * sol.u - 2 < 1e-9
+
 # AD Tests
 using ForwardDiff
 
 # Immutable
 f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0]
 
-for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
+for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
+            SimpleDFSane())
     g = function (p)
         probN = NonlinearProblem{false}(f, csu0, p)
         sol = solve(probN, alg, abstol = 1e-9)
         return sol.u[end]
     end
 
     for p in 1.1:0.1:100.0
-        @test g(p) ≈ sqrt(p)
-        @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p))
+        @test abs.(g(p)) ≈ sqrt(p)
+        @test abs.(ForwardDiff.derivative(g, p)) ≈ 1 / (2 * sqrt(p))
     end
 end
 
 # Scalar
 f, u0 = (u, p) -> u * u - p, 1.0
-for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
+for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
+            SimpleDFSane())
     g = function (p)
         probN = NonlinearProblem{false}(f, oftype(p, u0), p)
         sol = solve(probN, alg)
         return sol.u
     end
 
     for p in 1.1:0.1:100.0
-        @test g(p) ≈ sqrt(p)
-        @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p))
+        @test abs(g(p)) ≈ sqrt(p)
+        @test abs(ForwardDiff.derivative(g, p)) ≈ 1 / (2 * sqrt(p))
     end
 end
 
@@ -148,7 +160,8 @@ for alg in [Bisection(), Falsi(), Ridder(), Brent()]
     @test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p)
 end
 
-for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
+for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
+            SimpleDFSane())
     global g, p
     g = function (p)
         probN = NonlinearProblem{false}(f, 0.5, p)
@@ -169,6 +182,7 @@ probN = NonlinearProblem(f, u0)
 @test solve(probN, SimpleTrustRegion(; autodiff = false)).u[end] ≈ sqrt(2.0)
 @test solve(probN, Broyden()).u[end] ≈ sqrt(2.0)
 @test solve(probN, Klement()).u[end] ≈ sqrt(2.0)
+@test solve(probN, SimpleDFSane()).u[end] ≈ sqrt(2.0)
 
 for u0 in [1.0, [1, 1.0]]
     local f, probN, sol
@@ -185,8 +199,8 @@ for u0 in [1.0, [1, 1.0]]
     @test solve(probN, SimpleTrustRegion(; autodiff = false)).u ≈ sol
 
     @test solve(probN, Broyden()).u ≈ sol
-
     @test solve(probN, Klement()).u ≈ sol
+    @test solve(probN, SimpleDFSane()).u ≈ sol
 end
 
 # Bisection Tests
@@ -299,3 +313,58 @@ for options in list_of_options
     sol = solve(probN, alg)
     @test all(abs.(f(u, p)) .< 1e-10)
 end
+
+# Test that `SimpleDFSane` passes a test that `SimpleNewtonRaphson` fails on.
+u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
+global g, f
+f = (u, p) -> 0.010000000000000002 .+
+              10.000000000000002 ./ (1 .+
+               (0.21640425613334457 .+
+                216.40425613334457 ./ (1 .+
+                 (0.21640425613334457 .+
+                  216.40425613334457 ./
+                  (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
+              0.0011552453009332421u .- p
+g = function (p)
+    probN = NonlinearProblem{false}(f, u0, p)
+    sol = solve(probN, SimpleDFSane())
+    return sol.u
+end
+p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+u = g(p)
+f(u, p)
+@test all(abs.(f(u, p)) .< 1e-10)
+
+# Test kwars in `SimpleDFSane`
+σ_min = [1e-10, 1e-5, 1e-4]
+σ_max = [1e10, 1e5, 1e4]
+σ_1 = [1.0, 0.5, 2.0]
+M = [10, 1, 100]
+γ = [1e-4, 1e-3, 1e-5]
+τ_min = [0.1, 0.2, 0.3]
+τ_max = [0.5, 0.8, 0.9]
+nexp = [2, 1, 2]
+η_strategy = [
+    (f_1, k, x, F) -> f_1 / k^2,
+    (f_1, k, x, F) -> f_1 / k^3,
+    (f_1, k, x, F) -> f_1 / k^4,
+]
+
+list_of_options = zip(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp,
+                      η_strategy)
+for options in list_of_options
+    local probN, sol, alg
+    alg = SimpleDFSane(σ_min = options[1],
+                       σ_max = options[2],
+                       σ_1 = options[3],
+                       M = options[4],
+                       γ = options[5],
+                       τ_min = options[6],
+                       τ_max = options[7],
+                       nexp = options[8],
+                       η_strategy = options[9])
+
+    probN = NonlinearProblem(f, u0, p)
+    sol = solve(probN, alg)
+    @test all(abs.(f(u, p)) .< 1e-10)
+end