Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.

Commit aa3af46

Browse files
Merge pull request #39 from CCsimon123/main
Implementation of a DF-Sane method
2 parents 95db958 + 7fcca42 commit aa3af46

File tree

4 files changed

+237
-12
lines changed

4 files changed

+237
-12
lines changed

src/SimpleNonlinearSolve.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,14 @@ include("klement.jl")
2424
include("trustRegion.jl")
2525
include("ridder.jl")
2626
include("brent.jl")
27+
include("dfsane.jl")
2728
include("ad.jl")
2829

2930
import SnoopPrecompile
3031

3132
SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
3233
prob_no_brack = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
33-
for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion)
34+
for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion, SimpleDFSane)
3435
solve(prob_no_brack, alg(), abstol = T(1e-2))
3536
end
3637

@@ -51,7 +52,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
5152
end end
5253

5354
# DiffEq styled algorithms
54-
export Bisection, Brent, Broyden, Falsi, Klement, Ridder, SimpleNewtonRaphson,
55+
export Bisection, Brent, Broyden, SimpleDFSane, Falsi, Klement, Ridder, SimpleNewtonRaphson,
5556
SimpleTrustRegion
5657

5758
end # module

src/ad.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ end
3131
function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
3232
iip,
3333
<:Dual{T, V, P}},
34-
alg::Union{SimpleNewtonRaphson, SimpleTrustRegion},
34+
alg::AbstractSimpleNonlinearSolveAlgorithm,
3535
args...; kwargs...) where {iip, T, V, P}
3636
sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
3737
return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
@@ -40,7 +40,7 @@ end
4040
function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
4141
iip,
4242
<:AbstractArray{<:Dual{T, V, P}}},
43-
alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, args...;
43+
alg::AbstractSimpleNonlinearSolveAlgorithm, args...;
4444
kwargs...) where {iip, T, V, P}
4545
sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
4646
return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;

src/dfsane.jl

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
"""
2+
```julia
3+
SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
4+
M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
5+
nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
6+
```
7+
8+
A low-overhead implementation of the df-sane method for solving large-scale nonlinear
9+
systems of equations. For in depth information about all the parameters and the algorithm,
10+
see the paper: [W LaCruz, JM Martinez, and M Raydan (2006), Spectral residual mathod without
11+
gradient information for solving large-scale nonlinear systems of equations, Mathematics of
12+
Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_Spectral_Residual_Method_without_Gradient_Information_for_Solving_Large-Scale_Nonlinear_Systems_of_Equations)
13+
14+
### Keyword Arguments
15+
16+
- `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step
17+
size in the algorithm. Defaults to `1e-10`.
18+
- `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step
19+
size in the algorithm. Defaults to `1e10`.
20+
- `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step
21+
size in the algorithm.. Defaults to `1.0`.
22+
- `M`: The monotonicity of the algorithm is determined by a this positive integer.
23+
A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
24+
of the function `f`. However, higher values allow for more flexibility in this reduction.
25+
Despite this, the algorithm still ensures global convergence through the use of a
26+
non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
27+
condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call
28+
for a higher value of `M`. The default setting is 10.
29+
- `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ`
30+
will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
31+
- `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this
32+
parameter is the minimum value of that factor. Defaults to `0.1`.
33+
- `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this
34+
parameter is the maximum value of that factor. Defaults to `0.5`.
35+
- `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses
36+
`nexp ∈ {1,2}`. Defaults to `2`.
37+
- `η_strategy`: function to determine the parameter `η_k`, which enables growth
38+
of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as
39+
``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and
40+
`F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to
41+
``||F||^2 / k^2``.
42+
"""
43+
struct SimpleDFSane{T} <: AbstractSimpleNonlinearSolveAlgorithm
44+
σ_min::T
45+
σ_max::T
46+
σ_1::T
47+
M::Int
48+
γ::T
49+
τ_min::T
50+
τ_max::T
51+
nexp::Int
52+
η_strategy::Function
53+
54+
function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
55+
M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
56+
nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
57+
new{typeof(σ_min)}(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp, η_strategy)
58+
end
59+
end
60+
61+
function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane,
62+
args...; abstol = nothing, reltol = nothing, maxiters = 1000,
63+
kwargs...)
64+
f = Base.Fix2(prob.f, prob.p)
65+
x = float(prob.u0)
66+
T = eltype(x)
67+
σ_min = float(alg.σ_min)
68+
σ_max = float(alg.σ_max)
69+
σ_k = float(alg.σ_1)
70+
M = alg.M
71+
γ = float(alg.γ)
72+
τ_min = float(alg.τ_min)
73+
τ_max = float(alg.τ_max)
74+
nexp = alg.nexp
75+
η_strategy = alg.η_strategy
76+
77+
if SciMLBase.isinplace(prob)
78+
error("SimpleDFSane currently only supports out-of-place nonlinear problems")
79+
end
80+
81+
atol = abstol !== nothing ? abstol :
82+
real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5)
83+
rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5)
84+
85+
function ff(x)
86+
F = f(x)
87+
f_k = norm(F)^nexp
88+
return f_k, F
89+
end
90+
91+
f_k, F_k = ff(x)
92+
α_1 = convert(T, 1.0)
93+
f_1 = f_k
94+
history_f_k = fill(f_k, M)
95+
96+
for k in 1:maxiters
97+
iszero(F_k) &&
98+
return SciMLBase.build_solution(prob, alg, x, F_k;
99+
retcode = ReturnCode.Success)
100+
101+
# Spectral parameter range check
102+
if abs(σ_k) > σ_max
103+
σ_k = sign(σ_k) * σ_max
104+
elseif abs(σ_k) < σ_min
105+
σ_k = sign(σ_k) * σ_min
106+
end
107+
108+
# Line search direction
109+
d = -σ_k * F_k
110+
111+
η = η_strategy(f_1, k, x, F_k)
112+
= maximum(history_f_k)
113+
α_p = α_1
114+
α_m = α_1
115+
x_new = x + α_p * d
116+
f_new, F_new = ff(x_new)
117+
while true
118+
if f_new + η - γ * α_p^2 * f_k
119+
break
120+
end
121+
122+
α_tp = α_p^2 * f_k / (f_new + (2 * α_p - 1) * f_k)
123+
x_new = x - α_m * d
124+
f_new, F_new = ff(x_new)
125+
126+
if f_new + η - γ * α_m^2 * f_k
127+
break
128+
end
129+
130+
α_tm = α_m^2 * f_k / (f_new + (2 * α_m - 1) * f_k)
131+
α_p = min(τ_max * α_p, max(α_tp, τ_min * α_p))
132+
α_m = min(τ_max * α_m, max(α_tm, τ_min * α_m))
133+
x_new = x + α_p * d
134+
f_new, F_new = ff(x_new)
135+
end
136+
137+
if isapprox(x_new, x, atol = atol, rtol = rtol)
138+
return SciMLBase.build_solution(prob, alg, x_new, F_new;
139+
retcode = ReturnCode.Success)
140+
end
141+
# Update spectral parameter
142+
s_k = x_new - x
143+
y_k = F_new - F_k
144+
σ_k = (s_k' * s_k) / (s_k' * y_k)
145+
146+
# Take step
147+
x = x_new
148+
F_k = F_new
149+
f_k = f_new
150+
151+
# Store function value
152+
history_f_k[k % M + 1] = f_new
153+
end
154+
return SciMLBase.build_solution(prob, alg, x, F_k; retcode = ReturnCode.MaxIters)
155+
end

test/basictests.jl

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,37 +62,49 @@ sol = benchmark_scalar(sf, csu0)
6262
@test sol.retcode === ReturnCode.Success
6363
@test sol.u * sol.u - 2 < 1e-9
6464

65+
# SimpleDFSane
66+
function benchmark_scalar(f, u0)
67+
probN = NonlinearProblem{false}(f, u0)
68+
sol = (solve(probN, SimpleDFSane()))
69+
end
70+
71+
sol = benchmark_scalar(sf, csu0)
72+
@test sol.retcode === ReturnCode.Success
73+
@test sol.u * sol.u - 2 < 1e-9
74+
6575
# AD Tests
6676
using ForwardDiff
6777

6878
# Immutable
6979
f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0]
7080

71-
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
81+
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
82+
SimpleDFSane())
7283
g = function (p)
7384
probN = NonlinearProblem{false}(f, csu0, p)
7485
sol = solve(probN, alg, abstol = 1e-9)
7586
return sol.u[end]
7687
end
7788

7889
for p in 1.1:0.1:100.0
79-
@test g(p) sqrt(p)
80-
@test ForwardDiff.derivative(g, p) 1 / (2 * sqrt(p))
90+
@test abs.(g(p)) sqrt(p)
91+
@test abs.(ForwardDiff.derivative(g, p)) 1 / (2 * sqrt(p))
8192
end
8293
end
8394

8495
# Scalar
8596
f, u0 = (u, p) -> u * u - p, 1.0
86-
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
97+
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
98+
SimpleDFSane())
8799
g = function (p)
88100
probN = NonlinearProblem{false}(f, oftype(p, u0), p)
89101
sol = solve(probN, alg)
90102
return sol.u
91103
end
92104

93105
for p in 1.1:0.1:100.0
94-
@test g(p) sqrt(p)
95-
@test ForwardDiff.derivative(g, p) 1 / (2 * sqrt(p))
106+
@test abs(g(p)) sqrt(p)
107+
@test abs(ForwardDiff.derivative(g, p)) 1 / (2 * sqrt(p))
96108
end
97109
end
98110

@@ -148,7 +160,8 @@ for alg in [Bisection(), Falsi(), Ridder(), Brent()]
148160
@test ForwardDiff.jacobian(g, p) ForwardDiff.jacobian(t, p)
149161
end
150162

151-
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion())
163+
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
164+
SimpleDFSane())
152165
global g, p
153166
g = function (p)
154167
probN = NonlinearProblem{false}(f, 0.5, p)
@@ -169,6 +182,7 @@ probN = NonlinearProblem(f, u0)
169182
@test solve(probN, SimpleTrustRegion(; autodiff = false)).u[end] sqrt(2.0)
170183
@test solve(probN, Broyden()).u[end] sqrt(2.0)
171184
@test solve(probN, Klement()).u[end] sqrt(2.0)
185+
@test solve(probN, SimpleDFSane()).u[end] sqrt(2.0)
172186

173187
for u0 in [1.0, [1, 1.0]]
174188
local f, probN, sol
@@ -185,8 +199,8 @@ for u0 in [1.0, [1, 1.0]]
185199
@test solve(probN, SimpleTrustRegion(; autodiff = false)).u sol
186200

187201
@test solve(probN, Broyden()).u sol
188-
189202
@test solve(probN, Klement()).u sol
203+
@test solve(probN, SimpleDFSane()).u sol
190204
end
191205

192206
# Bisection Tests
@@ -299,3 +313,58 @@ for options in list_of_options
299313
sol = solve(probN, alg)
300314
@test all(abs.(f(u, p)) .< 1e-10)
301315
end
316+
317+
# Test that `SimpleDFSane` passes a test that `SimpleNewtonRaphson` fails on.
318+
u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
319+
global g, f
320+
f = (u, p) -> 0.010000000000000002 .+
321+
10.000000000000002 ./ (1 .+
322+
(0.21640425613334457 .+
323+
216.40425613334457 ./ (1 .+
324+
(0.21640425613334457 .+
325+
216.40425613334457 ./
326+
(1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
327+
0.0011552453009332421u .- p
328+
g = function (p)
329+
probN = NonlinearProblem{false}(f, u0, p)
330+
sol = solve(probN, SimpleDFSane())
331+
return sol.u
332+
end
333+
p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
334+
u = g(p)
335+
f(u, p)
336+
@test all(abs.(f(u, p)) .< 1e-10)
337+
338+
# Test kwars in `SimpleDFSane`
339+
σ_min = [1e-10, 1e-5, 1e-4]
340+
σ_max = [1e10, 1e5, 1e4]
341+
σ_1 = [1.0, 0.5, 2.0]
342+
M = [10, 1, 100]
343+
γ = [1e-4, 1e-3, 1e-5]
344+
τ_min = [0.1, 0.2, 0.3]
345+
τ_max = [0.5, 0.8, 0.9]
346+
nexp = [2, 1, 2]
347+
η_strategy = [
348+
(f_1, k, x, F) -> f_1 / k^2,
349+
(f_1, k, x, F) -> f_1 / k^3,
350+
(f_1, k, x, F) -> f_1 / k^4,
351+
]
352+
353+
list_of_options = zip(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp,
354+
η_strategy)
355+
for options in list_of_options
356+
local probN, sol, alg
357+
alg = SimpleDFSane(σ_min = options[1],
358+
σ_max = options[2],
359+
σ_1 = options[3],
360+
M = options[4],
361+
γ = options[5],
362+
τ_min = options[6],
363+
τ_max = options[7],
364+
nexp = options[8],
365+
η_strategy = options[9])
366+
367+
probN = NonlinearProblem(f, u0, p)
368+
sol = solve(probN, alg)
369+
@test all(abs.(f(u, p)) .< 1e-10)
370+
end

0 commit comments

Comments
 (0)