Add termination condition to gaussnewton and other fixes

utkarsh530 · utkarsh530 · commit 2763d65c0089 · 2023-10-19T16:17:11.000-04:00
diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl
@@ -36,26 +36,34 @@ for large-scale and numerically-difficult nonlinear least squares problems.
     Jacobian-Free version of `GaussNewton` doesn't work yet, and it forces jacobian
     construction. This will be fixed in the near future.
 """
-@concrete struct GaussNewton{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+@concrete struct GaussNewton{CJ, AD, TC} <: AbstractNewtonAlgorithm{CJ, AD, TC}
     ad::AD
     linsolve
     precs
+    termination_condition::TC
 end
 
 function set_ad(alg::GaussNewton{CJ}, ad) where {CJ}
     return GaussNewton{CJ}(ad, alg.linsolve, alg.precs)
 end
 
-function GaussNewton(; concrete_jac = nothing, linsolve = CholeskyFactorization(),
-    precs = DEFAULT_PRECS, adkwargs...)
+function GaussNewton(; concrete_jac = nothing, linsolve = NormalCholeskyFactorization(),
+    precs = DEFAULT_PRECS,
+    termination_condition = NLSolveTerminationCondition(NLSolveTerminationMode.AbsNorm;
+        abstol = nothing,
+        reltol = nothing), adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)
-    return GaussNewton{_unwrap_val(concrete_jac)}(ad, linsolve, precs)
+    return GaussNewton{_unwrap_val(concrete_jac)}(ad,
+        linsolve,
+        precs,
+        termination_condition)
 end
 
 @concrete mutable struct GaussNewtonCache{iip} <: AbstractNonlinearSolveCache{iip}
     f
     alg
     u
+    u_prev
     fu1
     fu2
     fu_new
@@ -72,12 +80,15 @@ end
     internalnorm
     retcode::ReturnCode.T
     abstol
+    reltol
     prob
     stats::NLStats
+    tc_storage
 end
 
 function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg_::GaussNewton,
-    args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
+    internalnorm = DEFAULT_NORM,
     kwargs...) where {uType, iip}
     alg = get_concrete_algorithm(alg_, prob)
     @unpack f, u0, p = prob
@@ -91,27 +102,46 @@ function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg_::
     uf, linsolve, J, fu2, jac_cache, du, JᵀJ, Jᵀf = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_with_JᵀJ = Val(true))
 
-    return GaussNewtonCache{iip}(f, alg, u, fu1, fu2, zero(fu1), du, p, uf, linsolve, J,
-        JᵀJ, Jᵀf, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol,
-        prob, NLStats(1, 0, 0, 0, 0))
+    tc = alg.termination_condition
+    mode = DiffEqBase.get_termination_mode(tc)
+
+    atol = _get_tolerance(abstol, tc.abstol, eltype(u))
+    rtol = _get_tolerance(reltol, tc.reltol, eltype(u))
+
+    storage = mode ∈ DiffEqBase.SAFE_TERMINATION_MODES ? NLSolveSafeTerminationResult() :
+              nothing
+
+    return GaussNewtonCache{iip}(f, alg, u, copy(u), fu1, fu2, zero(fu1), du, p, uf,
+        linsolve, J,
+        JᵀJ, Jᵀf, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, atol, rtol,
+        prob, NLStats(1, 0, 0, 0, 0), storage)
 end
 
 function perform_step!(cache::GaussNewtonCache{true})
-    @unpack u, fu1, f, p, alg, J, JᵀJ, Jᵀf, linsolve, du = cache
+    @unpack u, u_prev, fu1, f, p, alg, J, JᵀJ, Jᵀf, linsolve, du = cache
     jacobian!!(J, cache)
     __matmul!(JᵀJ, J', J)
     __matmul!(Jᵀf, J', fu1)
 
+    tc_storage = cache.tc_storage
+    termination_condition = cache.alg.termination_condition(tc_storage)
+
     # u = u - J \ fu
     linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(JᵀJ), b = _vec(Jᵀf),
         linu = _vec(du), p, reltol = cache.abstol)
     cache.linsolve = linres.cache
     @. u = u - du
     f(cache.fu_new, u, p)
 
-    (cache.internalnorm(cache.fu_new .- cache.fu1) < cache.abstol ||
-     cache.internalnorm(cache.fu_new) < cache.abstol) &&
+    (termination_condition(cache.fu_new .- cache.fu1,
+        cache.u,
+        u_prev,
+        cache.abstol,
+        cache.reltol) ||
+     termination_condition(cache.fu_new, cache.u, u_prev, cache.abstol, cache.reltol)) &&
         (cache.force_stop = true)
+
+    @. u_prev = u
     cache.fu1 .= cache.fu_new
     cache.stats.nf += 1
     cache.stats.njacs += 1
@@ -121,7 +151,10 @@ function perform_step!(cache::GaussNewtonCache{true})
 end
 
 function perform_step!(cache::GaussNewtonCache{false})
-    @unpack u, fu1, f, p, alg, linsolve = cache
+    @unpack u, u_prev, fu1, f, p, alg, linsolve = cache
+
+    tc_storage = cache.tc_storage
+    termination_condition = cache.alg.termination_condition(tc_storage)
 
     cache.J = jacobian!!(cache.J, cache)
 
@@ -138,7 +171,10 @@ function perform_step!(cache::GaussNewtonCache{false})
     cache.u = @. u - cache.du  # `u` might not support mutation
     cache.fu_new = f(cache.u, p)
 
-    (cache.internalnorm(cache.fu_new) < cache.abstol) && (cache.force_stop = true)
+    termination_condition(cache.fu_new, cache.u, u_prev, cache.abstol, cache.reltol) &&
+        (cache.force_stop = true)
+
+    cache.u_prev = @. cache.u
     cache.fu1 = cache.fu_new
     cache.stats.nf += 1
     cache.stats.njacs += 1
diff --git a/src/levenberg.jl b/src/levenberg.jl
@@ -99,7 +99,7 @@ function LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing,
     precs = DEFAULT_PRECS, damping_initial::Real = 1.0, damping_increase_factor::Real = 2.0,
     damping_decrease_factor::Real = 3.0, finite_diff_step_geodesic::Real = 0.1,
     α_geodesic::Real = 0.75, b_uphill::Real = 1.0, min_damping_D::AbstractFloat = 1e-8,
-    termination_condition = NLSolveTerminationCondition(NLSolveTerminationMode.NLSolveDefault;
+    termination_condition = NLSolveTerminationCondition(NLSolveTerminationMode.AbsNorm;
         abstol = nothing,
         reltol = nothing),
     adkwargs...)
@@ -209,7 +209,6 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip},
         JᵀJ, λ, λ_factor, damping_increase_factor, damping_decrease_factor, h, α_geodesic,
         b_uphill, min_damping_D, v, a, tmp_vec, v_old, loss, δ, loss, make_new_J, fu_tmp,
         zero(u), zero(fu1), mat_tmp, NLStats(1, 0, 0, 0, 0), storage)
-
 end
 
 function perform_step!(cache::LevenbergMarquardtCache{true})
@@ -271,11 +270,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
         if (1 - β)^b_uphill * loss ≤ loss_old
             # Accept step.
             cache.u .+= δ
-            if termination_condition(cache.fu_tmp,
-                cache.u,
-                u_prev,
-                cache.abstol,
-                cache.reltol)
+            if loss < cache.abstol
                 cache.force_stop = true
                 return nothing
             end