Make it an extension

avik-pal · avik-pal · commit 9697e0685a1c · 2023-02-13T11:08:14.000-05:00
diff --git a/Project.toml b/Project.toml
@@ -9,12 +9,17 @@ DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c"
 StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
 
+[weakdeps]
+NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
+
+[extensions]
+SimpleBatchedNonlinearSolveExt = "NNlib"
+
 [compat]
 ArrayInterfaceCore = "0.1.1"
 DiffEqBase = "6.114"
diff --git a/ext/SimpleBatchedNonlinearSolveExt/SimpleBatchedNonlinearSolveExt.jl b/ext/SimpleBatchedNonlinearSolveExt/SimpleBatchedNonlinearSolveExt.jl
@@ -0,0 +1,9 @@
+module SimpleBatchedNonlinearSolveExt
+
+using SimpleNonlinearSolve, SciMLBase, NNlib
+
+include("utils.jl")
+include("broyden.jl")
+include("lbroyden.jl")
+
+end
diff --git a/ext/SimpleBatchedNonlinearSolveExt/broyden.jl b/ext/SimpleBatchedNonlinearSolveExt/broyden.jl
@@ -0,0 +1,48 @@
+function SciMLBase.__solve(prob::NonlinearProblem, alg::Broyden{true}, args...;
+                           abstol = nothing, reltol = nothing, maxiters = 1000, kwargs...)
+    f = Base.Fix2(prob.f, prob.p)
+    x = float(prob.u0)
+
+    if ndims(x) != 2
+        error("`batch` mode works only if `ndims(prob.u0) == 2`")
+    end
+
+    fₙ = f(x)
+    T = eltype(x)
+    J⁻¹ = _init_J_batched(x)
+
+    if SciMLBase.isinplace(prob)
+        error("Broyden currently only supports out-of-place nonlinear problems")
+    end
+
+    atol = abstol !== nothing ? abstol :
+           real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5)
+    rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5)
+
+    xₙ = x
+    xₙ₋₁ = x
+    fₙ₋₁ = fₙ
+    for _ in 1:maxiters
+        xₙ = xₙ₋₁ .- _batched_mul(J⁻¹, fₙ₋₁, batch)
+        fₙ = f(xₙ)
+        Δxₙ = xₙ .- xₙ₋₁
+        Δfₙ = fₙ .- fₙ₋₁
+        J⁻¹Δfₙ = _batched_mul(J⁻¹, Δfₙ, batch)
+        J⁻¹ += _batched_mul(((Δxₙ .- J⁻¹Δfₙ, batch) ./
+                             (_batched_mul(_batch_transpose(Δxₙ, batch), J⁻¹Δfₙ, batch))),
+                            _batched_mul(_batch_transpose(Δxₙ, batch), J⁻¹, batch), batch)
+
+        iszero(fₙ) &&
+            return SciMLBase.build_solution(prob, alg, xₙ, fₙ;
+                                            retcode = ReturnCode.Success)
+
+        if isapprox(xₙ, xₙ₋₁, atol = atol, rtol = rtol)
+            return SciMLBase.build_solution(prob, alg, xₙ, fₙ;
+                                            retcode = ReturnCode.Success)
+        end
+        xₙ₋₁ = xₙ
+        fₙ₋₁ = fₙ
+    end
+
+    return SciMLBase.build_solution(prob, alg, xₙ, fₙ; retcode = ReturnCode.MaxIters)
+end
diff --git a/ext/SimpleBatchedNonlinearSolveExt/lbroyden.jl b/ext/SimpleBatchedNonlinearSolveExt/lbroyden.jl
diff --git a/ext/SimpleBatchedNonlinearSolveExt/utils.jl b/ext/SimpleBatchedNonlinearSolveExt/utils.jl
@@ -0,0 +1,25 @@
+_batch_transpose(x) = reshape(x, 1, size(x)...)
+
+_batched_mul(x, y) = x * y
+
+function _batched_mul(x::AbstractArray{T, 3}, y::AbstractMatrix) where {T}
+    return dropdims(batched_mul(x, reshape(y, size(y, 1), 1, size(y, 2))); dims = 2)
+end
+
+function _batched_mul(x::AbstractMatrix, y::AbstractArray{T, 3}) where {T}
+    return batched_mul(reshape(x, size(x, 1), 1, size(x, 2)), y)
+end
+
+function _batched_mul(x::AbstractArray{T1, 3}, y::AbstractArray{T2, 3}) where {T1, T2}
+    return batched_mul(x, y)
+end
+
+function _init_J_batched(x::AbstractMatrix{T}) where {T}
+    J = ArrayInterfaceCore.zeromatrix(x[:, 1])
+    if ismutable(x)
+        J[diagind(J)] .= one(eltype(x))
+    else
+        J += I
+    end
+    return repeat(J, 1, 1, size(x, 2))
+end
diff --git a/src/SimpleNonlinearSolve.jl b/src/SimpleNonlinearSolve.jl
@@ -7,7 +7,6 @@ using StaticArraysCore
 using LinearAlgebra
 import ArrayInterfaceCore
 using DiffEqBase
-using NNlib  # Batched Matrix Multiplication
 
 @reexport using SciMLBase
 
diff --git a/src/broyden.jl b/src/broyden.jl
@@ -1,25 +1,26 @@
 """
-```julia
-Broyden()
-```
+    Broyden()
 
 A low-overhead implementation of Broyden. This method is non-allocating on scalar
 and static array problems.
 """
-struct Broyden <: AbstractSimpleNonlinearSolveAlgorithm end
+struct Broyden{batched} <: AbstractSimpleNonlinearSolveAlgorithm
+    Broyden(batched = false) = new{batched}()
+end
 
-function SciMLBase.__solve(prob::NonlinearProblem, alg::Broyden, args...; abstol = nothing,
-                           reltol = nothing, maxiters = 1000, batch = false, kwargs...)
+function SciMLBase.__solve(prob::NonlinearProblem, alg::Broyden{false}, args...;
+                           abstol = nothing, reltol = nothing, maxiters = 1000, kwargs...)
     f = Base.Fix2(prob.f, prob.p)
     x = float(prob.u0)
 
-    if batch && ndims(x) != 2
-        error("`batch` mode works only if `ndims(prob.u0) == 2`")
-    end
+    # if batch && ndims(x) != 2
+    #     error("`batch` mode works only if `ndims(prob.u0) == 2`")
+    # end
 
     fₙ = f(x)
     T = eltype(x)
-    J⁻¹ = init_J(x; batch)
+    # J⁻¹ = init_J(x; batch)
+    J⁻¹ = init_J(x)
 
     if SciMLBase.isinplace(prob)
         error("Broyden currently only supports out-of-place nonlinear problems")
@@ -33,14 +34,12 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::Broyden, args...; abstol
     xₙ₋₁ = x
     fₙ₋₁ = fₙ
     for _ in 1:maxiters
-        xₙ = xₙ₋₁ .- _batched_mul(J⁻¹, fₙ₋₁, batch)
+        xₙ = xₙ₋₁ - J⁻¹ * fₙ₋₁
         fₙ = f(xₙ)
         Δxₙ = xₙ .- xₙ₋₁
         Δfₙ = fₙ .- fₙ₋₁
-        J⁻¹Δfₙ = _batched_mul(J⁻¹, Δfₙ, batch)
-        J⁻¹ += _batched_mul(((Δxₙ .- J⁻¹Δfₙ, batch) ./
-                             (_batched_mul(_batch_transpose(Δxₙ, batch), J⁻¹Δfₙ, batch))),
-                            _batched_mul(_batch_transpose(Δxₙ, batch), J⁻¹, batch), batch)
+        J⁻¹Δfₙ = J⁻¹ * Δfₙ
+        J⁻¹ += ((Δxₙ .- J⁻¹Δfₙ) ./ (Δxₙ' * J⁻¹Δfₙ)) * (Δxₙ' * J⁻¹)
 
         iszero(fₙ) &&
             return SciMLBase.build_solution(prob, alg, xₙ, fₙ;
@@ -56,8 +55,3 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::Broyden, args...; abstol
 
     return SciMLBase.build_solution(prob, alg, xₙ, fₙ; retcode = ReturnCode.MaxIters)
 end
-
-function _batch_transpose(x, batch)
-    !batch && return x'
-    return reshape(x, 1, size(x)...)
-end
diff --git a/src/utils.jl b/src/utils.jl
@@ -34,17 +34,14 @@ value(x) = x
 value(x::Dual) = ForwardDiff.value(x)
 value(x::AbstractArray{<:Dual}) = map(ForwardDiff.value, x)
 
-function init_J(x; batch = false)
-    x_ = batch ? x[:, 1] : x
-
-    J = ArrayInterfaceCore.zeromatrix(x_)
-    if ismutable(x_)
-        J[diagind(J)] .= one(eltype(x_))
+function init_J(x)
+    J = ArrayInterfaceCore.zeromatrix(x)
+    if ismutable(x)
+        J[diagind(J)] .= one(eltype(x))
     else
         J += I
     end
-
-    return batch ? repeat(J, 1, 1, size(x, 2)) : J
+    return J
 end
 
 function dogleg_method(H, g, Δ)
@@ -71,18 +68,3 @@ function dogleg_method(H, g, Δ)
     tau = (-dot_δsd_δN_δsd + sqrt(fact)) / dot_δN_δsd
     return δsd + tau * δN_δsd
 end
-
-_batched_mul(x, y, batch) = x * y
-function _batched_mul(x::AbstractArray{T, 3}, y::AbstractMatrix, batch) where {T}
-    !batch && return x * y
-    return dropdims(batched_mul(x, reshape(y, size(y, 1), 1, size(y, 2))); dims = 2)
-end
-function _batched_mul(x::AbstractMatrix, y::AbstractArray{T, 3}, batch) where {T}
-    !batch && return x * y
-    return batched_mul(reshape(x, size(x, 1), 1, size(x, 2)), y)
-end
-function _batched_mul(x::AbstractArray{T1, 3}, y::AbstractArray{T2, 3},
-                      batch) where {T1, T2}
-    !batch && return x * y
-    return batched_mul(x, y)
-end