diff --git a/src/lbfgs.jl b/src/lbfgs.jl index 14dfb024..65bec6c5 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -11,7 +11,7 @@ function lbfgs(nlp :: AbstractNLPModel; atol :: Real=√eps(eltype(x)), rtol :: Real=√eps(eltype(x)), max_eval :: Int=-1, max_time :: Float64=30.0, - verbose :: Bool=true, + ls_method :: Symbol=:armijo_wolfe, mem :: Int=5) if !unconstrained(nlp) @@ -25,11 +25,12 @@ function lbfgs(nlp :: AbstractNLPModel; n = nlp.meta.nvar xt = zeros(T, n) - ∇ft = zeros(T, n) + ∇fold = zeros(T, n) f = obj(nlp, x) ∇f = grad(nlp, x) H = InverseLBFGSOperator(T, n, mem, scaling=true) + ϕ = UncMerit(nlp, fx=f, gx=∇f) ∇fNorm = nrm2(n, ∇f) ϵ = atol + rtol * ∇fNorm @@ -43,34 +44,22 @@ function lbfgs(nlp :: AbstractNLPModel; stalled = false status = :unknown - h = LineModel(nlp, x, ∇f) - while !(optimal || tired || stalled) d = - H * ∇f - slope = dot(n, d, ∇f) - if slope ≥ 0 - @error "not a descent direction" slope - status = :not_desc - stalled = true - continue - end - - redirect!(h, x, d) + ∇fold .= ∇f # Perform improved Armijo linesearch. - t, good_grad, ft, nbk, nbW = armijo_wolfe(h, f, slope, ∇ft, τ₁=T(0.9999), bk_max=25, verbose=false) + lso = linesearch!(ϕ, x, d, xt, method=ls_method, bk_max=25) - @info log_row(Any[iter, f, ∇fNorm, slope, nbk]) + @info log_row(Any[iter, f, ∇fNorm, dot(d, ∇fold), lso.specific[:nbk]]) - copyaxpy!(n, t, d, x, xt) - good_grad || grad!(nlp, xt, ∇ft) + lso.good_grad || grad!(nlp, xt, ∇f) # Update L-BFGS approximation. - push!(H, t * d, ∇ft - ∇f) + push!(H, lso.t * d, ∇f - ∇fold) # Move on. x .= xt - f = ft - ∇f .= ∇ft + ϕ.fx = f = lso.ϕt ∇fNorm = nrm2(n, ∇f) iter = iter + 1 diff --git a/src/tron.jl b/src/tron.jl index 5071129c..15b4e8a7 100644 --- a/src/tron.jl +++ b/src/tron.jl @@ -29,6 +29,7 @@ function tron(::Val{:Newton}, max_time :: Real=30.0, max_cgiter :: Int=nlp.meta.nvar, use_only_objgrad :: Bool=false, + tr_method :: Symbol=:tron, cgtol :: Real=eltype(x)(0.1), atol :: Real=√eps(eltype(x)), rtol :: Real=√eps(eltype(x)), @@ -60,6 +61,7 @@ function tron(::Val{:Newton}, fx, _ = objgrad!(nlp, x, gx) gt = use_only_objgrad ? zeros(T, n) : T[] num_success_iters = 0 + ϕ = UncMerit(nlp, fx=fx, gx=gx) # Optimality measure project_step!(gpx, x, gx, ℓ, u, -one(T)) @@ -78,14 +80,13 @@ function tron(::Val{:Newton}, end αC = one(T) - tr = TRONTrustRegion(min(max(one(T), πx / 10), 100)) + Δ = min(max(one(T), πx / 10), 100) @info log_header([:iter, :f, :dual, :radius, :ratio, :cgstatus], [Int, T, T, T, T, String], hdr_override=Dict(:f=>"f(x)", :dual=>"π", :radius=>"Δ")) while !(optimal || tired || stalled || unbounded) # Current iteration xc .= x fc = fx - Δ = get_property(tr, :radius) H = hess_op!(nlp, xc, temp) αC, s, cauchy_status = cauchy(x, H, gx, Δ, αC, ℓ, u, μ₀=μ₀, μ₁=μ₁, σ=σ) @@ -107,25 +108,14 @@ function tron(::Val{:Newton}, obj(nlp, x) end - ared, pred, quad_min = aredpred(tr, nlp, fc, fx, qs, x, s, slope) - if pred ≥ 0 - status = :neg_pred - stalled = true - continue - end - tr.ratio = ared / pred - tr.quad_min = quad_min - @info log_row([iter, fx, πx, Δ, tr.ratio, cginfo]) + ϕ.fx = fc + tro = trust_region!(ϕ, xc, s, x, qs, Δ, method=tr_method, update_obj_at_x=false, update_obj_at_xt=false, ft=fx) - s_norm = nrm2(n, s) - if num_success_iters == 0 - tr.radius = min(Δ, s_norm) - end + @info log_row([iter, fx, πx, Δ, tro.ρ, cginfo]) - # Update the trust region - update!(tr, s_norm) + Δ = tro.Δ - if acceptable(tr) + if tro.success num_success_iters += 1 if use_only_objgrad gx .= gt @@ -141,11 +131,9 @@ function tron(::Val{:Newton}, push!(nlp, s, gn) gn .= gx end - end - - if !acceptable(tr) - fx = fc + else x .= xc + fx = fc end iter += 1 @@ -155,7 +143,7 @@ function tron(::Val{:Newton}, unbounded = fx < fmin end - @info log_row(Any[iter, fx, πx, get_property(tr, :radius)]) + @info log_row(Any[iter, fx, πx, Δ]) if tired if el_time > max_time diff --git a/src/trunk.jl b/src/trunk.jl index 1544f5b4..1b25c274 100644 --- a/src/trunk.jl +++ b/src/trunk.jl @@ -24,6 +24,7 @@ function trunk(::Val{:Newton}, atol :: Real=√eps(eltype(x)), rtol :: Real=√eps(eltype(x)), max_eval :: Int=-1, max_time :: Float64=30.0, + tr_method :: Symbol=:basic, bk_max :: Int=10, monotone :: Bool=true, nm_itmax :: Int=25) @@ -48,7 +49,8 @@ function trunk(::Val{:Newton}, ∇f = grad(nlp, x) ∇fNorm2 = nrm2(n, ∇f) ϵ = atol + rtol * ∇fNorm2 - tr = TrustRegion(min(max(∇fNorm2 / 10, one(T)), T(100))) + Δ = min(max(∇fNorm2 / 10, one(T)), T(100)) + ϕ = UncMerit(nlp, fx=f, gx=∇f) # Non-monotone mode parameters. # fmin: current best overall objective value @@ -85,39 +87,30 @@ function trunk(::Val{:Newton}, (s, cg_stats) = with_logger(subsolver_logger) do cg(H, -∇f, atol=T(atol), rtol=cgtol, - radius=get_property(tr, :radius), + radius=Δ, itmax=max(2 * n, 50)) end - # Compute actual vs. predicted reduction. - sNorm = nrm2(n, s) - copyaxpy!(n, one(T), s, x, xt) slope = dot(n, s, ∇f) curv = dot(n, s, H * s) Δq = slope + curv / 2 - ft = obj(nlp, xt) - ared, pred = aredpred(nlp, f, ft, Δq, xt, s, slope) - if pred ≥ 0 - status = :neg_pred - stalled = true - continue - end - tr.ratio = ared / pred - - if !monotone - ared_hist, pred_hist = aredpred(nlp, fref, ft, σref + Δq, xt, s, slope) - if pred_hist ≥ 0 - status = :neg_pred - stalled = true - continue - end - ρ_hist = ared_hist / pred_hist - set_property!(tr, :ratio, max(get_property(tr, :ratio), ρ_hist)) + tro = if monotone + ϕ.fx = f + trust_region!(ϕ, x, s, xt, Δq, Δ, method=tr_method, update_obj_at_x=false) + else + ϕ.fx = fref + trust_region!(ϕ, x, s, xt, Δq + σref, Δ, method=tr_method, update_obj_at_x=false) end + # if abs(Δq) < 10_000 * eps(T) || abs(tro.ared) < 10_000 * eps(T) * abs(f) + # @error("Small Δq") + # end + + ft = tro.ϕt + bk = 0 - if !acceptable(tr) + if !tro.success # Perform backtracking linesearch along s # Scaling s to the trust-region boundary, as recommended in # Algorithm 10.3.2 of the Trust-Region book @@ -126,12 +119,6 @@ function trunk(::Val{:Newton}, # slope *= get_property(tr, :radius) / sNorm # sNorm = get_property(tr, :radius) - if slope ≥ 0 - @error "not a descent direction: slope = $slope, ‖∇f‖ = $∇fNorm2" - status = :not_desc - stalled = true - continue - end α = one(T) while (bk < bk_max) && (ft > f + β * α * slope) bk = bk + 1 @@ -139,34 +126,26 @@ function trunk(::Val{:Newton}, copyaxpy!(n, α, s, x, xt) ft = obj(nlp, xt) end - sNorm *= α scal!(n, α, s) slope *= α Δq = slope + α * α * curv / 2 - ared, pred = aredpred(nlp, f, ft, Δq, xt, s, slope) - if pred ≥ 0 - status = :neg_pred - stalled = true - continue - end - tr.ratio = ared / pred - if !monotone - ared_hist, pred_hist = aredpred(nlp, fref, ft, σref + Δq, xt, s, slope) - if pred_hist ≥ 0 - status = :neg_pred - stalled = true - continue - end - ρ_hist = ared_hist / pred_hist - set_property!(tr, :ratio, max(get_property(tr, :ratio), ρ_hist)) + + tro = if monotone + ϕ.fx = f + trust_region!(ϕ, x, s, xt, Δq, Δ, method=tr_method, update_obj_at_x=false, update_obj_at_xt=false, ft=ft) + else + ϕ.fx = fref + trust_region!(ϕ, x, s, xt, Δq + σref, Δ, method=tr_method, update_obj_at_x=false, update_obj_at_xt=false, ft=ft) end + + ft = tro.ϕt end - @info log_row([iter, f, ∇fNorm2, get_property(tr, :radius), get_property(tr, :ratio), - length(cg_stats.residuals), bk, cg_stats.status]) + Δ = tro.Δ + @info log_row([iter, f, ∇fNorm2, Δ, tro.ρ, length(cg_stats.residuals), bk, cg_stats.status]) iter = iter + 1 - if acceptable(tr) + if tro.success # Update non-monotone mode parameters. if !monotone σref = σref + Δq @@ -194,7 +173,11 @@ function trunk(::Val{:Newton}, x .= xt f = ft - grad!(nlp, x, ∇f) + if tro.good_grad + ∇f .= tro.gt + else + grad!(nlp, x, ∇f) + end ∇fNorm2 = nrm2(n, ∇f) if isa(nlp, QuasiNewtonModel) @@ -205,15 +188,12 @@ function trunk(::Val{:Newton}, end end - # Move on. - update!(tr, sNorm) - optimal = ∇fNorm2 ≤ ϵ elapsed_time = time() - start_time tired = neval_obj(nlp) > max_eval ≥ 0 || elapsed_time > max_time solved = optimal || tired || stalled end - @info log_row(Any[iter, f, ∇fNorm2, get_property(tr, :radius)]) + @info log_row(Any[iter, f, ∇fNorm2, Δ]) if optimal status = :first_order