Skip to content

[WIP] Update method to use the new SolverTools #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 9 additions & 20 deletions src/lbfgs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ function lbfgs(nlp :: AbstractNLPModel;
atol :: Real=√eps(eltype(x)), rtol :: Real=√eps(eltype(x)),
max_eval :: Int=-1,
max_time :: Float64=30.0,
verbose :: Bool=true,
ls_method :: Symbol=:armijo_wolfe,
mem :: Int=5)

if !unconstrained(nlp)
Expand All @@ -25,11 +25,12 @@ function lbfgs(nlp :: AbstractNLPModel;
n = nlp.meta.nvar

xt = zeros(T, n)
ft = zeros(T, n)
fold = zeros(T, n)

f = obj(nlp, x)
∇f = grad(nlp, x)
H = InverseLBFGSOperator(T, n, mem, scaling=true)
ϕ = UncMerit(nlp, fx=f, gx=∇f)

∇fNorm = nrm2(n, ∇f)
ϵ = atol + rtol * ∇fNorm
Expand All @@ -43,34 +44,22 @@ function lbfgs(nlp :: AbstractNLPModel;
stalled = false
status = :unknown

h = LineModel(nlp, x, ∇f)

while !(optimal || tired || stalled)
d = - H * ∇f
slope = dot(n, d, ∇f)
if slope ≥ 0
@error "not a descent direction" slope
status = :not_desc
stalled = true
continue
end

redirect!(h, x, d)
∇fold .= ∇f
# Perform improved Armijo linesearch.
t, good_grad, ft, nbk, nbW = armijo_wolfe(h, f, slope, ∇ft, τ₁=T(0.9999), bk_max=25, verbose=false)
lso = linesearch!(ϕ, x, d, xt, method=ls_method, bk_max=25)

@info log_row(Any[iter, f, ∇fNorm, slope, nbk])
@info log_row(Any[iter, f, ∇fNorm, dot(d, ∇fold), lso.specific[:nbk]])

copyaxpy!(n, t, d, x, xt)
good_grad || grad!(nlp, xt, ∇ft)
lso.good_grad || grad!(nlp, xt, ∇f)

# Update L-BFGS approximation.
push!(H, t * d, ∇ft - ∇f)
push!(H, lso.t * d, ∇f - ∇fold)

# Move on.
x .= xt
f = ft
∇f .= ∇ft
ϕ.fx = f = lso.ϕt

∇fNorm = nrm2(n, ∇f)
iter = iter + 1
Expand Down
34 changes: 11 additions & 23 deletions src/tron.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ function tron(::Val{:Newton},
max_time :: Real=30.0,
max_cgiter :: Int=nlp.meta.nvar,
use_only_objgrad :: Bool=false,
tr_method :: Symbol=:tron,
cgtol :: Real=eltype(x)(0.1),
atol :: Real=√eps(eltype(x)),
rtol :: Real=√eps(eltype(x)),
Expand Down Expand Up @@ -60,6 +61,7 @@ function tron(::Val{:Newton},
fx, _ = objgrad!(nlp, x, gx)
gt = use_only_objgrad ? zeros(T, n) : T[]
num_success_iters = 0
ϕ = UncMerit(nlp, fx=fx, gx=gx)

# Optimality measure
project_step!(gpx, x, gx, ℓ, u, -one(T))
Expand All @@ -78,14 +80,13 @@ function tron(::Val{:Newton},
end

αC = one(T)
tr = TRONTrustRegion(min(max(one(T), πx / 10), 100))
Δ = min(max(one(T), πx / 10), 100)
@info log_header([:iter, :f, :dual, :radius, :ratio, :cgstatus], [Int, T, T, T, T, String],
hdr_override=Dict(:f=>"f(x)", :dual=>"π", :radius=>"Δ"))
while !(optimal || tired || stalled || unbounded)
# Current iteration
xc .= x
fc = fx
Δ = get_property(tr, :radius)
H = hess_op!(nlp, xc, temp)

αC, s, cauchy_status = cauchy(x, H, gx, Δ, αC, ℓ, u, μ₀=μ₀, μ₁=μ₁, σ=σ)
Expand All @@ -107,25 +108,14 @@ function tron(::Val{:Newton},
obj(nlp, x)
end

ared, pred, quad_min = aredpred(tr, nlp, fc, fx, qs, x, s, slope)
if pred ≥ 0
status = :neg_pred
stalled = true
continue
end
tr.ratio = ared / pred
tr.quad_min = quad_min
@info log_row([iter, fx, πx, Δ, tr.ratio, cginfo])
ϕ.fx = fc
tro = trust_region!(ϕ, xc, s, x, qs, Δ, method=tr_method, update_obj_at_x=false, update_obj_at_xt=false, ft=fx)

s_norm = nrm2(n, s)
if num_success_iters == 0
tr.radius = min(Δ, s_norm)
end
@info log_row([iter, fx, πx, Δ, tro.ρ, cginfo])

# Update the trust region
update!(tr, s_norm)
Δ = tro.Δ

if acceptable(tr)
if tro.success
num_success_iters += 1
if use_only_objgrad
gx .= gt
Expand All @@ -141,11 +131,9 @@ function tron(::Val{:Newton},
push!(nlp, s, gn)
gn .= gx
end
end

if !acceptable(tr)
fx = fc
else
x .= xc
fx = fc
end

iter += 1
Expand All @@ -155,7 +143,7 @@ function tron(::Val{:Newton},
unbounded = fx < fmin

end
@info log_row(Any[iter, fx, πx, get_property(tr, :radius)])
@info log_row(Any[iter, fx, πx, Δ])

if tired
if el_time > max_time
Expand Down
90 changes: 35 additions & 55 deletions src/trunk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ function trunk(::Val{:Newton},
atol :: Real=√eps(eltype(x)), rtol :: Real=√eps(eltype(x)),
max_eval :: Int=-1,
max_time :: Float64=30.0,
tr_method :: Symbol=:basic,
bk_max :: Int=10,
monotone :: Bool=true,
nm_itmax :: Int=25)
Expand All @@ -48,7 +49,8 @@ function trunk(::Val{:Newton},
∇f = grad(nlp, x)
∇fNorm2 = nrm2(n, ∇f)
ϵ = atol + rtol * ∇fNorm2
tr = TrustRegion(min(max(∇fNorm2 / 10, one(T)), T(100)))
Δ = min(max(∇fNorm2 / 10, one(T)), T(100))
ϕ = UncMerit(nlp, fx=f, gx=∇f)

# Non-monotone mode parameters.
# fmin: current best overall objective value
Expand Down Expand Up @@ -85,39 +87,30 @@ function trunk(::Val{:Newton},
(s, cg_stats) = with_logger(subsolver_logger) do
cg(H, -∇f,
atol=T(atol), rtol=cgtol,
radius=get_property(tr, :radius),
radius=Δ,
itmax=max(2 * n, 50))
end

# Compute actual vs. predicted reduction.
sNorm = nrm2(n, s)
copyaxpy!(n, one(T), s, x, xt)
slope = dot(n, s, ∇f)
curv = dot(n, s, H * s)
Δq = slope + curv / 2
ft = obj(nlp, xt)

ared, pred = aredpred(nlp, f, ft, Δq, xt, s, slope)
if pred ≥ 0
status = :neg_pred
stalled = true
continue
end
tr.ratio = ared / pred

if !monotone
ared_hist, pred_hist = aredpred(nlp, fref, ft, σref + Δq, xt, s, slope)
if pred_hist ≥ 0
status = :neg_pred
stalled = true
continue
end
ρ_hist = ared_hist / pred_hist
set_property!(tr, :ratio, max(get_property(tr, :ratio), ρ_hist))
tro = if monotone
ϕ.fx = f
trust_region!(ϕ, x, s, xt, Δq, Δ, method=tr_method, update_obj_at_x=false)
else
ϕ.fx = fref
trust_region!(ϕ, x, s, xt, Δq + σref, Δ, method=tr_method, update_obj_at_x=false)
end

# if abs(Δq) < 10_000 * eps(T) || abs(tro.ared) < 10_000 * eps(T) * abs(f)
# @error("Small Δq")
# end

ft = tro.ϕt

bk = 0
if !acceptable(tr)
if !tro.success
# Perform backtracking linesearch along s
# Scaling s to the trust-region boundary, as recommended in
# Algorithm 10.3.2 of the Trust-Region book
Expand All @@ -126,47 +119,33 @@ function trunk(::Val{:Newton},
# slope *= get_property(tr, :radius) / sNorm
# sNorm = get_property(tr, :radius)

if slope ≥ 0
@error "not a descent direction: slope = $slope, ‖∇f‖ = $∇fNorm2"
status = :not_desc
stalled = true
continue
end
α = one(T)
while (bk < bk_max) && (ft > f + β * α * slope)
bk = bk + 1
α /= T(1.2)
copyaxpy!(n, α, s, x, xt)
ft = obj(nlp, xt)
end
sNorm *= α
scal!(n, α, s)
slope *= α
Δq = slope + α * α * curv / 2
ared, pred = aredpred(nlp, f, ft, Δq, xt, s, slope)
if pred ≥ 0
status = :neg_pred
stalled = true
continue
end
tr.ratio = ared / pred
if !monotone
ared_hist, pred_hist = aredpred(nlp, fref, ft, σref + Δq, xt, s, slope)
if pred_hist ≥ 0
status = :neg_pred
stalled = true
continue
end
ρ_hist = ared_hist / pred_hist
set_property!(tr, :ratio, max(get_property(tr, :ratio), ρ_hist))

tro = if monotone
ϕ.fx = f
trust_region!(ϕ, x, s, xt, Δq, Δ, method=tr_method, update_obj_at_x=false, update_obj_at_xt=false, ft=ft)
else
ϕ.fx = fref
trust_region!(ϕ, x, s, xt, Δq + σref, Δ, method=tr_method, update_obj_at_x=false, update_obj_at_xt=false, ft=ft)
end

ft = tro.ϕt
end

@info log_row([iter, f, ∇fNorm2, get_property(tr, :radius), get_property(tr, :ratio),
length(cg_stats.residuals), bk, cg_stats.status])
Δ = tro.Δ
@info log_row([iter, f, ∇fNorm2, Δ, tro.ρ, length(cg_stats.residuals), bk, cg_stats.status])
iter = iter + 1

if acceptable(tr)
if tro.success
# Update non-monotone mode parameters.
if !monotone
σref = σref + Δq
Expand Down Expand Up @@ -194,7 +173,11 @@ function trunk(::Val{:Newton},

x .= xt
f = ft
grad!(nlp, x, ∇f)
if tro.good_grad
∇f .= tro.gt
else
grad!(nlp, x, ∇f)
end
∇fNorm2 = nrm2(n, ∇f)

if isa(nlp, QuasiNewtonModel)
Expand All @@ -205,15 +188,12 @@ function trunk(::Val{:Newton},
end
end

# Move on.
update!(tr, sNorm)

optimal = ∇fNorm2 ≤ ϵ
elapsed_time = time() - start_time
tired = neval_obj(nlp) > max_eval ≥ 0 || elapsed_time > max_time
solved = optimal || tired || stalled
end
@info log_row(Any[iter, f, ∇fNorm2, get_property(tr, :radius)])
@info log_row(Any[iter, f, ∇fNorm2, Δ])

if optimal
status = :first_order
Expand Down