NLSolversBase.jl
NLSolversBase.jl copied to clipboard
Joint Hessian and Jacobian evaluation using the TwiceDifferentiable type
I am working on the Gauss-Newton method, but need a bit more control over the computation than what is provided in LsqFit.jl. That I was thinking of is to use the Newton()
algorithm in Optim.jl, but change the hessian and gradient as follows:
"""
min 1/2 ||r(x)||^2
"""
# r(x) = .... # predictive function
# J_r(x) = .... # Jacobian of r
function gauss_newton_fgh!(F, G, H, x)
if any([!(H == nothing), !(G == nothing)])
J = J_r(x)
if !(H == nothing)
# mutating calculations specific to h!
H[:,:] = J'*J
end
if !(G == nothing)
G[:] = J'*r(x)
# mutating calculations specific to g!
end
end
if !(F == nothing)
r_k = r(x)
return dot(r_k, r_k)/2.0
end
end
TwiceDifferentiable(only_fgh!(gauss_newton_fgh!), x0)
However, from the TwiceDifferentiable
type
mutable struct TwiceDifferentiable{T,TDF,TH,TX} <: AbstractObjective
f
df
fdf
h
....
end
and the constructor for inplace functions:
function TwiceDifferentiable(t::InPlaceFGH, x::AbstractVector, F::Real = real(zero(eltype(x))), G::AbstractVector = similar(x)) where {TH}
H = alloc_H(x, F)
f = x -> t.fgh(F, nothing, nothing, x)
df = (G, x) -> t.fgh(nothing, G, nothing, x)
fdf = (G, x) -> t.fgh(F, G, nothing, x)
h = (H, x) -> t.fgh(F, nothing, H, x)
TwiceDifferentiable(f, df, fdf, h, x, F, G, H)
end
it seems that the gradient and hessian cannot be evaluated simultaneously, and the Gauss-Newton above evaluates the Jacobian of r
one time more than needed.
Is there something I have missed, or would it make sensor to add fields to TwiceDifferentiable
such that
mutable struct TwiceDifferentiable{T,TDF,TH,TX} <: AbstractObjective
f
df
fdf
h
fdfh
....
end
function TwiceDifferentiable(t::InPlaceFGH, x::AbstractVector, F::Real = real(zero(eltype(x))), G::AbstractVector = similar(x)) where {TH}
H = alloc_H(x, F)
f = x -> t.fgh(F, nothing, nothing, x)
df = (G, x) -> t.fgh(nothing, G, nothing, x)
fdf = (G, x) -> t.fgh(F, G, nothing, x)
h = (H, x) -> t.fgh(F, nothing, H, x)
fdfh = (H,x) -> t.fgh(F, G, H, x)
TwiceDifferentiable(f, df, fdf, h, fdfh, x, F, G, H)
end
Edit: After some thinking, to avoid evaluating the jacobian of r
one more time, one can also wrap the r
function in a TwiceDifferentiable
object:
"""
Minimize f(x) = 1/2||r(x)||^2
"""
function get_gauss_newton_df(dr, x0)
function gauss_newton_fgh!(F, G, H, x)
if !(H == nothing)
jacobian!(dr, x)
J_r = jacobian(dr)
# mutating calculations specific to h!
H[:,:] = J_r'*J_r
end
if !(G == nothing)
jacobian!(dr, x)
J_r = jacobian(dr)
value!(dr)
r = value(dr)
G[:] = J_r'*r
# mutating calculations specific to g!
end
if !(F == nothing)
value!(dr)
r = value(dr)
return dot(r, r)/2.0
end
end
TwiceDifferentiable(only_fgh!(gauss_newton_fgh!), x0)
end
But I think the original question is still relevant.
I think it is a good idea, yes. Optim currently evaluates h
seperately, but in extremely many cases it would be useful to evaluate them all at once, yes.