NLSolversBase.jl icon indicating copy to clipboard operation
NLSolversBase.jl copied to clipboard

Joint Hessian and Jacobian evaluation using the TwiceDifferentiable type

Open hcarlsso opened this issue 4 years ago • 1 comments

I am working on the Gauss-Newton method, but need a bit more control over the computation than what is provided in LsqFit.jl. That I was thinking of is to use the Newton() algorithm in Optim.jl, but change the hessian and gradient as follows:

"""
    min 1/2 ||r(x)||^2
"""
# r(x) = ....  # predictive function
# J_r(x) = .... # Jacobian of r
function gauss_newton_fgh!(F, G, H, x)
    if any([!(H == nothing), !(G == nothing)])
        J = J_r(x)
        if !(H == nothing)
            # mutating calculations specific to h!
            H[:,:] = J'*J
        end
        if !(G == nothing)
            G[:] = J'*r(x)
            # mutating calculations specific to g!
        end
    end
    if !(F == nothing)
        r_k = r(x)
        return dot(r_k, r_k)/2.0
    end
end
TwiceDifferentiable(only_fgh!(gauss_newton_fgh!), x0)

However, from the TwiceDifferentiable type

mutable struct TwiceDifferentiable{T,TDF,TH,TX} <: AbstractObjective
    f
    df
    fdf
    h
    ....
end

and the constructor for inplace functions:

function TwiceDifferentiable(t::InPlaceFGH, x::AbstractVector, F::Real = real(zero(eltype(x))), G::AbstractVector = similar(x)) where {TH}

    H = alloc_H(x, F)
    f   =     x  -> t.fgh(F, nothing, nothing, x)
    df  = (G, x) -> t.fgh(nothing, G, nothing, x)
    fdf = (G, x) -> t.fgh(F, G, nothing, x)
    h   = (H, x) -> t.fgh(F, nothing, H, x)
    TwiceDifferentiable(f, df, fdf, h, x, F, G, H)
end

it seems that the gradient and hessian cannot be evaluated simultaneously, and the Gauss-Newton above evaluates the Jacobian of r one time more than needed.

Is there something I have missed, or would it make sensor to add fields to TwiceDifferentiable such that

mutable struct TwiceDifferentiable{T,TDF,TH,TX} <: AbstractObjective
    f
    df
    fdf
    h
    fdfh
    ....
end

function TwiceDifferentiable(t::InPlaceFGH, x::AbstractVector, F::Real = real(zero(eltype(x))), G::AbstractVector = similar(x)) where {TH}

    H = alloc_H(x, F)
    f   =     x  -> t.fgh(F, nothing, nothing, x)
    df  = (G, x) -> t.fgh(nothing, G, nothing, x)
    fdf = (G, x) -> t.fgh(F, G, nothing, x)
    h   = (H, x) -> t.fgh(F, nothing, H, x)
    fdfh = (H,x) -> t.fgh(F, G, H, x)
    TwiceDifferentiable(f, df, fdf, h, fdfh, x, F, G, H)
end

Edit: After some thinking, to avoid evaluating the jacobian of r one more time, one can also wrap the r function in a TwiceDifferentiable object:

"""
Minimize f(x) = 1/2||r(x)||^2
"""
function get_gauss_newton_df(dr, x0)
    function gauss_newton_fgh!(F, G, H, x)
        if !(H == nothing)
            jacobian!(dr, x)
            J_r = jacobian(dr)
            # mutating calculations specific to h!
            H[:,:] = J_r'*J_r
        end
        if !(G == nothing)
            jacobian!(dr, x)
            J_r = jacobian(dr)

            value!(dr)
            r = value(dr)
            G[:] = J_r'*r
            # mutating calculations specific to g!
        end
        if !(F == nothing)
            value!(dr)
            r = value(dr)
            return dot(r, r)/2.0
        end
    end
    TwiceDifferentiable(only_fgh!(gauss_newton_fgh!), x0)
end

But I think the original question is still relevant.

hcarlsso avatar May 06 '20 07:05 hcarlsso

I think it is a good idea, yes. Optim currently evaluates h seperately, but in extremely many cases it would be useful to evaluate them all at once, yes.

pkofod avatar May 26 '20 12:05 pkofod