ReverseDiff.jl
ReverseDiff.jl copied to clipboard
Combine Dual numbers with compiled gradient tape
It seems that in some cases, the compiled tape does not return correct gradient in the following application of obtaining gradients.
function get_hessian_reversediff(params0::AbstractArray{T}) where T
tape2 = ReverseDiff.GradientTape(embedding_loss, (Dual.(randn(k, 10), zeros(k, 10)),))
ctape2 = ReverseDiff.compile(tape2)
get_hessian_reversediff(ctape2, params0)
end
function get_hessian_reversediff(tape, params0::AbstractArray{T}) where T
N = length(params0)
params = Dual.(params0, zero(T))
hes = zeros(T, N, N)
for i=1:N
@inbounds i !== 1 && (params[i-1] = Dual(params0[i-1], zero(T)))
@inbounds params[i] = Dual(params0[i], one(T))
res = ReverseDiff.gradient!(tape, (params,))[1]
res2 = ReverseDiff.gradient(embedding_loss, params)
h1 = vec(ForwardDiff.partials.(res, 1))
h2 = vec(ForwardDiff.partials.(res2, 1))
@show h1 - h2 # they are different!!!!
hes[:,i] .= vec(ForwardDiff.partials.(res, 1))
end
hes
end
It turns out h1 - h2
is not zero.
The compiled tape returns a gradient slightly different with the not compiled version. I checked that the not compiled version gradient is correct.
To reproduce the result. Please check https://github.com/JuliaReverse/NiGraphEmbedding.jl/blob/master/benchmarks/benchmark.jl
Wondering what is a correct way the obtain gradient through forward differentiating over back program?