NNlib.jl conv_bias_act broken pullback

I was trying to test if using conv_bias_act on gpu would speed up an implementation. When trying to take the gradient I get the error

ERROR: LoadError: MethodError: no method matching iterate(::ErrorException)
Closest candidates are:
  iterate(::Union{LinRange, StepRangeLen}) at range.jl:664
  iterate(::Union{LinRange, StepRangeLen}, ::Int64) at range.jl:664
  iterate(::T) where T<:Union{Base.KeySet{var"#s79", var"#s78"} where {var"#s79", var"#s78"<:Dict}, Base.ValueIterator{var"#s77"} where var"#s77"<:Dict} at dict.jl:693

The same error occurs on CPU and GPU with 1D and 2D convolutions. I wrote a "MWE" that slaps together a bunch of things to work in an environment that has Flux and CUDA installed.

Details

Versions

[052768ef] CUDA v3.3.0
[587475ba] Flux v0.12.4
[872c559c] NNlib v0.7.22
[a00861dc] NNlibCUDA v0.1.3

MWE

using Flux, CUDA
using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims, 
                  cudnnConvolutionDescriptor, cudnnConvolutionBwdDataAlgoPerf,
                  cudnnConvolutionForward!, cudnnConvolutionBwdFilterAlgoPerf,
                  cudnnConvolutionBackwardData, cudnnConvolutionBackwardFilter,
                  cudnnConvolutionBackwardBias, CUDNN_ACTIVATION_IDENTITY,
                  CUDNN_ACTIVATION_RELU

const CUDNNFloat = Union{Float16,Float32,Float64}

# From https://github.com/FluxML/Flux.jl/pull/1302
function (c::Conv)(x::AbstractArray)
    σ, b = c.σ, reshape(c.bias, ntuple(_->1, length(c.stride))..., :, 1)
    cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
    # σ.(conv(x, c.weight, cdims) .+ b)
    conv_bias_act(x, c.weight, cdims, b, σ)
end

# From https://github.com/FluxML/Flux.jl/pull/1302
NNlib.conv_bias_act(x, w, cdims::DenseConvDims, b::Flux.Zeros, σ) = σ.(conv(x, w, cdims))
function NNlib.conv_bias_act(x::CuArray, w::CuArray{T}, cdims::DenseConvDims, b::Flux.Zeros, σ) where T
  bz = gpu(collect(b))
  conv_bias_act(x, w, cdims, bz, σ)
end

# https://github.com/FluxML/NNlibCUDA.jl/blob/master/src/cudnn/conv.jl#L51
function NNlib.conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, 
                            cdims::DenseConvDims, bias::DenseCuArray{T}, σ=identity;
                            z::DenseCuArray{T}=y, alpha=1, beta=0, algo=-1) where T<:CUDNNFloat
    # if cudnnversion() < v"6"
    #     all(x -> x == 1, dilation(cdims)) || error("Only dilation = 1 is supported in cuDNN version < 6")
    # end
    if algo != -1
        @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
    end    
    d = cudnnConvolutionDescriptor(cdims, x)
    # only relu and identity are supported by cudnnConvolutionForward!
    activation = (σ == NNlib.relu ? CUDA.CUDNN.CUDNN_ACTIVATION_RELU : CUDA.CUDNN.CUDNN_ACTIVATION_IDENTITY)
    cudnnConvolutionForward!(y, w, x, d; z, bias, activation, alpha, beta)
    if activation === CUDA.CUDNN.CUDNN_ACTIVATION_IDENTITY && σ ∉ (nothing, identity)
        y = σ.(y)
    end
    return y
end

function oned_test()
    # Sequential MNIST size
    x = randn(Float32, 782, 1, 32)
    c = Conv((3,), 1=>2, relu)
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

function oned_test_gpu()
    # Sequential MNIST size
    x = CUDA.randn(Float32, 782, 1, 32)
    c = Conv((3,), 1=>2, relu) |> gpu
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

function twod_test()
    # MNIST size
    x = randn(Float32, 28, 28, 1, 32)
    c = Conv((3,3), 1=>2, relu)
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

function twod_test_gpu()
    # MNIST size
    x = CUDA.randn(Float32, 28, 28, 1, 32)
    c = Conv((3,3), 1=>2, relu) |> gpu
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

oned_test()
twod_test()

oned_test_gpu()
twod_test_gpu()

Each of these fails with the same error.

Jun 22 '21 15:06 carterjgreen

I'm assuming the forwards passes work as expected?

Jun 22 '21 15:06 DhairyaLGandhi

Yes, the forward pass works fine and on the GPU the CUDNN activation works as well.

Jun 22 '21 16:06 carterjgreen

Should be helped by https://github.com/FluxML/Zygote.jl/pull/1026

Jul 16 '21 10:07 DhairyaLGandhi

The new error message with Zygote v0.6.17 and NNlib v0.7.25 is

ERROR: LoadError: UndefVarError: xs not defined
Stacktrace:
  [1] (::Zygote.var"#442#443")(#unused#::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\lib\array.jl:79
  [2] (::Zygote.var"#2351#back#444"{Zygote.var"#442#443"})(Δ::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\ZygoteRules\OjfTt\src\adjoint.jl:59
  [3] Pullback
    @ .\broadcast.jl:894 [inlined]
  [4] Pullback
    @ .\broadcast.jl:891 [inlined]
  [5] Pullback
    @ .\broadcast.jl:887 [inlined]
  [6] Pullback
    @ C:\Users\Carter\.julia\packages\NNlib\YKZXm\src\conv_bias_act.jl:13 [inlined]
  [7] (::typeof(∂(#conv_bias_act!#228)))(Δ::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface2.jl:0
  [8] Pullback
    @ C:\Users\Carter\.julia\packages\NNlib\YKZXm\src\conv_bias_act.jl:12 [inlined]
  [9] (::typeof(∂(conv_bias_act!)))(Δ::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface2.jl:0
 [10] Pullback
    @ C:\Users\Carter\.julia\packages\NNlib\YKZXm\src\conv_bias_act.jl:24 [inlined]
 [11] (::typeof(∂(#conv_bias_act!#230)))(Δ::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface2.jl:0
 [12] Pullback
    @ C:\Users\Carter\.julia\packages\NNlib\YKZXm\src\conv_bias_act.jl:24 [inlined]
 [13] (::typeof(∂(conv_bias_act!)))(Δ::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface2.jl:0
 [14] Pullback
    @ C:\Users\Carter\.julia\packages\NNlib\YKZXm\src\conv_bias_act.jl:6 [inlined]
 [15] (::typeof(∂(#conv_bias_act#227)))(Δ::Array{Float32, 4})
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface2.jl:0
 [16] Pullback
    @ C:\Users\Carter\.julia\packages\NNlib\YKZXm\src\conv_bias_act.jl:5 [inlined]
 [17] (::typeof(∂(conv_bias_act)))(Δ::Array{Float32, 4})
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface2.jl:0
.....
 [21] (::typeof(∂(λ)))(Δ::Float32)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface2.jl:0
 [22] (::Zygote.var"#90#91"{Zygote.Params, typeof(∂(λ)), Zygote.Context})(Δ::Float32)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface.jl:348
 [23] gradient(f::Function, args::Zygote.Params)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\TaBlo\src\compiler\interface.jl:76
 [24] oned_test()

Jul 22 '21 13:07 carterjgreen

Ref https://github.com/FluxML/Zygote.jl/pull/1038

Could you try with master? I don't think we've added any adjoint for conv_bias_act specifically.

Jul 22 '21 15:07 DhairyaLGandhi

On master, it now hits the copyto! error message.

ERROR: LoadError: Mutating arrays is not supported -- called copyto!(::Tuple{Array{Float32, 5}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{5}, NTuple{5, Base.OneTo{Int64}}, typeof(identity), Tuple{Array{Float32, 5}}}}..., _...)
Stacktrace:
  [1] error(s::String)  
    @ Base .\error.jl:33
  [2] (::Zygote.var"#442#443"{Tuple{Array{Float32, 5}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{5}, NTuple{5, Base.OneTo{Int64}}, typeof(identity), Tuple{Array{Float32, 5}}}}})(#unused#::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\Zygote\3dpuu\src\lib\array.jl:79
  [3] (::Zygote.var"#2351#back#444"{Zygote.var"#442#443"{Tuple{Array{Float32, 5}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{5}, NTuple{5, Base.OneTo{Int64}}, typeof(identity), Tuple{Array{Float32, 5}}}}}})(Δ::Nothing)
    @ Zygote C:\Users\Carter\.julia\packages\ZygoteRules\OjfTt\src\adjoint.jl:59

Without a pullback I guess that this issue blocks https://github.com/FluxML/Flux.jl/pull/1302.

Edit: and on cuda

ERROR: LoadError: Compiling Tuple{typeof(CUDA.APIUtils.with_workspace), CUDA.CUDNN.var"#625#627"{CuArray{Float32, 4}, CUDA.CUDNN.cudnnActivationMode_t, cudnnConvolutionDescriptor, CUDA.CUDNN.cudnnFilterDescriptor, CUDA.CUDNN.cudnnTensorDescriptor, CUDA.CUDNN.cudnnTensorDescriptor, Base.RefValue{Float32}, Base.RefValue{Float32}, CuArray{Float32, 4}, CuArray{Float32, 4}, CUDA.CUDNN.cudnnConvolutionFwdAlgoPerfStruct}, Type{UInt8}, CUDA.APIUtils.var"#1#2"{UInt64}, Nothing}: try/catch is not supported.

Jul 23 '21 13:07 carterjgreen

NNlib.jl NNlib.jl copied to clipboard

conv_bias_act broken pullback

NNlib.jl
NNlib.jl copied to clipboard