remove `@adjoint` for `hv/h/v/cat`
Closes #1217
Seems fine locally, let's see what integration tests say.
GPU failure on https://github.com/FluxML/Zygote.jl/blob/528e0be677d1feb9ccf6fc4ab298f4d8a106de10/test/cuda.jl#L134looks real, but is masked on CI by an odd show-related error. Running standalone:
julia> gradient((x,y) -> sum(vcat(x,y)), r, 5)
ERROR: MethodError: no method matching ndims(::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{0}, Nothing, typeof(identity), Tuple{CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}}})
Closest candidates are:
ndims(::CUDA.CUSPARSE.CuSparseMatrix) at ~/.julia/packages/CUDA/DfvRa/lib/cusparse/array.jl:225
ndims(::CUDA.CUSPARSE.CuSparseDeviceMatrixBSR) at ~/.julia/packages/CUDA/DfvRa/lib/cusparse/device.jl:64
ndims(::Base.Iterators.ProductIterator) at iterators.jl:1015
...
Stacktrace:
[1] check_reducedims(R::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}, A::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{0}, Nothing, typeof(identity), Tuple{CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}}})
@ Base ./reducedim.jl:244
[2] mapreducedim!(f::typeof(identity), op::typeof(Base.add_sum), R::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}, A::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{0}, Nothing, typeof(identity), Tuple{CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}}}; init::Float32)
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/mapreduce.jl:172
[3] _mapreduce(f::typeof(identity), op::typeof(Base.add_sum), As::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}; dims::Colon, init::Nothing)
@ GPUArrays ~/.julia/packages/GPUArrays/Hyss4/src/host/mapreduce.jl:69
[4] mapreduce(::Function, ::Function, ::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}; dims::Function, init::Nothing)
@ GPUArrays ~/.julia/packages/GPUArrays/Hyss4/src/host/mapreduce.jl:31
[5] mapreduce(::Function, ::Function, ::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer})
@ GPUArrays ~/.julia/packages/GPUArrays/Hyss4/src/host/mapreduce.jl:31
[6] _sum(f::Function, a::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}, ::Colon; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reducedim.jl:999
[7] _sum(f::Function, a::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}, ::Colon)
@ Base ./reducedim.jl:999
[8] _sum(a::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}, ::Colon; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reducedim.jl:998
[9] _sum(a::CuArray{Float32, 0, CUDA.Mem.DeviceBuffer}, ::Colon)
@ Base ./reducedim.jl:998
[10] #sum#772
@ ./reducedim.jl:994 [inlined]
[11] sum
@ ./reducedim.jl:994 [inlined]
[12] (::ChainRules.var"#1342#1345"{Base.RefValue{Int64}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Val{1}})(project::ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}, sizeX::Tuple{})
@ ChainRules ~/.julia/packages/ChainRules/EyLkg/src/rulesets/Base/array.jl:310
[13] map
@ ./tuple.jl:247 [inlined]
[14] (::ChainRules.var"#vcat_pullback#1344"{Tuple{ChainRulesCore.ProjectTo{AbstractArray, NamedTuple{(:element, :axes), Tuple{ChainRulesCore.ProjectTo{Float32, NamedTuple{(), Tuple{}}}, Tuple{Base.OneTo{Int64}}}}}, ChainRulesCore.ProjectTo{Float64, NamedTuple{(), Tuple{}}}}, Tuple{Tuple{Int64}, Tuple{}}, Val{1}})(ȳ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ ChainRules ~/.julia/packages/ChainRules/EyLkg/src/rulesets/Base/array.jl:295
[15] ZBack
@ ~/.julia/dev/Zygote/src/compiler/chainrules.jl:205 [inlined]
[16] Pullback
@ ./REPL[6]:1 [inlined]
[17] (::typeof(∂(#3)))(Δ::Float32)
@ Zygote ~/.julia/dev/Zygote/src/compiler/interface2.jl:0
[18] (::Zygote.var"#60#61"{typeof(∂(#3))})(Δ::Float32)
@ Zygote ~/.julia/dev/Zygote/src/compiler/interface.jl:45
[19] gradient(::Function, ::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, ::Vararg{Any})
@ Zygote ~/.julia/dev/Zygote/src/compiler/interface.jl:97
[20] top-level scope
@ REPL[6]:1
[21] top-level scope
@ ~/.julia/packages/CUDA/DfvRa/src/initialization.jl:52
E: looks like this is https://github.com/JuliaGPU/GPUArrays.jl/issues/362 as mentioned in https://github.com/FluxML/Zygote.jl/issues/1061#issuecomment-1037635266.
Never got back to https://github.com/JuliaGPU/GPUArrays.jl/issues/362 . But the rule could now use @allowscalar via GPUArraysCore instead.
DynamicPPL seems to be the same failures as just running current HEAD on julia 1.8, so this is probably good to go? https://github.com/TuringLang/DynamicPPL.jl/issues/423
DynamicPPL errors on the current HEAD with Julia 1.8 are fixed by updates of Tracker and Bijectors.
Another good chunk of adjoints gone!