I try to train a simple Dense Neural Network in Julia using MLJFlux. The code runs on the CPU, but when I want to run it on the GPU, it crashes with a MethodError in the fit!(machine) command, saying that the method
parent(::Type{SubArray{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}, 0, Vector{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}}, Tuple{Int64}, true}})
is not known. I have posted a MWE and the full stacktrace below. Can someone please point me in the right direction what is wrong with this code?
using Flux
using CUDA
using MLJ
using MLUtils
using cuDNN
x = Float32.(rand(1000, 9))
y = Float32.(rand(1000))
NeuralNetworkRegressor = MLJ.@load NeuralNetworkRegressor pkg=MLJFlux verbosity=0
builder = MLJFlux.@builder begin
Chain(
Dense(9, 32, relu ),
Dense(32, 64, relu),
Dense(64, 32, relu),
Dense(32, 1),
)
end
model = NeuralNetworkRegressor(
builder=builder,
acceleration = CUDALibs(), #if I comment this line, the code works without error
batch_size = 512,
epochs=10)
mach = machine(model, x, y)
fit!(mach)
This code runs on the CPU, but on the GPU the last line with fit!() crashes with
┌ Error: Problem fitting the machine machine(NeuralNetworkRegressor(builder = GenericBuilder(apply = #3), …), …).
└ @ MLJBase ~/.julia/packages/MLJBase/fEiP2/src/machines.jl:682
[ Info: Running type checks...
[ Info: Type checks okay.
ERROR: LoadError: MethodError: no method matching parent(::Type{SubArray{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}, 0, Vector{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}}, Tuple{Int64}, true}})
Closest candidates are:
parent(::Union{LinearAlgebra.Adjoint{T, S}, LinearAlgebra.Transpose{T, S}} where {T, S})
@ LinearAlgebra ~/software/julia-1.9.2/share/julia/stdlib/v1.9/LinearAlgebra/src/adjtrans.jl:341
parent(::Union{LinearAlgebra.LowerTriangular{T, S} where S<:AbstractMatrix{T}, LinearAlgebra.UnitLowerTriangular{T, S} where S<:AbstractMatrix{T}, LinearAlgebra.UnitUpperTriangular{T, S} where S<:AbstractMatrix{T}, LinearAlgebra.UpperTriangular{T, S} where S<:AbstractMatrix{T}} where T)
@ LinearAlgebra ~/software/julia-1.9.2/share/julia/stdlib/v1.9/LinearAlgebra/src/triangular.jl:164
parent(::Union{LinearAlgebra.Hermitian{T, S}, LinearAlgebra.Symmetric{T, S}} where {T, S})
@ LinearAlgebra ~/software/julia-1.9.2/share/julia/stdlib/v1.9/LinearAlgebra/src/symmetric.jl:275
...
Stacktrace:
[1] backend(#unused#::Type{SubArray{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}, 0, Vector{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}}, Tuple{Int64}, true}})
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/uOYfN/src/GPUArraysCore.jl:151
[2] backend(x::SubArray{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}, 0, Vector{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}}, Tuple{Int64}, true})
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/uOYfN/src/GPUArraysCore.jl:149
[3] _copyto!
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:65 [inlined]
[4] materialize!
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:41 [inlined]
[5] materialize!
@ ./broadcast.jl:881 [inlined]
[6] ∇getindex!(dx::Vector{Union{ChainRulesCore.ZeroTangent, CuMatrix{Float32, CUDA.Mem.DeviceBuffer}, DenseCuMatrix{Float32, CUDA.Mem.DeviceBuffer}}}, dy::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, inds::Int64)
@ ChainRules ~/.julia/packages/ChainRules/DSuXy/src/rulesets/Base/indexing.jl:147
[7] ∇getindex(x::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, dy::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, inds::Int64)
@ ChainRules ~/.julia/packages/ChainRules/DSuXy/src/rulesets/Base/indexing.jl:89
[8] (::ChainRules.var"#1583#1585"{Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Tuple{Int64}})()
@ ChainRules ~/.julia/packages/ChainRules/DSuXy/src/rulesets/Base/indexing.jl:69
[9] unthunk
@ ~/.julia/packages/ChainRulesCore/7MWx2/src/tangent_types/thunks.jl:204 [inlined]
[10] unthunk(x::ChainRulesCore.InplaceableThunk{ChainRulesCore.Thunk{ChainRules.var"#1583#1585"{Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}}, ChainRules.var"#1582#1584"{Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Tuple{Int64}}})
@ ChainRulesCore ~/.julia/packages/ChainRulesCore/7MWx2/src/tangent_types/thunks.jl:237
[11] wrap_chainrules_output
@ ~/.julia/packages/Zygote/YYT6v/src/compiler/chainrules.jl:110 [inlined]
[12] map
@ ./tuple.jl:275 [inlined]
[13] wrap_chainrules_output
@ ~/.julia/packages/Zygote/YYT6v/src/compiler/chainrules.jl:111 [inlined]
[14] (::Zygote.ZBack{ChainRules.var"#getindex_pullback#1581"{Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Tuple{Int64}, Tuple{ChainRulesCore.NoTangent}}})(dy::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/YYT6v/src/compiler/chainrules.jl:211
[15] Pullback
@ ~/.julia/packages/MLJFlux/V1aWg/src/core.jl:41 [inlined]
[16] (::Zygote.Pullback{Tuple{MLJFlux.var"#23#24"{MLJFlux.Penalty{MLJFlux.Penalizer{Nothing}}, Chain{Tuple{Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Int64, typeof(Flux.Losses.mse), Int64, Zygote.Params{Zygote.Buffer{Any, Vector{Any}}}}}, Any})(Δ::Float32)
@ Zygote ~/.julia/packages/Zygote/YYT6v/src/compiler/interface2.jl:0
[17] (::Zygote.var"#122#123"{Zygote.Params{Zygote.Buffer{Any, Vector{Any}}}, Zygote.Pullback{Tuple{MLJFlux.var"#23#24"{MLJFlux.Penalty{MLJFlux.Penalizer{Nothing}}, Chain{Tuple{Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Int64, typeof(Flux.Losses.mse), Int64, Zygote.Params{Zygote.Buffer{Any, Vector{Any}}}}}, Any}, Zygote.Context{true}})(Δ::Float32)
@ Zygote ~/.julia/packages/Zygote/YYT6v/src/compiler/interface.jl:419
[18] gradient
@ ~/.julia/packages/Zygote/YYT6v/src/compiler/interface.jl:97 [inlined]
[19] train!(model::MLJFlux.NeuralNetworkRegressor{MLJFlux.GenericBuilder{var"#3#4"}, Adam, typeof(Flux.Losses.mse)}, penalty::MLJFlux.Penalty{MLJFlux.Penalizer{Nothing}}, chain::Chain{Tuple{Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, optimiser::Adam, X::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, y::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
@ MLJFlux ~/.julia/packages/MLJFlux/V1aWg/src/core.jl:39
[20] fit!(model::MLJFlux.NeuralNetworkRegressor{MLJFlux.GenericBuilder{var"#3#4"}, Adam, typeof(Flux.Losses.mse)}, penalty::MLJFlux.Penalty{MLJFlux.Penalizer{Nothing}}, chain::Chain{Tuple{Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, optimiser::Adam, epochs::Int64, verbosity::Int64, X::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, y::Vector{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
@ MLJFlux ~/.julia/packages/MLJFlux/V1aWg/src/core.jl:107
[21] fit(model::MLJFlux.NeuralNetworkRegressor{MLJFlux.GenericBuilder{var"#3#4"}, Adam, typeof(Flux.Losses.mse)}, verbosity::Int64, X::Matrix{Float32}, y::Vector{Float32})
@ MLJFlux ~/.julia/packages/MLJFlux/V1aWg/src/mlj_model_interface.jl:79
[22] fit_only!(mach::Machine{MLJFlux.NeuralNetworkRegressor{MLJFlux.GenericBuilder{var"#3#4"}, Adam, typeof(Flux.Losses.mse)}, true}; rows::Nothing, verbosity::Int64, force::Bool, composite::Nothing)
@ MLJBase ~/.julia/packages/MLJBase/fEiP2/src/machines.jl:680
[23] fit_only!
@ ~/.julia/packages/MLJBase/fEiP2/src/machines.jl:606 [inlined]
[24] #fit!#63
@ ~/.julia/packages/MLJBase/fEiP2/src/machines.jl:777 [inlined]
[25] fit!(mach::Machine{MLJFlux.NeuralNetworkRegressor{MLJFlux.GenericBuilder{var"#3#4"}, Adam, typeof(Flux.Losses.mse)}, true})
@ MLJBase ~/.julia/packages/MLJBase/fEiP2/src/machines.jl:774
[26] top-level scope
@ /path/to/file/file.jl:29
in expression starting at /path/to/file/file.jl:29
Updating Julia packages has resolved the issue. The code is working under Julia 1.9.2, with Flux 0.14.8, CUDA 5.1.1, MLJFlux 0.4.0 and MLJ 0.20.2