NeuralPDE.jl
NeuralPDE.jl copied to clipboard
Integrate NeuralOperator and NeuralPDE
Couple of strategy/scenarios
- train-to-train model
- train a sub-class of PDEs with NeuralOperator
- transfer learning pre-trained prediction from NeuralOperator problem to PINNs
- PINNs training with pre-trained model for the instance of the PDE
-
physics informed neural operator integrate both into one training scheme
-
use each individual prediction, solution, and data together to adaptive update the predict for some the core /kernel/ Green's function of class PDEs(major equations for some problem ) as a pre-trained state.
https://zongyi-li.github.io/neural-operator/Neural-Operator-CS159-0503-2022.pdf https://www2.compute.dtu.dk/~apek/SCIML2022/Surrogate_methods.pdf
Physics-Informed Neural Operators https://arxiv.org/abs/2111.03794 Graph Kernel Network https://arxiv.org/abs/2003.03485 Generative Adversarial Neural Operators https://arxiv.org/abs/2205.03017
TODO
-
provide support to the main Neural Operators https://github.com/SciML/NeuralOperators.jl/tree/main/src : symbolic part, inner types, strategies, training, transfer learning, GPU
-
I would like to experiment with this database and do something interesting as demonstration example https://developer.ibm.com/data/spot-challenge-wildfires/
https://github.com/SciML/NeuralOperators.jl/issues/83
the first script, training neural operator for a family of Burger equations. training is mapping between functional space of some initial conditions {u(t0 x)} and solution of equation u(t,x) here use only data
using MAT
using Plots
using DataDeps, MAT, MLUtils
using NeuralOperators, Flux
using CUDA, FluxTraining, BSON
import Flux: params
using BSON: @save, @load
# Burgers' equation dataset from
# [fourier_neural_operator](https://github.com/zongyi-li/fourier_neural_operator)
# """,
# "http://www.med.cgu.edu.tw/NeuralOperators/Burgers_R10.zip",
#### data
burgermat = matopen("path here"))
b = read(burgermat)
input = b["input"]
output = b["output"]
tspan = b["tspan"]
x = b["x"]
a = b["a"]
size(input)
size(output)
output_ = Array{Float32,4}(undef, 1, 101, 128, 1200)
for i in 1:1200
output_[1, :, :, i] = output[i, :, :]
end
input_ = Array{Float32,4}(undef, 3, 101, 128, 1200)
size(input_[1, :, :, :])
#init cond
for i in 1:101
input_[1, i, :, :] = input[:, :]'
end
#time
ts = reshape(repeat(LinRange(0, 1, 101), 128), 101, 128)
for i in 1:1200
input_[2, :, :, i] = ts
end
#cord
xs = reshape(repeat(LinRange(0, 1, 128), 101), 128, 101)'
for i in 1:1200
input_[3, :, :, i] = xs
end
###
size(input_)
size(output_)
input_data = input_[:, 1:100, :, 1:900]
output_data = output_[:, 1:100, :, 1:900]
size(input_data)
size(output_data)
# model = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(1, 128),
# σ=gelu)
m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),
σ=gelu)
res = model(input_data[:, 1:10, 1:128, 1:1])
ratio = 0.9
batchsize = 50
data_train = (input_data, output_data)
data = DataLoader(data_train |> gpu , batchsize=batchsize, shuffle=true)
cuda = true
η₀ = 1.0f-3
λ = 1.0f-4
epochs = 100
if cuda && CUDA.has_cuda()
device = gpu
CUDA.allowscalar(false)
@info "Training on GPU"
else
device = cpu
@info "Training on CPU"
end
optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
lossfunc(x,y) = l₂loss(m(x),y)
m = m |> gpu
xtrain = input_data[:, :, :, 1:1] |> gpu
ytrain = output_data[:, :, :, 1:1] |> gpu
evalcb() = @show(lossfunc(xtrain, ytrain))
m(xtrain)
lossfunc(xtrain, ytrain)
evalcb()
CUDA.memory_status()
CUDA.reclaim()
epochs = 100
Flux.@epochs epochs Flux.train!(lossfunc, params(m), data, optimiser, cb=evalcb)
# for epoch in 1: epochs
# d =generate_data()
# time_evolution_loop()
# Flux.train!(lossfunc, params(m), d, optimiser, cb=evalcb)
# end
model = m |> cpu
j = 100
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
end
gif(anim, fps=15)
the second script added a loss function for the differential operator (PINNs ) data + pde
# ## pde + data
# dim
# 1) dim
# 2) time
# 3) space
# 4) a - initial condition
_epsilon = one(Float32) / cbrt(eps(Float32))
_epsilonpow = (_epsilon/2)^2
#time
# input_data = input_data |> gpu
input_data = input_data |> cpu
input_dtl = copy(input_data)
input_dtr = copy(input_data)
dt = cbrt(eps(Float32))
input_dtr[2, :, :, :] .= input_data[2, :, :, :] .+ dt
input_dtl[2, :, :, :] .= input_data[2, :, :, :] .- dt
#cord
input_dxl = copy(input_data)
input_dxr = copy(input_data)
dx = cbrt(eps(Float32))
input_dxr[3, :, :, :] = input_data[3, :, :, :] .+ dx
input_dxl[3, :, :, :] = input_data[3, :, :, :] .- dx
###
m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),
σ=gelu)
del = (m(input_dtr[:, :, :, 1:10]) .- m(input_dtl[:, :, :, 1:10])) .* _epsilon
function derivative_dt(input_dtr,input_dtl)
(m(input_dtr) .- m(input_dtl)) .* _epsilon
end
function derivative_dx(input_dxr,input_dxl)
(m(input_dxr) .- m(input_dxl)) .* _epsilon
end
function derivative_dx2(input_data,input_dxr,input_dxl)
(m(input_dxr) .+ m(input_dxl) .- 2 .* m(input_data)) .* _epsilonpow
end
@time derivative_dx2(input_data[:, :, :, 1:5],input_dxr[:, :, :, 1:5], input_dxl[:, :, :, 1:5]);
@time derivative_dx(input_dxr[:, :, :, 1:5], input_dxl[:, :, :, 1:5]);
@time derivative_dt(input_dtr[:, :, :, 1:5], input_dtl[:, :, :, 1:5]);
@time m(input_data[:, :, :, 1:5]) ;
size_ = 100
input_data_ = input_data[:, :, :, 1:size_];
output_data_ = output_data[:, :, :, 1:size_];
input_dxr_ = input_dxr[:, :, :, 1:size_];
input_dxl_ = input_dxl[:, :, :, 1:size_];
input_dtr_ = input_dtr[:, :, :, 1:size_];
input_dtl_ = input_dtl[:, :, :, 1:size_];
m = m |> gpu
CUDA.@time derivative_dx2(input_data[:,:,:,1:1] |> gpu, input_dxl[:,:,:,1:1] |> gpu, input_dxr[:,:,:,1:1] |> gpu);
CUDA.@time derivative_dx( input_dxr[:,:,:,1:1] |> gpu, input_dxl[:,:,:,1:1] |> gpu);
CUDA.@time derivative_dt( input_dtr[:,:,:,1:1] |> gpu, input_dtl[:,:,:,1:1] |> gpu);
CUDA.@time m(input_data[:,:,:,1:1] |> gpu) ;
batchsize = 10
data_train = (input_data_,input_dxr_,input_dxl_,input_dtr_,input_dtl_, output_data_)
data = DataLoader(data_train |> gpu , batchsize=batchsize, shuffle=true)
cuda = true
η₀ = 1.0f-3
λ = 1.0f-4
optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
xtrain = input_data[:, :, :, 1:1] |> gpu
ytrain = output_data[:, :, :, 1:1] |> gpu
dxltrain = input_dxl[:, :, :, 1:1] |> gpu
dxrtrain = input_dxr[:, :, :, 1:1] |> gpu
dtrtrain = input_dtr[:, :, :, 1:1] |> gpu
dtltrain = input_dtl[:,:,:,1:1] |> gpu
function l₂loss2(𝐲)
feature_dims = 2:(ndims(𝐲)-1)
loss = sum(.√(sum(abs2, 𝐲, dims=feature_dims)))
# y_norm = sum(.√(sum(abs2, 𝐲, dims=feature_dims)))
return loss #/ y_norm
end
l₂loss2(m(xtrain))
l₂loss(m(xtrain), ytrain)
#burger
# eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - ν * Dxx(u(t, x)) ~ 0
ν = 0.01
derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)
pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
data_loss(x, y) = l₂loss(m(x) , y)
lossfull(x, dxr, dxl, dtr, dtl, y) = data_loss(x, y) + pde_loss(x,dxr,dxl,dtr,dtl)
evalcb() = @show(lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain))
l₂loss2(derivative_operator(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain))
pde_loss(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain)
data_loss(xtrain, ytrain)
lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain)
evalcb()
# CUDA.devices()
CUDA.reclaim()
CUDA.memory_status()
epochs = 80
Flux.@epochs epochs Flux.train!(lossfull, params(m), data, optimiser, cb=evalcb)
model = m |> cpu
j = 1
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
end
gif(anim, fps=15)
fine-tuning optimization with PINNs
here it's used already trained neural operator over the entire given some functional space of initial conditions how pre-trained state for PINNs solver in order to achieve higher accuracy for a single instance of burger equation
# fine tuning with PINNS
m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),σ=gelu)
j =1 # index of one of initial condtions from dataset
input_data_ = input_data[:, :, :, j:j] |> gpu
output_data_ = output_data[:, :, :, j:j] |> gpu
input_dxr_ = input_dxr[:, :, :, j:j] |> gpu
input_dxl_ = input_dxl[:, :, :, j:j] |> gpu
input_dtr_ = input_dtr[:, :, :, j:j] |> gpu
input_dtl_ = input_dtl[:, :, :, j:j] |> gpu
batchsize = 1
data_train = (input_data_,input_dxr_,input_dxl_,input_dtr_,input_dtl_, output_data_)
data = DataLoader(data_train |> gpu , batchsize=batchsize, shuffle=true)
ν = 0.01
derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)
pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
data_loss(x, y) = l₂loss(m(x) , y)
lossfull(x, dxr, dxl, dtr, dtl, y) = data_loss(x, y) + pde_loss(x,dxr,dxl,dtr,dtl)
#model saved here https://github.com/KirillZubov/pino_model/tree/main/burger/model
mtrained = BSON.load("/home/kirillzubov/model_burger_pino_full.bson")[:m]
j = 100
res = mtrained(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
end
gif(anim, "burger_neural_operator.gif", fps=15)
mtrained = mtrained |> gpu
m = m |> gpu
op_loss(x) = l₂loss(mtrained(x) , m(x))
derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)
pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
lossfull(x, dxr, dxl, dtr, dtl, y) = pde_loss(x,dxr,dxl,dtr,dtl) + op_loss(x) #data_loss(x, y)
evalcb() = @show(lossfull(input_data_, input_dxr_, input_dxl_, input_dtr_, input_dtl_, output_data))
mtrained(xtrain)
m(xtrain)
op_loss(xtrain)
pde_loss(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain)
lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain)
evalcb()
CUDA.reclaim()
CUDA.memory_status()
epochs = 1000
η₀ = 1.0f-4
λ = 1.0f-5
optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
Flux.@epochs epochs Flux.train!(lossfull, params(m), data, optimiser, cb=evalcb)
model = m |> cpu
mtrainedc = mtrained |> cpu
j = 1
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
res2 = mtrainedc(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
plot!(res2[i,:])
end
gif(anim, fps=15)