taichi
taichi copied to clipboard
Why the code consumes much more time in the first iteration than those in other iterations?
import taichi as ti
import numpy as np
import math
import time
ti.init(arch = ti.cuda)
nely = 40
nelx = 80
n_node = (nelx+1) * (nely+1) # number of nodes
ndof = 2 * n_node # nodal degree-of-freedom
K = ti.field(ti.f64, shape=(ndof, ndof))
Ke = ti.field(ti.f64, shape=(8,8))
fixed_dofs = list(range(0, 2 * (nely + 1))) # fixed dof
all_dofs = list(range(0, 2 * (nelx + 1) * (nely + 1)))
free_dofs = np.array(list(set(all_dofs) - set(fixed_dofs))) # free dof
n_free_dof = len(free_dofs)
free_dofs_vec = ti.field(ti.i32, shape=n_free_dof)
K_freedof = ti.field(ti.f64, shape=(n_free_dof, n_free_dof))
E = 1. # Young modulus
nu = 0.3 # Possion's rate
volfrac = 0.5 # Volume fraction
penalty = 3 # Penalty
xe = ti.field(ti.f64, shape=(nely, nelx))
@ti.kernel
def initialize():
# 1. initialize rho
for I in ti.grouped(xe):
xe[I] = 1
@ti.kernel
def assemble_k():
for I in ti.grouped(K):
K[I] = 0.
for ely, elx in ti.ndrange(nely, nelx):
n1 = (nely + 1) * elx + ely + 1
n2 = (nely + 1) * (elx + 1) + ely + 1
edof = ti.Vector([2*n1 -2, 2*n1 -1, 2*n2 -2, 2*n2 -1, 2*n2, 2*n2+1, 2*n1, 2*n1+1])
for i, j in ti.static(ti.ndrange(8, 8)):
K[edof[i], edof[j]] += xe[ely, elx] ** penalty * Ke[i, j]
for i, j in ti.ndrange(n_free_dof,n_free_dof):
K_freedof[i, j] = K[free_dofs_vec[i], free_dofs_vec[j]]
def get_ke():
k = np.array(
[1 / 2 - nu / 6, 1 / 8 + nu / 8, -1 / 4 - nu / 12, -1 / 8 + 3 * nu / 8, -1 / 4 + nu / 12, -1 / 8 - nu / 8,
nu / 6, 1 / 8 - 3 * nu / 8])
Ke_ = E / (1. - nu ** 2) * np.array([[k[0], k[1], k[2], k[3], k[4], k[5], k[6], k[7]],
[k[1], k[0], k[7], k[6], k[5], k[4], k[3], k[2]],
[k[2], k[7], k[0], k[5], k[6], k[3], k[4], k[1]],
[k[3], k[6], k[5], k[0], k[7], k[2], k[1], k[4]],
[k[4], k[5], k[6], k[7], k[0], k[1], k[2], k[3]],
[k[5], k[4], k[3], k[2], k[1], k[0], k[7], k[6]],
[k[6], k[3], k[4], k[1], k[2], k[7], k[0], k[5]],
[k[7], k[2], k[1], k[4], k[3], k[6], k[5], k[0]]])
Ke.from_numpy(Ke_)
if __name__ == '__main__':
initialize()
get_ke()
iter = 0
while iter < 3:
iter += 1
start_time = time.time()
assemble_k()
elapsed_time = time.time()-start_time
print(elapsed_time)
Because Taichi is a JIT compiler, which means it will compile the Taichi kernels during running the code. In your loop, kernel assemble_k
is compiled during the first iteration, all the remaining iterations won't need to compile again. The initial compilation can take signicantly amount of time