taichi icon indicating copy to clipboard operation
taichi copied to clipboard

Why the code consumes much more time in the first iteration than those in other iterations?

Open qmdxcube opened this issue 1 year ago • 1 comments

import taichi as ti
import numpy as np
import math
import time

ti.init(arch = ti.cuda)

nely = 40
nelx = 80
n_node = (nelx+1) * (nely+1) # number of nodes
ndof = 2 * n_node # nodal degree-of-freedom

K = ti.field(ti.f64, shape=(ndof, ndof))
Ke = ti.field(ti.f64, shape=(8,8))

fixed_dofs = list(range(0, 2 * (nely + 1))) # fixed dof
all_dofs = list(range(0, 2 * (nelx + 1) * (nely + 1)))
free_dofs = np.array(list(set(all_dofs) - set(fixed_dofs))) # free dof
n_free_dof = len(free_dofs)

free_dofs_vec = ti.field(ti.i32, shape=n_free_dof)
K_freedof = ti.field(ti.f64, shape=(n_free_dof, n_free_dof))

E = 1. # Young modulus
nu = 0.3 # Possion's rate
volfrac = 0.5 # Volume fraction
penalty = 3 # Penalty

xe = ti.field(ti.f64, shape=(nely, nelx))

@ti.kernel
def initialize():
    # 1. initialize rho
    for I in ti.grouped(xe):
        xe[I] = 1

@ti.kernel
def assemble_k():
    for I in ti.grouped(K):
        K[I] = 0.

    for ely, elx in ti.ndrange(nely, nelx):
        n1 = (nely + 1) * elx + ely + 1
        n2 = (nely + 1) * (elx + 1) + ely + 1
        edof = ti.Vector([2*n1 -2, 2*n1 -1, 2*n2 -2, 2*n2 -1, 2*n2, 2*n2+1, 2*n1, 2*n1+1])
        for i, j in ti.static(ti.ndrange(8, 8)):
            K[edof[i], edof[j]] += xe[ely, elx] ** penalty * Ke[i, j]

    for i, j in ti.ndrange(n_free_dof,n_free_dof):
        K_freedof[i, j] = K[free_dofs_vec[i], free_dofs_vec[j]]

def get_ke():
    k = np.array(
        [1 / 2 - nu / 6, 1 / 8 + nu / 8, -1 / 4 - nu / 12, -1 / 8 + 3 * nu / 8, -1 / 4 + nu / 12, -1 / 8 - nu / 8,
         nu / 6, 1 / 8 - 3 * nu / 8])
    Ke_ = E / (1. - nu ** 2) * np.array([[k[0], k[1], k[2], k[3], k[4], k[5], k[6], k[7]],
                                         [k[1], k[0], k[7], k[6], k[5], k[4], k[3], k[2]],
                                         [k[2], k[7], k[0], k[5], k[6], k[3], k[4], k[1]],
                                         [k[3], k[6], k[5], k[0], k[7], k[2], k[1], k[4]],
                                         [k[4], k[5], k[6], k[7], k[0], k[1], k[2], k[3]],
                                         [k[5], k[4], k[3], k[2], k[1], k[0], k[7], k[6]],
                                         [k[6], k[3], k[4], k[1], k[2], k[7], k[0], k[5]],
                                         [k[7], k[2], k[1], k[4], k[3], k[6], k[5], k[0]]])

    Ke.from_numpy(Ke_)

if __name__ == '__main__':
    initialize()
    get_ke()
    iter = 0
    while iter < 3:
        iter += 1
        start_time = time.time()
        assemble_k()
        elapsed_time = time.time()-start_time
        print(elapsed_time)

qmdxcube avatar Aug 28 '23 10:08 qmdxcube

Because Taichi is a JIT compiler, which means it will compile the Taichi kernels during running the code. In your loop, kernel assemble_k is compiled during the first iteration, all the remaining iterations won't need to compile again. The initial compilation can take signicantly amount of time

TH3CHARLie avatar Aug 29 '23 18:08 TH3CHARLie