devito nesting of time dimensions ("nested solves")

nesting of time dimensions ("nested solves")

Open FabioLuporini opened this issue 5 years ago • 7 comments

this code

from devito import *

grid = Grid(shape=(3, 3))
t = grid.stepping_dim
time = grid.time_dim
x, y = grid.dimensions

T = Dimension(name='T')
x1 = Dimension(name='x1')
x2 = Dimension(name='x2')

u0 = TimeFunction(name='u0', grid=grid)
u1 = TimeFunction(name='u1', shape=(2, 3, 3), dimensions=(T, x1, x2))

eqs = [Eq(u0.forward, u0 + u1[t, x, y] + 1),
       Eq(u1[(T + 1) % 2, x1, x2], u0[t + 1, x1, x2] + u1[T, x1, x2] + 2)]

op = Operator(eqs, dse='noop')
print(op)

produces code which is kinda OK on 2495e026281ae36c9cea15abc9ab77b79162b64c, but it's ugly. We should naturally support nesting of time dimensions

Sep 06 '18 18:09 FabioLuporini

@navjotk and I had a brief discussion about this and one possible solution is to implement Operator nesting.

op0 = Operator(...)
op1 = Operator([op0, eq...])

When op1 gets compiled, a function call to op0 will be generated

Oct 13 '18 22:10 FabioLuporini

Can we break this down further into TODOs? PS: I don't understand why I didn't get a notification for this tag.

Oct 25 '18 19:10 navjotk

This needs some thinking but I won't have time shortly , but if you start with a TODO draft I'm happy to follow up

Oct 25 '18 20:10 FabioLuporini

Just a follow up to this owing to the recent chat in #qestions.

So below is code for a lid driven cavity problem:

import numpy as np
from devito import Grid, Function, TimeFunction, Eq, Operator, solve
from devito import ConditionalDimension, Constant, first_derivative, second_derivative
import matplotlib.pyplot as plt
from matplotlib import cm

"""
Code for computing the final steady state of the lid driven cavity problem using devito.
Note that transient states are not computed accurately in this formulation.
"""

# physical parameters
rho = Constant(name='rho')
nu = Constant(name='nu')
ti = Constant(name='ti', dtype=np.int32)

rho.data = 1.
nu.data = 1./10.

# define spatial mesh
# Size of rectangular domain
Lx = 1
Ly = Lx

# Number of grid points in each direction, including boundary nodes
Nx = 51
Ny = Nx

# hence the mesh spacing
dx = Lx/(Nx-1)
dy = Ly/(Ny-1)

grid = Grid(shape=(Nx, Ny), extent=(Lx, Ly))
gridc = grid
time = grid.time_dim
t = grid.stepping_dim
x, y = grid.dimensions

# time stepping parameters
dt = 1e-3
t_end = 1.
ns = int(t_end/dt)

## Poisson pressure eq. tol
#ptol = 1e-5
#pnorm = 1.

# If we use a set number of pressure iterations.
nl = 5

u = TimeFunction(name='u', grid=grid, space_order=2, save=ns+1)
v = TimeFunction(name='v', grid=grid, space_order=2, save=ns+1)
udx = TimeFunction(name='udx', grid=grid, space_order=2, save=ns+1)
vdx = TimeFunction(name='vdx', grid=grid, space_order=2, save=ns+1)
udy = TimeFunction(name='udy', grid=grid, space_order=2, save=ns+1)
vdy = TimeFunction(name='vdy', grid=grid, space_order=2, save=ns+1)
pd = TimeFunction(name='pd', grid=grid, space_order=2)
p = TimeFunction(name='p', grid=grid, space_order=2, save=ns+1)

# Initialise fields
u.data[:] = 0.
v.data[:] = 0.
udx.data[:] = 0.
vdx.data[:] = 0.
udy.data[:] = 0.
vdy.data[:] = 0.
pd.data[:] = 0.
p.data[:] = 0.

# RHS of Poisson Pressure eq.
RHS = Function(name='RHS', grid=grid)
RHS.data[:] = 0.

# Main equations
eq_RHS = Eq(RHS, rho*(1./dt*(udx[ti,x,y]+vdy[ti,x,y])- \
                       (udx[ti,x,y]*udx[ti,x,y]+ \
                        vdy[ti,x,y]*vdy[ti,x,y]+2*udy[ti,x,y]*vdx[ti,x,y])))
eq_p = Eq(pd.laplace, RHS)
eq_u = Eq(u.dt + u*u.dx + v*u.dy - nu*u.laplace, -1./rho*p.dx)
eq_v = Eq(v.dt + u*v.dx + v*v.dy - nu*v.laplace, -1./rho*p.dy)

# SymPy stencils
stencil_p = solve(eq_p, pd)
stencil_u = solve(eq_u, u.forward)
stencil_v = solve(eq_v, v.forward)
update_p = Eq(pd.forward, stencil_p)
update_u = Eq(u.forward, stencil_u)
update_v = Eq(v.forward, stencil_v)

# Create Dirichlet BC expressions for velocity
bc_u = [Eq(u[time+1, x, Ny-1], 1.)]  # top
bc_u += [Eq(u[time+1, 0, y], 0.)]  # left
bc_u += [Eq(u[time+1, Nx-1, y], 0.)]  # right
bc_u += [Eq(u[time+1, x, 0], 0.)]  # bottom
bc_v = [Eq(v[time+1, 0, y], 0.)]  # left
bc_v += [Eq(v[time+1, Nx-1, y], 0.)]  # right
bc_v += [Eq(v[time+1, x, Ny-1], 0.)]  # top
bc_v += [Eq(v[time+1, x, 0], 0.)]  # bottom

## Neumann BCs for pressure
bc_p = [Eq(pd[t+1, 0, y], pd[t+1, 1, y])]  # left
bc_p += [Eq(pd[t+1, Nx-1, y], pd[t+1, Nx-2, y])]  # right
bc_p += [Eq(pd[t+1, x, Ny-1], pd[t+1, x, Ny-2])]  # top
bc_p += [Eq(pd[t+1, x, 0], pd[t+1, x, 1])]  # bottom

# Pressure only known up to a constant so set to zero at origin
bc_p += [Eq(pd[t+1, 0, 0], 0.)]  # bottom

# Create the operators
opRHS = Operator([eq_RHS])
opp = Operator([update_p] + bc_p)
opv = Operator([update_u, update_v, Eq(udx, u.dx), Eq(udy, u.dy), \
                Eq(vdx, v.dx), Eq(vdy, v.dy), \
                ]+ bc_u + bc_v)

# Run the simulation
ctime = 0
#nit = 0

# Loop in which the error in the pressure equation is computed (slow).
#for j in range(0,ns):
    #if j>0:
        #ti.data = j-1
        #nit = 0
        #pnorm = 1.
        #opRHS.apply()
        #pd.data[0] = p.data[j-1]
        #while pnorm>ptol:
            #pd.data[0] = pd.data[1]
            #opp.apply(time_m=0, time_M=0, RHS=RHS)
            #pnorm = np.sum(np.abs(pd.data[1] - pd.data[0]))/ \
                    #np.maximum(1.0e-10,np.sum(np.abs(pd.data[1])))
            #nit+=1
    #p.data[j] = pd.data[1]
    #opv.apply(time_m=j, time_M=j, dt=dt)
    #ctime+=dt
    #print(ctime,nit)

# Faster method.
for j in range(0,ns):
    if j>0:
        ti.data = j-1
        opRHS.apply()
        opp.apply(t_M=nl, RHS=RHS)
    p.data[j] = pd.data[0]
    opv.apply(time_m=j, time_M=j, dt=dt, p=p)
    ctime+=dt
    print(ctime)

# Check results
Marchi_Re10_u = np.array([[0.0625, -3.85425800e-2],
                          [0.125,  -6.96238561e-2],
                          [0.1875, -9.6983962e-2],
                          [0.25,  -1.22721979e-1],
                          [0.3125, -1.47636199e-1],
                          [0.375,  -1.71260757e-1],
                          [0.4375, -1.91677043e-1],
                          [0.5,    -2.05164738e-1],
                          [0.5625, -2.05770198e-1],
                          [0.625,  -1.84928116e-1],
                          [0.6875, -1.313892353e-1],
                          [0.75,   -3.1879308e-2],
                          [0.8125,  1.26912095e-1],
                          [0.875,   3.54430364e-1],
                          [0.9375,  6.50529292e-1]])

Marchi_Re10_v = np.array([[0.0625, 9.2970121e-2],
                          [0.125, 1.52547843e-1],
                          [0.1875, 1.78781456e-1],
                          [0.25, 1.76415100e-1],
                          [0.3125, 1.52055820e-1],
                          [0.375, 1.121477612e-1],
                          [0.4375, 6.21048147e-2],
                          [0.5, 6.3603620e-3],
                          [0.5625, -5.10417285e-2],
                          [0.625, -1.056157259e-1],
                          [0.6875, -1.51622101e-1],
                          [0.75, -1.81633561e-1],
                          [0.8125, -1.87021651e-1],
                          [0.875, -1.59898186e-1],
                          [0.9375, -9.6409942e-2]])

x_coord = np.linspace(0, Lx, grid.shape[0])
y_coord = np.linspace(0, Ly, grid.shape[1])

fig = plt.figure(figsize=(12, 6))
ax1 = fig.add_subplot(121)
ax1.plot(u.data[-1,int(grid.shape[0]/2),:],y_coord[:])
ax1.plot(Marchi_Re10_u[:,1],Marchi_Re10_u[:,0],'ro')
ax1.set_xlabel('$u$')
ax1.set_ylabel('$y$')
ax1 = fig.add_subplot(122)
ax1.plot(x_coord[:],v.data[-1,:,int(grid.shape[0]/2)])
ax1.plot(Marchi_Re10_v[:,0],Marchi_Re10_v[:,1],'ro')
ax1.set_xlabel('$x$')
ax1.set_ylabel('$v$')

plt.show()

The results produced are 'fine' but the method for obtaining them is not satisfactory. Ideally the commented out operator loop should be used but this is impractical owing to the amount of calls to operator required (pressure solves can take up to 50,000 iterations).

So this is just to illustrate the type of problem it would be nice to support natively via code generation at some point in the future.

Feb 22 '19 15:02 rhodrin

Note: For the above script to now work with master the following changes must be made:

eq_u = Eq(u.dt + u*u.dx + v*u.dy - nu*u.laplace, -1./rho*p.dx)
eq_v = Eq(v.dt + u*v.dx + v*v.dy - nu*v.laplace, -1./rho*p.dy)
opv = Operator([update_u, update_v, Eq(udx, u.dx), Eq(udy, u.dy), \
                Eq(vdx, v.dx), Eq(vdy, v.dy), \
                ]+ bc_u + bc_v)

------->>

eq_u = Eq(u.dt + u*u.dxc + v*u.dyc - nu*u.laplace, -1./rho*p.dxc)
eq_v = Eq(v.dt + u*v.dxc + v*v.dyc - nu*v.laplace, -1./rho*p.dyc)
opv = Operator([update_u, update_v, Eq(udx, u.dxc), Eq(udy, u.dyc), \
                Eq(vdx, v.dxc), Eq(vdy, v.dyc), \
                ]+ bc_u + bc_v)

Mar 11 '20 14:03 rhodrin

Just a follow up to this owing to the recent chat in #questions.

The results produced are 'fine' but the method for obtaining them is not satisfactory. Ideally the commented out operator loop should be used but this is impractical owing to the amount of calls to operator required (pressure solves can take up to 50,000 iterations).

So this is just to illustrate the type of problem it would be nice to support natively via code generation at some point in the future.

If #1139 gets resolved, the following code should allow the calculation for the error of the pressure equation in C instead of in Python. I could not even fully execute the "slow code" in my computer to compare with the updated version:

import numpy as np
from devito import Grid, Function, TimeFunction, Eq, Operator, solve
from devito import ConditionalDimension, Constant, first_derivative, second_derivative
import matplotlib.pyplot as plt
from matplotlib import cm

"""
Code for computing the final steady state of the lid driven cavity problem using devito.
Note that transient states are not computed accurately in this formulation.
"""

# physical parameters
rho = Constant(name='rho')
nu = Constant(name='nu')
ti = Constant(name='ti', dtype=np.int32)

rho.data = 1.
nu.data = 1./10.

# define spatial mesh
# Size of rectangular domain
Lx = 1
Ly = Lx

# Number of grid points in each direction, including boundary nodes
Nx = 51
Ny = Nx

# hence the mesh spacing
dx = Lx/(Nx-1)
dy = Ly/(Ny-1)

grid = Grid(shape=(Nx, Ny), extent=(Lx, Ly))
gridc = grid
time = grid.time_dim
t = grid.stepping_dim
x, y = grid.dimensions

# time stepping parameters
dt = 1e-3
t_end = 1.
ns = int(t_end/dt)

## Poisson pressure eq. tol
ptol = 1e-5

# If we use a set number of pressure iterations.
nl = 100000 # maximum number of iterations

u = TimeFunction(name='u', grid=grid, space_order=2, save=ns+1)
v = TimeFunction(name='v', grid=grid, space_order=2, save=ns+1)
udx = TimeFunction(name='udx', grid=grid, space_order=2, save=ns+1)
vdx = TimeFunction(name='vdx', grid=grid, space_order=2, save=ns+1)
udy = TimeFunction(name='udy', grid=grid, space_order=2, save=ns+1)
vdy = TimeFunction(name='vdy', grid=grid, space_order=2, save=ns+1)
pd = TimeFunction(name='pd', grid=grid, space_order=2)
p = TimeFunction(name='p', grid=grid, space_order=2, save=ns+1)

# Initialise fields
u.data[:] = 0.
v.data[:] = 0.
udx.data[:] = 0.
vdx.data[:] = 0.
udy.data[:] = 0.
vdy.data[:] = 0.
pd.data[:] = 0.
p.data[:] = 0.

# RHS of Poisson Pressure eq.
RHS = Function(name='RHS', grid=grid)
RHS.data[:] = 0.

# Main equations
eq_RHS = Eq(RHS, rho*(1./dt*(udx[ti,x,y]+vdy[ti,x,y])- \
                       (udx[ti,x,y]*udx[ti,x,y]+ \
                        vdy[ti,x,y]*vdy[ti,x,y]+2*udy[ti,x,y]*vdx[ti,x,y])))
eq_p = Eq(pd.laplace, RHS)
eq_u = Eq(u.dt + u*u.dxc + v*u.dyc - nu*u.laplace, -1./rho*p.dxc)
eq_v = Eq(v.dt + u*v.dxc + v*v.dyc - nu*v.laplace, -1./rho*p.dyc)

# SymPy stencils
stencil_p = solve(eq_p, pd)
stencil_u = solve(eq_u, u.forward)
stencil_v = solve(eq_v, v.forward)
update_p = Eq(pd.forward, stencil_p)
update_u = Eq(u.forward, stencil_u)
update_v = Eq(v.forward, stencil_v)

# Create Dirichlet BC expressions for velocity
bc_u = [Eq(u[time+1, x, Ny-1], 1.)]  # top
bc_u += [Eq(u[time+1, 0, y], 0.)]  # left
bc_u += [Eq(u[time+1, Nx-1, y], 0.)]  # right
bc_u += [Eq(u[time+1, x, 0], 0.)]  # bottom
bc_v = [Eq(v[time+1, 0, y], 0.)]  # left
bc_v += [Eq(v[time+1, Nx-1, y], 0.)]  # right
bc_v += [Eq(v[time+1, x, Ny-1], 0.)]  # top
bc_v += [Eq(v[time+1, x, 0], 0.)]  # bottom

## Neumann BCs for pressure
bc_p = [Eq(pd[t+1, 0, y], pd[t+1, 1, y])]  # left
bc_p += [Eq(pd[t+1, Nx-1, y], pd[t+1, Nx-2, y])]  # right
bc_p += [Eq(pd[t+1, x, Ny-1], pd[t+1, x, Ny-2])]  # top
bc_p += [Eq(pd[t+1, x, 0], pd[t+1, x, 1])]  # bottom

# Pressure only known up to a constant so set to zero at origin
bc_p += [Eq(pd[t+1, 0, 0], 0.)]  # bottom

#############################################
from devito import Break, DefaultDimension, Dimension, Inc
from sympy import Abs

xtemp = DefaultDimension(name = 'xt', default_value = pd.shape[1])
ytemp = DefaultDimension(name = 'yt', default_value = pd.shape[2])
xy_to_xtyt = [(x, xtemp), (y, ytemp)]

residual = Function(name = 'residual', shape = (1,), dimensions = (Dimension(name = 'residual_dim'),))
pnorm = Function(name = 'pnorm', shape = (1,), dimensions = (Dimension(name = 'pnorm_dim'),))
niter = Function(name = 'niter', shape = (1,), dimensions = (Dimension(name = 'niter_dim'),),
                 dtype = np.int32)

norm_p = Inc(pnorm[0], Abs(pd[t+1, x, y]).subs(xy_to_xtyt))
res_p = Inc(residual[0], Abs(pd[t+1, x, y] - pd[t, x, y]).subs(xy_to_xtyt))
brk_p = Break(residual[0] < ptol * pnorm[0], [Eq(pnorm[0], 1e-10), Eq(residual[0], 0)], implicit_dims = [time])
cnt_p = Eq(niter[0], time + 1)

opp = Operator([cnt_p, update_p] + bc_p + [res_p, norm_p, brk_p])
##############################################

# Create the operators
opRHS = Operator([eq_RHS])
opv = Operator([update_u, update_v, Eq(udx, u.dxc), Eq(udy, u.dyc), \
                Eq(vdx, v.dxc), Eq(vdy, v.dyc), \
                ]+ bc_u + bc_v)

# Run the simulation
ctime = 0
#nit = 0

for j in range(0,ns):
    if j>0:
        ti.data = j-1
        opRHS.apply()
        residual.data[0] = 0
        pnorm.data[0] = 1e-10
        opp.apply(t_M=nl, RHS=RHS)
        pd.data[0] = pd.data[niter.data[0] % 2]
    p.data[j] = pd.data[0]
    opv.apply(time_m=j, time_M=j, dt=dt)
    ctime+=dt
    print("Time: %f    Iterations: %d     Residual: %f" % (ctime,niter.data[0],
                                                           residual.data[0]))

# Check results
Marchi_Re10_u = np.array([[0.0625, -3.85425800e-2],
                          [0.125,  -6.96238561e-2],
                          [0.1875, -9.6983962e-2],
                          [0.25,  -1.22721979e-1],
                          [0.3125, -1.47636199e-1],
                          [0.375,  -1.71260757e-1],
                          [0.4375, -1.91677043e-1],
                          [0.5,    -2.05164738e-1],
                          [0.5625, -2.05770198e-1],
                          [0.625,  -1.84928116e-1],
                          [0.6875, -1.313892353e-1],
                          [0.75,   -3.1879308e-2],
                          [0.8125,  1.26912095e-1],
                          [0.875,   3.54430364e-1],
                          [0.9375,  6.50529292e-1]])

Marchi_Re10_v = np.array([[0.0625, 9.2970121e-2],
                          [0.125, 1.52547843e-1],
                          [0.1875, 1.78781456e-1],
                          [0.25, 1.76415100e-1],
                          [0.3125, 1.52055820e-1],
                          [0.375, 1.121477612e-1],
                          [0.4375, 6.21048147e-2],
                          [0.5, 6.3603620e-3],
                          [0.5625, -5.10417285e-2],
                          [0.625, -1.056157259e-1],
                          [0.6875, -1.51622101e-1],
                          [0.75, -1.81633561e-1],
                          [0.8125, -1.87021651e-1],
                          [0.875, -1.59898186e-1],
                          [0.9375, -9.6409942e-2]])

x_coord = np.linspace(0, Lx, grid.shape[0])
y_coord = np.linspace(0, Ly, grid.shape[1])

fig = plt.figure(figsize=(12, 6))
ax1 = fig.add_subplot(121)
ax1.plot(u.data[-1,int(grid.shape[0]/2),:],y_coord[:])
ax1.plot(Marchi_Re10_u[:,1],Marchi_Re10_u[:,0],'ro')
ax1.set_xlabel('$u$')
ax1.set_ylabel('$y$')
ax1 = fig.add_subplot(122)
ax1.plot(x_coord[:],v.data[-1,:,int(grid.shape[0]/2)])
ax1.plot(Marchi_Re10_v[:,0],Marchi_Re10_v[:,1],'ro')
ax1.set_xlabel('$x$')
ax1.set_ylabel('$v$')

plt.show()

Mar 13 '20 15:03 BritishPiper

Note: There's a also a native solution to the lid-driven cavity problem on branch break_devito

c-code produced is:

#define _POSIX_C_SOURCE 200809L
#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "xmmintrin.h"
#include "pmmintrin.h"
struct dataobj
{
  void *restrict data;
  int * size;
  int * npsize;
  int * dsize;
  int * hsize;
  int * hofs;
  int * oofs;
} ;
struct profiler
{
  double section0;
  double section1;
  double section2;
  double section3;
  double section4;
  double section5;
} ;
int Kernel(const float dt, const float h_x, const float h_y, const float nu, struct dataobj *restrict p_vec, struct dataobj *restrict pi_vec, const float rho, struct dataobj *restrict u_vec, struct dataobj *restrict v_vec, const int i_M, const int i_m, const int time_M, const int time_m, struct profiler * timers, const int x_M, const int x_m, const int y_M, const int y_m)
{
  float (*restrict p)[p_vec->size[1]][p_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[p_vec->size[1]][p_vec->size[2]]) p_vec->data;
  float (*restrict pi)[pi_vec->size[1]][pi_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[pi_vec->size[1]][pi_vec->size[2]]) pi_vec->data;
  float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;
  float (*restrict v)[v_vec->size[1]][v_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[v_vec->size[1]][v_vec->size[2]]) v_vec->data;
  /* Flush denormal numbers to zero in hardware */
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  float norm = 0;
  for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))
  {
    for (int i = i_m, i_space0 = (i)%(2), i_space1 = (i + 1)%(2); i <= i_M; i += 1, i_space0 = (i)%(2), i_space1 = (i + 1)%(2))
    {
      struct timeval start_section0, end_section0;
      gettimeofday(&start_section0, NULL);
      /* Begin section0 */
      for (int x = x_m; x <= x_M; x += 1)
      {
        #pragma omp simd aligned(pi,u,v:32)
        for (int y = y_m; y <= y_M; y += 1)
        {
          float r26 = 1.0/h_x;
          float r25 = 1.0/h_y;
          float r24 = 1.0/(h_y*h_y);
          float r23 = 1.0/(h_x*h_x);
          float r22 = -r25*v[t0][x + 2][y + 1] + r25*v[t0][x + 2][y + 3];
          float r21 = -r26*u[t0][x + 1][y + 2] + r26*u[t0][x + 3][y + 2];
          pi[i_space1][x + 2][y + 2] = (r23*(-(pi[i_space0][x + 1][y + 2] + pi[i_space0][x + 3][y + 2])) + r24*(-(pi[i_space0][x + 2][y + 1] + pi[i_space0][x + 2][y + 3])) + rho*(-2.5e-1F*r21*r21 - 2.5e-1F*r22*r22 + r25*(5.0e+2F*(-v[t0][x + 2][y + 1] + v[t0][x + 2][y + 3])) + r26*(-5.0e-1F*r25*(-u[t0][x + 2][y + 1] + u[t0][x + 2][y + 3])*(-v[t0][x + 1][y + 2] + v[t0][x + 3][y + 2]) + 5.0e+2F*(-u[t0][x + 1][y + 2] + u[t0][x + 3][y + 2]))))/(-2.0F*r23 - 2.0F*r24);
        }
      }
      for (int x = x_M; x >= x_m; x -= 1)
      {
        for (int y = y_m; y <= y_M; y += 1)
        {
          pi[i_space1][2][y + 2] = pi[i_space1][3][y + 2];
        }
      }
      for (int x = x_m; x <= x_M; x += 1)
      {
        for (int y = y_m; y <= y_M; y += 1)
        {
          pi[i_space1][52][y + 2] = pi[i_space1][51][y + 2];
        }
      }
      for (int x = x_m; x <= x_M; x += 1)
      {
        for (int y = y_m; y <= y_M; y += 1)
        {
          pi[i_space1][x + 2][52] = pi[i_space1][x + 2][51];
        }
        for (int y = y_M; y >= y_m; y -= 1)
        {
          pi[i_space1][x + 2][2] = pi[i_space1][x + 2][3];
        }
      }
      for (int x = x_m; x <= x_M; x += 1)
      {
        #pragma omp simd aligned(pi:32)
        for (int y = y_M; y >= y_m; y -= 1)
        {
          pi[i_space1][2][2] = 0.0F;
        }
      }
      for (int x = x_m; x <= x_M; x += 1)
      {
        for (int y = y_m; y <= y_M; y += 1)
        {
          norm += (1.0F/2601.0F)*fabs(pi[i_space0][x + 2][y + 2] - pi[i_space1][x + 2][y + 2]);
          p[t1][x + 2][y + 2] = pi[i_space1][x + 2][y + 2];
        }
      }
      /* End section0 */
      gettimeofday(&end_section0, NULL);
      timers->section0 += (double)(end_section0.tv_sec-start_section0.tv_sec)+(double)(end_section0.tv_usec-start_section0.tv_usec)/1000000;
      if (norm >= 1.0e-3F)
      {
        norm += -norm;
      }
      else
      {
        break;
      }
    }
    struct timeval start_section1, end_section1;
    gettimeofday(&start_section1, NULL);
    /* Begin section1 */
    for (int x = x_m; x <= x_M; x += 1)
    {
      #pragma omp simd aligned(p,u,v:32)
      for (int y = y_m; y <= y_M; y += 1)
      {
        float r34 = -2.0F*v[t0][x + 2][y + 2];
        float r33 = -2.0F*u[t0][x + 2][y + 2];
        float r32 = 1.0/rho;
        float r31 = 1.0/dt;
        float r30 = 1.0/h_y;
        float r29 = 1.0/h_x;
        float r28 = 1.0/(h_y*h_y);
        float r27 = 1.0/(h_x*h_x);
        u[t1][x + 2][y + 2] = dt*(r31*u[t0][x + 2][y + 2] + r32*(-5.0e-1F*r29*(-p[t1][x + 1][y + 2] + p[t1][x + 3][y + 2])) + nu*(r27*(r33 + u[t0][x + 1][y + 2] + u[t0][x + 3][y + 2]) + r28*(r33 + u[t0][x + 2][y + 1] + u[t0][x + 2][y + 3])) + 5.0e-1F*(-r29*(-u[t0][x + 1][y + 2] + u[t0][x + 3][y + 2])*u[t0][x + 2][y + 2] - r30*(-u[t0][x + 2][y + 1] + u[t0][x + 2][y + 3])*v[t0][x + 2][y + 2]));
        v[t1][x + 2][y + 2] = dt*(r31*v[t0][x + 2][y + 2] + r32*(-5.0e-1F*r30*(-p[t1][x + 2][y + 1] + p[t1][x + 2][y + 3])) + nu*(r27*(r34 + v[t0][x + 1][y + 2] + v[t0][x + 3][y + 2]) + r28*(r34 + v[t0][x + 2][y + 1] + v[t0][x + 2][y + 3])) + 5.0e-1F*(-r29*(-v[t0][x + 1][y + 2] + v[t0][x + 3][y + 2])*u[t0][x + 2][y + 2] - r30*(-v[t0][x + 2][y + 1] + v[t0][x + 2][y + 3])*v[t0][x + 2][y + 2]));
      }
      u[t1][x + 2][52] = 1.00000000000000F;
    }
    /* End section1 */
    gettimeofday(&end_section1, NULL);
    timers->section1 += (double)(end_section1.tv_sec-start_section1.tv_sec)+(double)(end_section1.tv_usec-start_section1.tv_usec)/1000000;
    struct timeval start_section2, end_section2;
    gettimeofday(&start_section2, NULL);
    /* Begin section2 */
    for (int y = y_m; y <= y_M; y += 1)
    {
      u[t1][2][y + 2] = 0.0F;
      u[t1][52][y + 2] = 0.0F;
    }
    /* End section2 */
    gettimeofday(&end_section2, NULL);
    timers->section2 += (double)(end_section2.tv_sec-start_section2.tv_sec)+(double)(end_section2.tv_usec-start_section2.tv_usec)/1000000;
    struct timeval start_section3, end_section3;
    gettimeofday(&start_section3, NULL);
    /* Begin section3 */
    for (int x = x_m; x <= x_M; x += 1)
    {
      u[t1][x + 2][2] = 0.0F;
    }
    /* End section3 */
    gettimeofday(&end_section3, NULL);
    timers->section3 += (double)(end_section3.tv_sec-start_section3.tv_sec)+(double)(end_section3.tv_usec-start_section3.tv_usec)/1000000;
    struct timeval start_section4, end_section4;
    gettimeofday(&start_section4, NULL);
    /* Begin section4 */
    for (int y = y_m; y <= y_M; y += 1)
    {
      v[t1][2][y + 2] = 0.0F;
      v[t1][52][y + 2] = 0.0F;
    }
    /* End section4 */
    gettimeofday(&end_section4, NULL);
    timers->section4 += (double)(end_section4.tv_sec-start_section4.tv_sec)+(double)(end_section4.tv_usec-start_section4.tv_usec)/1000000;
    struct timeval start_section5, end_section5;
    gettimeofday(&start_section5, NULL);
    /* Begin section5 */
    for (int x = x_m; x <= x_M; x += 1)
    {
      v[t1][x + 2][52] = 0.0F;
      v[t1][x + 2][2] = 0.0F;
    }
    /* End section5 */
    gettimeofday(&end_section5, NULL);
    timers->section5 += (double)(end_section5.tv_sec-start_section5.tv_sec)+(double)(end_section5.tv_usec-start_section5.tv_usec)/1000000;
  }
  return 0;
}

Jun 24 '20 11:06 rhodrin

devito devito copied to clipboard

nesting of time dimensions ("nested solves")

devito
devito copied to clipboard