numba-dpex
numba-dpex copied to clipboard
`fastmath` option is not supported
Numba keyword argument fastmath
is not supported in @dppy.kernel
. It can cause performance degradation in some cases.
import dpctl
import numba_dppy
import numpy as np
import numpy.random as rnd
import dpctl.tensor as dpt
work_dtype = np.float64
SEED_TEST = 777777
SEED_TRAIN = 0
def __gen_data(ip_size, dtype, seed, dim):
rnd.seed(seed)
data = rnd.rand(ip_size, dim)
return data.astype(dtype)
@numba_dppy.func
def get_dist(arr1, arr2, dims):
result = 0
for i in range(dims):
result += (arr1[i] - arr2[i])*(arr1[i] - arr2[i])
return result
@numba_dppy.kernel(fastmath=True)
def dist_kernel(test_data,test_size,
train_data, train_size,
distance_m,
dim):
g_test_id = numba_dppy.get_global_id(0)
g_train_id = numba_dppy.get_global_id(1)
if (g_train_id < train_size and g_test_id < test_size):
test_v = test_data[g_test_id]
train_v = train_data[g_train_id]
dist = 0
for i in range(dim):
dist += (train_v[i] - test_v[i])*(train_v[i] - test_v[i])
dist_idx = g_train_id
distance_m[g_test_id * train_size + g_train_id] = dist
def run_kernel(
train_data,
test_data,
ntrain,
ntest,
dim,
):
dist_matrix = np.empty(ntest * ntrain, dtype=work_dtype)
gws = [ntest, ntrain]
lws = [2, 2]
with dpctl.device_context("opencl:gpu") as gpu_queue:
dist_kernel[gws, lws](test_data, ntest,
train_data, ntrain,
dist_matrix,
dim)
def main():
dim = 16
ntrain = 32
ntest = 8
test = __gen_data(ntest, work_dtype, SEED_TEST, dim)
train = __gen_data(ntrain, work_dtype, SEED_TRAIN, dim)
run_kernel(train, test, ntrain, ntest, dim)
if __name__ == "__main__":
main()
The code runs with the error:
TypeError: kernel() got an unexpected keyword argument 'fastmath'