sdf icon indicating copy to clipboard operation
sdf copied to clipboard

How to use numba for hardware acceleration

Open KnIfER opened this issue 5 months ago • 0 comments

@sdf3
def custom_shape():
    @cuda.jit(nopython=True)
    def f(p):
        distances = np.zeros(len(p), dtype=np.float32)
        for i in range(len(p)):
            x, y, z = p[i][0], p[i][1], p[i][2]
            distances[i] = real_sdf_func(x, y, z)
        return distances
    return f

It complains :

Kernel launch configuration was not specified

But I can pass this simple performance test :

import numpy as np
import numba
from numba import cuda, float32
from numba import jit

import time

# Regular Python function
def sum_array_python(arr):
    result = 0
    for i in range(arr.size):
        result += arr[i]
    return result

# Numba-accelerated function
@jit(nopython=True)  # Numba decorator to enable just-in-time compilation
def sum_array_numba(arr):
    result = 0
    distances = []
    for i in range(arr.size):
        result += arr[i]
        distances.append(result)
    return result

# Generate a large amount of test data
array_size = 10_000_000  # 10 million data points
test_array = np.random.rand(array_size)

# Test the performance of the regular Python function
start_time = time.time()
python_result = sum_array_python(test_array)
python_time = time.time() - start_time

# Test the performance of the Numba function (first run includes compilation time)
start_time = time.time()
numba_result = sum_array_numba(test_array)
first_run_time = time.time() - start_time

# Test the performance of the Numba function (excluding compilation time)
start_time = time.time()
numba_result = sum_array_numba(test_array)
optimized_time = time.time() - start_time

# Verify the consistency of the results
assert np.isclose(python_result, numba_result), "Calculation results do not match!"

# Output the performance comparison
print(f"Regular Python function result: {python_result:.6f}, Time taken: {python_time:.6f} seconds")
print(f"Numba function first run result: {numba_result:.6f}, Time taken: {first_run_time:.6f} seconds")
print(f"Numba function optimized time: {optimized_time:.6f} seconds")
print(f"Numba speedup ratio (compared to pure Python): {python_time/optimized_time:.2f}x")

Regular Python function result: 4999941.395946, Time taken: 1.143203 seconds Numba function first run result: 4999941.395946, Time taken: 0.600171 seconds Numba function optimized time: 0.218762 seconds Numba speedup ratio (compared to pure Python): 5.23x

KnIfER avatar Jul 19 '25 14:07 KnIfER