tinygrad
tinygrad copied to clipboard
Some new tests
If you look at the output of these two tests, one can see that the constant folding does not work properly for zero, but it does for 1.0
DEBUG=4 GPU=1 python3 test/test_speed_v_torch.py TestSpeed.test_constant_zero_sub
__kernel void ew_S4096_4096( __global float * data0, __global float * data1, __global float * data2 ) {
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int idx0 = get_global_id(0);
int bufi0 = (0 + idx0);
int bufi1 = (0);
int bufi2 = (0 + idx0);
data0[bufi0] = (data1[bufi1]-data2[bufi2]);
}
vs DEBUG=4 GPU=1 python3 test/test_speed_v_torch.py TestSpeed.test_constant_sub
__kernel void ew_S4096_4096( __global float * data0, __global float * data2 ) {
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int idx0 = get_global_id(0);
int bufi0 = (0 + idx0);
int bufi1 = (0);
int bufi2 = (0 + idx0);
data0[bufi0] = ((1.0)-data2[bufi2]);
}
Can you merge into master instead of newgpu? newgpu isn't close to ready