heterocl
heterocl copied to clipboard
Support of Constant Arrays
Currently, HeteroCL cannot infer constant arrays from the source code. For example, the weights (F) of the convolution layer are read-only and should be declared as constants in the generated C code.
def conv1():
A = hcl.placeholder((6, 6), "A")
F = hcl.placeholder((3, 3), "F")
def kernel(A, F):
r = hcl.reduce_axis(0, 3)
c = hcl.reduce_axis(0, 3)
return hcl.compute((4, 4),
lambda y, x: hcl.sum(A[y+r, x+c] * F[r, c], axis=[r, c]), "B")
s = hcl.create_schedule([A, F], kernel)
s[kernel.B].pipeline(kernel.B.axis[1])
target = hcl.platform.zc706
target.config(compile="vivado_hls",mode="csim",project="conv1")
f = hcl.build(s, target=target)
hcl_A = hcl.asarray(np.random.randint(0, 10, A.shape))
hcl_F = hcl.asarray(np.random.randint(0, 10, F.shape))
hcl_B = hcl.asarray(np.zeros((4, 4)))
f(hcl_A, hcl_F, hcl_B)
def conv2():
A = hcl.placeholder((6, 6), "A")
def kernel(A):
r = hcl.reduce_axis(0, 3)
c = hcl.reduce_axis(0, 3)
F = hcl.copy(np.random.randint(0,10,(3,3)),"F")
return hcl.compute((4, 4),
lambda y, x: hcl.sum(A[y+r, x+c] * F[r, c], axis=[r, c]), "B")
s = hcl.create_schedule([A], kernel)
s[kernel.B].pipeline(kernel.B.axis[1])
target = hcl.platform.zc706
target.config(compile="vivado_hls",mode="csim",project="conv2")
f = hcl.build(s, target=target)
hcl_A = hcl.asarray(np.random.randint(0, 10, A.shape))
hcl_B = hcl.asarray(np.zeros((4, 4)))
f(hcl_A, hcl_B)
However, the first method (conv1) puts F in the function argument, and conv2 that uses hcl.copy makes assignments for the array one by one.
// conv1
void test(bit32 A[6][6], bit32 F[3][3], bit32 B[4][4]) {
B_y: for (bit32 y = 0; y < 4; ++y) {
B_x: for (bit32 x = 0; x < 4; ++x) {
#pragma HLS pipeline
bit32 sum;
sum = 0;
B_ra2: for (bit32 ra2 = 0; ra2 < 3; ++ra2) {
B_ra3: for (bit32 ra3 = 0; ra3 < 3; ++ra3) {
sum = ((bit32)(((ap_int<65>)(((ap_int<64>)A[(y + ra2)][(x + ra3)]) * ((ap_int<64>)F[ra2][ra3]))) + ((ap_int<65>)sum)));
}
}
B[y][x] = sum;
}
}
}
// conv2
void test(bit32 A[6][6], bit32 B[4][4]) {
bit32 F[3][3];
F[0][0] = 1;
F[0][1] = 5;
F[0][2] = 9;
F[1][0] = 0;
F[1][1] = 5;
F[1][2] = 9;
F[2][0] = 8;
F[2][1] = 1;
F[2][2] = 1;
B_y: for (bit32 y = 0; y < 4; ++y) {
B_x: for (bit32 x = 0; x < 4; ++x) {
#pragma HLS pipeline
bit32 sum;
sum = 0;
B_ra0: for (bit32 ra0 = 0; ra0 < 3; ++ra0) {
B_ra1: for (bit32 ra1 = 0; ra1 < 3; ++ra1) {
sum = ((bit32)(((ap_int<65>)(((ap_int<64>)A[(y + ra0)][(x + ra1)]) * ((ap_int<64>)F[ra0][ra1]))) + ((ap_int<65>)sum)));
}
}
B[y][x] = sum;
}
}
}
@seanlatias we also need to test the const struct array.
@chhzh123 can you test it with the HLS compiler and see if F is synthesized as const or they just simply be inlined? You can check the resource report for that. If it can, please increase the array size and push them to the limit to see when the HLS tool will fail. Thanks.
NVM, I'll do that.