tiramisu
tiramisu copied to clipboard
Bug with tag_unroll_level ?
In the following code : https://github.com/abdouskamel/tiramisu/blob/master/benchmarks/DNN/blocks/Resize-Conv/resize_conv_generator_tiramisu.cpp, I tag the loop level fin
of the computation resize
to be unrolled (line 79). But when I generate the code, it's the loop level fin
of the computation init_resized_input
that gets unrolled. Here is the generated code :
parallel (c1, 0, 32) {
for (c3, 0, 226) {
for (c5, 0, 226) {
unrolled (c7, 0, 3) { // Loop level that gets unrolled
input_resized_buf[(((c7 + int32((int64(c5)*(int64)3))) + int32((int64(c3)*(int64)678))) + int32((int64(c1)*(int64)153228)))] = 0.000000f
}
}
}
for (c3, 0, 224) {
for (c5, 0, 28) {
vectorized (c7, 0, 8) {
for (c9, 0, 3) { // Loop level that should be unrolled
let t27.s = int32(floor_f32(((float32(((c5*8) + c7))*2.678571f) + 0.839286f)))
let t26.s = int32(floor_f32(((float32(c3)*1.785714f) + 0.392857f)))
let t25.s = t27.s
let t24 = t26.s
let t23 = t25.s
let t22.s = t24
let t21 = t25.s
let t20 = t24
input_resized_buf[(((c9 + int32(((int64(((c5*8) + c7))*(int64)3) + (int64)3))) + int32(((int64(c3)*(int64)678) + (int64)678))) + int32((int64(c1)*(int64)153228)))] = ((((_c_input_b0[(((c9 + (t25.s*3)) + (t24*1800)) + (c1*720000))]*((floor_f32(((float32(c3)*1.785714f) + 0.392857f)) - (float32(c3)*1.785714f)) + 0.607143f)) + (_c_input_b0[((((c9 + (t25.s*3)) + (t24*1800)) + (c1*720000)) + 1800)]*(((float32(c3)*1.785714f) - floor_f32(((float32(c3)*1.785714f) + 0.392857f))) + 0.392857f)))*((floor_f32(((float32(((c5*8) + c7))*2.678571f) + 0.839286f)) - (float32(((c5*8) + c7))*2.678571f)) + 0.160714f)) + (((_c_input_b0[((((c9 + (t25.s*3)) + (t24*1800)) + (c1*720000)) + 3)]*((floor_f32(((float32(c3)*1.785714f) + 0.392857f)) - (float32(c3)*1.785714f)) + 0.607143f)) + (_c_input_b0[((((c9 + (t25.s*3)) + (t24*1800)) + (c1*720000)) + 1803)]*(((float32(c3)*1.785714f) - floor_f32(((float32(c3)*1.785714f) + 0.392857f))) + 0.392857f)))*(((float32(((c5*8) + c7))*2.678571f) - floor_f32(((float32(((c5*8) + c7))*2.678571f) + 0.839286f))) + 0.839286f)))
}
}
}
}
for (c3, 0, 4) {
for (c5, 0, 224) {
for (c7, 0, 224) {
for (c9, 0, 8) {
output_buf[((((c9 + int32((int64(c7)*(int64)8))) + int32((int64(c5)*(int64)1792))) + int32((int64(c3)*(int64)401408))) + int32((int64(c1)*(int64)1605632)))] = _conv_bias_b2[(c9 + (c3*8))]
}
}
for (c7, 0, 3) {
for (c9, 0, 3) {
for (c11, 0, 224) {
for (c13, 0, 3) {
vectorized (c15, 0, 8) {
output_buf[((((c15 + int32((int64(c11)*(int64)8))) + int32((int64(c5)*(int64)1792))) + int32((int64(c3)*(int64)401408))) + int32((int64(c1)*(int64)1605632)))] = (output_buf[((((c15 + int32((int64(c11)*(int64)8))) + int32((int64(c5)*(int64)1792))) + int32((int64(c3)*(int64)401408))) + int32((int64(c1)*(int64)1605632)))] + (input_resized_buf[(((c13 + int32((int64((c9 + c11))*(int64)3))) + int32((int64((c5 + c7))*(int64)678))) + int32((int64(c1)*(int64)153228)))]*_conv_filter_b1[((((c15 + (c13*8)) + (c9*24)) + (c7*72)) + (c3*216))]))
}
}
}
}
}
}
}
}
I don't know if it's a bug or if there's an error in my code.
This looks like a bug. Thanks, for reporting the bug!