Binary op pow with scalar compute error on sse2
commit b4ba207c18d3103d6df890c0e3a97b469b196b26 (tag: 20220729) Author: nihui [email protected] Date: Thu Jul 28 17:47:27 2022 +0800
more strict compiler rvv checks, drop rvv-071 support (#4094)
Tested on x86 cpu with g++ (GCC) 12.2.0 and g++ (GCC) 4.8.5 got same result.
test.param like below:
7767517
3 3
Input x 0 1 x
BinaryOp Pow_0 1 1 x yyy 0=6 1=1 2=2.000000e+00
Convolution Conv_0 1 1 x 222 0=16 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=432
test.cc like below:
#include <net.h>
#include <datareader.h>
class DataReaderFromEmpty : public ncnn::DataReader {
public:
virtual int scan(const char* format, void* p) const { return 0; }
virtual size_t read(void* buf, size_t size) const {
memset(buf, 0, size);
float* p = (float*)buf;
for (int i = 0; i < size / 4; i++) {
p[i] = i;
}
return size;
}
};
int main() {
ncnn::Net net;
net.opt.lightmode = false;
net.load_param("../test.param");
DataReaderFromEmpty dr;
net.load_model(dr);
float data[] = {0.196577f, 0.503749f, 1.292102f, 1.728930f,
-0.041557f, -1.013573f, -1.430609f, 0.529204f,
-1.100880f, -1.505001f, 0.196577f, 0.503749f,
1.292102f, 1.728930f,-0.041557f, -1.013573f,
-1.430609f, 0.529204f, -1.100880f, -1.505001f,
0.196577f, 0.503749f, 1.292102f, 1.728930f};
ncnn::Mat x(24, 1, (void*)data);
ncnn::Extractor ex = net.create_extractor();
ex.input("x", x);
ncnn::Mat y;
ex.extract("yyy", y);
const float* ptr = y.channel(0);
for (int i = 0; i < y.w; i++) {
printf("%f, ", ptr[i]);
}
printf("\n");
return 0;
}
Get output as below:
0.038643, 0.253763, 1.669528, 2.989199, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.280057, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.038643, 0.253763, 1.669528, 2.989199, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.280057, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.038643, 0.253763, 1.669528, 2.989199,
And when add settings as set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-sse2") in my project, I can get the right result.
binaryop pow only handles positive base, this is a known limitation. try unaryop square or binaryop mul for x^2 purpose
Thanks
the latest pnnx convert pow(x, 2) to square(x) automatically. :D