ncnn Binary op pow with scalar compute error on sse2

commit b4ba207c18d3103d6df890c0e3a97b469b196b26 (tag: 20220729) Author: nihui [email protected] Date: Thu Jul 28 17:47:27 2022 +0800

more strict compiler rvv checks, drop rvv-071 support (#4094)

Tested on x86 cpu with g++ (GCC) 12.2.0 and g++ (GCC) 4.8.5 got same result.

test.param like below:

7767517
3 3
Input            x            0 1 x
BinaryOp         Pow_0        1 1 x yyy 0=6 1=1 2=2.000000e+00
Convolution      Conv_0       1 1 x 222 0=16 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=432

test.cc like below:

#include <net.h>
#include <datareader.h>

class DataReaderFromEmpty : public ncnn::DataReader {
public:
    virtual int scan(const char* format, void* p) const { return 0; }
    virtual size_t read(void* buf, size_t size) const {
        memset(buf, 0, size);
        float* p = (float*)buf;
        for (int i = 0; i < size / 4; i++) {
            p[i] = i;
        }
        return size;
    }
};

int main() {
    ncnn::Net net;
    net.opt.lightmode = false;
    net.load_param("../test.param");
    DataReaderFromEmpty dr;
    net.load_model(dr);

    float data[] = {0.196577f, 0.503749f, 1.292102f, 1.728930f,
                    -0.041557f, -1.013573f, -1.430609f, 0.529204f,
                    -1.100880f, -1.505001f, 0.196577f, 0.503749f,
                    1.292102f, 1.728930f,-0.041557f, -1.013573f,
                    -1.430609f, 0.529204f, -1.100880f, -1.505001f,
                    0.196577f, 0.503749f, 1.292102f, 1.728930f};
    ncnn::Mat x(24, 1, (void*)data);

    ncnn::Extractor ex = net.create_extractor();
    ex.input("x", x);
    ncnn::Mat y;
    ex.extract("yyy", y);

    const float* ptr = y.channel(0);
    for (int i = 0; i < y.w; i++) {
        printf("%f, ", ptr[i]);
    }
    printf("\n");
    return 0;
}

Get output as below:

0.038643, 0.253763, 1.669528, 2.989199, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.280057, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.038643, 0.253763, 1.669528, 2.989199, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.280057, 240614362739678911180052727245270155264.000000, 240614362739678911180052727245270155264.000000, 0.038643, 0.253763, 1.669528, 2.989199,

And when add settings as set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-sse2") in my project, I can get the right result.

Sep 14 '22 11:09 zzzzzsad

binaryop pow only handles positive base, this is a known limitation. try unaryop square or binaryop mul for x^2 purpose

Sep 15 '22 09:09 nihui

Thanks

Sep 19 '22 05:09 zzzzzsad

the latest pnnx convert pow(x, 2) to square(x) automatically. :D

Feb 14 '23 07:02 nihui