OpenBLAS icon indicating copy to clipboard operation
OpenBLAS copied to clipboard

cblas_sgemm has wrong result on apple M4

Open kon0740 opened this issue 2 months ago • 1 comments

0.3.30 version It seems that cblas_sgemm() always output the zero result.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include <math.h>

void generate_random_matrix(float *matrix, int rows, int cols) {
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            matrix[i * cols + j] = (float)rand() / RAND_MAX * 2.0f - 1.0f; // [-1, 1]
        }
    }
}

void print_matrix(const char *name, float *matrix, int rows, int cols) {
    printf("%s (%dx%d):\n", name, rows, cols);
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            printf("%8.4f ", matrix[i * cols + j]);
        }
        printf("\n");
    }
    printf("\n");
}

int main() {
    const int M = 3;  // out_channels - 
    const int N = 4;  // tiles - 
    const int K = 2;  // channels - 
    
    printf("Matrix dimensions: M=%d, N=%d, K=%d\n", M, N, K);
    printf("Computing: C(MxN) = A(MxK) * B(KxN)\n\n");
    
    float *A = (float*)malloc(M * K * sizeof(float));
    float *B = (float*)malloc(K * N * sizeof(float));
    float *C_cblas = (float*)malloc(M * N * sizeof(float));
    float *C_ref = (float*)malloc(M * N * sizeof(float));
    
    srand(42); // 
    generate_random_matrix(A, M, K);
    generate_random_matrix(B, K, N);
    
    memset(C_cblas, 0, M * N * sizeof(float));
    
    print_matrix("Matrix A", A, M, K);
    print_matrix("Matrix B", B, K, N);
    

    const int lda = K;  
    const int ldb = N;  
    const int ldc = N;  
    
    printf("Leading dimensions: lda=%d, ldb=%d, ldc=%d\n", lda, ldb, ldc);
    
    printf("Calling cblas_sgemm...\n");
    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                M, N, K,                    // M, N, K
                1.0,                        // alpha
                A, lda,                     // A, lda
                B, ldb,                     // B, ldb  
                0.0,                        // beta
                C_cblas, ldc);              // C, ldc
    
    print_matrix("CBLAS Result", C_cblas, M, N);
    
    free(A);
    free(B);
    free(C_cblas);
    
    return 0;
}

and output

Matrix dimensions: M=3, N=4, K=2
Computing: C(MxN) = A(MxK) * B(KxN)

Matrix A (3x2):
 -0.9993   0.0492 
  0.4708  -0.4734 
 -0.2476  -0.6074 

Matrix B (2x4):
  0.9517   0.0246   0.0609  -0.4858 
 -0.7858   0.6310   0.8011  -0.0959 

Leading dimensions: lda=2, ldb=4, ldc=4
Calling cblas_sgemm...
CBLAS Result (3x4):
  0.0000   0.0000   0.0000   0.0000 
  0.0000   0.0000   0.0000   0.0000 
  0.0000   0.0000   0.0000   0.0000 

kon0740 avatar Nov 05 '25 15:11 kon0740

duplicate of #5414 (SME kernel inadvertently getting called while only its empty substitute is compiled in)

martin-frbg avatar Nov 22 '25 18:11 martin-frbg