ruapu icon indicating copy to clipboard operation
ruapu copied to clipboard

detect rvv1.0 and its extensions

Open dtcxzyw opened this issue 9 months ago • 13 comments

This patch adds supports for rvv1p0 and its extensions (zvbb/zvbc/zvfh/zvfhmin/zvfbfmin/zvfbfwma/zvkb/zvl*b). Doc: https://dtcxzyw.github.io/riscv-isa-manual-host/unpriv-isa-asciidoc.html#vector

The machine code is generated with the llvm toolchain:

// llvm-mc -filetype=obj -triple=riscv64 -mattr=+v,+zvbb,+zvbc,+zvfh,+experimental-zvfbfmin,+experimental-zvfbfwma < tmp.S | llvm-objdump --mattr=+v,+zvbb,+zvbc,+zvfh,+experimental-zvfbfmin,+experimental-zvfbfwma -d -r -
add a0,a0,a0
unimp
vclz.v v4, v8 # zvbb
vclmul.vv v4, v8, v8 # zvbc
vfadd.vv v4, v8, v8 # zvfh
vfncvt.f.f.w v4, v8 # zvfhmin
vfncvtbf16.f.f.w v4, v8 # zvfbfmin
vfwmaccbf16.vf v4, fa0, v8 # zvfbfwma
vrol.vv v4, v8, v12 # zvkb
vaaddu.vv v4, v8, v8 # v
vsetvl a0, zero, a0 # vsetvl
csrr a0, vlenb # for zvl*
vsetvli a0, zero, e16, m1, ta, ma # for zvfh
csrr a0, vcsr # for rvv1p0
Disassembly of section .text:

0000000000000000 <.text>:
       0: 33 05 a5 00   add     a0, a0, a0
       4: 73 10 00 c0   unimp
       8: 57 22 86 4a   vclz.v  v4, v8
       c: 57 22 84 32   vclmul.vv       v4, v8, v8
      10: 57 12 84 02   vfadd.vv        v4, v8, v8
      14: 57 12 8a 4a   vfncvt.f.f.w    v4, v8
      18: 57 92 8e 4a   vfncvtbf16.f.f.w        v4, v8
      1c: 57 52 85 ee   vfwmaccbf16.vf  v4, fa0, v8
      20: 57 02 86 56   vrol.vv v4, v8, v12
      24: 57 22 84 22   vaaddu.vv       v4, v8, v8
      28: 57 75 a0 80   vsetvl  a0, zero, a0
      2c: 73 25 20 c2   csrr    a0, vlenb
      30: 57 75 80 0c   vsetvli a0, zero, e16, m1, ta, ma
      34: 73 25 f0 00   csrr    a0, vcsr

Tested with qemu 9.0.50 (v9.0.0-519-ge116b92d01)

riscv64-linux-gnu-gcc main.c
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./a.out | grep "v = 0"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true ./a.out | grep "v = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true ./a.out | grep "zvl32b = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true ./a.out | grep "zvl64b = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true ./a.out | grep "zvl128b = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,vlen=256 ./a.out | grep "zvl256b = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,vlen=512 ./a.out | grep "zvl512b = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,vlen=1024 ./a.out | grep "zvl1024b = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zvbb=true ./a.out | grep "zvbb = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zvbb=true ./a.out | grep "zvkb = 1" # zvbb implies zvkb
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zvkb=true ./a.out | grep "zvkb = 1" # FIXME: it doesn't work.
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zfhmin=true,zvfh=true ./a.out | grep "zvfh = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zvfbfmin=true ./a.out | grep "zvfbfmin = 1"
qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zvfbfmin=true,zvfbfwma=true ./a.out | grep "zvfbfwma = 1"

dtcxzyw avatar May 08 '24 18:05 dtcxzyw

Have you ever tested this on xtheadvector machine with vector enabled? I think we should not probe xtheadvector machines as rvv1.0.

cyyself avatar May 09 '24 05:05 cyyself

Have you ever tested this on xtheadvector machine with vector enabled?

No.

I think we should not probe xtheadvector machines as rvv1.0.

It would be better to use vaaddu instead (which is not defined in rvv 0.7.1).

dtcxzyw avatar May 09 '24 05:05 dtcxzyw

I can help you take a test on a vector-enabled xtheadvector machine to see if these instructions will be trapped, maybe 6 hours later.

cyyself avatar May 09 '24 05:05 cyyself

I have tested your patch. However, it traps on T-Head Vendor Kernel for TH1520 v5.10 on Lichee Pi 4A:

[ 2374.078809] a.out[2442]: unhandled signal 11 code 0x1 at 0xfffffffffffffd38 in libc.so.6[3fd4e30000+13c000]
[ 2374.088736] CPU: 1 PID: 2442 Comm: a.out Tainted: G         C        5.10.113-lpi4a #2023.12.08.03.26+b8c5d3546
[ 2374.098943] epc: 0000003fd4ea0cd2 ra : 0000003fd4e66bf2 sp : 0000003fffe39a60
[ 2374.106168]  gp : 0000002ab2c93800 tp : 0000000000000000 t0 : 0000003fd4fb22c0
[ 2374.113530]  t1 : 0000002ab2c8ffbc t2 : 0000000000000003 s0 : ffffffffffffffff
[ 2374.120891]  s1 : 0000002ab2c934e8 a0 : 0000002ab2c934e8 a1 : 0000003fffe39a90
[ 2374.128211]  a2 : 0000003fffe39b60 a3 : 0000000000000000 a4 : 0000000000000001
[ 2374.135518]  a5 : 0000002ab2c934e0 a6 : 0000003fd4fb0d40 a7 : 0000000000000087
[ 2374.142839]  s2 : 0000000000000000 s3 : 0000002ab2c92e18 s4 : 0000002ab2c906ec
[ 2374.150138]  s5 : 0000003fffe3a498 s6 : 0000002ab2c92e18 s7 : 0000003fd4fb0d48
[ 2374.157433]  s8 : 0000003fd4fb1050 s9 : 0000000000000000 s10: 0000002ad3236a4c
[ 2374.164704]  s11: 0000002ad32369c0 t3 : 0000003fd4e66be2 t4 : 0000000000036be2
[ 2374.171958]  t5 : 000000000001e028 t6 : 000000000181cc90
[ 2374.177306] status: 8000000201804020 badaddr: fffffffffffffd38 cause: 000000000000000d

And the gdb shows:

┌───────────────────────────────────────────────────────────────────────────────────────────────────────┐
│    0x2aaaaab39c <ruapu_rvv_assert+2>       sd      s0,24(sp)                                          │
│    0x2aaaaab39e <ruapu_rvv_assert+4>       add     s0,sp,32                                           │
│    0x2aaaaab3a0 <ruapu_rvv_assert+6>       mv      a5,a0                                              │
│    0x2aaaaab3a2 <ruapu_rvv_assert+8>       sw      a5,-20(s0)                                         │
│    0x2aaaaab3a6 <ruapu_rvv_assert+12>      lw      a5,-20(s0)                                         │
│    0x2aaaaab3aa <ruapu_rvv_assert+16>      sext.w  a5,a5                                              │
│    0x2aaaaab3ac <ruapu_rvv_assert+18>      bnez    a5,0x2aaaaab3b4 <ruapu_rvv_assert+26>              │
│    0x2aaaaab3ae <ruapu_rvv_assert+20>      nop                                                        │
│  > 0x2aaaaab3b0 <ruapu_rvv_assert+22>      unimp                                                      │
│    0x2aaaaab3b4 <ruapu_rvv_assert+26>      nop                                                        │
│    0x2aaaaab3b6 <ruapu_rvv_assert+28>      ld      s0,24(sp)                                          │
│    0x2aaaaab3b8 <ruapu_rvv_assert+30>      add     sp,sp,32                                           │
│    0x2aaaaab3ba <ruapu_rvv_assert+32>      ret                                                        │
│    0x2aaaaab3bc <ruapu_rvv_vsetvl>         .4byte  0x80a07557                                         │
│    0x2aaaaab3c0 <ruapu_rvv_vsetvl+4>       ret                                                        │
│    0x2aaaaab3c2 <ruapu_rvv_vsetvl+6>       nop                                                        │
│    0x2aaaaab3c4 <ruapu_rvv_vsetvl+8>       mv      a0,a5                                              │
└───────────────────────────────────────────────────────────────────────────────────────────────────────┘
multi-thre Thread 0x3ff7fcbc40 In: ruapu_rvv_assert                               L341  PC: 0x2aaaaab3b0
0x0000002aaaaab3b0 in ruapu_rvv_assert (cond=0) at /home/cyy/ruapu/ruapu.h:341
(gdb) bt
#0  0x0000002aaaaab3b0 in ruapu_rvv_assert (cond=0) at /home/cyy/ruapu/ruapu.h:341
#1  0x0000002aaaaab3f6 in ruapu_rvv_vsetvl_safe (vtype=24) at /home/cyy/ruapu/ruapu.h:353
#2  0x0000002aaaaab5de in ruapu_some_v () at /home/cyy/ruapu/ruapu.h:373
#3  0x0000002aaaaab162 in ruapu_detect_isa (some_inst=0x2aaaaab5d0 <ruapu_some_v>)
    at /home/cyy/ruapu/ruapu.h:110
#4  0x0000002aaaaab616 in ruapu_init () at /home/cyy/ruapu/ruapu.h:585
#5  0x0000002aaaaab6f8 in main () at main.c:14
(gdb)

As you can see, it failed during the vector probe.

And after reviewing your code. You should first probe if rvv (but not xtheadvector) exists. Rather than assume it may support and do all the rest of the vector probe on xtheadvector machines without checks. I came up with a new idea in #106 , which uses vcsr. As both vcsr and vlenb do not exist in RISC-V Vector 0.7.1, T-Head implemented vlenb but no vcsr. We may also use this feature to detect xtheadvector if we have checked all the T-Head typed-out SoCs.

cyyself avatar May 09 '24 16:05 cyyself

qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zvkb=true ./a.out | grep "zvkb = 1" # FIXME: it doesn't work.

This is because qemu will not read the zvkb argument; you can also check the dts generated by the qemu-system.

cyyself avatar May 09 '24 16:05 cyyself

However, it traps on T-Head Vendor Kernel for TH1520 v5.10 on Lichee Pi 4A:

This behavior is expected. This patch only checks rvv1.0 support.

dtcxzyw avatar May 09 '24 16:05 dtcxzyw

However, it traps on T-Head Vendor Kernel for TH1520 v5.10 on Lichee Pi 4A:

This behavior is expected. This patch only checks rvv1.0 support.

However, it traps on vl value assert after vsetvl rather than the vector instruction itself.

cyyself avatar May 09 '24 16:05 cyyself

qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64,v=true,zvkb=true ./a.out | grep "zvkb = 1" # FIXME: it doesn't work.

This is because qemu will not read the zvkb argument; you can also check the dts generated by the qemu-system.

See https://gitlab.com/qemu-project/qemu/-/blob/master/target/riscv/cpu.c?ref_type=heads#L1538. It is just a typo :)

dtcxzyw avatar May 09 '24 16:05 dtcxzyw

However, it traps on T-Head Vendor Kernel for TH1520 v5.10 on Lichee Pi 4A:

This behavior is expected. This patch only checks rvv1.0 support.

However, it traps on vl value assert after vsetvl rather than the vector instruction itself.

See https://dtcxzyw.github.io/riscv-isa-manual-host/unpriv-isa-asciidoc.html#_v_vector_extension_for_application_processors

The V extension supports EEW of 8, 16, and 32, and 64.

IIUC the V extension should support all vector fixed-point arithmetic instructions with EEW=64.

dtcxzyw avatar May 09 '24 16:05 dtcxzyw

For the VLEN probe, why do we not just probe in csr vlenb?

cyyself avatar May 09 '24 16:05 cyyself

For the VLEN probe, why do we not just probe in csr vlenb?

Oh, it is my bad. I misread vlenb as vl/8 :( Will fix it later.

dtcxzyw avatar May 09 '24 17:05 dtcxzyw

I found the root cause of it fault on xtheadvector is the zvbc probe:

cyy@lpi4a:~/archroot/home/cyy/ruapu$ git diff
diff --git a/main.c b/main.c
index 4b61ba7..84aead8 100644
--- a/main.c
+++ b/main.c
@@ -143,7 +143,7 @@ int main()
     PRINT_ISA_SUPPORT(zicsr)
     PRINT_ISA_SUPPORT(zifencei)
     PRINT_ISA_SUPPORT(zvbb)
-    PRINT_ISA_SUPPORT(zvbc)
+    // PRINT_ISA_SUPPORT(zvbc)
     PRINT_ISA_SUPPORT(zvfh)
     PRINT_ISA_SUPPORT(zvfhmin)
     PRINT_ISA_SUPPORT(zvfbfmin)
diff --git a/ruapu.h b/ruapu.h
index a5ae4bd..464ec49 100644
--- a/ruapu.h
+++ b/ruapu.h
@@ -517,7 +517,7 @@ RUAPU_ISAENTRY(zicsr)
 RUAPU_ISAENTRY(zifencei)
 RUAPU_ISAENTRY(zmmul)
 RUAPU_ISAENTRY(zvbb)
-RUAPU_ISAENTRY(zvbc)
+// RUAPU_ISAENTRY(zvbc)
 RUAPU_ISAENTRY(zvfh)
 RUAPU_ISAENTRY(zvfhmin)
 RUAPU_ISAENTRY(zvfbfmin)
cyy@lpi4a:~/archroot/home/cyy/ruapu$ gcc main.c -g
cyy@lpi4a:~/archroot/home/cyy/ruapu$ ./a.out
i = 1
m = 1
a = 1
f = 1
d = 1
c = 1
v = 0
zba = 0
zbb = 0
zbc = 0
zbs = 0
zbkb = 0
zbkc = 0
zbkx = 0
zfa = 0
zfbfmin = 0
zfh = 1
zfhmin = 1
zicond = 0
zicsr = 1
zifencei = 1
zvbb = 0
zvfh = 1
zvfhmin = 0
zvfbfmin = 0
zvfbfwma = 0
zvkb = 0
zvl32b = 1
zvl64b = 1
zvl128b = 1
zvl256b = 0
zvl512b = 0
zvl1024b = 0
xtheadba = 1
xtheadbb = 1
xtheadbs = 1
xtheadcondmov = 1
xtheadfmemidx = 1
xtheadfmv = 0
xtheadmac = 1
xtheadmemidx = 1
xtheadmempair = 1
xtheadsync = 1
xtheadvdot = 0
i
m
a
f
d
c
zfh
zfhmin
zicsr
zifencei
zmmul
zvfh
zvl32b
zvl64b
zvl128b
xtheadba
xtheadbb
xtheadbs
xtheadcondmov
xtheadfmemidx
xtheadmac
xtheadmemidx
xtheadmempair
xtheadsync
cyy@lpi4a:~/archroot/home/cyy/ruapu$

However, we should also refuse to add zvl*b for xtheadvector. IIRC, all xtheadvector CPU has only 128 bits of VLEN. So, the following change this PR needs to make is to check if the vector extension exists first. I would rather use vcsr because it's safe. There is no reason not to implement it on rvv 1.0. So we will not need to make any changes if someday v is divided into small extensions like m and zmmul today.

cyyself avatar May 09 '24 17:05 cyyself

I found the root cause of it fault on xtheadvector is the zvbc probe:

cyy@lpi4a:~/archroot/home/cyy/ruapu$ git diff
diff --git a/main.c b/main.c
index 4b61ba7..84aead8 100644
--- a/main.c
+++ b/main.c
@@ -143,7 +143,7 @@ int main()
     PRINT_ISA_SUPPORT(zicsr)
     PRINT_ISA_SUPPORT(zifencei)
     PRINT_ISA_SUPPORT(zvbb)
-    PRINT_ISA_SUPPORT(zvbc)
+    // PRINT_ISA_SUPPORT(zvbc)
     PRINT_ISA_SUPPORT(zvfh)
     PRINT_ISA_SUPPORT(zvfhmin)
     PRINT_ISA_SUPPORT(zvfbfmin)
diff --git a/ruapu.h b/ruapu.h
index a5ae4bd..464ec49 100644
--- a/ruapu.h
+++ b/ruapu.h
@@ -517,7 +517,7 @@ RUAPU_ISAENTRY(zicsr)
 RUAPU_ISAENTRY(zifencei)
 RUAPU_ISAENTRY(zmmul)
 RUAPU_ISAENTRY(zvbb)
-RUAPU_ISAENTRY(zvbc)
+// RUAPU_ISAENTRY(zvbc)
 RUAPU_ISAENTRY(zvfh)
 RUAPU_ISAENTRY(zvfhmin)
 RUAPU_ISAENTRY(zvfbfmin)
cyy@lpi4a:~/archroot/home/cyy/ruapu$ gcc main.c -g
cyy@lpi4a:~/archroot/home/cyy/ruapu$ ./a.out
i = 1
m = 1
a = 1
f = 1
d = 1
c = 1
v = 0
zba = 0
zbb = 0
zbc = 0
zbs = 0
zbkb = 0
zbkc = 0
zbkx = 0
zfa = 0
zfbfmin = 0
zfh = 1
zfhmin = 1
zicond = 0
zicsr = 1
zifencei = 1
zvbb = 0
zvfh = 1
zvfhmin = 0
zvfbfmin = 0
zvfbfwma = 0
zvkb = 0
zvl32b = 1
zvl64b = 1
zvl128b = 1
zvl256b = 0
zvl512b = 0
zvl1024b = 0
xtheadba = 1
xtheadbb = 1
xtheadbs = 1
xtheadcondmov = 1
xtheadfmemidx = 1
xtheadfmv = 0
xtheadmac = 1
xtheadmemidx = 1
xtheadmempair = 1
xtheadsync = 1
xtheadvdot = 0
i
m
a
f
d
c
zfh
zfhmin
zicsr
zifencei
zmmul
zvfh
zvl32b
zvl64b
zvl128b
xtheadba
xtheadbb
xtheadbs
xtheadcondmov
xtheadfmemidx
xtheadmac
xtheadmemidx
xtheadmempair
xtheadsync
cyy@lpi4a:~/archroot/home/cyy/ruapu$

However, we should also refuse to add zvl*b for xtheadvector. IIRC, all xtheadvector CPU has only 128 bits of VLEN. So, the following change this PR needs to make is to check if the vector extension exists first. I would rather use vcsr because it's safe. There is no reason not to implement it on rvv 1.0. So we will not need to make any changes if someday v is divided into small extensions like m and zmmul today.

Done!

dtcxzyw avatar May 09 '24 17:05 dtcxzyw

risc-v is so complicated ⌓‿⌓

nihui avatar May 15 '24 03:05 nihui