FStar Performance blow up

Consider the following (repetitive) module:

module Test
#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
open FStar.Mul

assume new val u32: Type0
assume new val u16: Type0
assume new val i16: Type0
assume new val i32: Type0

assume new val (<>.) (#t: eqtype) (a b: t): bool
assume new val (^.) (a b: u32): u32
assume new val ( &. ) #t: t -> t -> t
assume new val ( <<! ) #t (x: t) (s: i32): t

assume new val cast (x: u16): u32
assume new val mk_u32: int -> u32
assume new val mk_i16: int -> i16
assume new val mk_i32: int -> i32
assume new val mk_u16: int -> u16

let poly_mul (a b: u16) : u32 =
  let (v: u32):u32 = mk_u32 0 in
  let me:u32 = cast (a <: u16) <: u32 in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 0 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 0 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 1 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 1 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 2 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 2 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 3 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 3 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 4 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 4 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 5 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 5 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 6 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 6 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 7 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 7 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 8 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 8 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 9 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 9 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 10 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 10 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 11 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 11 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 12 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 12 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 13 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 13 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 14 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 14 <: u32) in
      v
    else v
  in
  let v:u32 =
    if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 15 <: u16) <: u16)
    then
      let v:u32 = v ^. (me <<! mk_i32 15 <: u32) in
      v
    else v
  in
  v

Here, the typecheck time of F* explodes as the number of let v:u32 = ... in grows.

On my laptop (a M4 mac), I get the following numbers:

numer of `let v:u32 = ... in`	TC time	lax time
1	0.08s	0.510
2	0.08s	0.503
3	0.09s	0.510
4	0.10s	0.510
5	0.11s	0.516
6	0.14s	0.520
7	0.18s	0.543
8	0.30s	0.569
9	0.52s	0.577
10	0.99s	0.657
11	2.06s	0.768
12	4.20s	0.993
13	8.78s	1.441
14	18.17s	2.371
15	38.70s	4.227

Or graphically:

Mar 18 '25 13:03 W95Psp

Thanks for this report.

I have started looking into it and there are some extra substitutions when sequentially composing VCs (in TypeChecker.Util.bind) that end up making VCs exponentially large.

Removing those does improve things substantially (though there is still some exponential behavior elsewhere that I have yet to track down), and likely removing those substitutions will impact how much some VCs can be normalized.

Will continue investigating a possible patch.

Mar 20 '25 01:03 nikswamy

Cool! We have been seeing similar performance issues in a bunch of Prure code, but this is the first time we have a succinct repro. Hopefully, any fixes you come up with will improve perf for our code across the board,

Mar 20 '25 01:03 karthikbhargavan

In branch _nik_3800, I have a patch to the VC generator where rather than inline pure terms in the VC of the continuation, I distinguish let-bound pure terms from other pure terms. For a let-bound term, I introduce a name in the VC, mirroring the structure of the source term and avoiding the excessive blowup in VC size.

It helps somewhat in this example, but there is still some exponential behavior. After about 17 lets, one starts to notice verification time doubling with each additional let. Still, it is considerably faster than the default behavior, which for me starts to run out of memory much sooner.

The downside is that there are cases where you really want to inline a let in the continuation, e.g.,

let test () =
  let x = [0;1;2;3;4;5;6;7;8;9] in
  assert_norm (List.Tot.length x == 10)

In this case, one needs to inline x in the assertion, otherwise the normalizer cannot reduce. So, for such cases, one needs to write:

let test () =
  [@@inline_let]
  let x = [0;1;2;3;4;5;6;7;8;9] in
  assert_norm (List.Tot.length x == 10)

In the F* repo, there were perhaps 20-30 proofs that needed such inline_let annotations --- you can see them in my branch.

So, the question is: is this kind of strategic inline_let annotation palatable? It's certainly one more thing a user has to think about ... so not ideal. But, with this user hint, VC generation can be much faster.

May 27 '25 05:05 nikswamy

FStar FStar copied to clipboard

Performance blow up

FStar
FStar copied to clipboard