FStar
FStar copied to clipboard
Performance blow up
Consider the following (repetitive) module:
module Test
#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
open FStar.Mul
assume new val u32: Type0
assume new val u16: Type0
assume new val i16: Type0
assume new val i32: Type0
assume new val (<>.) (#t: eqtype) (a b: t): bool
assume new val (^.) (a b: u32): u32
assume new val ( &. ) #t: t -> t -> t
assume new val ( <<! ) #t (x: t) (s: i32): t
assume new val cast (x: u16): u32
assume new val mk_u32: int -> u32
assume new val mk_i16: int -> i16
assume new val mk_i32: int -> i32
assume new val mk_u16: int -> u16
let poly_mul (a b: u16) : u32 =
let (v: u32):u32 = mk_u32 0 in
let me:u32 = cast (a <: u16) <: u32 in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 0 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 0 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 1 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 1 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 2 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 2 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 3 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 3 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 4 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 4 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 5 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 5 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 6 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 6 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 7 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 7 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 8 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 8 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 9 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 9 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 10 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 10 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 11 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 11 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 12 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 12 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 13 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 13 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 14 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 14 <: u32) in
v
else v
in
let v:u32 =
if mk_u16 0 <>. (b &. (mk_u16 1 <<! mk_i32 15 <: u16) <: u16)
then
let v:u32 = v ^. (me <<! mk_i32 15 <: u32) in
v
else v
in
v
Here, the typecheck time of F* explodes as the number of let v:u32 = ... in grows.
On my laptop (a M4 mac), I get the following numbers:
numer of let v:u32 = ... in |
TC time | lax time |
|---|---|---|
| 1 | 0.08s | 0.510 |
| 2 | 0.08s | 0.503 |
| 3 | 0.09s | 0.510 |
| 4 | 0.10s | 0.510 |
| 5 | 0.11s | 0.516 |
| 6 | 0.14s | 0.520 |
| 7 | 0.18s | 0.543 |
| 8 | 0.30s | 0.569 |
| 9 | 0.52s | 0.577 |
| 10 | 0.99s | 0.657 |
| 11 | 2.06s | 0.768 |
| 12 | 4.20s | 0.993 |
| 13 | 8.78s | 1.441 |
| 14 | 18.17s | 2.371 |
| 15 | 38.70s | 4.227 |
Or graphically:
Thanks for this report.
I have started looking into it and there are some extra substitutions when sequentially composing VCs (in TypeChecker.Util.bind) that end up making VCs exponentially large.
Removing those does improve things substantially (though there is still some exponential behavior elsewhere that I have yet to track down), and likely removing those substitutions will impact how much some VCs can be normalized.
Will continue investigating a possible patch.
Cool! We have been seeing similar performance issues in a bunch of Prure code, but this is the first time we have a succinct repro. Hopefully, any fixes you come up with will improve perf for our code across the board,
In branch _nik_3800, I have a patch to the VC generator where rather than inline pure terms in the VC of the continuation, I distinguish let-bound pure terms from other pure terms. For a let-bound term, I introduce a name in the VC, mirroring the structure of the source term and avoiding the excessive blowup in VC size.
It helps somewhat in this example, but there is still some exponential behavior. After about 17 lets, one starts to notice verification time doubling with each additional let. Still, it is considerably faster than the default behavior, which for me starts to run out of memory much sooner.
The downside is that there are cases where you really want to inline a let in the continuation, e.g.,
let test () =
let x = [0;1;2;3;4;5;6;7;8;9] in
assert_norm (List.Tot.length x == 10)
In this case, one needs to inline x in the assertion, otherwise the normalizer cannot reduce. So, for such cases, one needs to write:
let test () =
[@@inline_let]
let x = [0;1;2;3;4;5;6;7;8;9] in
assert_norm (List.Tot.length x == 10)
In the F* repo, there were perhaps 20-30 proofs that needed such inline_let annotations --- you can see them in my branch.
So, the question is: is this kind of strategic inline_let annotation palatable? It's certainly one more thing a user has to think about ... so not ideal. But, with this user hint, VC generation can be much faster.