perf: optimize compile.ml Arr.toBlob/ofBlob/Blob.compare

Open crusso opened this issue 2 years ago • 1 comments

All three do bounds checked arithmetic and explicit indexing. A while loop bumping a pointer would be shorted and faster.

Might just be a case of using the new pointer to elem bumping Arr.iterate and introducing a new, similar Blob.iterate.

Mar 01 '23 22:03 crusso

let ofBlob env =
    Func.share_code1 env "Arr.ofBlob" ("blob", I32Type) [I32Type] (fun env get_blob ->
      let (set_len, get_len) = new_local env "len" in
      let (set_r, get_r) = new_local env "r" in
      get_blob ^^ Blob.len env ^^ set_len ^^
      get_len ^^ alloc env ^^ set_r ^^

      get_len ^^ from_0_to_n env (fun get_i ->
        get_r ^^ get_i ^^ idx env ^^                          (* BAD: bounds check! *)
        get_blob ^^ Blob.payload_ptr_unskewed ^^
        get_i ^^ G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ (* BAD index arithmetic *)
        G.i (Load {ty = I32Type; align = 0; offset = 0l; sz = Some Wasm.Types.(Pack8, ZX)}) ^^
        TaggedSmallWord.msb_adjust Type.Nat8 ^^
        store_ptr
      ) ^^
      get_r
    )
  let toBlob env =
    Func.share_code1 env "Arr.toBlob" ("array", I32Type) [I32Type] (fun env get_a ->
      let (set_len, get_len) = new_local env "len" in
      let (set_r, get_r) = new_local env "r" in

      get_a ^^ Heap.load_field len_field ^^ set_len ^^

      get_len ^^ Blob.alloc env ^^ set_r ^^

      get_len ^^ from_0_to_n env (fun get_i ->
        get_r ^^ Blob.payload_ptr_unskewed ^^
        get_i ^^ G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^ (* BAD index arithmentic *)
        get_a ^^ get_i ^^ idx env ^^ (* BAD: bounds check *)
        load_ptr ^^
        TaggedSmallWord.lsb_adjust Type.Nat8 ^^
        G.i (Store {ty = I32Type; align = 0; offset = 0l; sz = Some Wasm.Types.Pack8})
      ) ^^
      get_r
    )

Mar 01 '23 22:03 crusso