rescript-compiler icon indicating copy to clipboard operation
rescript-compiler copied to clipboard

Switch sometimes (non-deterministically?) results in redundant conditional block

Open mediremi opened this issue 7 months ago • 4 comments

ReScript version: v12.0.0-alpha.13

The following code (playground link)

type actionType1 =
  | WithoutPayload1
  | WithPayload1({x: int})

type actionType2 =
  | WithoutPayload2
  | WithPayload2({y: int})
  | WithPayload3({y: int})

type actionType3 =
  | WithPayload4({x: int})
  | WithPayload5({y: int})
  | WithPayload6({x: int})
  | WithPayload7({y: int})
  | WithPayload8({x: int})

type action =
  | ...actionType1
  | ...actionType2
  | ...actionType3

let f = (action: action) => {
  switch action {
  | ...actionType3 => Console.log("hello")
  | _ => ()
  }
  42
}

sometimes gets compiled to

function f(action) {
  if (typeof action !== "object") {
    action === "WithoutPayload1";
  } else {
    switch (action.TAG) {
      case "WithPayload1" :
      case "WithPayload2" :
      case "WithPayload3" :
        break;
      default:
        console.log("hello");
    }
  }
  return 42;
}

I say 'sometimes' as by rerunning the compiler you can get the correct output of

function f(action) {
  if (typeof action === "object") {
    switch (action.TAG) {
      case "WithPayload4" :
      case "WithPayload5" :
      case "WithPayload6" :
      case "WithPayload7" :
      case "WithPayload8" :
        console.log("hello");
        break;
    }
  }
  return 42;
}

See this video:

https://github.com/user-attachments/assets/29a56151-2083-402b-a005-f7451cac7ceb

mediremi avatar May 12 '25 18:05 mediremi

Repro from command line below. Since it was happening only in the playground, and not from the command line, it seems to suggest that it might have to do with global state, as the playground keeps on compiling with the same process. So here's a repro from the command line where the same code is type checked twice:

module M1 = {
type action =
  | WithoutPayload1
  | WithoutPayload2
  | WithPayload2({y: int})
  | WithPayload3({y: int})
  | WithPayload5({y: int})
  | WithPayload6({x: int})
  | WithPayload7({y: int})
  | WithPayload8({x: int})

let f = (action: action) => {
  switch action {
  | WithPayload5(_)
  | WithPayload6(_)
  | WithPayload7(_)
  | WithPayload8(_) => Console.log("hello")
  | _ => ()
  }
  42
}
}

module M2 = {
type action =
  | WithoutPayload1
  | WithoutPayload2
  | WithPayload2({y: int})
  | WithPayload3({y: int})
  | WithPayload5({y: int})
  | WithPayload6({x: int})
  | WithPayload7({y: int})
  | WithPayload8({x: int})

let f = (action: action) => {
  switch action {
  | WithPayload5(_)
  | WithPayload6(_)
  | WithPayload7(_)
  | WithPayload8(_) => Console.log("hello")
  | _ => ()
  }
  42
}
}

Result:

// Generated by ReScript, PLEASE EDIT WITH CARE
'use strict';


function f(action) {
  if (typeof action !== "object") {
    action === "WithoutPayload1";
  } else {
    switch (action.TAG) {
      case "WithPayload2" :
      case "WithPayload3" :
        break;
      default:
        console.log("hello");
    }
  }
  return 42;
}

let M1 = {
  f: f
};

function f$1(action) {
  if (typeof action === "object") {
    switch (action.TAG) {
      case "WithPayload5" :
      case "WithPayload6" :
      case "WithPayload7" :
      case "WithPayload8" :
        console.log("hello");
        break;
    }
  }
  return 42;
}

let M2 = {
  f: f$1
};

exports.M1 = M1;
exports.M2 = M2;
/* No side effect */

cristianoc avatar Jun 13 '25 12:06 cristianoc

Another observation - it's not related to the new pattern match variant type spread functionality. The same code but without the variant type spread also has the same behavior: https://rescript-lang.org/try?version=v12.0.0-alpha.10&module=esmodule&code=C4TwDgpgBAhgxsAlgewHYBVwQIxQLwBQUUAPlAOqLAAWyArsAAowgA2yMAJtkaRVdWZsO3ABQBvAB4AuKIlTAAvgEoCBUJFgIUGLACZ8vMpRr0mLdlz1H+NIZc56JIWfKWrixgfZEBmZ64KKmoa0PBIaJiQvoaetoIWIgAsEjJyQR58JgnCXACsAenuNtk+XABsqYHFcaWJXADshW7Btd71nAAcVUXB6lhaEaixfAB04+E6UTg246OTkfqzE9qL0WqsEMBQAGb4UKILqLJHyvgAfFDivADOAO5UcNSDOlcl7bmcKQD6mV52HQKv3eAM+lWBbVBDiaEKyHwc3V+F14xAAwmgbshNqN2ABzUQAImoEFY7AJfyg3wuB0yil4SWsdLUBCAA

Not in the example, but more observations:

  • Happens all the way back to v9, so this has existed for a long while
  • Not related to either pattern matching variant type spreads or variant type definition spreads

Same example in v9 failing in the same way: https://rescript-lang.org/try?version=v9.1.2&module=esmodule&code=C4TwDgpgBAhgxsAlgewHYBVwQIxQLwBQUUAPlAOqLAAWyArsAAowgA2yMAJtkaRVdWZsO3ABQBvAB4AuKIlTAAvgEoCBUJFgIUGLACZ8vMpRr0mLdlz1H+NIZc56JIWfKWrixgfZEBmZ64KKmoa0PBIaJiQvoaetoIWIgAsEjJyQR58JgnCXACsAenuNtk+XABsqYHFcaWJXADshW7Btd71nAAcVUXB6lhaEaixWQJmZdwl7bliUtWto6YME9Ztdh1O4i69mV7rM-5b87vxEylzO1P7DgVHl2s5DpUXLSd1M013r1ePIt0vGTUrAgwCgADN8FBROEdLIYWhlPgAHxQcS8ADOAHcqHBqIMdKifmdRAB9N7TG6k8nXESVMlEjpNekPCbdMnI3jEABS6IAdOwAOaiABE1AgrHYwpOJORUMyil4SWsCrUqrV6rUQA

zth avatar Jun 13 '25 12:06 zth

Seems due to calling a global increment next_raise_count().

cristianoc avatar Jun 13 '25 12:06 cristianoc

To summarize this (and please @cristianoc say if you think differently):

  1. The 2 different code snippets produced are functionally equivalent, so there's no bug in the sense that something isn't working as intended.
  2. The behavior is actually deterministic because the compiler's state is reset between files, so compiling a file will always yield the same result (even if identical code in the file produces 2 different outputs).

Correct?

zth avatar Jun 15 '25 17:06 zth

To summarize this (and please @cristianoc say if you think differently):

  1. The 2 different code snippets produced are functionally equivalent, so there's no bug in the sense that something isn't working as intended.
  2. The behavior is actually deterministic because the compiler's state is reset between files, so compiling a file will always yield the same result (even if identical code in the file produces 2 different outputs).

Correct?

That is correct.

cristianoc avatar Jun 17 '25 09:06 cristianoc

Going down the rabbit hole for a while, after confirming that the initial value of the global counter for the switch exit handlers determines which of the two possible forms of the output is generated. The rabbit hole is finding out why.

Why the old code was non-deterministic

reintroduce_fail optimises a switch by picking one exit action (Lstaticraise id) as the default of the switch when that same exit occurs at least three times in its branches.

  1. It counts how many times each exit id appears:
let t = Hashtbl.create 17 in
List.iter seen sw.sw_consts;
List.iter seen sw.sw_blocks;
  1. It then walks that hash-table to pick the exit with the largest multiplicity:
let i_max = ref (-1) and max = ref (-1) in
Hashtbl.iter
  (fun i c ->
     if c > !max then (      (* strictly “>” *)
       i_max := i;
       max   := c))
  t;

If two exits appear the same number of times (this is the case for your match: the “constant” branch and the “block” branch each occur twice) the test c > !max never triggers for the second one, so the winner is whichever entry the hash table happens to enumerate first. Hashtbl.iter does not visit bindings in numerical order; its order depends on the bucket i mod 17.
Because the two exits are assigned successive numbers (… , n+1, n+2 ,…) the bucket visited first flips according to the absolute value of the global counter, hence the code generator sometimes chooses the constant exit as default, sometimes the block exit, producing the two different JavaScript layouts you observed.

The one-line fix

Replace the “strictly larger” test by a deterministic tie-breaker:

- if c > !max then (
+ if c > !max || (c = !max && i < !i_max) then (

c > !max – keep the behaviour when a multiplicity is larger.
c = !max && i < !i_max – when the counts are tied, keep the smallest numeric id.

Why this makes the result stable

Now, no matter in which order Hashtbl.iter yields (id, count) pairs,

  1. every pair with a larger count replaces the current winner;
  2. for pairs with the same count the one with the smaller id replaces the current winner.

The final i_max is therefore uniquely determined (the smallest id among those with maximal multiplicity), independent of the hash-table’s iteration order and thus independent of how many exceptions had already been allocated before the match was compiled.
Consequently both of your functions (f1 and f2) are now compiled into identical JavaScript, whatever the initial value of the global raise_count.

cristianoc avatar Jun 17 '25 10:06 cristianoc