repl LeanRepl terminates trying to run large amounts of commands on a single LeanRepl instance

When I run the following python script

import os
import subprocess
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import sys

HOME_DIR = os.path.expanduser('~')
DEFAULT_LAKE_PATH = f'{HOME_DIR}/.elan/bin/lake'
DEFAULT_LEAN_WORKSPACE = 'TestLean'

def process_command(process, command):

    print(command)
        
    json_input = json.dumps(command, ensure_ascii=False) + "\n\n"
    process.stdin.write(json_input)
    process.stdin.flush()
    
    
    output_lines = []
    while True:
        line = process.stdout.readline().strip()
        if not line:
            break
        output_lines.append(line)

    print(output_lines)

    return "\n".join(output_lines)

def process_commands(commands):

    process = subprocess.Popen(
            [DEFAULT_LAKE_PATH, "exe", "repl"],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            cwd=DEFAULT_LEAN_WORKSPACE,
            text=True,
            bufsize=1,
            env=os.environ,
            preexec_fn=os.setsid if sys.platform != 'win32' else None,
        )
    
    for command in commands:
        process_command(process, command)
    
    print("closing repl ... ")

    process.stdin.close()
    process.wait()

start_time = time.time()

commands = [{"cmd": f"theorem womp{i} (a{i} b c : Nat) : (a{i} + b) + c = c + a{i} + b := by sorry"} for i in range(1000)]

process_commands(commands)

end_time = time.time()



# Print results if needed
# print("\n".join(results))
print(f"Execution Time: {end_time - start_time}")

I get the following error.

...
{'cmd': 'theorem womp161 (a161 b c : Nat) : (a161 + b) + c = c + a161 + b := by sorry'}
['{"sorries":', '[{"proofState": 161,', '"pos": {"line": 1, "column": 71},', '"goal": "a161 b c : Nat\\nâŠ¢ a161 + b + c = c + a161 + b",', '"endPos": {"line": 1, "column": 76}}],', '"messages":', '[{"severity": "warning",', '"pos": {"line": 1, "column": 8},', '"endPos": {"line": 1, "column": 15},', '"data": "declaration uses \'sorry\'"}],', '"env": 161}']
{'cmd': 'theorem womp162 (a162 b c : Nat) : (a162 + b) + c = c + a162 + b := by sorry'}
['{"sorries":', '[{"proofState": 162,', '"pos": {"line": 1, "column": 71},', '"goal": "a162 b c : Nat\\nâŠ¢ a162 + b + c = c + a162 + b",', '"endPos": {"line": 1, "column": 76}}],', '"messages":', '[{"severity": "warning",', '"pos": {"line": 1, "column": 8},', '"endPos": {"line": 1, "column": 15},', '"data": "declaration uses \'sorry\'"}],', '"env": 162}']
{'cmd': 'theorem womp163 (a163 b c : Nat) : (a163 + b) + c = c + a163 + b := by sorry'}
[]
{'cmd': 'theorem womp164 (a164 b c : Nat) : (a164 + b) + c = c + a164 + b := by sorry'}
Traceback (most recent call last):
  File "C:\Users\pufre\Downloads\CodingProjects\TinyProof\solver\batch_verifier1.py", line 58, in <module>
    process_commands(commands)
  File "C:\Users\pufre\Downloads\CodingProjects\TinyProof\solver\batch_verifier1.py", line 47, in process_commands
    process_command(process, command)
  File "C:\Users\pufre\Downloads\CodingProjects\TinyProof\solver\batch_verifier1.py", line 17, in process_command
    process.stdin.write(json_input)
OSError: [Errno 22] Invalid argument

Even when I modify the runCommands function to not save snapshots it still gives the same error.

Mar 29 '25 22:03 FrederickPu

A note about the memory requirements:

In this case, each cmd consumes about 113MB of RAM, so you'd need at least 113GB of memory to initialize a thousand theorems in one instance. 😢

Apr 01 '25 09:04 RexWzh

But I wait for each cmd to finish before running the next one. Is there any way to clear away the memory allocation once the command is done being run?

Apr 01 '25 14:04 FrederickPu

also is there any way to check the current memeory usage from within leanrepl?

Apr 01 '25 15:04 FrederickPu

I made my own version of runCommand that doesn't save any enviroment data and I still get the same error:

import REPL

open Lean Elab

namespace REPL

def runCommand' (s : Command) : M IO (CommandResponse ⊕ Error) := do
  -- let (cmdSnapshot?, notFound) ← do match s.env with
  -- | none => pure (none, false)
  -- | some i => do match (← get).cmdStates[i]? with
  --   | some env => pure (some env, false)
  --   | none => pure (none, true)
  -- if notFound then
  --   return .inr ⟨"Unknown environment."⟩
  -- let initialCmdState? := cmdSnapshot?.map fun c => c.cmdState
  let (_, messages, _) ← try
    IO.processInput s.cmd none
  catch ex =>
    return .inr ⟨ex.toString⟩
  let messages ← messages.mapM fun m => Message.of m
  -- For debugging purposes, sometimes we print out the trees here:
  -- trees.forM fun t => do IO.println (← t.format)
  -- let sorries ← sorries trees (initialCmdState?.map (·.env))
  -- let tactics ← match s.allTactics with
  -- | some true => tactics trees
  -- | _ => pure []
  -- let jsonTrees := match s.infotree with
  -- | some "full" => trees
  -- | some "tactics" => trees.flatMap InfoTree.retainTacticInfo
  -- | some "original" => trees.flatMap InfoTree.retainTacticInfo |>.flatMap InfoTree.retainOriginal
  -- | some "substantive" => trees.flatMap InfoTree.retainTacticInfo |>.flatMap InfoTree.retainSubstantive
  -- | _ => []
  -- let infotree ← if jsonTrees.isEmpty then
  --   pure none
  -- else
  --   pure <| some <| Json.arr (← jsonTrees.toArray.mapM fun t => t.toJson none)
  return .inl
    { env := 0,
      messages,
      sorries := [], -- sorries,
      tactics := []-- tactics
      infotree := none } -- infotree }


/-- Parse a user input string to an input command. -/
def parseCmd (query : String) : IO Command := do
  let json := Json.parse query
  match json with
  | .error e => throw <| IO.userError <| toString <| toJson <|
      (⟨"Could not parse JSON:\n" ++ e⟩ : Error)
  | .ok j => match fromJson? j with
    | .ok (r : REPL.Command) => return r
    | .error e => throw <| IO.userError <| toString <| toJson <|
        (⟨"Could not parse as a valid JSON command:\n" ++ e⟩ : Error)


/-- Read-eval-print loop for Lean. -/
unsafe def verify : IO Unit :=
  StateT.run' loop {}
where loop : M IO Unit := do
  let query ← getLines
  if query = "" then
    return ()
  if query.startsWith "#" || query.startsWith "--" then loop else
    IO.println <| toString <| toJson (← runCommand' (← parseCmd query))
  printFlush "\n" -- easier to parse the output if there are blank lines
  loop

Apr 01 '25 15:04 FrederickPu

This is a growing memory issue. I encountered it as well multiple times. It seems that the memory usage is increasing faster than through classical Lean usage in the IDE. I didn't find a proper fix so far (didn't try much though). Instead, I automatically restart the repl once the memory is above a certain threshold (>= 80% by default). I implemented that in LeanInteract to handle this. The package is still experimental, but the following code should run smoothly (after a pip install lean-interact):

from lean_interact import AutoLeanServer, LeanREPLConfig

config = LeanREPLConfig()
server = AutoLeanServer(config)

for i in range(1000):
    cmd = f"theorem womp{i} (a{i} b c : Nat) : (a{i} + b) + c = c + a{i} + b := by sorry"
    print(f"Running command: {cmd}")
    server.run_code(cmd)

Apr 01 '25 17:04 augustepoiroux

I wonder if we can piggy back off of some Lake's scheduling system to do this more efficiently.

Apr 01 '25 17:04 FrederickPu

It seems that this memory issue happens because all commands are run without a previous environment state. This triggers this part of code processing headers, adding runtime and memory overhead. But we only need to do this once in the example you showed. So, one can run a dummy first command and then run subsequent commands with the environment state. Taking this into account, the following code only takes 10s to run (and 5s without prints):

# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "lean-interact==0.3.2",
# ]
# ///
from lean_interact import Command, LeanREPLConfig, LeanServer
from lean_interact.interface import CommandResponse

config = LeanREPLConfig()
server = LeanServer(config)

init_env = server.run(Command(cmd="#eval 1"))
assert isinstance(init_env, CommandResponse)
for i in range(1000):
    cmd = Command(cmd=f"theorem womp{i} (a{i} b c : Nat) : (a{i} + b) + c = c + a{i} + b := by sorry", env=init_env.env)
    print(f"Running command {i}: {cmd}")
    server.run(cmd)

In your code, you can achieve this by adding process_command(process, {"cmd": "#eval 1"}) in your process_commands method, and then adding the entry "env": 0 in each of your commands.

Apr 03 '25 11:04 augustepoiroux

so is the idea that the headers are being generated faster than they can be garbage collected or smth?

Apr 03 '25 23:04 FrederickPu

The memory of unused variable will not release anyway in repl, that's the biggest problem!

A more direct way to see what happes in repl:

Modify ./REPL/Main.lean as follows:

/-- Do not record `CommandSnapshot` into the REPL state anymore, always return 0 -/
def recordCommandSnapshot (state : CommandSnapshot) : M m Nat := do
  let id := (← get).cmdStates.size
  return id

/--
Check the memory change before/after applying cmd
-/
def runCommand (s : Command) : M IO (CommandResponse ⊕ Error) := do

...

  let initialCmdState? := cmdSnapshot?.map fun c => c.cmdState

  -- print memory consumption before applying cmd
  let pid ← IO.Process.getPID
  let output ← IO.Process.output {
    cmd := "ps"
    args := #["-p", toString pid, "-o", "rss="]
  }
  let memUsage := output.stdout.trim
  IO.println s!"Memory before applying command: {memUsage} KB"

  let (cmdState, messages, trees) ← try
    IO.processInput s.cmd initialCmdState?
  catch ex =>
    return .inr ⟨ex.toString⟩

  -- print memory consumption after applying cmd
  let output ← IO.Process.output {
    cmd := "ps"
    args := #["-p", toString pid, "-o", "rss="]
  }
  let memUsage2 := output.stdout.trim
  IO.println s!": Memory after applying command: {memUsage2} KB"
  let messages ← messages.mapM fun m => Message.of m

...

 -- print memory consumption before return 
 let output ← IO.Process.output {
    cmd := "ps"
    args := #["-p", toString pid, "-o", "rss="]
  }
  let memUsage3 := output.stdout.trim
  IO.println s!": Memory before return: {memUsage3} KB"
  let messages ← messages.mapM fun m => Message.of m

Now launch repl in terminal and test 3 proof sketch sequentially:

$ lake exe repl
⚠ [20/22] Built REPL.Main
warning: ././././REPL/Main.lean:86:27: unused variable `state`
note: this linter can be disabled with `set_option linter.unusedVariables false`
{"cmd": "import Mathlib"}

Memory before applying command: 67576 KB
Memory after applying command: 4793208 KB
Memory before return: 4793404 KB
{"env": 0}

{"cmd": "def a := 1"}

Memory before applying command: 4793404 KB
Memory after applying command: 5001144 KB
Memory before return: 5001144 KB
{"env": 0}

{"cmd": "abdjiusf := 1"}

Memory before applying command: 5001144 KB
Memory after applying command: 5203448 KB
Memory before return: 5203448 KB
{"messages":
 [{"severity": "error",
   "pos": {"line": 1, "column": 0},
   "endPos": {"line": 1, "column": 8},
   "data": "unexpected identifier; expected command"}],
 "env": 0}

You can see that even the variable cmdState will not be used anymore after calling runCommand function, but the memory allocated to cmdState or other unused variables doesn't release.

So I wonder if there is any garbage collection method we can use in Lean 4?

Apr 10 '25 14:04 ohyeat

what if you wait for 2 minutes or something. Does it still never get released?

Apr 10 '25 14:04 FrederickPu

The memory of unused variable will not release anyway in repl, that's the biggest problem!

Indeed. I attempted to address this by adding a gc option that prevents recording cmdState to cmdStates, but it only marginally slowed the memory growth rate. You can see my attempt here: https://github.com/leanprover-community/repl/pull/83

Apr 10 '25 20:04 RexWzh

can we escalate this to a lean language issue?

Apr 10 '25 20:04 FrederickPu

can we escalate this to a lean language issue?

No, I don't think so. Since Lean is a general-purpose programming language, it should have some mechanism for garbage-collecting unused variables.

Apr 10 '25 20:04 RexWzh

so is the issue that it's just garbage collecting them too slowly

Apr 10 '25 21:04 FrederickPu

Lean doesn't have a garbage collector, it is reference counted. Everything is collected immediately at last use so if you have a memory issue it means you are still holding on to live references.

Apr 11 '25 00:04 digama0

do you have any idea which variable remains in scope beyond a single runCommand function?

Apr 11 '25 00:04 FrederickPu

what if you wait for 2 minutes or something. Does it still never get released?

No, the memory wont release as long as the process is still alive

Apr 11 '25 02:04 ohyeat

I just found a demonstration in functional programming in Lean tutorial and saw the following:

Reference Counting and In-Place Updates Rather than using a tracing garbage collector, as is done in Java, C#, and most JavaScript implementations, Lean uses reference counting for memory management. This means that each value in memory contains a field that tracks how many other values refer to it, and the run-time system maintains these counts as references appear or disappear. Reference counting is also used in Python, PHP, and Swift.

When asked to allocate a fresh object, Lean's run-time system is able to recycle existing objects whose reference counts are falling to zero. Additionally, array operations such as Array.set and Array.swap will mutate an array if its reference count is one, rather than allocating a modified copy. If Array.swap holds the only reference to an array, then no other part of the program can tell that it was mutated rather than copied.

Writing efficient code in Lean requires the use of tail recursion and being careful to ensure that large arrays are used uniquely. While tail calls can be identified by inspecting the function's definition, understanding whether a value is referred to uniquely may require reading the whole program. The debugging helper dbgTraceIfShared can be used at key locations in the program to check that a value is not shared.

So I guess we just implemented the loop function runComand using inappropriate syntax, so that we met this memory recycle issue. But I'm not familiar with RC technique, so I wonder if there is an explicit way to tell program to recycle these unused memory? Thanks! @digama0

Apr 11 '25 03:04 ohyeat

if we want tail recursion, can't we just put loop at the start

Apr 11 '25 04:04 FrederickPu

Hi all,

I find the way to implement tail recursion to replace repl loop, just modify two function definitions (recordCommandSnapshot, repl) in Main.lean as follows:

/-- Only record the first `CommandSnapshot` into the REPL state, returning its index for future use. -/
def recordCommandSnapshot (state : CommandSnapshot) : M m Nat := do
  let id := (← get).cmdStates.size
  if id == 0 then
    modify fun s => { s with cmdStates := s.cmdStates.push state }
  return id


/-- Read-eval-print loop for Lean, using tail recursion for saving memory. -/
unsafe def repl : IO Unit := do
  let rec loop (s : State) : IO Unit := do
    let query ← getLines
    if query = "" then
      return ()
    else if query.startsWith "#" || query.startsWith "--" then
      loop s
    else
      let (result, s') ← StateT.run (do
        match ← parse query with
        | .command r => pure <| toJson (← runCommand r)
        | .file r => pure <| toJson (← processFile r)
        | .proofStep r => pure <| toJson (← runProofStep r)
        | .pickleEnvironment r => pure <| toJson (← pickleCommandSnapshot r)
        | .unpickleEnvironment r => pure <| toJson (← unpickleCommandSnapshot r)
        | .pickleProofSnapshot r => pure <| toJson (← pickleProofSnapshot r)
        | .unpickleProofSnapshot r => pure <| toJson (← unpickleProofSnapshot r)
      ) s
      IO.println (toString result)
      printFlush "\n" -- easier to parse the output if there are blank lines
      loop s'
  loop {}

Modification of recordCommandSnapshot is for importing headers like import Mathlib, the rest commands always set "env": 0

Modified repl still suffers memory increasing issue, but it is much better than before: it runs faster, and we can observe memory recycle when REPL handle proof sketch sequentially.

We can set a loose threshold on memory increase to trigger relaunching REPL in python to avoid OOM crash further, but it will lower the speed.

Apr 12 '25 04:04 ohyeat

The memory of unused variable will not release anyway in repl, that's the biggest problem!

A more direct way to see what happes in repl:

Modify ./REPL/Main.lean as follows:

/-- Do not record CommandSnapshot into the REPL state anymore, always return 0 -/ def recordCommandSnapshot (state : CommandSnapshot) : M m Nat := do let id := (← get).cmdStates.size return id

/-- Check the memory change before/after applying cmd -/ def runCommand (s : Command) : M IO (CommandResponse ⊕ Error) := do

...

let initialCmdState? := cmdSnapshot?.map fun c => c.cmdState

-- print memory consumption before applying cmd let pid ← IO.Process.getPID let output ← IO.Process.output { cmd := "ps" args := #["-p", toString pid, "-o", "rss="] } let memUsage := output.stdout.trim IO.println s!"Memory before applying command: {memUsage} KB"

let (cmdState, messages, trees) ← try IO.processInput s.cmd initialCmdState? catch ex => return .inr ⟨ex.toString⟩

-- print memory consumption after applying cmd let output ← IO.Process.output { cmd := "ps" args := #["-p", toString pid, "-o", "rss="] } let memUsage2 := output.stdout.trim IO.println s!": Memory after applying command: {memUsage2} KB" let messages ← messages.mapM fun m => Message.of m

...

-- print memory consumption before return let output ← IO.Process.output { cmd := "ps" args := #["-p", toString pid, "-o", "rss="] } let memUsage3 := output.stdout.trim IO.println s!": Memory before return: {memUsage3} KB" let messages ← messages.mapM fun m => Message.of m

Now launch repl in terminal and test 3 proof sketch sequentially:

$ lake exe repl ⚠ [20/22] Built REPL.Main warning: ././././REPL/Main.lean:86:27: unused variable state note: this linter can be disabled with set_option linter.unusedVariables false {"cmd": "import Mathlib"}

Memory before applying command: 67576 KB Memory after applying command: 4793208 KB Memory before return: 4793404 KB {"env": 0}

{"cmd": "def a := 1"}

Memory before applying command: 4793404 KB Memory after applying command: 5001144 KB Memory before return: 5001144 KB {"env": 0}

{"cmd": "abdjiusf := 1"}

Memory before applying command: 5001144 KB Memory after applying command: 5203448 KB Memory before return: 5203448 KB {"messages": [{"severity": "error", "pos": {"line": 1, "column": 0}, "endPos": {"line": 1, "column": 8}, "data": "unexpected identifier; expected command"}], "env": 0} You can see that even the variable cmdState will not be used anymore after calling runCommand function, but the memory allocated to cmdState or other unused variables doesn't release.

So I wonder if there is any garbage collection method we can use in Lean 4?

Isn't the above memory benchmark wrong since you're doing the memory measurement inside of the runCommand function. Meaning that the commandState wouldn't have been reference counted at this point.

When I run the following benchmark:

from datasets import load_dataset
import json
import subprocess
import time
import psutil

header = "import Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\n\nopen BigOperators Real Nat Topology Rat\n\n"
# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("Goedel-LM/Lean-workbook-proofs")

proofs = []

for data in ds["train"].select(range(8)):
    proof = data["full_proof"].split(header)[1]
    proofs.append(proof)
    # print(header, proof)

print("done loading")

start_repl_time = time.time()

process = subprocess.Popen(
    ["lake", "env", "../../.lake/build/bin/repl"],
    stdin=subprocess.PIPE,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True,   # Makes it work with strings
    encoding="utf-8"
)

p = psutil.Process(process.pid)

process.stdin.write(json.dumps({"cmd": header}) + "\n\n")
process.stdin.flush()
while True:
    line = process.stdout.readline().strip()
    if not line:
        break

print("loadded header")

for i in range(20):
    start = time.time()

    # # Write input directly to the process
    process.stdin.write(json.dumps({"env": 0, "cmd": proofs[0]}) + "\n\n")
    process.stdin.flush()

    output_lines = []
    while True:
        line = process.stdout.readline().strip()
        if not line:
            break
        output_lines.append(line)

    stdout = "\n".join(output_lines)
    # print(stdout)

    end = time.time()

    # Monitor memory in MB
    mem_info = p.memory_info()
    memory_mb = mem_info.rss / (1024 ** 2)

    print(f"[{i}] Time Cmd: {end - start:.2f}s, Memory: {memory_mb:.2f} MB")

    # ----------------------------------

    start = time.time()

    # # Write input directly to the process
    process.stdin.write(json.dumps({"env": 0, "proofs": proofs, "mode": "naive"}) + "\n\n")
    process.stdin.flush()

    output_lines = []
    while True:
        line = process.stdout.readline().strip()
        if not line:
            break
        output_lines.append(line)

    stdout = "\n".join(output_lines)
    # print(stdout)

    end = time.time()

    # Monitor memory in MB
    mem_info = p.memory_info()
    memory_mb = mem_info.rss / (1024 ** 2)

    print(f"[{i}] Time Batch: {end - start:.2f}s, Memory: {memory_mb:.2f} MB")

process.stdin.close()
process.wait()

I get the following output:

# single cmd vs batch command with 7 items (in naive mode)

done loading
loadded header
[0] Time Cmd: 12.56s, Memory: 5.84 MB
[0] Time Batch: 11.94s, Memory: 5.84 MB
[1] Time Cmd: 12.21s, Memory: 5.84 MB
[1] Time Batch: 12.36s, Memory: 5.84 MB
[2] Time Cmd: 12.31s, Memory: 5.84 MB
[2] Time Batch: 12.16s, Memory: 5.84 MB
[3] Time Cmd: 16.09s, Memory: 5.84 MB
[3] Time Batch: 20.35s, Memory: 4.17 MB
[4] Time Cmd: 19.22s, Memory: 2.19 MB
[4] Time Batch: 22.80s, Memory: 0.45 MB
[5] Time Cmd: 19.09s, Memory: 0.45 MB
[5] Time Batch: 19.34s, Memory: 0.41 MB
[6] Time Cmd: 11.88s, Memory: 0.41 MB
[6] Time Batch: 11.79s, Memory: 0.41 MB
[7] Time Cmd: 15.72s, Memory: 0.41 MB
[7] Time Batch: 13.86s, Memory: 0.31 MB
[8] Time Cmd: 11.00s, Memory: 0.25 MB
[8] Time Batch: 11.92s, Memory: 1.43 MB
[9] Time Cmd: 11.34s, Memory: 1.43 MB
[9] Time Batch: 11.30s, Memory: 1.43 MB
[10] Time Cmd: 11.10s, Memory: 1.43 MB
[10] Time Batch: 11.33s, Memory: 1.43 MB
[11] Time Cmd: 10.84s, Memory: 1.43 MB
[11] Time Batch: 12.03s, Memory: 1.43 MB
[12] Time Cmd: 16.42s, Memory: 1.42 MB
[12] Time Batch: 11.26s, Memory: 1.48 MB
[13] Time Cmd: 10.94s, Memory: 1.48 MB
[13] Time Batch: 13.73s, Memory: 1.47 MB
[14] Time Cmd: 19.77s, Memory: 1.47 MB
[14] Time Batch: 16.88s, Memory: 1.47 MB
[15] Time Cmd: 12.80s, Memory: 1.47 MB
[15] Time Batch: 13.76s, Memory: 1.47 MB
[16] Time Cmd: 11.82s, Memory: 1.47 MB
[16] Time Batch: 16.25s, Memory: 1.45 MB
[17] Time Cmd: 15.12s, Memory: 1.45 MB
[17] Time Batch: 20.97s, Memory: 1.45 MB
[18] Time Cmd: 18.38s, Memory: 1.45 MB
[18] Time Batch: 20.46s, Memory: 1.45 MB
[19] Time Cmd: 18.88s, Memory: 1.51 MB
[19] Time Batch: 19.58s, Memory: 1.51 MB

see repo

Apr 12 '25 15:04 FrederickPu

Hi all,

I find the way to implement tail recursion to replace repl loop, just modify two function definitions (recordCommandSnapshot, repl) in Main.lean as follows:

/-- Only record the first CommandSnapshot into the REPL state, returning its index for future use. -/ def recordCommandSnapshot (state : CommandSnapshot) : M m Nat := do let id := (← get).cmdStates.size if id == 0 then modify fun s => { s with cmdStates := s.cmdStates.push state } return id

/-- Read-eval-print loop for Lean, using tail recursion for saving memory. -/ unsafe def repl : IO Unit := do let rec loop (s : State) : IO Unit := do let query ← getLines if query = "" then return () else if query.startsWith "#" || query.startsWith "--" then loop s else let (result, s') ← StateT.run (do match ← parse query with | .command r => pure <| toJson (← runCommand r) | .file r => pure <| toJson (← processFile r) | .proofStep r => pure <| toJson (← runProofStep r) | .pickleEnvironment r => pure <| toJson (← pickleCommandSnapshot r) | .unpickleEnvironment r => pure <| toJson (← unpickleCommandSnapshot r) | .pickleProofSnapshot r => pure <| toJson (← pickleProofSnapshot r) | .unpickleProofSnapshot r => pure <| toJson (← unpickleProofSnapshot r) ) s IO.println (toString result) printFlush "\n" -- easier to parse the output if there are blank lines loop s' loop {} Modification of recordCommandSnapshot is for importing headers like import Mathlib, the rest commands always set "env": 0

Modified repl still suffers memory increasing issue, but it is much better than before: it runs faster, and we can observe memory recycle when REPL handle proof sketch sequentially.

We can set a loose threshold on memory increase to trigger relaunching REPL in python to avoid OOM crash further, but it will lower the speed.

do you have a before and after comparison you can show?

Apr 12 '25 15:04 FrederickPu

@FrederickPu This is the most interesting(or maybe annoying) part: the subprocess you create in python is not the exact the process executing repl, it actually executing lake (recall the launching command lake exe repl), its memory usage will remains unchanged till dead; It has one child process, which is running repl in fact.

Here is the evidence:

# run repl in your terminal
lake exe repl

# open another terminal, watch all process related to `repl`
ps aux | grep repl

You will see 3 running process:

grep --color=auto repl
.elan/toolchains/leanprover--lean4---v4.18.0/bin/lake exe repl
././.lake/build/bin/repl.

The subprocess you created in python is the 2nd one, and the memory measurement I conducted inside of the runCommand function is from the 3rd one. In your python script, you should watch p.children instead p it self.

Apr 13 '25 10:04 ohyeat

@ohyeat I use the following codes to track the memory:

from psutil import Process, NoSuchProcess
from typing import Union

def get_process_memory(pid:Union[int, Process]):
    """Get the memory usage of a process in GB."""
    try:
        process = Process(pid) if isinstance(pid, int) else pid
        mem_info = process.memory_info()
        return mem_info.rss / 1024 / 1024 / 1024  # Convert to GB
    except NoSuchProcess:
        return 0

def get_process_children(pid:Union[int, Process]):
    try:
        parent = Process(pid) if isinstance(pid, int) else pid
        children = parent.children(recursive=True)
        return children
    except NoSuchProcess:
        return []

lake_pid = <subprocess instance>.pid
child = get_process_children(lake_pid)[-1] # try after few seconds, make sure child.name == 'repl'

Apr 13 '25 16:04 RexWzh

@ohyeat I use the following codes to track the memory:

from psutil import Process, NoSuchProcess from typing import Union

def get_process_memory(pid:Union[int, Process]): """Get the memory usage of a process in GB.""" try: process = Process(pid) if isinstance(pid, int) else pid mem_info = process.memory_info() return mem_info.rss / 1024 / 1024 / 1024 # Convert to GB except NoSuchProcess: return 0

def get_process_children(pid:Union[int, Process]): try: parent = Process(pid) if isinstance(pid, int) else pid children = parent.children(recursive=True) return children except NoSuchProcess: return []

lake_pid = .pid child = get_process_children(lake_pid)[-1] # try after few seconds, make sure child.name == 'repl'

You can try it in your test script to track the memory of repl, plotting the result can be more clear.

Apr 14 '25 05:04 ohyeat

@ohyeat I've incorporated your code into this PR https://github.com/leanprover-community/repl/pull/83 , hope that's okay with you.

I've also modified the following code snippet to be more flexible:

if id == 0 then
    modify fun s => { s with cmdStates := s.cmdStates.push state }
    return id

Apr 14 '25 11:04 RexWzh

I'm okay with that, but you should test on other mode like tactic mode to see if this modification can work well on all modes.

Apr 15 '25 10:04 ohyeat

The garbage collection approach so far only helps in limited cases and cannot prevent memory growth.

For example, import commands cannot be garbage collected:

{"cmd": "import Mathlib", "record": false}

Additionally, memory increases caused by complex proofs also cannot be garbage collected:

{"cmd": "import Mathlib\nimport Aesop\n\nopen BigOperators Real Nat Topology Rat\n\nset_option maxHeartbeats 0\nset_option maxRecDepth 100000\nset_option tactic.hygienic false"}

{"cmd": "theorem mathd_algebra_392 (n : ℕ) (h₀ : Even n)\n    (h₁ : (↑n - 2) ^ 2 + ↑n ^ 2 + (↑n + 2) ^ 2 = (12296 : ℕ)) :\n    (↑n - 2) _↑n_ (↑n + 2) / 8 = (32736 : ℕ) := by\n    have h₂ : n ≤ 12296 := by nlinarith [h₀, h₁]\n    interval_cases n <;> simp_all",
"env": 0, "record": false}

Apr 22 '25 20:04 RexWzh

what if we run the IO.processCommands on a different thread using IO.asTask?

Apr 22 '25 20:04 FrederickPu