Pluto.jl icon indicating copy to clipboard operation
Pluto.jl copied to clipboard

A documented way to syncronize Manifests of notebook collection

Open mmikhasenko opened this issue 4 months ago β€’ 3 comments

Migrated from slack (as important project to do)

Problem

by @FelixWechsler

I have a few Pluto notebooks and I want to synchronize their Manifest to be all the same (reducing loading times). Is there a automatic script or way to do that?

Solution from Claude
function update_pluto_blocks_with_using(source_file::String, target_file::String)
    source_content = read(source_file, String)
    target_content = read(target_file, String)

    # Define the blocks we need to handle
    blocks = [
        (hash = "00000000-0000-0000-0000-000000000001", name = "PLUTO_PROJECT_TOML_CONTENTS"),
        (hash = "00000000-0000-0000-0000-000000000002", name = "PLUTO_MANIFEST_TOML_CONTENTS")
    ]

    updated_content = target_content

    # First, find and extract all cells with 'using' statements from source
    using_cells = []
    cell_pattern = r"# ╔═║ ([a-f0-9\-]+)\n([^#]*?)(?=\n# ╔═║|\z)"s

    for match in eachmatch(cell_pattern, source_content)
        cell_hash = match.captures[1]
        cell_content = match.captures[2]

        # Check if this cell contains a 'using' statement
        if contains(cell_content, "using ")
            push!(using_cells, (hash = cell_hash, content = "# ╔═║ $cell_hash\n$cell_content"))
            println("Found using statement in cell: $cell_hash")
        end
    end

    # Add using cells to target if they don't exist
    for cell in using_cells
        if !contains(updated_content, cell.hash)
            # Insert before Cell order
            cell_order_idx = findfirst("# ╔═║ Cell order:", updated_content)
            if !isnothing(cell_order_idx)
                insert_pos = first(cell_order_idx)
                updated_content = updated_content[1:insert_pos-1] * 
                                cell.content * "\n\n" * 
                                updated_content[insert_pos:end]
                println("Inserted using cell: $(cell.hash)")
            end
        else
            # Replace existing cell with the one from source
            start_marker = "# ╔═║ $(cell.hash)"
            cell_start = findfirst(start_marker, updated_content)
            if !isnothing(cell_start)
                next_cell_pattern = r"\n# ╔═║"
                search_start = last(cell_start) + 1
                next_cell_match = findnext(next_cell_pattern, updated_content, search_start)
                cell_end = isnothing(next_cell_match) ? length(updated_content) : first(next_cell_match) - 1

                updated_content = updated_content[1:first(cell_start)-1] * 
                                 cell.content * 
                                 updated_content[cell_end+1:end]
                println("Updated existing using cell: $(cell.hash)")
            end
        end
    end

    # Process TOML blocks
    for block in blocks
        # Find the block in source file
        start_marker = "# ╔═║ $(block.hash)"
        start_idx = findfirst(start_marker, source_content)

        if isnothing(start_idx)
            println("Warning: Could not find $(block.name) block in source file")
            continue
        end

        # Find the end of the block
        next_cell_pattern = r"\n# ╔═║"
        search_start = last(start_idx) + 1
        next_cell_match = findnext(next_cell_pattern, source_content, search_start)

        end_idx = isnothing(next_cell_match) ? length(source_content) : first(next_cell_match) - 1

        # Extract the block
        source_block = source_content[first(start_idx):end_idx]

        # Check if block exists in target
        if contains(updated_content, start_marker)
            # Replace existing block
            target_start = findfirst(start_marker, updated_content)
            target_search_start = last(target_start) + 1
            target_next_cell = findnext(next_cell_pattern, updated_content, target_search_start)
            target_end = isnothing(target_next_cell) ? length(updated_content) : first(target_next_cell) - 1

            updated_content = updated_content[1:first(target_start)-1] * 
                             source_block * 
                             updated_content[target_end+1:end]
            println("Replaced existing $(block.name) block")
        else
            # Insert before Cell order
            cell_order_idx = findfirst("# ╔═║ Cell order:", updated_content)
            if !isnothing(cell_order_idx)
                insert_pos = first(cell_order_idx)
                updated_content = updated_content[1:insert_pos-1] * 
                                source_block * "\n\n" * 
                                updated_content[insert_pos:end]
                println("Inserted new $(block.name) block")
            else
                error("Could not find Cell order section")
            end
        end
    end

    # Update cell order to ensure using cells are at the top
    cell_order_idx = findfirst("# ╔═║ Cell order:", updated_content)
    if !isnothing(cell_order_idx)
        cell_order_start = first(cell_order_idx)
        cell_order_end = length(updated_content)

        # Extract cell order lines
        cell_order_content = updated_content[cell_order_start:cell_order_end]
        lines = split(cell_order_content, '\n')

        # Separate the header and cell references
        header_line = lines[1]  # "# ╔═║ Cell order:"
        cell_lines = lines[2:end]

        # Collect all unique cell hashes
        using_hashes = [cell.hash for cell in using_cells]
        toml_hashes = [block.hash for block in blocks]

        # Filter out using cells and TOML blocks from existing order
        other_cells = filter(line -> begin
            if isempty(strip(line))
                return false
            end
            # Extract hash from line
            hash_match = match(r"([a-f0-9\-]+)", line)
            if isnothing(hash_match)
                return false
            end
            hash = hash_match.captures[1]
            return !(hash in using_hashes || hash in toml_hashes)
        end, cell_lines)

        # Reconstruct cell order with using cells at top
        new_order = [header_line]

        # Add using cells first
        for cell in using_cells
            push!(new_order, "# ╠═$(cell.hash)")
        end

        # Add other cells
        append!(new_order, other_cells)

        # Add TOML blocks at the end
        for block in blocks
            push!(new_order, "# β•Ÿβ”€$(block.hash)")
        end

        # Reconstruct the content
        updated_content = updated_content[1:cell_order_start-1] * join(new_order, '\n')
    end

    # Write the updated content back to the target file
    write(target_file, updated_content)
    println("Successfully updated $target_file")
end

# Usage
update_pluto_blocks_with_using("a.jl", "b.jl")

Related

  • some scripts

https://github.com/mmikhasenko/ReactionAmplitudeExamples.jl/blob/main/services/update_notebooks.jl to be specific https://github.com/mmikhasenko/ReactionAmplitudeExamples.jl/blob/main/services/update_notebooks.jl#L30C3-L59

  • Suggestion from @fonsp

I would really like a script that takes a collection of notebooks and:

  • Creates a Pkg environment that contains all packages imported in all notebooks
  • For each notebook, use a copy of that mega-env, with unused packages removed. I think this should minimize load times and caches can be reused :slightly_smiling_face:

mmikhasenko avatar Aug 07 '25 09:08 mmikhasenko

I've wanted an easy way to do this for a long time as well!

This would improve the experience for students in a Pluto-based course dramatically. I have courses with 10+ notebooks, using many packages. If I could easily synchronize Manifests, my students would save a lot of time.

greimel avatar Aug 20 '25 09:08 greimel

github is advertising their agents. Shell we try?

For outsourcing it to AI, we need a feedback loop - that is the only chance to let it do something useful.

What could be a test, hm, given a collection of notebooks, normalize them and check what?

  • versions (Pkg.status())
  • load time?
  • something else

mmikhasenko avatar Aug 21 '25 15:08 mmikhasenko

What do you think, guys?

Requirements

  • Notebooks must remain independent, openable one by one.
  • No extra environment (env/Mega) should exist; only notebooks are updated.
  • Handling of dependencies must stay consistent with regular Pluto notebooks.

Approach

  • Ephemeral union resolve: Collect all using/import across notebooks, resolve once in a temporary Project/Manifest (not saved).
  • Per-notebook envs: From the resolved Manifest, generate a Project/Manifest subset for each notebook with only its direct deps; embed into PLUTO_PROJECT_TOML_CONTENTS and PLUTO_MANIFEST_TOML_CONTENTS.
  • Cache reuse: Works via the standard shared depot; versions across notebooks align, so precompiles/artifacts are reused.
  • No permanent Mega env: The union Manifest exists only in memory during sync.

API Sketch

collect_deps(notebooks) -> Dict{notebook => Vector{String}}
resolve_union_ephemeral(depmap) -> MegaManifest   # in memory only
derive_notebook_env(notebook, MegaManifest) -> (proj, mani)
embed_env!(notebook, proj, mani)
validate_idempotency!(notebooks)

Behavior

  • Opening any notebook works standalone (embedded env present).
  • If a larger-env notebook is opened after a smaller one, Pkg installs the missing packages once; subsequent opens are no-op instantiates.
  • Embedded Project lists only direct deps; Manifest is pruned subset of the resolved union.
  • No artifacts, files, or extra envs left behind.

Validation Checklist

  1. No new env files: Only notebooks are modified.
  2. Subset consistency: Each notebook’s Manifest matches a subset of the union Manifest.
  3. Idempotency: Re-running sync produces no diffs.
  4. Independence: Each notebook opens cleanly in Pluto with its embedded env.
  5. Order tolerance: Opening in any order never triggers downgrades/re-resolves; only one-time installs for supersets.
  6. Pruning respected: Projects contain only direct deps, Manifests handle transitive deps.
  7. Robust parsing: Ignore comments/strings; support import Foo as Bar or warn clearly.

mmikhasenko avatar Aug 21 '25 16:08 mmikhasenko