A documented way to syncronize Manifests of notebook collection
Migrated from slack (as important project to do)
Problem
by @FelixWechsler
I have a few Pluto notebooks and I want to synchronize their Manifest to be all the same (reducing loading times). Is there a automatic script or way to do that?
Solution from Claude
function update_pluto_blocks_with_using(source_file::String, target_file::String)
source_content = read(source_file, String)
target_content = read(target_file, String)
# Define the blocks we need to handle
blocks = [
(hash = "00000000-0000-0000-0000-000000000001", name = "PLUTO_PROJECT_TOML_CONTENTS"),
(hash = "00000000-0000-0000-0000-000000000002", name = "PLUTO_MANIFEST_TOML_CONTENTS")
]
updated_content = target_content
# First, find and extract all cells with 'using' statements from source
using_cells = []
cell_pattern = r"# βββ‘ ([a-f0-9\-]+)\n([^#]*?)(?=\n# βββ‘|\z)"s
for match in eachmatch(cell_pattern, source_content)
cell_hash = match.captures[1]
cell_content = match.captures[2]
# Check if this cell contains a 'using' statement
if contains(cell_content, "using ")
push!(using_cells, (hash = cell_hash, content = "# βββ‘ $cell_hash\n$cell_content"))
println("Found using statement in cell: $cell_hash")
end
end
# Add using cells to target if they don't exist
for cell in using_cells
if !contains(updated_content, cell.hash)
# Insert before Cell order
cell_order_idx = findfirst("# βββ‘ Cell order:", updated_content)
if !isnothing(cell_order_idx)
insert_pos = first(cell_order_idx)
updated_content = updated_content[1:insert_pos-1] *
cell.content * "\n\n" *
updated_content[insert_pos:end]
println("Inserted using cell: $(cell.hash)")
end
else
# Replace existing cell with the one from source
start_marker = "# βββ‘ $(cell.hash)"
cell_start = findfirst(start_marker, updated_content)
if !isnothing(cell_start)
next_cell_pattern = r"\n# βββ‘"
search_start = last(cell_start) + 1
next_cell_match = findnext(next_cell_pattern, updated_content, search_start)
cell_end = isnothing(next_cell_match) ? length(updated_content) : first(next_cell_match) - 1
updated_content = updated_content[1:first(cell_start)-1] *
cell.content *
updated_content[cell_end+1:end]
println("Updated existing using cell: $(cell.hash)")
end
end
end
# Process TOML blocks
for block in blocks
# Find the block in source file
start_marker = "# βββ‘ $(block.hash)"
start_idx = findfirst(start_marker, source_content)
if isnothing(start_idx)
println("Warning: Could not find $(block.name) block in source file")
continue
end
# Find the end of the block
next_cell_pattern = r"\n# βββ‘"
search_start = last(start_idx) + 1
next_cell_match = findnext(next_cell_pattern, source_content, search_start)
end_idx = isnothing(next_cell_match) ? length(source_content) : first(next_cell_match) - 1
# Extract the block
source_block = source_content[first(start_idx):end_idx]
# Check if block exists in target
if contains(updated_content, start_marker)
# Replace existing block
target_start = findfirst(start_marker, updated_content)
target_search_start = last(target_start) + 1
target_next_cell = findnext(next_cell_pattern, updated_content, target_search_start)
target_end = isnothing(target_next_cell) ? length(updated_content) : first(target_next_cell) - 1
updated_content = updated_content[1:first(target_start)-1] *
source_block *
updated_content[target_end+1:end]
println("Replaced existing $(block.name) block")
else
# Insert before Cell order
cell_order_idx = findfirst("# βββ‘ Cell order:", updated_content)
if !isnothing(cell_order_idx)
insert_pos = first(cell_order_idx)
updated_content = updated_content[1:insert_pos-1] *
source_block * "\n\n" *
updated_content[insert_pos:end]
println("Inserted new $(block.name) block")
else
error("Could not find Cell order section")
end
end
end
# Update cell order to ensure using cells are at the top
cell_order_idx = findfirst("# βββ‘ Cell order:", updated_content)
if !isnothing(cell_order_idx)
cell_order_start = first(cell_order_idx)
cell_order_end = length(updated_content)
# Extract cell order lines
cell_order_content = updated_content[cell_order_start:cell_order_end]
lines = split(cell_order_content, '\n')
# Separate the header and cell references
header_line = lines[1] # "# βββ‘ Cell order:"
cell_lines = lines[2:end]
# Collect all unique cell hashes
using_hashes = [cell.hash for cell in using_cells]
toml_hashes = [block.hash for block in blocks]
# Filter out using cells and TOML blocks from existing order
other_cells = filter(line -> begin
if isempty(strip(line))
return false
end
# Extract hash from line
hash_match = match(r"([a-f0-9\-]+)", line)
if isnothing(hash_match)
return false
end
hash = hash_match.captures[1]
return !(hash in using_hashes || hash in toml_hashes)
end, cell_lines)
# Reconstruct cell order with using cells at top
new_order = [header_line]
# Add using cells first
for cell in using_cells
push!(new_order, "# β β$(cell.hash)")
end
# Add other cells
append!(new_order, other_cells)
# Add TOML blocks at the end
for block in blocks
push!(new_order, "# ββ$(block.hash)")
end
# Reconstruct the content
updated_content = updated_content[1:cell_order_start-1] * join(new_order, '\n')
end
# Write the updated content back to the target file
write(target_file, updated_content)
println("Successfully updated $target_file")
end
# Usage
update_pluto_blocks_with_using("a.jl", "b.jl")
Related
- some scripts
https://github.com/mmikhasenko/ReactionAmplitudeExamples.jl/blob/main/services/update_notebooks.jl to be specific https://github.com/mmikhasenko/ReactionAmplitudeExamples.jl/blob/main/services/update_notebooks.jl#L30C3-L59
- Suggestion from @fonsp
I would really like a script that takes a collection of notebooks and:
- Creates a Pkg environment that contains all packages imported in all notebooks
- For each notebook, use a copy of that mega-env, with unused packages removed. I think this should minimize load times and caches can be reused :slightly_smiling_face:
I've wanted an easy way to do this for a long time as well!
This would improve the experience for students in a Pluto-based course dramatically. I have courses with 10+ notebooks, using many packages. If I could easily synchronize Manifests, my students would save a lot of time.
github is advertising their agents. Shell we try?
For outsourcing it to AI, we need a feedback loop - that is the only chance to let it do something useful.
What could be a test, hm, given a collection of notebooks, normalize them and check what?
- versions (Pkg.status())
- load time?
- something else
What do you think, guys?
Requirements
- Notebooks must remain independent, openable one by one.
- No extra environment (
env/Mega) should exist; only notebooks are updated. - Handling of dependencies must stay consistent with regular Pluto notebooks.
Approach
- Ephemeral union resolve: Collect all
using/importacross notebooks, resolve once in a temporary Project/Manifest (not saved). - Per-notebook envs: From the resolved Manifest, generate a Project/Manifest subset for each notebook with only its direct deps; embed into
PLUTO_PROJECT_TOML_CONTENTSandPLUTO_MANIFEST_TOML_CONTENTS. - Cache reuse: Works via the standard shared depot; versions across notebooks align, so precompiles/artifacts are reused.
- No permanent Mega env: The union Manifest exists only in memory during sync.
API Sketch
collect_deps(notebooks) -> Dict{notebook => Vector{String}}
resolve_union_ephemeral(depmap) -> MegaManifest # in memory only
derive_notebook_env(notebook, MegaManifest) -> (proj, mani)
embed_env!(notebook, proj, mani)
validate_idempotency!(notebooks)
Behavior
- Opening any notebook works standalone (embedded env present).
- If a larger-env notebook is opened after a smaller one, Pkg installs the missing packages once; subsequent opens are no-op instantiates.
- Embedded Project lists only direct deps; Manifest is pruned subset of the resolved union.
- No artifacts, files, or extra envs left behind.
Validation Checklist
- No new env files: Only notebooks are modified.
- Subset consistency: Each notebookβs Manifest matches a subset of the union Manifest.
- Idempotency: Re-running sync produces no diffs.
- Independence: Each notebook opens cleanly in Pluto with its embedded env.
- Order tolerance: Opening in any order never triggers downgrades/re-resolves; only one-time installs for supersets.
- Pruning respected: Projects contain only direct deps, Manifests handle transitive deps.
- Robust parsing: Ignore comments/strings; support
import Foo as Baror warn clearly.