atomate2
atomate2 copied to clipboard
CONTCAR -> POSCAR check
Sometimes the CONTCAR file gets created by VASP but is empty. Not a big problem since we are copying the file first but would be a helpful error message if something went wrong. We can validate the CONTCAR before copying and throwing an error. https://github.com/materialsproject/atomate2/blob/d21bdac763595372c04db999fcaa71b155dae28c/src/atomate2/vasp/files.py#L101
UPDATE
For long running jobs using fireworks the following CLI script can be called from the cluster to restart the job from a fizzled state and just copy over the CONTCAR files.
#!/usr/bin/env python3
"""Restart fizzled jobs."""
import json
import logging
from pathlib import Path
import click
from fireworks import LaunchPad
from pymatgen.io.vasp.inputs import Poscar
logger = logging.getLogger(__name__)
LPAD = LaunchPad.auto_load()
def _get_fizzled(query=None):
"""Find the fizzed relaxation jobs.
The atomate2 created fireworks for long running relaxation jobs should be simple.
Basically `spec._tasks.0` should be the relaxation job.
Args:
query: query to use. The basic query is already included.
Returns:
list: list of fizzled fireworks ids
"""
fw_query = {
"spec._tasks.0.job.function.@callable": "BaseVaspMaker.make",
"state": "FIZZLED",
"$or": [
{"spec._tasks.0.job.function_args.0.@class": "Structure"},
{"spec._tasks.0.job.function_args.0.@class": "OutputReference"},
],
}
fw_query.update(query or {})
return LPAD.get_fw_ids(query=fw_query)
def _get_last_launch_dir(fw_id):
"""Get the last launch directory for a fizzled job.
Args:
fw_id: fireworks id
Returns:
str: last launch directory
"""
return LPAD.get_fw_dict_by_id(fw_id)["launches"][-1]["launch_dir"]
def _read_contcar(dir_name):
"""Read the CONTCAR currently in the directory.
Args:
dir_name: directory to read CONTCAR from
Returns:
Structure: structure from CONTCAR
"""
try:
contcar = Poscar.from_file(Path(dir_name) / "CONTCAR")
except Exception:
contcar = None
return contcar
def _update_fw_db(fw_id, structure):
"""Update the spec structure for a fizzled job.
Args:
fw_id: fireworks id
structure: structure to update to
"""
fw_dict = LPAD.get_fw_dict_by_id(fw_id)
fw_dict["spec"]["_tasks"][0]["job"]["function_args"][0][
"structure"
] = structure.as_dict()
update_dict = {"_tasks.0.job.function_args.0": structure.as_dict()}
LPAD.update_spec([fw_id], update_dict)
@click.command()
@click.option("-q", "--query", default={})
@click.option("-d", "--dry-run", is_flag=True)
def main(query, dry_run):
"""Run the CLI."""
q = json.loads(query)
fizzled = _get_fizzled(q)
for fw_id in fizzled:
dir_name = _get_last_launch_dir(fw_id)
print(f"Reading CONTCAR for {fw_id} from {dir_name}")
contcar = _read_contcar(dir_name)
if contcar is not None:
print(">>>>CONTCAR PARSED")
if contcar is not None:
# start the return first then update the db
# this way the data from the failed calculation is kept
if not dry_run:
print(f"Rerun -- {fw_id}")
LPAD.rerun_fw(fw_id)
_update_fw_db(fw_id, contcar)
if __name__ == "__main__":
main()
Thanks for this code @jmmshn. What do you think about adding this as part of the command line utility for atomate2. So the command would be something like:
atm fireworks-continue ...
The only question I have is whether we can ensure the position of the structure argument
I think this could easily be modified to enable people to run VASP jobs on preempt-able queues, as well. I'll work on that and get back if I get it functioning