SwanLab
SwanLab copied to clipboard
[BUG] concurrent experiment: database is locked
🐛 Bug description
有多组并行实验提交时,出现 sqlite并行读写报错
swanlab.log(metrics, step)
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanlab/data/sdk.py", line 184, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanlab/data/sdk.py", line 208, in log
ll = run.log(data, step)
^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanlab/data/run/main.py", line 325, in log
metric_info = self.__exp.add(key=k, data=v, step=step)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanlab/data/run/exp.py", line 76, in add
self.__operator.on_column_create(column_info)
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanlab/data/run/helper.py", line 106, in on_column_create
return self.__run_all("on_column_create", column_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanlab/data/run/helper.py", line 54, in __run_all
return {name: getattr(callback, method)(*args, **kwargs) for name, callback in self.callbacks.items()}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanboard/callback.py", line 87, in on_column_create
n = Namespace.create(name=namespace, experiment_id=self.exp.id, sort=column_info.sort)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanboard/db/models/namespaces.py", line 155, in create
return super().create(
^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanboard/db/model.py", line 119, in create
return super().create(**query)
^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 6741, in create
inst.save(force_insert=True)
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/swanboard/db/model.py", line 103, in save
super().save(*args, **kwargs)
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 6951, in save
pk = self.insert(**field_dict).execute()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 2036, in inner
return method(self, database, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 2107, in execute
return self._execute(database)
^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 2912, in _execute
return super(Insert, self)._execute(database)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 2625, in _execute
cursor = database.execute(self)
^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 3330, in execute
return self.execute_sql(sql, params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 3320, in execute_sql
with __exception_wrapper__:
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 3088, in __exit__
reraise(new_type, new_type(exc_value, *exc_args), traceback)
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 196, in reraise
raise value.with_traceback(tb)
File "/public/opt/conda/envs/py312/lib/python3.12/site-packages/peewee.py", line 3322, in execute_sql
cursor.execute(sql, params or ())
peewee.OperationalError: database is locked
🧑💻 Step to reproduce
快速复现:用 process pool executor 模拟并发状态
import swanlab
import json
import os
import yaml
import tqdm
from tqdm.contrib.concurrent import process_map
def load_metrics(folder:str):
cfg = yaml.load(open(f"{folder}/config.yaml"), Loader=yaml.FullLoader)
swanlab.init(
project="dinov2",
logdir='./logs',
mode="local",
experiment_name=os.path.basename(folder),
config=cfg)
with open(f"{folder}/training_metrics.json") as f:
#each line contains a dict
lines = f.readlines()
#parse each line
for line in tqdm.tqdm(lines):
metrics=eval(line)
step = metrics.pop("iteration")
swanlab.log(metrics, step)
if __name__ == "__main__":
process_map(load_metrics, [f"outputs/{x}" for x in os.listdir("outputs")])
👾 Expected result
处理sqlite并行读写问题,wait for lock release