FedVision
FedVision copied to clipboard
发现了compile error
机器为Ubuntu 20.04,有一个Nvidia 3090显卡,python环境为3.8,其余包的版本均按照readme和requriments.txt中安装
但是在运行Run examples部分的
sh FedVision/examples/paddle_mnist/run.sh 127.0.0.1:10002
语句时遇到如下错误:
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
│ │ └ {'__name__': '__main__', '__doc__': None, '__package__': 'fedvision.framework.cli', '__loader__': <_frozen_importlib_external...
│ └ <code object <module> at 0x7f769b3b7240, file "/home/lkhpc/projects/fed_sub_machine/FedVision/fedvision/framework/cli/master....
└ <function _run_code at 0x7f769b3ff160>
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
│ └ {'__name__': '__main__', '__doc__': None, '__package__': 'fedvision.framework.cli', '__loader__': <_frozen_importlib_external...
└ <code object <module> at 0x7f769b3b7240, file "/home/lkhpc/projects/fed_sub_machine/FedVision/fedvision/framework/cli/master....
File "/home/lkhpc/projects/fed_sub_machine/FedVision/fedvision/framework/cli/master.py", line 58, in <module>
start_master()
└ <Command start-master>
File "/home/lkhpc/projects/fed_sub_machine/venv/lib/python3.8/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
│ │ │ └ {}
│ │ └ ()
│ └ <function BaseCommand.main at 0x7f769aca4a60>
└ <Command start-master>
File "/home/lkhpc/projects/fed_sub_machine/venv/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
│ │ └ <click.core.Context object at 0x7f769b446ca0>
│ └ <function Command.invoke at 0x7f769ac97430>
└ <Command start-master>
File "/home/lkhpc/projects/fed_sub_machine/venv/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
│ │ │ │ │ └ {'submitter_port': 10002, 'party_id': 'master1', 'cluster_address': '127.0.0.1:10001', 'coordinator_address': '127.0.0.1:10000'}
│ │ │ │ └ <click.core.Context object at 0x7f769b446ca0>
│ │ │ └ <function start_master at 0x7f769ac9e820>
│ │ └ <Command start-master>
│ └ <function Context.invoke at 0x7f769aca4550>
└ <click.core.Context object at 0x7f769b446ca0>
File "/home/lkhpc/projects/fed_sub_machine/venv/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
│ │ └ {'submitter_port': 10002, 'party_id': 'master1', 'cluster_address': '127.0.0.1:10001', 'coordinator_address': '127.0.0.1:10000'}
│ └ ()
└ <function start_master at 0x7f769ac9e820>
File "/home/lkhpc/projects/fed_sub_machine/FedVision/fedvision/framework/cli/master.py", line 48, in start_master
loop.run_forever()
│ └ <function BaseEventLoop.run_forever at 0x7f769ad45280>
└ <_UnixSelectorEventLoop running=True closed=False debug=False>
File "/usr/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x7f769ad47dc0>
└ <_UnixSelectorEventLoop running=True closed=False debug=False>
File "/usr/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
handle._run()
│ └ <function Handle._run at 0x7f769ae33b80>
└ <Handle <TaskWakeupMethWrapper object at 0x7f769408bb20>(<Future finished result=1>)>
File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle <TaskWakeupMethWrapper object at 0x7f769408bb20>(<Future finished result=1>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle <TaskWakeupMethWrapper object at 0x7f769408bb20>(<Future finished result=1>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle <TaskWakeupMethWrapper object at 0x7f769408bb20>(<Future finished result=1>)>
> File "/home/lkhpc/projects/fed_sub_machine/FedVision/fedvision/framework/master/master.py", line 470, in _co_handler
await job.compile()
│ └ <function PaddleFLJob.compile at 0x7f769891af70>
└ <fedvision.paddle_fl.job.PaddleFLJob object at 0x7f769408b820>
File "/home/lkhpc/projects/fed_sub_machine/FedVision/fedvision/paddle_fl/job.py", line 95, in compile
raise FedvisionJobCompileException("compile error")
└ <class 'fedvision.framework.utils.exception.FedvisionJobCompileException'>
fedvision.framework.utils.exception.FedvisionJobCompileException: compile error```
去job下面的master目录下看stderr, 他这个嵌套有点多不好找实际的错误。