reframe
reframe copied to clipboard
auto-detection, use existing module if told to
The auto-detection could perhaps be made to use an existing module on the remote system instead of bootstrapping itself. I would make the process a lot faster. A config option per site and/or partition would suffice
Related to #2690.
Something like this works (tested):
diff --git a/reframe/frontend/autodetect.py b/reframe/frontend/autodetect.py
index d8c07c36..5d7be1c3 100644
--- a/reframe/frontend/autodetect.py
+++ b/reframe/frontend/autodetect.py
@@ -38,6 +38,22 @@ def _log_contents(filename):
f'--- {filename} ---')
+class _ake_reframe:
+ def __init__(self, prefix):
+ self._prefix = prefix
+ self._workdir = None
+
+ def __enter__(self):
+ self._workdir = os.path.abspath(
+ tempfile.mkdtemp(prefix='rfm.', dir=self._prefix)
+ )
+
+ return self._workdir
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ osext.rmtree(self._workdir)
+
+
class _copy_reframe:
def __init__(self, prefix):
self._prefix = prefix
@@ -132,11 +148,46 @@ def _remote_detect(part):
]
job.prepare(commands, env, trap_errors=True)
+ def _emit_module_script(job, env):
+ launcher_cmd = job.launcher.run_command(job)
+ commands = [
+ f'module load {module}',
+ f'{launcher_cmd} reframe --detect-host-topology=topo.json'
+ ]
+ job.prepare(commands, env, trap_errors=True)
+
getlogger().info(
f'Detecting topology of remote partition {part.fullname!r}: '
f'this may take some time...'
)
topo_info = {}
+
+ module = runtime.runtime().get_option('general/0/reframe_module')
+ if module:
+ try:
+ prefix = runtime.runtime().get_option('general/0/remote_workdir')
+ with _ake_reframe(prefix) as dirname:
+ with osext.change_dir(dirname):
+ job = Job.create(part.scheduler,
+ part.launcher_type(),
+ name='rfm-detect-job',
+ sched_access=part.access)
+ _emit_module_script(job, [part.local_env])
+ getlogger().debug('submitting detection script')
+ _log_contents(job.script_filename)
+ job.submit()
+ job.wait()
+ getlogger().debug('job finished')
+ _log_contents(job.stdout)
+ _log_contents(job.stderr)
+ topo_info = json.loads(_contents('topo.json'))
+ except Exception as e:
+ getlogger().warning(f'failed to retrieve remote processor info using ReFrame module: {e}')
+ getlogger().debug(traceback.format_exc())
+ else:
+ getlogger().warning(f'reframe module not set')
+
+
try:
prefix = runtime.runtime().get_option('general/0/remote_workdir')
with _copy_reframe(prefix) as dirname:
diff --git a/reframe/schemas/config.json b/reframe/schemas/config.json
index d9108b36..b735b448 100644
--- a/reframe/schemas/config.json
+++ b/reframe/schemas/config.json
@@ -486,6 +486,7 @@
"perf_info_level": {"$ref": "#/defs/loglevel"},
"pipeline_timeout": {"type": ["number", "null"]},
"purge_environment": {"type": "boolean"},
+ "reframe_module": {"type": "string"},
"remote_detect": {"type": "boolean"},
"remote_workdir": {"type": "string"},
"report_file": {"type": "string"},
But should probably take a list of strings for the reframe_module. One could also add a "reframe_already_in_path" boolean which would just run reframe assuming that it is the correct version it will pick up. responsibility for that would of course be on the user side :-)