flux-sched
flux-sched copied to clipboard
sharness based scheduler performance study
I was able to create a reproduce of some kind of fluxion performance problem via the following sharness test, which creates 16K fake resources split into 3 different sets via properties, drains 10 of them, then submits various sets of mock execution jobs to them.
In my testing, Fluxion appears to take a long time to initialize from the configured fake reosurces. Then the first set of jobs get 0.4job/s throughput. After that, performance seems to improve. This may be nothing or I may have made some kind of setup mistake, but I thought I'd share just in case.
I dropped this test into t/t8888-scheduling-test.t
(results below)
#!/bin/sh
#
test_description='
'
. `dirname $0`/sharness.sh
export TEST_UNDER_FLUX_QUORUM=1
export TEST_UNDER_FLUX_START_MODE=leader
rpc() {
flux python -c \
"import flux, json; print(flux.Flux().rpc(\"$1\").get_str())"
}
test_under_flux 16384 system
test_expect_success 'unload sched-simple' '
flux module remove -f sched-simple
'
test_expect_success 'update configuration' '
flux config load <<-'EOF'
[[resource.config]]
hosts = "fake[0-16383]"
cores = "0-63"
gpus = "0-3"
[[resource.config]]
hosts = "fake[0-9999]"
properties = ["compute"]
[[resource.config]]
hosts = "fake[10000-16000]"
properties = ["test"]
[[resource.config]]
hosts = "fake[16001-16383]"
properties = ["debug"]
[sched-fluxion-qmanager]
queue-policy = "easy"
[sched-fluxion-resource]
match-policy = "firstnodex"
prune-filters = "ALL:core,ALL:gpu,cluster:node,rack:node"
match-format = "rv1_nosched"
EOF
'
test_expect_success 'reload resource with monitor-force-up' '
flux module reload -f resource noverify monitor-force-up
'
test_expect_success 'drain a few nodes' '
flux resource drain 1-1000 test with drained nodes
'
test_expect_success 'load fluxion modules' '
flux module load sched-fluxion-resource &&
flux module load sched-fluxion-qmanager
'
test_expect_success 'wait for fluxion to be ready' '
time rpc sched.resource-status 2>/dev/null
'
test_expect_success 'create a set of 100 inactive jobs' '
flux submit --cc=1-100 --quiet --wait \
--requires="compute" \
--progress --jps \
--setattr=exec.test.run_duration=0.01s \
hostname
'
test_expect_success 'create a set of 64 running jobs' '
flux submit --progress --jps --quiet --cc=1-64 --wait-event=start -N1 \
--requires=compute \
--setattr=exec.test.run_duration=5m hostname
'
test_expect_success 'submit a job that requires a drained node' '
flux submit --quiet --wait-event=depend -N10 \
--requires="host:fake[10-19] and compute" \
hostname
'
test_expect_success 'run a simple job' '
flux run -vvv --setattr=exec.test.run_duration=1s -N16 hostname
'
test_expect_success 'get match stats' '
rpc sched-fluxion-resource.stat | jq
'
test_expect_success 'unload fluxion' '
flux module remove sched-fluxion-qmanager &&
flux module remove sched-fluxion-resource &&
flux module load sched-simple
'
test_done
$ ./t8888-scheduling-test.t -d -v
sharness: loading extensions from /g/g0/grondo/git/flux-sched/t/sharness.d/flux-sharness.sh
sharness: loading extensions from /g/g0/grondo/git/flux-sched/t/sharness.d/sched-sharness.sh
sharness: loading extensions from /g/g0/grondo/git/flux-sched/t/sharness.d/flux-sharness.sh
sharness: loading extensions from /g/g0/grondo/git/flux-sched/t/sharness.d/sched-sharness.sh
expecting success:
flux module remove -f sched-simple
ok 1 - unload sched-simple
expecting success:
flux config load <<-EOF
[[resource.config]]
hosts = "fake[0-16383]"
cores = "0-63"
gpus = "0-3"
[[resource.config]]
hosts = "fake[0-9999]"
properties = ["compute"]
[[resource.config]]
hosts = "fake[10000-16000]"
properties = ["test"]
[[resource.config]]
hosts = "fake[16001-16383]"
properties = ["debug"]
[sched-fluxion-qmanager]
queue-policy = "easy"
[sched-fluxion-resource]
match-policy = "firstnodex"
prune-filters = "ALL:core,ALL:gpu,cluster:node,rack:node"
match-format = "rv1_nosched"
EOF
ok 2 - update configuration
expecting success:
flux module reload -f resource noverify monitor-force-up
ok 3 - reload resource with monitor-force-up
expecting success:
flux resource drain 1-1000 test with drained nodes
ok 4 - drain a few nodes
expecting success:
flux module load sched-fluxion-resource &&
flux module load sched-fluxion-qmanager
ok 5 - load fluxion modules
expecting success:
time rpc sched.resource-status 2>/dev/null
{"all":{"version":1,"execution":{"R_lite":[{"rank":"0-16383","children":{"core":"0-63","gpu":"0-3"}}],"nodelist":["fake[0-16383]"],"properties":{"compute":"0-9999","debug":"16001-16383","test":"10000-16000"},"starttime":0,"expiration":0}},"down":{"version":1,"execution":{"R_lite":[{"rank":"1-1000","children":{"core":"0-63","gpu":"0-3"}}],"nodelist":["fake[1-1000]"],"properties":{"compute":"1-1000"},"starttime":0,"expiration":0}},"allocated":null}
real 2m22.924s
user 0m0.142s
sys 0m0.027s
ok 6 - wait for fluxion to be ready
expecting success:
flux submit --cc=1-100 --quiet --wait \
--requires="compute" \
--progress --jps \
--setattr=exec.test.run_duration=0.01s \
hostname
PD:0 R:0 CD:100 F:0 │███████████████████████████████████│100.0% 0.4 job/s
ok 7 - create a set of 100 inactive jobs
expecting success:
flux submit --progress --jps --quiet --cc=1-64 --wait-event=start -N1 \
--requires=compute \
--setattr=exec.test.run_duration=5m hostname
PD:0 R:64 CD:0 F:0 │███████████████████████████████████│100.0% 6.1 job/s
ok 8 - create a set of 64 running jobs
expecting success:
flux submit --quiet --wait-event=depend -N10 \
--requires="host:fake[10-19] and compute" \
hostname
ok 9 - submit a job that requires a drained node
expecting success:
flux run -vvv --setattr=exec.test.run_duration=1s -N16 hostname
jobid: f3zuRJpBy
0.000s: job.submit {"userid":6885,"urgency":16,"flags":0,"version":1}
0.012s: job.validate
0.023s: job.depend
0.023s: job.priority {"priority":16}
1.590s: job.alloc
1.590s: job.prolog-start {"description":"cray-pals-port-distributor"}
1.591s: job.cray_port_distribution {"ports":[11999,11998],"random_integer":1935304580792628789}
1.591s: job.prolog-finish {"description":"cray-pals-port-distributor","status":0}
1.593s: job.start
1.592s: exec.init
1.593s: exec.starting
2.594s: exec.complete {"status":0}
2.594s: exec.done
flux-job: No job output found
2.594s: job.finish {"status":0}
ok 10 - run a simple job
expecting success:
rpc sched-fluxion-resource.stat | jq
{
"V": 1130497,
"E": 2260992,
"by_rank": {
"[0-16383]": 69
},
"load-time": 44.955565206000003,
"njobs": 165,
"min-match": 0.098841462000000005,
"max-match": 2.72908858,
"avg-match": 1.4591527333454548
}
ok 11 - get match stats
expecting success:
flux module remove sched-fluxion-qmanager &&
flux module remove sched-fluxion-resource &&
flux module load sched-simple
ok 12 - unload fluxion
# passed all 12 test(s)
1..12