dask-gateway
dask-gateway copied to clipboard
Admin-mandated adaptive scaling
For those of us in public sector research groups where costs are always an issue, having the ability to run a cluster that mandates adaptive scaling for key user groups would be a fantastic feature that would enable more unrestricted usage. Something like this comes to mind. The idea would be that this would "disable" the ability to "fix" the size of a cluster, and it would always run in adaptive mode.
dask-gateway:
gateway:
extraConfig:
optionHandler: |
from dask_gateway_server.options import Options, Integer, Float, String
import logging
def cluster_options(user):
def option_handler(options):
if ":" not in options.image:
raise ValueError("When specifying an image you must also provide a tag")
extra_labels = {
"hub.jupyter.org/username": user.name,
"dask/username": user.name,
}
if "dask-high-compute-users" in user.groups:
return {
"worker_cores": options.worker_cores,
"worker_memory": int(options.worker_memory * 2 ** 30),
"image": options.image,
"scheduler_extra_pod_labels": extra_labels,
"worker_extra_pod_labels": extra_labels,
"cluster_max_workers": 32,
}
else:
return {
"worker_cores": options.worker_cores,
"worker_memory": int(options.worker_memory * 2 ** 30),
"image": options.image,
"scheduler_extra_pod_labels": extra_labels,
"worker_extra_pod_labels": extra_labels,
"cluster_max_workers": 32,
"apative": true,
"adaptive_min_workers": 0,
}
return Options(
Float("worker_cores", default=0.8, min=0.8, max=0.8, label="Worker Cores"),
Float("worker_memory", default=3.3, min=3.3, max=3.3, label="Worker Memory (GiB)"),
String("image", default="pangeo/base-notebook:2021.05.04", label="Image"),
handler=option_handler,
)
c.Backend.cluster_options = cluster_options