googleComputeEngineR
googleComputeEngineR copied to clipboard
Implement instanceGroups
Easier launching of multiple VMs https://github.com/cloudyr/googleComputeEngineR/issues/61
API docs https://cloud.google.com/compute/docs/instance-templates
I think the instance groups with the instance templates help to cut down on the overhead a decent amount:
# Bring in library
library(googleComputeEngineR)
library(data.table)
library(future)
# Name instance template and instance group before beginning
templateName <- paste0(Sys.getenv("GCE_SSH_USER"),"-rstudio-template2")
groupName <- paste0(Sys.getenv("GCE_SSH_USER"),"-rstudio-group2")
# Go to Google Console and create an instance-template
# Setup public key for quick accessing later
publicKey <- system("cat ~/.ssh/id_rsa.pub", intern=TRUE)
publicKey <- paste0("mgahan:mgahan:",publicKey)
cat(publicKey, file="tmp.pub")
system("cat tmp.pub")
# Create template
createInstanceTemplate <- paste0("gcloud compute instance-templates create ",
templateName," ",
"--image-project mike-modeling ",
"--image mike-rstudio ",
"--machine-type n1-highmem-2 ",
"--boot-disk-size 50GB ",
"--network default ",
"--region us-west1 ",
"--service-account mgahan@fndsknk,ds.iam.gserviceaccount.com ",
"--metadata-from-file ssh-keys=tmp.pub ",
"--project mike-modeling ",
"--preemptible --quiet")
system(createInstanceTemplate)
file.remove("tmp.pub")
# Authenicate to create instance-group
# activateCommand <- paste0("gcloud auth activate-service-account --key-file ",Sys.getenv("GCE_AUTH_FILE"))
# system(activateCommand)
# Launch instance group based on instance-template
part1 <- paste0("gcloud compute instance-groups managed create ",groupName," ")
part2 <- paste0("--zone us-west1-a --template ",templateName," ")
part3 <- paste0("--size 3 --project=",Sys.getenv("GCE_DEFAULT_PROJECT_ID")," --account ",Sys.getenv("GCE_SERVICE_ACCOUNT"))
groupCreateCommand <- paste0(part1, part2, part3)
system(groupCreateCommand)
# What instances do we have?
instanceNames <- gce_list_instances()$items$name
instanceNames <- instanceNames[grepl(groupName,instanceNames)]
# Grab instances
VM_List <- lapply(instanceNames, gce_get_instance)
## add any ssh details, username etc.
VM_List <- lapply(VM_List, gce_ssh_setup,"~/.ssh/id_rsa.pub", "~/.ssh/id_rsa", Sys.setenv("GCE_SSH_USER"))
## once all launched, add to cluster
plan(cluster, workers = as.cluster(
VM_List,
docker_image="gcr.io/mike-modeling/mike-rstudio",
rscript=c("docker", "run", c("--net=host","--shm-size=13G"),"gcr.io/mike-modeling/mike-rstudio", "Rscript")))
# Test function
my_single_function <- function(x){
#complicated function
}
# Collect results
all_results <- future_lapply(1:length(VM_List), my_single_function)
all_results
## Delete Group
part1 <- paste0("gcloud compute instance-groups managed delete -q ",groupName," ")
part2 <- paste0("--zone ",Sys.getenv("GCE_DEFAULT_ZONE")," --project=",Sys.getenv("GCE_DEFAULT_PROJECT_ID"))
part3 <- paste0(" --account ", Sys.getenv("GCE_SERVICE_ACCOUNT"))
deleteGroup <- paste0(part1, part2, part3)
system(deleteGroup)
## Delete Instance Template
part1 <- paste0("gcloud compute instance-templates delete -q ",templateName," ")
part2 <- paste0("--project=",Sys.getenv("GCE_DEFAULT_PROJECT_ID"))
part3 <- paste0(" --account ", Sys.getenv("GCE_SERVICE_ACCOUNT"))
deleteTemplate <- paste0(part1, part2, part3)
system(deleteTemplate)
Thanks for this! My job then is to replace your system calls with an R native function.
At the very least though, you can also dive back to using gcloud
for fringe cases and use this library for the R helper stuff. You may want to look at doing this in an RMarkdown document, as you can specify bash
snippets for gcloud
and normal R
for googleComputeEngineR
, and it will all be in one easy to read document.