semaphore
semaphore copied to clipboard
Remote runner hangs
Observed Behavior
The task hangs at
10:08:49 AM Task 2147483583 added to queue
10:09:03 AM Started: 2147483583
10:09:03 AM Run TaskRunner with template: swarm library
I don't see any errors that are likely related in either the runner or server. Below are the server logs from the relevant time frame followed by the complete runner logs.
...
semaphore_server.1.9h68p1n6f7et@cricket | time="2023-10-03T14:08:49Z" level=info msg="Task 2147483583 added to queue"
semaphore_server.1.9h68p1n6f7et@cricket | time="2023-10-03T14:08:52Z" level=info msg="Set resource locker with TaskRunner 2147483583"
semaphore_server.1.9h68p1n6f7et@cricket | time="2023-10-03T14:08:52Z" level=info msg="Task 2147483583 removed from queue"
semaphore_server.1.9h68p1n6f7et@cricket |
semaphore_server.1.9h68p1n6f7et@cricket | time="2023-10-03T14:12:03Z" level=error msg="websocket: close sent" level=Error
semaphore_server.1.9h68p1n6f7et@cricket | time="2023-10-03T14:12:03Z" level=error msg="close tcp 172.21.0.48:3000->172.21.0.60:33320: use of closed network connection" level=Error
Oct 03 10:08:06 runner systemd[1]: Started semaphore-runner.service - Semaphore Ansible Runner.
Oct 03 10:08:07 runner semaphore[42668]: Loading config
Oct 03 10:08:07 runner semaphore[42668]: Validating config
Oct 03 10:08:07 runner semaphore[42668]: time="2023-10-03T10:08:07-04:00" level=info msg="Trying to register on server"
Oct 03 10:08:07 runner semaphore[42668]: time="2023-10-03T10:08:07-04:00" level=info msg="Runner registered on server"
Expected Behavior
What I am expecting is for the task being run to execute as if it were running on the semaphore server itself just doing the execution on the runner instead. The same task template runs fine if I disable the remote runner in the server config.
Steps to Reproduce
- Install the server as a docker service.
- Install semaphore on a VM (Debian 12)
- Configure and start the runner (including server config).
- Setup all the bits and pieces on the server needed for a task template.
- Run the configured task template.
Extra info
- I tried this with the current stable release (v2.9.37) and v2.9.39-beta with the server and runner having the same version.
- I installed semaphore on the VM via the .deb package for both versions.
- semaphore server config
{
"mysql": {
"host": "",
"user": "",
"pass": "",
"name": "",
"options": null
},
"bolt": {
"host": "/var/lib/semaphore/database.boltdb",
"user": "",
"pass": "",
"name": "",
"options": null
},
"postgres": {
"host": "",
"user": "",
"pass": "",
"name": "",
"options": null
},
"dialect": "bolt",
"port": "",
"interface": "",
"tmp_path": "/tmp/semaphore",
"ssh_config_path": "",
"git_client": "",
"web_host": "",
"cookie_hash": "-redacted-",
"cookie_encryption": "-redacted-",
"access_key_encryption": "-redacted-",
"email_alert": true,
"email_sender": "semaphore <-redacted->",
"email_host": "-redacted-",
"email_port": "-redacted-",
"email_username": "-redacted-",
"email_password": "'-redacted-",
"email_secure": true,
"ldap_enable": false,
"ldap_binddn": "",
"ldap_bindpassword": "",
"ldap_server": "",
"ldap_searchdn": "",
"ldap_searchfilter": "",
"ldap_mappings": {
"dn": "",
"mail": "",
"uid": "",
"cn": ""
},
"ldap_needtls": false,
"telegram_alert": false,
"telegram_chat": "",
"telegram_token": "",
"slack_alert": false,
"slack_url": "",
"oidc_providers": null,
"max_parallel_tasks": 0,
"runner_registration_token": "",
"password_login_disable": false,
"non_admin_can_create_project": false,
"use_remote_runner": true,
"runner": {
"api_url": "",
"registration_token": "",
"config_file": "",
"one_off": false,
"webhook": "",
"max_parallel_tasks": 0
},
"billing_enabled": false
}
- semaphore runner config
{
"mysql": {
"host": "",
"user": "",
"pass": "",
"name": "",
"options": null
},
"bolt": {
"host": "/home/semaphore/.cache/semaphore/database.boltdb",
"user": "",
"pass": "",
"name": "",
"options": null
},
"postgres": {
"host": "",
"user": "",
"pass": "",
"name": "",
"options": null
},
"dialect": "bolt",
"port": "",
"interface": "",
"tmp_path": "/tmp/semaphore",
"ssh_config_path": "",
"git_client": "",
"web_host": "",
"cookie_hash": "-redacted-",
"cookie_encryption": "-redacted-",
"access_key_encryption": "-redacted-",
"email_alert": false,
"email_sender": "",
"email_host": "",
"email_port": "",
"email_username": "",
"email_password": "",
"email_secure": false,
"ldap_enable": false,
"ldap_binddn": "",
"ldap_bindpassword": "",
"ldap_server": "",
"ldap_searchdn": "",
"ldap_searchfilter": "",
"ldap_mappings": {
"dn": "",
"mail": "",
"uid": "",
"cn": ""
},
"ldap_needtls": false,
"telegram_alert": false,
"telegram_chat": "",
"telegram_token": "",
"slack_alert": false,
"slack_url": "",
"oidc_providers": null,
"max_parallel_tasks": 0,
"runner_registration_token": "",
"password_login_disable": false,
"non_admin_can_create_project": false,
"use_remote_runner": false,
"runner": {
"api_url": "https://semaphore.local/api",
"registration_token": "-redacted-",
"config_file": "/home/semaphore/.config/semaphore-runner.json",
"one_off": false,
"webhook": "",
"max_parallel_tasks": 20
},
"billing_enabled": false
}
- semaphore-runner.service
[Unit]
Description=Semaphore Ansible Runner
Documentation=https://github.com/ansible-semaphore/semaphore
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=semaphore
Group=semaphore
ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/bin/semaphore runner --config=/home/semaphore/.config/semaphore-runner.json
SyslogIdentifier=semaphore
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
- I also tried v2.9.39-beta with a docker runner with the same result. Here is the docker stack file I used. It's a jinja template and I left the things that I would have had to redact as they appear in the template. To make the runner section, I just copied and pasted the server section as the runner section, removed the ports section, and changed the image to the runner image.
version: "3.9"
configs:
server_config:
file: "./config.json"
runner_config:
file: "./runner.json"
services:
server:
image: semaphoreui/semaphore:v2.9.39-beta
ports:
- "33000:3000"
networks:
- swarm-net
configs:
- source: server_config
target: /etc/semaphore/config.json
volumes:
# I condensed all the files into one directory on the server
- server_data:/var/lib/semaphore
- server_data:/home/semaphore
environment:
SEMAPHORE_DB_DIALECT: bolt
SEMAPHORE_ADMIN_PASSWORD: "{{ semaphore_admin_password }}"
SEMAPHORE_ADMIN_NAME: "{{ semaphore_admin_name }}"
SEMAPHORE_ADMIN_EMAIL: "{{ semaphore_admin_email }}"
SEMAPHORE_ADMIN: "{{ semaphore_admin_username }}"
runner:
image: semaphoreui/runner:v2.9.39-beta
networks:
- swarm-net
configs:
- source: runner_config
target: /etc/semaphore/config.json
volumes:
- runner_data:/var/lib/semaphore
environment:
SEMAPHORE_DB_DIALECT: bolt
SEMAPHORE_ADMIN_PASSWORD: "{{ semaphore_admin_password }}"
SEMAPHORE_ADMIN_NAME: "{{ semaphore_admin_name }}"
SEMAPHORE_ADMIN_EMAIL: "{{ semaphore_admin_email }}"
SEMAPHORE_ADMIN: "{{ semaphore_admin_username }}"
networks:
swarm-net:
external: true
volumes:
# These point to glusterfs volumes in reality but it's a mess of template variables that I didn't want to translate and it's not relevant
server_data: {}
runner_data: {}
can confirm this, I too get a hang at running the task. I suspect its the main semaphore server that hangs, as a run using the remote runner errored as it couldnt get any data from the database
I've created a custom image for both server and runner for simpler deployment into a Kubernetes cluster. In the process of testing and iterating on these, I've found that exiting an existing runner container seems to cause these hangs. The temporary solution I found is to delete the deployment and statefulset (basically remove the database and semaphore containers) and start from scratch.
My assumption is that the failed or exited runners are not being cleared out of the list of runners on the server end. I went poking around into the API docs and it looks like there's not a ton of info about how to interact with that part of the API.
For me it's a single runner on another server and the runner is still up it's just the task that hangs.
I can confirm that problem and here is a temporary workaround:
1: empty runner sql table:
mariadb -u semaphore -p
use semaphore;
delete from runner;
2: remove runner.cfg (or whatever it is called in your config file)
3: re-register runner: semaphore runner --config ./config.json
This worked for me. Good Luck ! :)
Somehow no runners are in the sql table. However, the logs in the runner says Runner registered on server How can we check where the runner is registered to.
Does runner need the DB connectivity in the config.json? I was of the opinion that the semaphore server will broker all the connections to DB including the ones from the runner.
Somehow the doc is severly lacking in this regard.
Though I should leave this here for anyone that faces the same issue with me.
I faced the same issue with my deployment in a Kubernetes cluster some days ago where I have one deployment for Semaphore UI and one Stateful Set for the runners.
Like @thyseus mentioned deleting all records from database and restarting the runner solves the problem temporarily.
After investigating the issue with the multiple records in runner table I noticed that each Runner container after startup created a new config_file (as mentioned in the documentation) that contains the runner_id and the token causing it re-registering itself with the server and finally creating multiple records in runner table (I guess runner doesn't find the config and tries to re-registers itself to the server).
As a solution I configured my runner containers to place the config_file in a persistent volume so that the pod can find it on startup and avoid re-registering itself to the server causing the multiple records in runner table.
{
"runner": {
"registration_token": "***",
"config_file": "path/to/the/file/where/runner/saves/service/information", <-- THIS IN A PERSISTENT VOLUME
"api_url": "http://<semaphore_host>:<semaphore_port>/api",
"max_parallel_tasks": 10
}
}
I guess the same solution can be applied with docker deployments (just place the runner config in a volume) to avoid multiple records in runner table after every restart of the a runner container.
Hey @dmitronat
How did you get the start up config into your runner container? as I cant seem to make the runner work at all in docker.
never mind I got it working, if you need any help the below are the configs I used.
compose file
---
version: '3.5'
# docker stack deploy --compose-file runner.yml semaphore
volumes:
semaphore_config:
configs:
runner_config:
file: ./runner_config.conf
services:
semaphore-runner:
image: semaphoreui/runner:v2.9.56
configs:
- source: runner_config
target: /etc/semaphore/config.json
volumes:
- semaphore_config:/etc/semaphore
deploy:
update_config:
order: start-first
mode: global
labels:
- traefik.enable=false
config.json
{
"mysql":{
"host":"mysql:3306",
"user":"SECRETCODE",
"pass":"SECRETCODE",
"name":"SECRETCODE",
"options":null
},
"bolt":{
"host":"",
"user":"",
"pass":"",
"name":"",
"options":null
},
"postgres":{
"host":"",
"user":"",
"pass":"",
"name":"",
"options":null
},
"dialect":"mysql",
"port": "",
"interface": "",
"tmp_path": "/tmp/semaphore",
"ssh_config_path": "",
"git_client": "",
"web_host": "",
"cookie_hash":"SECRETCODE",
"cookie_encryption":"SECRETCODE",
"access_key_encryption":"SECRETCODE",
"email_alert": false,
"email_sender": "",
"email_host": "",
"email_port": "",
"email_username": "",
"email_password": "",
"email_secure": false,
"ldap_enable": false,
"ldap_binddn": "",
"ldap_bindpassword": "",
"ldap_server": "",
"ldap_searchdn": "",
"ldap_searchfilter": "",
"ldap_mappings": {
"dn": "",
"mail": "",
"uid": "",
"cn": ""
},
"ldap_needtls": false,
"telegram_alert": false,
"telegram_chat": "",
"telegram_token": "",
"slack_alert": false,
"slack_url": "",
"oidc_providers": null,
"max_parallel_tasks": 0,
"runner_registration_token": "",
"password_login_disable": false,
"non_admin_can_create_project": false,
"use_remote_runner": false,
"runner": {
"api_url": "https://semaphore/api",
"registration_token": "SECRETCODE",
"config_file": "/etc/semaphore/runner.json",
"one_off": false,
"webhook": "",
"max_parallel_tasks": 20
},
"billing_enabled": false
}