centreon-plugins icon indicating copy to clipboard operation
centreon-plugins copied to clipboard

Docker - add health check to a container

Open Coolgeek789 opened this issue 4 years ago • 5 comments

Hi.

I need the health of containers to be checked. The mode 'service-status' only apply to swarm.

According to the docker's API doc (https://docs.docker.com/engine/api/v1.40/#operation/ContainerInspect), you can only get this info on the rest API on /containers/{container_id}/json (in {State}->{Health}->{Status}).

we should have a filter on container's name or id. We should have threshold on health , state, or both.

Can you add the mode ?

Thanks

Coolgeek789 avatar Nov 05 '20 14:11 Coolgeek789

Could you provide a curl result example ?

garnier-quentin avatar Nov 05 '20 14:11 garnier-quentin

sure :

{ "Id": "3522a9e293d37a1df3272ab62e047bb40627a9eb1ec06a831911c0872e2a212d", "Created": "2020-10-28T14:24:35.717953187Z", "Path": "/bin/sh", "Args": [ "-c", "/some/path/script.sh start" ], "State": { "Status": "running", "Running": true, "Paused": false, "Restarting": false, "OOMKilled": false, "Dead": false, "Pid": 117444, "ExitCode": 0, "Error": "", "StartedAt": "2020-10-28T14:24:36.484580732Z", "FinishedAt": "0001-01-01T00:00:00Z", "Health": { "Status": "healthy", "FailingStreak": 0, "Log": [ { "Start": "2020-11-05T15:39:06.606206456+01:00", "End": "2020-11-05T15:39:06.863069714+01:00", "ExitCode": 0, "Output": "" }, { "Start": "2020-11-05T15:39:16.866406257+01:00", "End": "2020-11-05T15:39:17.14531162+01:00", "ExitCode": 0, "Output": "" }, { "Start": "2020-11-05T15:39:27.148129501+01:00", "End": "2020-11-05T15:39:27.415558112+01:00", "ExitCode": 0, "Output": "" }, { "Start": "2020-11-05T15:39:37.418586056+01:00", "End": "2020-11-05T15:39:37.683389795+01:00", "ExitCode": 0, "Output": "" }, { "Start": "2020-11-05T15:39:47.686343716+01:00", "End": "2020-11-05T15:39:47.945127004+01:00", "ExitCode": 0, "Output": "" } ] } }, "Image": "sha256:eb5b4c32dc14e42cb71fc125da61a4c3405c5297e28845ce80d140706f8669d2", "ResolvConfPath": "/var/lib/docker/containers/3522a9e293d37a1df3272ab62e047bb40627a9eb1ec06a831911c0872e2a212d/resolv.conf", "HostnamePath": "/var/lib/docker/containers/3522a9e293d37a1df3272ab62e047bb40627a9eb1ec06a831911c0872e2a212d/hostname", "HostsPath": "/var/lib/docker/containers/3522a9e293d37a1df3272ab62e047bb40627a9eb1ec06a831911c0872e2a212d/hosts", "LogPath": "/var/lib/docker/containers/3522a9e293d37a1df3272ab62e047bb40627a9eb1ec06a831911c0872e2a212d/3522a9e293d37a1df3272ab62e047bb40627a9eb1ec06a831911c0872e2a212d-json.log", "Name": "/wldauto_someapp_1", "RestartCount": 0, "Driver": "overlay2", "Platform": "linux", "MountLabel": "", "ProcessLabel": "", "AppArmorProfile": "", "ExecIDs": null, "HostConfig": { "Binds": [ ], "ContainerIDFile": "", "LogConfig": { "Type": "json-file", "Config": {} }, "NetworkMode": "reverse-proxy", "PortBindings": { "port1/tcp": [ { "HostIp": "", "HostPort": "port1" } ], "port2/tcp": [ { "HostIp": "", "HostPort": "port2" } ], "port3/tcp": [ { "HostIp": "", "HostPort": "port3" } ], "port4/tcp": [ { "HostIp": "", "HostPort": "port4" } ], "port5/tcp": [ { "HostIp": "", "HostPort": "port5" } ], "port6/tcp": [ { "HostIp": "", "HostPort": "port6" } ], "port7/tcp": [ { "HostIp": "", "HostPort": "port7" } ] }, "RestartPolicy": { "Name": "always", "MaximumRetryCount": 0 }, "AutoRemove": false, "VolumeDriver": "", "VolumesFrom": [], "CapAdd": null, "CapDrop": null, "Capabilities": null, "Dns": null, "DnsOptions": null, "DnsSearch": null, "ExtraHosts": null, "GroupAdd": null, "IpcMode": "shareable", "Cgroup": "", "Links": null, "OomScoreAdj": 0, "PidMode": "", "Privileged": false, "PublishAllPorts": false, "ReadonlyRootfs": false, "SecurityOpt": null, "UTSMode": "", "UsernsMode": "", "ShmSize": 67108864, "Runtime": "runc", "ConsoleSize": [ 0, 0 ], "Isolation": "", "CpuShares": 0, "Memory": 0, "NanoCpus": 0, "CgroupParent": "", "BlkioWeight": 0, "BlkioWeightDevice": null, "BlkioDeviceReadBps": null, "BlkioDeviceWriteBps": null, "BlkioDeviceReadIOps": null, "BlkioDeviceWriteIOps": null, "CpuPeriod": 0, "CpuQuota": 0, "CpuRealtimePeriod": 0, "CpuRealtimeRuntime": 0, "CpusetCpus": "", "CpusetMems": "", "Devices": null, "DeviceCgroupRules": null, "DeviceRequests": null, "KernelMemory": 0, "KernelMemoryTCP": 0, "MemoryReservation": 0, "MemorySwap": 0, "MemorySwappiness": null, "OomKillDisable": false, "PidsLimit": null, "Ulimits": [ { "Name": "fsize", "Hard": 6144000000, "Soft": 4096000000 }, { "Name": "core", "Hard": 0, "Soft": 0 }, { "Name": "nofile", "Hard": 4096, "Soft": 1024 }, { "Name": "nproc", "Hard": 24576, "Soft": 6144 } ], "CpuCount": 0, "CpuPercent": 0, "IOMaximumIOps": 0, "IOMaximumBandwidth": 0, "Mounts": [ ], "MaskedPaths": [ "/proc/asound", "/proc/acpi", "/proc/kcore", "/proc/keys", "/proc/latency_stats", "/proc/timer_list", "/proc/timer_stats", "/proc/sched_debug", "/proc/scsi", "/sys/firmware" ], "ReadonlyPaths": [ "/proc/bus", "/proc/fs", "/proc/irq", "/proc/sys", "/proc/sysrq-trigger" ] }, "GraphDriver": { "Data": { "LowerDir": "/var/lib/docker/overlay2/380759dee298a5c312806458f3add7d55c2c170d75dc8d0e005784ae4534ea13-init/diff:/var/lib/docker/overlay2/21541537954de57f7fee09b7639d5583076ff769a11a3d8ec4e83c0f3f23be39/diff:/var/lib/docker/overlay2/587cc4058542d4f4a168a32819457edf1e3ce3068f3b02bf52511e7cb407de55/diff:/var/lib/docker/overlay2/2ef910285fa5347ff32363e2948f74bd28a0b343ddb2ba04a5c8b30e0d30d60d/diff:/var/lib/docker/overlay2/18f99c1e388914a7cc2161d8b7defc138896e7ae6f8eb8fcf23d6af930744391/diff:/var/lib/docker/overlay2/c0fb68fad85d6ea88f2854cb0b2001c83250aafb54a99c1c616f29f529da0538/diff:/var/lib/docker/overlay2/55e3b226d90709dd3030a911b52898bb313fa05cb2249f862ab5b257f54d21fa/diff:/var/lib/docker/overlay2/88a640d0e23a5786691e3397f14661bc7af3b5c4691366df26574c521fa3db52/diff:/var/lib/docker/overlay2/d47ee602ce6a7634ea0b4f700540d32c01588e6f5f1f16bd255c3641433e9e17/diff:/var/lib/docker/overlay2/f4d9befff3abe419234bd7540f186c11b18f22e7ca6261d7a74f7afe2e9046b6/diff:/var/lib/docker/overlay2/a672bf31b3a746c2b06e8d4d3c8e8287fadf33ab962bfb6d9b68bba0955dfcf1/diff:/var/lib/docker/overlay2/c0f1b15ab9ea7f5159b2da994cee01e64e26b92a16db36614726dd06018350ed/diff:/var/lib/docker/overlay2/1e68f444f7c7a610ea40d3439d28acdde618f9d98ffef462b796253389587711/diff:/var/lib/docker/overlay2/f89242252065e1a174f4e0fa0f0dff04f5c4b01d50f979409b2a299cc6cd1aae/diff:/var/lib/docker/overlay2/1bfa55ba3e1d203ad88ccbdc52cd8df4a3b1ef3c007b803a2d90ba331c3ad8ea/diff", "MergedDir": "/var/lib/docker/overlay2/380759dee298a5c312806458f3add7d55c2c170d75dc8d0e005784ae4534ea13/merged", "UpperDir": "/var/lib/docker/overlay2/380759dee298a5c312806458f3add7d55c2c170d75dc8d0e005784ae4534ea13/diff", "WorkDir": "/var/lib/docker/overlay2/380759dee298a5c312806458f3add7d55c2c170d75dc8d0e005784ae4534ea13/work" }, "Name": "overlay2" }, "Mounts": [ ], "Config": { "Hostname": "hostname-someapp", "Domainname": "", "User": "someuser", "AttachStdin": false, "AttachStdout": false, "AttachStderr": false, "ExposedPorts": { "port1/tcp": {}, "port2/tcp": {}, "port3/tcp": {}, "port4/tcp": {}, "port5/tcp": {}, "port6/tcp": {}, "port7/tcp": {} }, "Tty": false, "OpenStdin": false, "StdinOnce": false, "Env": [ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" ], "Cmd": [ "/bin/sh", "-c", "/some/path/script.sh start" ], "Healthcheck": { "Test": [ "CMD-SHELL", "docker-healthchech.sh" ], "Interval": 10000000000, "Timeout": 2000000000, "StartPeriod": 5000000000, "Retries": 3 }, "Image": "centos7/someapp:6.10.11b0174", "Volumes": { "/etc/localtime": {} }, "WorkingDir": "/home/someuser", "Entrypoint": null, "OnBuild": null, "Labels": { "com.docker.compose.config-hash": "8a18f5788cfe03e7afaebdbcfa79f20ba95e3e4b052dbb455be924c6b5bb4d34", "com.docker.compose.container-number": "1", "com.docker.compose.oneoff": "False", "com.docker.compose.project": "wldauto", "com.docker.compose.service": "someapp", "com.docker.compose.version": "1.24.0", "org.label-schema.build-date": "20191001", "org.label-schema.license": "GPLv2", "org.label-schema.name": "CentOS Base Image", "org.label-schema.schema-version": "1.0", "org.label-schema.vendor": "CentOS" } }, "NetworkSettings": { "Bridge": "", "SandboxID": "20aa7010430860cb6bb7b8b7ddd3a3f0c710bd76454c99d7d03605bb5d7498fa", "HairpinMode": false, "LinkLocalIPv6Address": "", "LinkLocalIPv6PrefixLen": 0, "Ports": { "port1/tcp": [ { "HostIp": "0.0.0.0", "HostPort": "port1" } ], "port2/tcp": [ { "HostIp": "0.0.0.0", "HostPort": "port2" } ], "port3/tcp": [ { "HostIp": "0.0.0.0", "HostPort": "port3" } ], "port4/tcp": [ { "HostIp": "0.0.0.0", "HostPort": "port4" } ], "port5/tcp": [ { "HostIp": "0.0.0.0", "HostPort": "port5" } ], "port6/tcp": [ { "HostIp": "0.0.0.0", "HostPort": "port6" } ], "port7/tcp": [ { "HostIp": "0.0.0.0", "HostPort": "port7" } ] }, "SandboxKey": "/var/run/docker/netns/20aa70104308", "SecondaryIPAddresses": null, "SecondaryIPv6Addresses": null, "EndpointID": "", "Gateway": "", "GlobalIPv6Address": "", "GlobalIPv6PrefixLen": 0, "IPAddress": "", "IPPrefixLen": 0, "IPv6Gateway": "", "MacAddress": "", "Networks": { "reverse-proxy": { "IPAMConfig": null, "Links": null, "Aliases": [ "someapp", "3522a9e293d3" ], "NetworkID": "54ad4826aaa5cde575bd64ba57e5adad85c667cfcfa4c76983d2c4f7e2427c4b", "EndpointID": "641373f60c5a4266eda6fa43a1612156d7dcc90b4b7732f6801dd486aaa72b24", "Gateway": "XX.XX.XX.XX", "IPAddress": "YY.YY.YY.YY", "IPPrefixLen": 16, "IPv6Gateway": "", "GlobalIPv6Address": "", "GlobalIPv6PrefixLen": 0, "MacAddress": "MM:MM:MM:MM:MM:MM", "DriverOpts": null }, "wldauto_default": { "IPAMConfig": null, "Links": null, "Aliases": [ "someapp", "3522a9e293d3" ], "NetworkID": "bc64bb5570df622f76595608055ecf6d4613d16d80a333e921e667128925f092", "EndpointID": "951f045c0b9cdeb7b8d18ad38a299f7b92ca14e78cbb380f22f067ef7f236f65", "Gateway": "XX.XX.XX.XX", "IPAddress": "YY.YY.YY.YY", "IPPrefixLen": 16, "IPv6Gateway": "", "GlobalIPv6Address": "", "GlobalIPv6PrefixLen": 0, "MacAddress": "MM:MM:MM:MM:MM:MM", "DriverOpts": null } } } }

Coolgeek789 avatar Nov 05 '20 14:11 Coolgeek789

I understand. You want to get health_status (only available in inspector):

"State":{
      "Status":"running",
      "Running":true,
....
      "Health":{
         "Status":"healthy"

I can add it to the mode container-usage. Could you provide the json response of following endpoint: /containers/json

I could also try to use new parameters to enhance performance for stat collection: one-shot=true (since docker 1.41). If you have that version, could you provide the json response of following endpoint (for one container example): /containers/{id}/stats?stream=false&one-shot=true

garnier-quentin avatar Dec 30 '20 14:12 garnier-quentin

There you go : containers.json.txt stats.json.txt

Coolgeek789 avatar Dec 31 '20 16:12 Coolgeek789

The CT status is not working, I have tested it with multiple values (Stopped, Exited, Dead) but it doesn't send warnings nor critical alerts.

iM33d avatar Feb 02 '21 16:02 iM33d

New option --add-health: https://github.com/centreon/centreon-plugins/pull/3906

garnier-quentin avatar Sep 21 '22 09:09 garnier-quentin