nomad icon indicating copy to clipboard operation
nomad copied to clipboard

Ingress gateways return 503 when forwarding traffic to service where host and container ports don't match

Open brian-athinkingape opened this issue 1 year ago • 1 comments

Nomad version

Nomad v1.5.10 BuildDate 2023-10-30T13:26:22Z Revision 3d7f65f481c5b263d6c82f03862c27447cf1794b

Consul version

Consul v1.14.11 Revision c0c5688c Build Date 2023-10-31T13:58:53Z Protocol 2 spoken by default, understands 2 to 3 (agent will automatically use protocol >2 when speaking to compatible agents)

Docker version

Docker version 26.1.1, build 4cf5afa

Operating system and Environment details

Ubuntu 22.04.4 LTS (fresh install using AWS image) AWS c6a.xlarge

Issue

I'm running a setup with Nomad + Consul Connect (I'm providing a simplified test case of the problems we're encountering in our actual systems). I'm trying to set up a service running in Docker, listening on some port (let's say that we can't customize it for some reason; in this example it's Flask listening at port 5000). I also want to set up an ingress gateway to forward requests to that service (and have it listen on port 5555).

If I set up the Flask container with a port where to = 5000, then my ingress gateway fails even if the container is running. If I also set static = 5000 on the Flask port, then everything works fine. However, I can't set that in production, since there will be multiple copies of the container running on a server.

Reproduction steps

Run the two job files specified below. My server is running at 10.16.0.151. When I run curl http://10.16.0.151:<dynamic port allocated by Nomad to the Flask container> I get a 200 response with a body of Hello, World! as expected. However, running curl to the static ingress port does not give me the correct behaviour.

Expected Result

When I run curl http://10.16.0.151:5555 I should also get a 200 response with a body of Hello, World!.

Actual Result

When I run curl http://10.16.0.151:5555 I get a 503 response with a body of upstream connect error or disconnect/reset before headers. reset reason: connection failure, transport failure reason: delayed connect error: 111.

However, if I uncomment the # static = 5000 line in the Flask file, then the host port and container port match (both = 5000), and curling the ingress container returns the expected 200 response.

Job file (if appropriate)

Nomad config

# nomad 151
name = "testing151"

datacenter = "testing"
region = "testing"

log_file = "/var/log/nomad.log"

data_dir = "/opt/nomad/data"

bind_addr = "0.0.0.0"

advertise {
    http = "10.16.0.151"
    rpc = "10.16.0.151"
    serf = "10.16.0.151"
}

server {
    enabled = true

    raft_protocol = 3

    server_join {
        retry_max = 3
        retry_interval = "15s"
        retry_join = [ "10.16.0.151" ]
    }

    default_scheduler_config {
        scheduler_algorithm = "spread"
        memory_oversubscription_enabled = "true"
    }

    bootstrap_expect = 1
}

client {
    enabled = true

    reserved {
        memory = 2048
    }

    host_network "default" {
        cidr = "10.16.0.0/16"
    }

    host_network "loopback" {
        cidr = "127.0.0.1/32"
    }

    host_network "docker" {
        cidr = "172.17.0.1/32"
    }

    max_kill_timeout = "60s"

    meta {
        connect.sidecar_image = "envoyproxy/envoy:v1.24.12"
        connect.gateway_image = "envoyproxy/envoy:v1.24.12"
    }

    servers = [ "10.16.0.151" ]
}

consul {
    address = "127.0.0.1:8500"
}

log_rotate_duration = "24h"
log_rotate_max_files = 14
log_rotate_bytes = 10485760
log_level = "WARN"

Consul config

# consul 151
node_name = "testing151"

datacenter = "testing"

log_file = "/var/log/consul.log"
log_rotate_duration = "24h"
log_rotate_max_files = 14
log_rotate_bytes = 10485760
log_level = "WARN"

data_dir = "/opt/consul/data"

bind_addr = "0.0.0.0"
advertise_addr = "10.16.0.151"

client_addr = "10.16.0.151 127.0.0.1"

bootstrap_expect = 1
retry_join = [ "10.16.0.151" ]

server = true

enable_local_script_checks = true

connect {
    enabled = true
}

ports {
    grpc = 8502
    grpc_tls = -1
}

config_entries {
    bootstrap {
        kind = "proxy-defaults"
        name = "global"

        config {
            protocol = "http"
            local_request_timeout_ms = 0
        }
    }
}

ui_config {
    enabled = true
}

Flask job

job "flask" {
    region = "testing"
    datacenters = ["testing"]
    group "flask" {
        count = 1
        network {
            mode = "bridge"
            port "fivethousand" {
                # static = 5000 # If I set this port then everything works as expected
                to = 5000
                host_network = "default"
            }
        }
        service {
            name = "flask"
            port = "fivethousand"
            connect {
                sidecar_service {}
            }
            check {
                name = "Healthcheck"
                type = "http"
                path = "/"
                interval = "10s"
                timeout = "2s"
            }
        }
        task "service" {
            driver = "docker"
            config {
                # Python process listens on port 5000
                # Using this demo container:
                # https://github.com/do-community/k8s-intro-meetup-kit/tree/master/app
                image = "digitalocean/flask-helloworld"
            }
            resources {
                cpu = 500
                memory = 256
            }
            leader = true
        }
    }
}

Ingress job

job "flask-ingress" {
    region = "testing"
    datacenters = ["testing"]
    type = "system"
    group "flask-ingress" {
        network {
            mode = "bridge"
            port "default" {
                static = 5555
                host_network = "default"
            }
        }
        service {
            name = "flask-ingress"
            port = "5555"
            connect {
                gateway {
                    ingress {
                        listener {
                            port = 5555
                            protocol = "http"
                            service {
                                name = "flask"
                                hosts = ["*"]
                            }
                        }
                    }
                }
            }
        }
    }
}

brian-athinkingape avatar May 07 '24 22:05 brian-athinkingape