datadog, dependency-api-errorrate, DatadogMonitor (datadoghq.com) has changed:
# Source: flow-generic/templates/ddmonitor-errorrate.yaml
apiVersion: datadoghq.com/v1alpha1
kind: DatadogMonitor
metadata:
name: dependency-api-errorrate
namespace: datadog
spec:
options:
thresholds:
warning: "0.001"
critical: "0.005"
- query: 'sum(last_10m):(sum:trace.akka_http.request.errors{env:live,service:dependency-api }.as_count() / sum:trace.akka_http.request.hits{env:live,service:dependency-api }.as_count()) > 0.005'
+ query: 'max(last_10m):median_3((sum:trace.akka_http.request.errors{env:live,service:dependency-api }.as_count() / sum:trace.akka_http.request.hits{env:live,service:dependency-api }.as_count())) > 0.005'
type: "query alert"
name: "[generated] Service dependency-api has a high error rate on env:live"
message: |
dependency-api error rate is too high.
Comment:
- \{\{#is_warning\}\}
+ {{#is_warning}}
@slack-team-foundation-notifications
- \{\{/is_warning\}\}
+ {{/is_warning}}
- \{\{#is_alert\}\}
+ {{#is_alert}}
@pagerduty-foundation
- \{\{/is_alert\}\}
+ {{/is_alert}}
- \{\{#is_alert_recovery\}\}
+ {{#is_alert_recovery}}
@pagerduty-foundation
- \{\{/is_alert_recovery\}\}
+ {{/is_alert_recovery}}
- \{\{#is_recovery\}\}
+ {{#is_recovery}}
@pagerduty-foundation
- \{\{/is_recovery\}\}
+ {{/is_recovery}}
tags:
- "service:dependency-api"
- "env:live"
- "team:foundation"
datadog, dependency-api-p95responsetime, DatadogMonitor (datadoghq.com) has changed:
# Source: flow-generic/templates/ddmonitor-p95ResponseTime.yaml
apiVersion: datadoghq.com/v1alpha1
kind: DatadogMonitor
metadata:
name: dependency-api-p95responsetime
namespace: datadog
spec:
options:
thresholds:
warning: "0.8"
critical: "1"
query: 'min(last_5m):p95:trace.akka_http.request{env:live,service:dependency-api } > 1'
type: "query alert"
name: "[generated] Service dependency-api has a high 95th percentile latency on env:live"
message: |
dependency-api 95th percentile latency is too high.
Comment:
- \{\{#is_warning\}\}
+ {{#is_warning}}
@slack-team-foundation-notifications
- \{\{/is_warning\}\}
+ {{/is_warning}}
- \{\{#is_alert\}\}
+ {{#is_alert}}
@pagerduty-foundation
- \{\{/is_alert\}\}
+ {{/is_alert}}
- \{\{#is_alert_recovery\}\}
+ {{#is_alert_recovery}}
@pagerduty-foundation
- \{\{/is_alert_recovery\}\}
+ {{/is_alert_recovery}}
- \{\{#is_recovery\}\}
+ {{#is_recovery}}
@pagerduty-foundation
- \{\{/is_recovery\}\}
+ {{/is_recovery}}
tags:
- "service:dependency-api"
- "env:live"
- "team:foundation"
production, dependency, Service (v1) has changed:
# Source: flow-generic/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: dependency
labels:
- helm.sh/chart: flow-generic-1.3.33
+ helm.sh/chart: flow-generic-1.3.36
app.kubernetes.io/instance: dependency-api
app.kubernetes.io/managed-by: Tiller
app.kubernetes.io/name: dependency-api
app: dependency-api
spec:
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
selector:
app.kubernetes.io/name: dependency-api
app.kubernetes.io/instance: dependency-api
production, dependency-api-live, Deployment (apps) has changed:
# Source: flow-generic/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: dependency-api-live
labels:
- helm.sh/chart: flow-generic-1.3.33
+ helm.sh/chart: flow-generic-1.3.36
app.kubernetes.io/instance: dependency-api
app.kubernetes.io/managed-by: Tiller
app.kubernetes.io/name: dependency-api
app.kubernetes.io/stage: live
app: dependency-api
tags.datadoghq.com/service: dependency-api
tags.datadoghq.com/env: live
- tags.datadoghq.com/version: 0.9.23
- flow.io/version: 0.9.23
+ tags.datadoghq.com/version: 0.9.23-1-gbbe0ce9
+ flow.io/version: 0.9.23-1-gbbe0ce9
flow.io/team: foundation
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: dependency-api
app.kubernetes.io/instance: dependency-api
app.kubernetes.io/stage: live
template:
metadata:
labels:
app.kubernetes.io/name: dependency-api
app.kubernetes.io/instance: dependency-api
app.kubernetes.io/stage: live
app: dependency-api
tags.datadoghq.com/service: dependency-api
tags.datadoghq.com/env: live
- tags.datadoghq.com/version: 0.9.23
- flow.io/version: 0.9.23
+ tags.datadoghq.com/version: 0.9.23-1-gbbe0ce9
+ flow.io/version: 0.9.23-1-gbbe0ce9
flow.io/team: foundation
annotations:
iam.amazonaws.com/role: arn:aws:iam::479720515435:role/ecsInstanceRole
sumologic.com/sourceCategory: dependency-api
sidecar.istio.io/logLevel: info
proxy.istio.io/config: '{"terminationDrainDuration": 30s}'
spec:
terminationGracePeriodSeconds: 60
containers:
- name: dependency-api
- image: "flowcommerce/dependency-api:0.9.23"
+ image: "flowcommerce/dependency-api:0.9.23-1-gbbe0ce9"
imagePullPolicy: IfNotPresent
env:
- name: JAVA_OPTS
value: "-Xms3000m -Xmx3000m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt -XX:+UseG1GC -XX:+UseStringDeduplication -XX:FlightRecorderOptions=stackdepth=256"
- name: FLOW_KUBERNETES_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: FLOW_KUBERNETES_NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: FLOW_KUBERNETES_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: FLOW_KUBERNETES_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: FLOW_KUBERNETES_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: FLOW_KUBERNETES_POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: DD_SERVICE
value: dependency-api
- name: DD_ENV
value: live
- name: DD_VERSION
- value: 0.9.23
+ value: 0.9.23-1-gbbe0ce9
- name: DD_JMXFETCH_STATSD_HOST
value: 'unix:///var/run/datadog/dsd.socket'
- name: DD_DOGSTATSD_URL
value: 'unix:///var/run/datadog/dsd.socket'
- name: DD_TRACE_AGENT_URL
value: 'unix:///var/run/datadog/apm.socket'
- name: DD_SERVICE_MAPPING
value: "postgresql:dependency-api-postgresql,java-aws-sdk:dependency-api-aws-sdk"
- name: DD_TAGS
value: "team:foundation"
args: ["production"]
ports:
- name: http
containerPort: 9000
protocol: TCP
volumeMounts:
- name: dsdsocket
mountPath: /var/run/datadog
startupProbe:
httpGet:
path: /_internal_/healthcheck
port: http
failureThreshold: 30
periodSeconds: 10
livenessProbe:
httpGet:
path: /_internal_/healthcheck
port: http
failureThreshold: 6
periodSeconds: 10
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /_internal_/healthcheck
port: http
failureThreshold: 3
periodSeconds: 10
timeoutSeconds: 5
resources:
limits:
cpu: 1
memory: 3400Mi
requests:
cpu: 0.05
memory: 3400Mi
nodeSelector:
role: workers-lg
imagePullSecrets:
- name: flow-docker-hub
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: dependency-api
tolerations:
- effect: NoSchedule
key: role
operator: Equal
value: workers-lg
dnsConfig:
options:
- name: ndots
value: "1"
volumes:
- name: dsdsocket
hostPath:
path: /var/run/datadog/
type: DirectoryOrCreate
strategy:
type: RollingUpdate
production, dependency-api-live, VirtualService (networking.istio.io) has changed:
# Source: flow-generic/templates/istio.yaml
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: dependency-api-live
labels:
app.kubernetes.io/name: dependency-api
- helm.sh/chart: flow-generic-1.3.33
+ helm.sh/chart: flow-generic-1.3.36
app.kubernetes.io/instance: dependency-api
app.kubernetes.io/managed-by: Tiller
app: dependency-api
spec:
hosts:
- dependency
- dependency.api.flow.io
gateways:
- mesh
- dependency-flow-io
http:
- route:
- destination:
host: dependency
port:
number: 80
subset: live
production, dependency-flow-io, Gateway (networking.istio.io) has changed:
# Source: flow-generic/templates/ingress-defaults.yaml
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: dependency-flow-io
labels:
app.kubernetes.io/name: dependency-api
- helm.sh/chart: flow-generic-1.3.33
+ helm.sh/chart: flow-generic-1.3.36
app.kubernetes.io/instance: dependency-api
app.kubernetes.io/managed-by: Tiller
app: dependency-api
annotations:
kubernetes.io/ingress.class: "ingressgateway"
kubernetes.io/ingress.tld: "api.flow.io"
external-dns.alpha.kubernetes.io/ttl: "120"
spec:
selector:
istio: ingressgateway
servers:
- port:
number: 80
name: http
protocol: HTTP2
hosts:
- dependency.api.flow.io
tls:
httpsRedirect: true
- port:
number: 443
name: https
protocol: HTTP2
hosts:
- dependency.api.flow.io
datadog, dependency-api-httpaveragelatency, DatadogMonitor (datadoghq.com) has been added:
+ # Source: flow-generic/templates/ddmonitor-http-averagelatency.yaml
+ apiVersion: datadoghq.com/v1alpha1
+ kind: DatadogMonitor
+ metadata:
+ name: dependency-api-httpaveragelatency
+ namespace: datadog
+ spec:
+ options:
+ thresholds:
+ warning: "0.3"
+ critical: "20"
+ query: 'avg(last_10m):(default_zero(avg:trace.akka_http.request{env:live,service:dependency-api })) > 20'
+ type: "query alert"
+ name: "[generated] Service dependency-api has a high average latency on env:live"
+ message: |
+ dependency-api average latency is too high.
+
+ Comment:
+
+
+ {{#is_warning}}
+ @slack-team-foundation-notifications
+ {{/is_warning}}
+
+ {{#is_alert}}
+ @pagerduty-foundation
+ {{/is_alert}}
+
+ {{#is_alert_recovery}}
+ @pagerduty-foundation
+ {{/is_alert_recovery}}
+
+ {{#is_recovery}}
+ @pagerduty-foundation
+ {{/is_recovery}}
+ tags:
+ - "service:dependency-api"
+ - "env:live"
+ - "team:foundation"