dependency icon indicating copy to clipboard operation
dependency copied to clipboard

Updating monitors

Open arias-m opened this issue 2 years ago • 1 comments

arias-m avatar Feb 10 '23 09:02 arias-m

datadog, dependency-api-errorrate, DatadogMonitor (datadoghq.com) has changed:
  # Source: flow-generic/templates/ddmonitor-errorrate.yaml
  apiVersion: datadoghq.com/v1alpha1
  kind: DatadogMonitor
  metadata:
    name: dependency-api-errorrate
    namespace: datadog
  spec:
    options:
      thresholds:
        warning: "0.001"
        critical: "0.005"
-   query: 'sum(last_10m):(sum:trace.akka_http.request.errors{env:live,service:dependency-api }.as_count() / sum:trace.akka_http.request.hits{env:live,service:dependency-api }.as_count()) > 0.005'
+   query: 'max(last_10m):median_3((sum:trace.akka_http.request.errors{env:live,service:dependency-api }.as_count() / sum:trace.akka_http.request.hits{env:live,service:dependency-api }.as_count())) > 0.005'
    type: "query alert"
    name: "[generated] Service dependency-api has a high error rate on env:live"
    message: |
      dependency-api error rate is too high.

      Comment: 


-     \{\{#is_warning\}\}
+     {{#is_warning}}
        @slack-team-foundation-notifications
-     \{\{/is_warning\}\}
+     {{/is_warning}}

-     \{\{#is_alert\}\}
+     {{#is_alert}}
        @pagerduty-foundation
-     \{\{/is_alert\}\} 
+     {{/is_alert}}
            
-     \{\{#is_alert_recovery\}\}
+     {{#is_alert_recovery}}
        @pagerduty-foundation
-     \{\{/is_alert_recovery\}\} 
+     {{/is_alert_recovery}}
      
-     \{\{#is_recovery\}\}
+     {{#is_recovery}}
        @pagerduty-foundation
-     \{\{/is_recovery\}\}
+     {{/is_recovery}}

    tags:
      - "service:dependency-api"
      - "env:live"
      - "team:foundation"
datadog, dependency-api-p95responsetime, DatadogMonitor (datadoghq.com) has changed:
  # Source: flow-generic/templates/ddmonitor-p95ResponseTime.yaml
  apiVersion: datadoghq.com/v1alpha1
  kind: DatadogMonitor
  metadata:
    name: dependency-api-p95responsetime
    namespace: datadog
  spec:
    options:
      thresholds:
        warning: "0.8"
        critical: "1"
    query: 'min(last_5m):p95:trace.akka_http.request{env:live,service:dependency-api } > 1'
    type: "query alert"
    name: "[generated] Service dependency-api has a high 95th percentile latency on env:live"
    message: |
      dependency-api 95th percentile latency is too high.

      Comment: 


-     \{\{#is_warning\}\}
+     {{#is_warning}}
        @slack-team-foundation-notifications
-     \{\{/is_warning\}\}
+     {{/is_warning}}

-     \{\{#is_alert\}\}
+     {{#is_alert}}
        @pagerduty-foundation
-     \{\{/is_alert\}\} 
+     {{/is_alert}}
            
-     \{\{#is_alert_recovery\}\}
+     {{#is_alert_recovery}}
        @pagerduty-foundation
-     \{\{/is_alert_recovery\}\} 
+     {{/is_alert_recovery}}
      
-     \{\{#is_recovery\}\}
+     {{#is_recovery}}
        @pagerduty-foundation
-     \{\{/is_recovery\}\}
+     {{/is_recovery}}

    tags:
      - "service:dependency-api"
      - "env:live"
      - "team:foundation"
production, dependency, Service (v1) has changed:
  # Source: flow-generic/templates/service.yaml
  apiVersion: v1
  kind: Service
  metadata:
    name: dependency
    labels:
-     helm.sh/chart: flow-generic-1.3.33
+     helm.sh/chart: flow-generic-1.3.36
      app.kubernetes.io/instance: dependency-api
      app.kubernetes.io/managed-by: Tiller
      app.kubernetes.io/name: dependency-api
      app: dependency-api
  spec:
    ports:
      - port: 80
        targetPort: http
        protocol: TCP
        name: http
    selector:
      app.kubernetes.io/name: dependency-api
      app.kubernetes.io/instance: dependency-api
production, dependency-api-live, Deployment (apps) has changed:
  # Source: flow-generic/templates/deployment.yaml
  apiVersion: apps/v1
  kind: Deployment
  metadata:
    name: dependency-api-live
    labels:
-     helm.sh/chart: flow-generic-1.3.33
+     helm.sh/chart: flow-generic-1.3.36
      app.kubernetes.io/instance: dependency-api
      app.kubernetes.io/managed-by: Tiller
      app.kubernetes.io/name: dependency-api
      app.kubernetes.io/stage: live
      app: dependency-api
      tags.datadoghq.com/service: dependency-api
      tags.datadoghq.com/env: live
-     tags.datadoghq.com/version: 0.9.23
-     flow.io/version: 0.9.23
+     tags.datadoghq.com/version: 0.9.23-1-gbbe0ce9
+     flow.io/version: 0.9.23-1-gbbe0ce9
      flow.io/team: foundation
  spec:
    replicas: 1
    
    selector:
      matchLabels:
        app.kubernetes.io/name: dependency-api
        app.kubernetes.io/instance: dependency-api
        app.kubernetes.io/stage: live
    template:
      metadata:
        labels:
          app.kubernetes.io/name: dependency-api
          app.kubernetes.io/instance: dependency-api
          app.kubernetes.io/stage: live
          app: dependency-api
          tags.datadoghq.com/service: dependency-api
          tags.datadoghq.com/env: live
-         tags.datadoghq.com/version: 0.9.23
-         flow.io/version: 0.9.23
+         tags.datadoghq.com/version: 0.9.23-1-gbbe0ce9
+         flow.io/version: 0.9.23-1-gbbe0ce9
          flow.io/team: foundation
        annotations:
          iam.amazonaws.com/role: arn:aws:iam::479720515435:role/ecsInstanceRole
          sumologic.com/sourceCategory: dependency-api
          sidecar.istio.io/logLevel: info
          proxy.istio.io/config: '{"terminationDrainDuration": 30s}'
      spec:
        terminationGracePeriodSeconds: 60
        containers:
          - name: dependency-api
-           image: "flowcommerce/dependency-api:0.9.23"
+           image: "flowcommerce/dependency-api:0.9.23-1-gbbe0ce9"
            imagePullPolicy: IfNotPresent
            env:
              - name: JAVA_OPTS
                value: "-Xms3000m -Xmx3000m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt -XX:+UseG1GC -XX:+UseStringDeduplication -XX:FlightRecorderOptions=stackdepth=256"
              - name: FLOW_KUBERNETES_NODE_NAME
                valueFrom:
                  fieldRef:
                    fieldPath: spec.nodeName
              - name: FLOW_KUBERNETES_NODE_IP
                valueFrom:
                  fieldRef:
                    fieldPath: status.hostIP
              - name: FLOW_KUBERNETES_POD_NAME
                valueFrom:
                  fieldRef:
                    fieldPath: metadata.name
              - name: FLOW_KUBERNETES_POD_NAMESPACE
                valueFrom:
                  fieldRef:
                    fieldPath: metadata.namespace
              - name: FLOW_KUBERNETES_POD_IP
                valueFrom:
                  fieldRef:
                    fieldPath: status.podIP
              - name: FLOW_KUBERNETES_POD_UID
                valueFrom:
                  fieldRef:
                    fieldPath: metadata.uid
              - name: DD_SERVICE
                value: dependency-api
              - name: DD_ENV
                value: live
              - name: DD_VERSION
-               value: 0.9.23
+               value: 0.9.23-1-gbbe0ce9
              - name: DD_JMXFETCH_STATSD_HOST
                value: 'unix:///var/run/datadog/dsd.socket'
              - name: DD_DOGSTATSD_URL
                value: 'unix:///var/run/datadog/dsd.socket'
              - name: DD_TRACE_AGENT_URL
                value: 'unix:///var/run/datadog/apm.socket'
              - name: DD_SERVICE_MAPPING
                value: "postgresql:dependency-api-postgresql,java-aws-sdk:dependency-api-aws-sdk"
              - name: DD_TAGS
                value: "team:foundation"
            args: ["production"]
            ports:
              - name: http
                containerPort: 9000
                protocol: TCP
            volumeMounts:
              - name: dsdsocket
                mountPath: /var/run/datadog
            startupProbe:
              httpGet:
                path: /_internal_/healthcheck
                port: http
              failureThreshold: 30
              periodSeconds: 10
            livenessProbe:
              httpGet:
                path: /_internal_/healthcheck
                port: http
              failureThreshold: 6
              periodSeconds: 10
              timeoutSeconds: 5
            readinessProbe:
              httpGet:
                path: /_internal_/healthcheck
                port: http
              failureThreshold: 3
              periodSeconds: 10
              timeoutSeconds: 5
            resources:
              limits:
                cpu: 1
                memory: 3400Mi
              requests:
                cpu: 0.05
                memory: 3400Mi
              
        nodeSelector:
          role: workers-lg
          
        imagePullSecrets:
          - name: flow-docker-hub
          
        topologySpreadConstraints:
          - maxSkew: 1
            topologyKey: topology.kubernetes.io/zone
            whenUnsatisfiable: ScheduleAnyway
            labelSelector:
              matchLabels:
                app: dependency-api
        tolerations:
          - effect: NoSchedule
            key: role
            operator: Equal
            value: workers-lg
          
        dnsConfig:
          options:
            - name: ndots
              value: "1"
        volumes:
          - name: dsdsocket
            hostPath:
              path: /var/run/datadog/
              type: DirectoryOrCreate
    strategy:
      type: RollingUpdate
production, dependency-api-live, VirtualService (networking.istio.io) has changed:
  # Source: flow-generic/templates/istio.yaml
  apiVersion: networking.istio.io/v1alpha3
  kind: VirtualService
  metadata:
    name: dependency-api-live
    labels:
      app.kubernetes.io/name: dependency-api
-     helm.sh/chart: flow-generic-1.3.33
+     helm.sh/chart: flow-generic-1.3.36
      app.kubernetes.io/instance: dependency-api
      app.kubernetes.io/managed-by: Tiller
      app: dependency-api
  spec:
    hosts:
      - dependency
      - dependency.api.flow.io
    gateways:
      - mesh
      - dependency-flow-io
    http:
    - route:
      - destination:
          host: dependency
          port:
            number: 80
          subset:  live
production, dependency-flow-io, Gateway (networking.istio.io) has changed:
  # Source: flow-generic/templates/ingress-defaults.yaml
  apiVersion: networking.istio.io/v1alpha3
  kind: Gateway
  metadata:
    name: dependency-flow-io
    labels:
      app.kubernetes.io/name: dependency-api
-     helm.sh/chart: flow-generic-1.3.33
+     helm.sh/chart: flow-generic-1.3.36
      app.kubernetes.io/instance: dependency-api
      app.kubernetes.io/managed-by: Tiller
      app: dependency-api
    annotations:
      kubernetes.io/ingress.class: "ingressgateway"
      kubernetes.io/ingress.tld: "api.flow.io"
      external-dns.alpha.kubernetes.io/ttl: "120"
  spec:
    selector:
      istio: ingressgateway
    servers:
      - port:
          number: 80
          name: http
          protocol: HTTP2
        hosts:
          - dependency.api.flow.io
        tls:
          httpsRedirect: true
      - port:
          number: 443
          name: https
          protocol: HTTP2
        hosts:
          - dependency.api.flow.io
datadog, dependency-api-httpaveragelatency, DatadogMonitor (datadoghq.com) has been added:
+ # Source: flow-generic/templates/ddmonitor-http-averagelatency.yaml
+ apiVersion: datadoghq.com/v1alpha1
+ kind: DatadogMonitor
+ metadata:
+   name: dependency-api-httpaveragelatency
+   namespace: datadog
+ spec:
+   options:
+     thresholds:
+       warning: "0.3"
+       critical: "20"
+   query: 'avg(last_10m):(default_zero(avg:trace.akka_http.request{env:live,service:dependency-api })) > 20'
+   type: "query alert"
+   name: "[generated] Service dependency-api has a high average latency on env:live"
+   message: |
+     dependency-api average latency is too high.
+ 
+     Comment: 
+ 
+ 
+     {{#is_warning}}
+       @slack-team-foundation-notifications
+     {{/is_warning}}
+ 
+     {{#is_alert}}
+       @pagerduty-foundation
+     {{/is_alert}}
+           
+     {{#is_alert_recovery}}
+       @pagerduty-foundation
+     {{/is_alert_recovery}}
+     
+     {{#is_recovery}}
+       @pagerduty-foundation
+     {{/is_recovery}}

+   tags:
+     - "service:dependency-api"
+     - "env:live"
+     - "team:foundation"

flow-tech avatar Feb 10 '23 09:02 flow-tech