diff --git a/gateway/server.go b/gateway/server.go index a1b96a16ea587be75095d665600e1826337afbf3..ef39e65d0ac559527f21a7bbfb386952a9fe37e8 100644 --- a/gateway/server.go +++ b/gateway/server.go @@ -32,12 +32,21 @@ func scaleService(req requests.PrometheusAlert, c *client.Client) error { } else { return err } - } else { - replicas = *service.Spec.Mode.Replicated.Replicas - uint64(5) - if replicas <= 0 { + } else { // Resolved event. + // Previously decremented by 5, but event only fires once, so set to 1/1. + if *service.Spec.Mode.Replicated.Replicas > 1 { + // replicas = *service.Spec.Mode.Replicated.Replicas - uint64(5) + // if replicas < 1 { + // replicas = 1 + // } + // return nil + replicas = 1 + } else { + return nil } } + log.Printf("Scaling %s to %d replicas.\n", serviceName, replicas) service.Spec.Mode.Replicated.Replicas = &replicas diff --git a/prometheus/alertmanager.yml b/prometheus/alertmanager.yml index 8f647258d5e33a4fe28a14fb3d52477393a92432..93282307f9e0b94ced9371a2250c7b0d762ce6e3 100644 --- a/prometheus/alertmanager.yml +++ b/prometheus/alertmanager.yml @@ -25,15 +25,15 @@ route: # This way ensures that you get multiple alerts for the same group that start # firing shortly after another are batched together on the first # notification. - group_wait: 30s + group_wait: 5s # When the first notification was sent, wait 'group_interval' to send a batch # of new alerts that started firing for that group. - group_interval: 5m + group_interval: 10s # If an alert has successfully been sent, wait 'repeat_interval' to # resend them. - repeat_interval: 3h + repeat_interval: 30s # A default receiver receiver: scale-up @@ -66,3 +66,4 @@ receivers: webhook_configs: - url: http://gateway:8080/system/alert send_resolved: true +