Skip to content
Snippets Groups Projects
Commit 117707df authored by Alex Ellis (VMware)'s avatar Alex Ellis (VMware) Committed by Alex Ellis
Browse files

Enable backoff/retries on scaling up


- this change is needed for Docker Swarm which may give an error
when several concurrent requests come in to scale a deployment.

Tested on Docker Swarm before/after with the hey tool and figlet
scaled down to zero replicas.

Signed-off-by: default avatarAlex Ellis (VMware) <alexellis2@gmail.com>
parent 446c8672
No related branches found
No related tags found
No related merge requests found
......@@ -57,12 +57,31 @@ func (f *FunctionScaler) Scale(functionName string) FunctionScaleResult {
minReplicas = queryResponse.MinReplicas
}
log.Printf("[Scale] function=%s 0 => %d requested", functionName, minReplicas)
scaleResult := backoff(func(attempt int) error {
queryResponse, err := f.Config.ServiceQuery.GetReplicas(functionName)
if err != nil {
return err
}
f.Cache.Set(functionName, queryResponse)
if queryResponse.Replicas > 0 {
return nil
}
log.Printf("[Scale %d] function=%s 0 => %d requested", attempt, functionName, minReplicas)
setScaleErr := f.Config.ServiceQuery.SetReplicas(functionName, minReplicas)
if setScaleErr != nil {
return fmt.Errorf("unable to scale function [%s], err: %s", functionName, setScaleErr)
}
setScaleErr := f.Config.ServiceQuery.SetReplicas(functionName, minReplicas)
if setScaleErr != nil {
return nil
}, int(f.Config.SetScaleRetries), f.Config.FunctionPollInterval)
if scaleResult != nil {
return FunctionScaleResult{
Error: fmt.Errorf("unable to scale function [%s], err: %s", functionName, err),
Error: scaleResult,
Available: false,
Found: true,
Duration: time.Since(start),
......@@ -106,3 +125,23 @@ func (f *FunctionScaler) Scale(functionName string) FunctionScaleResult {
Duration: time.Since(start),
}
}
type routine func(attempt int) error
func backoff(r routine, attempts int, interval time.Duration) error {
var err error
for i := 0; i < attempts; i++ {
res := r(i)
if res != nil {
err = res
log.Printf("Attempt: %d, had error: %s\n", i, res)
} else {
err = nil
break
}
time.Sleep(interval)
}
return err
}
......@@ -9,7 +9,8 @@ type ScalingConfig struct {
// MaxPollCount attempts to query a function before giving up
MaxPollCount uint
// FunctionPollInterval delay or interval between polling a function's readiness status
// FunctionPollInterval delay or interval between polling a function's
// readiness status
FunctionPollInterval time.Duration
// CacheExpiry life-time for a cache entry before considering invalid
......@@ -17,4 +18,8 @@ type ScalingConfig struct {
// ServiceQuery queries available/ready replicas for function
ServiceQuery ServiceQuery
// SetScaleRetries is the number of times to try scaling a function before
// giving up due to errors
SetScaleRetries uint
}
......@@ -137,7 +137,8 @@ func main() {
if config.ScaleFromZero {
scalingConfig := scaling.ScalingConfig{
MaxPollCount: uint(1000),
FunctionPollInterval: time.Millisecond * 10,
SetScaleRetries: uint(20),
FunctionPollInterval: time.Millisecond * 50,
CacheExpiry: time.Second * 5, // freshness of replica values before going stale
ServiceQuery: alertHandler,
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment