From b3b6f49872bfb222da0b515511de56295418e18b Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sun, 2 Jul 2017 23:32:00 +0200 Subject: [PATCH 01/12] circuit breaker: consecutive and experimental rate breaker --- circuit/binarysampler.go | 65 ++++++ circuit/binarysampler_test.go | 77 +++++++ circuit/breaker.go | 138 ++++++++++++ circuit/breaker_test.go | 184 +++++++++++++++ circuit/gobreaker.go | 34 +++ circuit/list.go | 77 +++++++ circuit/list_test.go | 147 ++++++++++++ circuit/registry.go | 103 +++++++++ circuit/registry_test.go | 408 ++++++++++++++++++++++++++++++++++ proxy/breaker_test.go | 303 +++++++++++++++++++++++++ proxy/context.go | 7 + proxy/failingbackend_test.go | 208 +++++++++++++++++ proxy/proxy.go | 115 +++++++--- proxy/proxy_test.go | 22 +- 14 files changed, 1844 insertions(+), 44 deletions(-) create mode 100644 circuit/binarysampler.go create mode 100644 circuit/binarysampler_test.go create mode 100644 circuit/breaker.go create mode 100644 circuit/breaker_test.go create mode 100644 circuit/gobreaker.go create mode 100644 circuit/list.go create mode 100644 circuit/list_test.go create mode 100644 circuit/registry.go create mode 100644 circuit/registry_test.go create mode 100644 proxy/breaker_test.go create mode 100644 proxy/failingbackend_test.go diff --git a/circuit/binarysampler.go b/circuit/binarysampler.go new file mode 100644 index 0000000000..8a4d1677ad --- /dev/null +++ b/circuit/binarysampler.go @@ -0,0 +1,65 @@ +package circuit + +// TODO: apply a TTL for stale windows. Try to calculate it from the existing +// input arguments + +// contains a series of events with 0 or 1 values, e.g. errors or successes, +// within a limited window. +// count contains the number of events with the value of 1 in the window. +// compresses the event storage by 64. +type binarySampler struct { + size int + filled int + frames []uint64 + pad uint64 + count int +} + +func newBinarySampler(size int) *binarySampler { + if size <= 0 { + size = 1 + } + + return &binarySampler{ + size: size, + pad: 64 - uint64(size)%64, + } +} + +func highestSet(frame, pad uint64) bool { + return frame&(1<<(63-pad)) != 0 +} + +func shift(frames []uint64) { + highestFrame := len(frames) - 1 + for i := highestFrame; i >= 0; i-- { + h := highestSet(frames[i], 0) + frames[i] = frames[i] << 1 + if h && i < highestFrame { + frames[i+1] |= 1 + } + } +} + +func (s *binarySampler) tick(set bool) { + filled := s.filled == s.size + + if filled && highestSet(s.frames[len(s.frames)-1], s.pad) { + s.count-- + } + + if !filled { + if len(s.frames) <= s.filled/64 { + s.frames = append(s.frames, 0) + } + + s.filled++ + } + + shift(s.frames) + + if set { + s.count++ + s.frames[0] |= 1 + } +} diff --git a/circuit/binarysampler_test.go b/circuit/binarysampler_test.go new file mode 100644 index 0000000000..1474632db6 --- /dev/null +++ b/circuit/binarysampler_test.go @@ -0,0 +1,77 @@ +package circuit + +import "testing" + +func TestBinarySampler(t *testing.T) { + expectCount := func(t *testing.T, s *binarySampler, c int) { + if s.count != c { + t.Errorf("unexpected count, got: %d, expected: %d", s.count, c) + } + } + + t.Run("wrong init arg defaults to 1", func(t *testing.T) { + s := newBinarySampler(-3) + expectCount(t, s, 0) + s.tick(true) + expectCount(t, s, 1) + s.tick(true) + expectCount(t, s, 1) + }) + + t.Run("returns right count when not filled", func(t *testing.T) { + s := newBinarySampler(6) + s.tick(true) + s.tick(false) + s.tick(true) + expectCount(t, s, 2) + }) + + t.Run("returns right count after filled", func(t *testing.T) { + s := newBinarySampler(3) + s.tick(false) + s.tick(true) + s.tick(false) + s.tick(true) + expectCount(t, s, 2) + }) + + t.Run("shifts the reservoir when filled", func(t *testing.T) { + s := newBinarySampler(3) + s.tick(true) + s.tick(false) + s.tick(true) + s.tick(false) + expectCount(t, s, 1) + }) + + t.Run("shifts through multiple frames", func(t *testing.T) { + const size = 314 + s := newBinarySampler(size) + + for i := 0; i < size+size/2; i++ { + s.tick(true) + } + + expectCount(t, s, size) + }) + + t.Run("uses the right 'amount of memory'", func(t *testing.T) { + const size = 314 + s := newBinarySampler(size) + for i := 0; i < size+size/2; i++ { + s.tick(true) + } + + expectedFrames := size / 64 + if size%64 > 0 { + expectedFrames++ + } + + if len(s.frames) != expectedFrames { + t.Errorf( + "unexpected number of frames, got: %d, expected: %d", + len(s.frames), expectedFrames, + ) + } + }) +} diff --git a/circuit/breaker.go b/circuit/breaker.go new file mode 100644 index 0000000000..429d29955b --- /dev/null +++ b/circuit/breaker.go @@ -0,0 +1,138 @@ +package circuit + +import ( + "sync" + "time" +) + +type BreakerType int + +const ( + BreakerNone BreakerType = iota + ConsecutiveFailures + FailureRate +) + +type BreakerSettings struct { + Type BreakerType + Host string + Window, Failures int + Timeout time.Duration + HalfOpenRequests int + disabled bool +} + +type breakerImplementation interface { + Allow() (func(bool), bool) + Closed() bool +} + +// represents a single circuit breaker +type Breaker struct { + settings BreakerSettings + ts time.Time + prev, next *Breaker + impl breakerImplementation + mx *sync.Mutex + sampler *binarySampler +} + +func applySettings(to, from BreakerSettings) BreakerSettings { + if to.Type == BreakerNone { + to.Type = from.Type + + if from.Type == ConsecutiveFailures { + to.Failures = from.Failures + } + + if from.Type == FailureRate { + to.Window = from.Window + to.Failures = from.Failures + } + } + + if to.Timeout == 0 { + to.Timeout = from.Timeout + } + + if to.HalfOpenRequests == 0 { + to.HalfOpenRequests = from.HalfOpenRequests + } + + return to +} + +func newBreaker(s BreakerSettings) *Breaker { + b := &Breaker{ + settings: s, + mx: &sync.Mutex{}, + } + b.impl = newGobreaker(s, b.readyToTrip) + return b +} + +func (b *Breaker) rateReadyToTrip() bool { + b.mx.Lock() + defer b.mx.Unlock() + + if b.sampler == nil { + return false + } + + ready := b.sampler.count >= b.settings.Failures + if ready { + b.sampler = nil + } + + return ready +} + +func (b *Breaker) readyToTrip(failures int) bool { + switch b.settings.Type { + case ConsecutiveFailures: + return failures >= b.settings.Failures + case FailureRate: + return b.rateReadyToTrip() + default: + return false + } +} + +func (b *Breaker) tick(success bool) { + b.mx.Lock() + defer b.mx.Unlock() + + if b.sampler == nil { + if !b.impl.Closed() { + return + } + + b.sampler = newBinarySampler(b.settings.Window) + } + + // count the failures in closed state + b.sampler.tick(!success) +} + +func (b *Breaker) rateAllow() (func(bool), bool) { + done, ok := b.impl.Allow() + if !ok { + return nil, false + } + + return func(success bool) { + b.tick(success) + done(success) + }, true +} + +func (b *Breaker) Allow() (func(bool), bool) { + switch b.settings.Type { + case ConsecutiveFailures: + return b.impl.Allow() + case FailureRate: + return b.rateAllow() + default: + return nil, false + } +} diff --git a/circuit/breaker_test.go b/circuit/breaker_test.go new file mode 100644 index 0000000000..eddbfea79d --- /dev/null +++ b/circuit/breaker_test.go @@ -0,0 +1,184 @@ +package circuit + +import ( + "math/rand" + "testing" + "time" +) + +func times(n int, f func()) { + for n > 0 { + f() + n-- + } +} + +func createDone(t *testing.T, success bool, b *Breaker) func() { + return func() { + if t.Failed() { + return + } + + done, ok := b.Allow() + if !ok { + t.Error("breaker is unexpectedly open") + return + } + + done(success) + } +} + +func succeed(t *testing.T, b *Breaker) func() { return createDone(t, true, b) } +func fail(t *testing.T, b *Breaker) func() { return createDone(t, false, b) } +func failOnce(t *testing.T, b *Breaker) { fail(t, b)() } + +func checkClosed(t *testing.T, b *Breaker) { + if _, ok := b.Allow(); !ok { + t.Error("breaker is not closed") + } +} + +func checkOpen(t *testing.T, b *Breaker) { + if _, ok := b.Allow(); ok { + t.Error("breaker is not open") + } +} + +func TestConsecutiveFailures(t *testing.T) { + s := BreakerSettings{ + Type: ConsecutiveFailures, + Failures: 3, + HalfOpenRequests: 3, + Timeout: 3 * time.Millisecond, + } + + waitTimeout := func() { + time.Sleep(s.Timeout) + } + + t.Run("new breaker closed", func(t *testing.T) { + b := newBreaker(s) + checkClosed(t, b) + }) + + t.Run("does not open on not enough failures", func(t *testing.T) { + b := newBreaker(s) + times(s.Failures-1, fail(t, b)) + checkClosed(t, b) + }) + + t.Run("open on failures", func(t *testing.T) { + b := newBreaker(s) + times(s.Failures, fail(t, b)) + checkOpen(t, b) + }) + + t.Run("go half open, close after required successes", func(t *testing.T) { + b := newBreaker(s) + times(s.Failures, fail(t, b)) + waitTimeout() + times(s.HalfOpenRequests, succeed(t, b)) + checkClosed(t, b) + }) + + t.Run("go half open, reopen after a fail within the required successes", func(t *testing.T) { + b := newBreaker(s) + times(s.Failures, fail(t, b)) + waitTimeout() + times(s.HalfOpenRequests-1, succeed(t, b)) + failOnce(t, b) + checkOpen(t, b) + }) +} + +func TestRateBreaker(t *testing.T) { + s := BreakerSettings{ + Type: FailureRate, + Window: 6, + Failures: 3, + HalfOpenRequests: 3, + Timeout: 3 * time.Millisecond, + } + + t.Run("new breaker closed", func(t *testing.T) { + b := newBreaker(s) + checkClosed(t, b) + }) + + t.Run("doesn't open if failure count is not within a window", func(t *testing.T) { + b := newBreaker(s) + times(1, fail(t, b)) + times(2, succeed(t, b)) + checkClosed(t, b) + times(1, fail(t, b)) + times(2, succeed(t, b)) + checkClosed(t, b) + times(1, fail(t, b)) + times(2, succeed(t, b)) + checkClosed(t, b) + }) + + t.Run("opens on reaching the rate", func(t *testing.T) { + b := newBreaker(s) + times(s.Window, succeed(t, b)) + times(s.Failures, fail(t, b)) + checkOpen(t, b) + }) +} + +func TestRateBreakerFuzzy(t *testing.T) { + if testing.Short() { + t.Skip() + } + + const ( + concurrentRequests = 64 + requestDuration = 6 * time.Microsecond + requestDelay = 6 * time.Microsecond + duration = 3 * time.Second + ) + + s := BreakerSettings{ + Type: FailureRate, + Window: 300, + Failures: 120, + HalfOpenRequests: 12, + Timeout: 3 * time.Millisecond, + } + + b := newBreaker(s) + + stop := make(chan struct{}) + + successChance := func() bool { + return rand.Intn(s.Window) > s.Failures + } + + runAgent := func() { + for { + select { + case <-stop: + default: + } + + done, ok := b.Allow() + time.Sleep(requestDuration) + if ok { + done(successChance()) + } + + time.Sleep(requestDelay) + } + } + + time.AfterFunc(duration, func() { + close(stop) + }) + + for i := 0; i < concurrentRequests; i++ { + go runAgent() + } + + <-stop +} diff --git a/circuit/gobreaker.go b/circuit/gobreaker.go new file mode 100644 index 0000000000..487726f294 --- /dev/null +++ b/circuit/gobreaker.go @@ -0,0 +1,34 @@ +package circuit + +import "github.com/sony/gobreaker" + +// wrapper for changing interface: +type gobreakerWrap struct { + gb *gobreaker.TwoStepCircuitBreaker +} + +func newGobreaker(s BreakerSettings, readyToTrip func(int) bool) gobreakerWrap { + return gobreakerWrap{gobreaker.NewTwoStepCircuitBreaker(gobreaker.Settings{ + Name: s.Host, + MaxRequests: uint32(s.HalfOpenRequests), + Timeout: s.Timeout, + ReadyToTrip: func(c gobreaker.Counts) bool { + return readyToTrip(int(c.ConsecutiveFailures)) + }, + })} +} + +func (w gobreakerWrap) Allow() (func(bool), bool) { + done, err := w.gb.Allow() + + // this error can only indicate that the breaker is not closed + if err != nil { + return nil, false + } + + return done, true +} + +func (w gobreakerWrap) Closed() bool { + return w.gb.State() == gobreaker.StateClosed +} diff --git a/circuit/list.go b/circuit/list.go new file mode 100644 index 0000000000..8f3a6045c0 --- /dev/null +++ b/circuit/list.go @@ -0,0 +1,77 @@ +package circuit + +// a simple linked list to keep accessed breakers sorted in +// constant time. It can return the first consecutive items +// that match a condition (e.g. the ones that were inactive +// for a while). +type list struct { + first, last *Breaker +} + +func (l *list) remove(from, to *Breaker) { + if from == nil || l.first == nil { + return + } + + if from == l.first { + l.first = to.next + } else if from.prev != nil { + from.prev.next = to.next + } + + if to == l.last { + l.last = from.prev + } else if to.next != nil { + to.next.prev = from.prev + } + + from.prev = nil + to.next = nil +} + +func (l *list) append(from, to *Breaker) { + if from == nil { + return + } + + if l.last == nil { + l.first = from + l.last = to + return + } + + l.last.next = from + from.prev = l.last + l.last = to +} + +// appends an item to the end of the list. If the list already +// contains the item, moves it to the end. +func (l *list) appendLast(b *Breaker) { + l.remove(b, b) + l.append(b, b) +} + +// returns the first consecutive items that match the predicate +func (l *list) getMatchingHead(predicate func(*Breaker) bool) (first, last *Breaker) { + current := l.first + for { + if current == nil || !predicate(current) { + return + } + + if first == nil { + first = current + } + + last, current = current, current.next + } +} + +// takes the first consecutive items that match a predicate, +// removes them from the list, and returns them. +func (l *list) dropHeadIf(predicate func(*Breaker) bool) (from, to *Breaker) { + from, to = l.getMatchingHead(predicate) + l.remove(from, to) + return +} diff --git a/circuit/list_test.go b/circuit/list_test.go new file mode 100644 index 0000000000..caea3c13f2 --- /dev/null +++ b/circuit/list_test.go @@ -0,0 +1,147 @@ +package circuit + +import "testing" + +func checkListDirection(t *testing.T, first *Breaker, reverse bool, expected ...*Breaker) { +} + +func checkList(t *testing.T, first *Breaker, expected ...*Breaker) { + current := first + for i, expected := range expected { + if current == nil { + t.Error("less items in the list than expected") + return + } + + if i == 0 && current.prev != nil { + t.Error("damaged list") + return + } + + if expected != current { + t.Error("invalid order") + return + } + + current = current.next + } + + if current != nil { + t.Error("more items in the list than expected") + } +} + +func appendAll(l *list, items ...*Breaker) { + for _, i := range items { + l.appendLast(i) + } +} + +func TestListAppend(t *testing.T) { + t.Run("append", func(t *testing.T) { + l := &list{} + + b0 := newBreaker(BreakerSettings{}) + b1 := newBreaker(BreakerSettings{}) + b2 := newBreaker(BreakerSettings{}) + appendAll(l, b0, b1, b2) + checkList(t, l.first, b0, b1, b2) + }) + + t.Run("reappend", func(t *testing.T) { + l := &list{} + + b0 := newBreaker(BreakerSettings{}) + b1 := newBreaker(BreakerSettings{}) + b2 := newBreaker(BreakerSettings{}) + appendAll(l, b0, b1, b2) + + l.appendLast(b1) + + checkList(t, l.first, b0, b2, b1) + }) + + t.Run("reappend first", func(t *testing.T) { + l := &list{} + + b0 := newBreaker(BreakerSettings{}) + b1 := newBreaker(BreakerSettings{}) + b2 := newBreaker(BreakerSettings{}) + appendAll(l, b0, b1, b2) + + l.appendLast(b0) + + checkList(t, l.first, b1, b2, b0) + }) + + t.Run("reappend last", func(t *testing.T) { + l := &list{} + + b0 := newBreaker(BreakerSettings{}) + b1 := newBreaker(BreakerSettings{}) + b2 := newBreaker(BreakerSettings{}) + appendAll(l, b0, b1, b2) + + l.appendLast(b2) + + checkList(t, l.first, b0, b1, b2) + }) +} + +func TestDropHead(t *testing.T) { + createToDrop := func() *Breaker { return newBreaker(BreakerSettings{Host: "drop"}) } + createNotToDrop := func() *Breaker { return newBreaker(BreakerSettings{Host: "no-drop"}) } + predicate := func(item *Breaker) bool { return item.settings.Host == "drop" } + + t.Run("from empty", func(t *testing.T) { + l := &list{} + drop, _ := l.dropHeadIf(predicate) + checkList(t, l.first) + checkList(t, drop) + }) + + t.Run("drop matching", func(t *testing.T) { + l := &list{} + + b0 := createToDrop() + b1 := createToDrop() + b2 := createNotToDrop() + b3 := createToDrop() + b4 := createNotToDrop() + appendAll(l, b0, b1, b2, b3, b4) + + drop, _ := l.dropHeadIf(predicate) + checkList(t, l.first, b2, b3, b4) + checkList(t, drop, b0, b1) + }) + + t.Run("none match", func(t *testing.T) { + l := &list{} + + b0 := createNotToDrop() + b1 := createToDrop() + b2 := createNotToDrop() + b3 := createToDrop() + b4 := createNotToDrop() + appendAll(l, b0, b1, b2, b3, b4) + + drop, _ := l.dropHeadIf(predicate) + checkList(t, l.first, b0, b1, b2, b3, b4) + checkList(t, drop) + }) + + t.Run("all match", func(t *testing.T) { + l := &list{} + + b0 := createToDrop() + b1 := createToDrop() + b2 := createToDrop() + b3 := createToDrop() + b4 := createToDrop() + appendAll(l, b0, b1, b2, b3, b4) + + drop, _ := l.dropHeadIf(predicate) + checkList(t, l.first) + checkList(t, drop, b0, b1, b2, b3, b4) + }) +} diff --git a/circuit/registry.go b/circuit/registry.go new file mode 100644 index 0000000000..9188327998 --- /dev/null +++ b/circuit/registry.go @@ -0,0 +1,103 @@ +package circuit + +import "time" + +type Options struct { + Defaults BreakerSettings + HostSettings []BreakerSettings + IdleTTL time.Duration +} + +type Registry struct { + defaults BreakerSettings + hostSettings map[string]BreakerSettings + idleTTL time.Duration + lookup map[BreakerSettings]*Breaker + access *list + sync chan *Registry +} + +func NewRegistry(o Options) *Registry { + hs := make(map[string]BreakerSettings) + for _, s := range o.HostSettings { + hs[s.Host] = applySettings(s, o.Defaults) + } + + if o.IdleTTL <= 0 { + o.IdleTTL = time.Hour + } + + r := &Registry{ + defaults: o.Defaults, + hostSettings: hs, + idleTTL: o.IdleTTL, + lookup: make(map[BreakerSettings]*Breaker), + access: &list{}, + sync: make(chan *Registry, 1), + } + + r.sync <- r + return r +} + +func (r *Registry) synced(f func()) { + r = <-r.sync + f() + r.sync <- r +} + +func (r *Registry) applySettings(s BreakerSettings) BreakerSettings { + config, ok := r.hostSettings[s.Host] + if !ok { + config = r.defaults + } + + return applySettings(s, config) +} + +func (r *Registry) dropLookup(b *Breaker) { + for b != nil { + delete(r.lookup, b.settings) + b = b.next + } +} + +func (r *Registry) Get(s BreakerSettings) *Breaker { + // we check for host, because we don't want to use shared global breakers + if s.disabled || s.Host == "" { + return nil + } + + // apply host specific and global defaults when not set in the request + s = r.applySettings(s) + if s.Type == BreakerNone { + return nil + } + + var b *Breaker + r.synced(func() { + now := time.Now() + + var ok bool + b, ok = r.lookup[s] + if !ok { + // if the breaker doesn't exist with the requested settings, + // check if there is any to evict, evict if yet, and create + // a new one + + drop, _ := r.access.dropHeadIf(func(b *Breaker) bool { + return now.Sub(b.ts) > r.idleTTL + }) + + r.dropLookup(drop) + b = newBreaker(s) + r.lookup[s] = b + } + + // append/move the breaker to the last position of the access history + b.ts = now + r.access.appendLast(b) + }) + + return b +} diff --git a/circuit/registry_test.go b/circuit/registry_test.go new file mode 100644 index 0000000000..ad773bc9f7 --- /dev/null +++ b/circuit/registry_test.go @@ -0,0 +1,408 @@ +package circuit + +import ( + "math/rand" + "testing" + "time" +) + +func TestRegistry(t *testing.T) { + createSettings := func(cf int) BreakerSettings { + return BreakerSettings{ + Type: ConsecutiveFailures, + Failures: cf, + } + } + + createHostSettings := func(h string, cf int) BreakerSettings { + s := createSettings(cf) + s.Host = h + return s + } + + createDisabledSettings := func() BreakerSettings { + return BreakerSettings{disabled: true} + } + + checkNil := func(t *testing.T, b *Breaker) { + if b != nil { + t.Error("unexpected breaker") + } + } + + checkNotNil := func(t *testing.T, b *Breaker) { + if b == nil { + t.Error("failed to receive a breaker") + } + } + + checkSettings := func(t *testing.T, left, right BreakerSettings) { + if left != right { + t.Error("failed to receive breaker with the right settings") + t.Log(left) + t.Log(right) + } + } + + checkWithoutHost := func(t *testing.T, b *Breaker, s BreakerSettings) { + checkNotNil(t, b) + sb := b.settings + sb.Host = "" + checkSettings(t, sb, s) + } + + checkWithHost := func(t *testing.T, b *Breaker, s BreakerSettings) { + checkNotNil(t, b) + checkSettings(t, b.settings, s) + } + + t.Run("no settings", func(t *testing.T) { + r := NewRegistry(Options{}) + + b := r.Get(BreakerSettings{Host: "foo"}) + checkNil(t, b) + }) + + t.Run("only default settings", func(t *testing.T) { + d := createSettings(5) + r := NewRegistry(Options{Defaults: d}) + + b := r.Get(BreakerSettings{Host: "foo"}) + checkWithoutHost(t, b, r.defaults) + }) + + t.Run("only host settings", func(t *testing.T) { + h0 := createHostSettings("foo", 5) + h1 := createHostSettings("bar", 5) + r := NewRegistry(Options{HostSettings: []BreakerSettings{h0, h1}}) + + b := r.Get(BreakerSettings{Host: "foo"}) + checkWithHost(t, b, h0) + + b = r.Get(BreakerSettings{Host: "bar"}) + checkWithHost(t, b, h1) + + b = r.Get(BreakerSettings{Host: "baz"}) + checkNil(t, b) + }) + + t.Run("default and host settings", func(t *testing.T) { + d := createSettings(5) + h0 := createHostSettings("foo", 5) + h1 := createHostSettings("bar", 5) + r := NewRegistry(Options{ + Defaults: d, + HostSettings: []BreakerSettings{h0, h1}, + }) + + b := r.Get(BreakerSettings{Host: "foo"}) + checkWithHost(t, b, h0) + + b = r.Get(BreakerSettings{Host: "bar"}) + checkWithHost(t, b, h1) + + b = r.Get(BreakerSettings{Host: "baz"}) + checkWithoutHost(t, b, d) + }) + + t.Run("only custom settings", func(t *testing.T) { + r := NewRegistry(Options{}) + + cs := createHostSettings("foo", 15) + b := r.Get(cs) + checkWithHost(t, b, cs) + }) + + t.Run("only default settings, with custom", func(t *testing.T) { + d := createSettings(5) + r := NewRegistry(Options{Defaults: d}) + + cs := createHostSettings("foo", 15) + b := r.Get(cs) + checkWithHost(t, b, cs) + }) + + t.Run("only host settings, with custom", func(t *testing.T) { + h0 := createHostSettings("foo", 5) + h1 := createHostSettings("bar", 5) + r := NewRegistry(Options{HostSettings: []BreakerSettings{h0, h1}}) + + cs := createHostSettings("foo", 15) + b := r.Get(cs) + checkWithHost(t, b, cs) + + cs = createHostSettings("bar", 15) + b = r.Get(cs) + checkWithHost(t, b, cs) + + cs = createHostSettings("baz", 15) + b = r.Get(cs) + checkWithHost(t, b, cs) + }) + + t.Run("default and host settings, with custom", func(t *testing.T) { + d := createSettings(5) + h0 := createHostSettings("foo", 5) + h1 := createHostSettings("bar", 5) + r := NewRegistry(Options{ + Defaults: d, + HostSettings: []BreakerSettings{h0, h1}, + }) + + cs := createHostSettings("foo", 15) + b := r.Get(cs) + checkWithHost(t, b, cs) + + cs = createHostSettings("bar", 15) + b = r.Get(cs) + checkWithHost(t, b, cs) + + cs = createHostSettings("baz", 15) + b = r.Get(cs) + checkWithHost(t, b, cs) + }) + + t.Run("no settings and disabled", func(t *testing.T) { + r := NewRegistry(Options{}) + + b := r.Get(createDisabledSettings()) + checkNil(t, b) + }) + + t.Run("only default settings, disabled", func(t *testing.T) { + d := createSettings(5) + r := NewRegistry(Options{Defaults: d}) + + b := r.Get(createDisabledSettings()) + checkNil(t, b) + }) + + t.Run("only host settings, disabled", func(t *testing.T) { + h0 := createHostSettings("foo", 5) + h1 := createHostSettings("bar", 5) + r := NewRegistry(Options{HostSettings: []BreakerSettings{h0, h1}}) + + b := r.Get(createDisabledSettings()) + checkNil(t, b) + }) + + t.Run("default and host settings, disabled", func(t *testing.T) { + d := createSettings(5) + h0 := createHostSettings("foo", 5) + h1 := createHostSettings("bar", 5) + r := NewRegistry(Options{ + Defaults: d, + HostSettings: []BreakerSettings{h0, h1}, + }) + + b := r.Get(createDisabledSettings()) + checkNil(t, b) + }) +} + +func TestRegistryEvictIdle(t *testing.T) { + if testing.Short() { + t.Skip() + } + + options := Options{ + HostSettings: []BreakerSettings{{ + Host: "foo", + Type: ConsecutiveFailures, + Failures: 4, + }, { + Host: "bar", + Type: ConsecutiveFailures, + Failures: 5, + }, { + Host: "baz", + Type: ConsecutiveFailures, + Failures: 6, + }, { + Host: "qux", + Type: ConsecutiveFailures, + Failures: 7, + }}, + IdleTTL: 15 * time.Millisecond, + } + toEvict := options.HostSettings[2] + r := NewRegistry(options) + + get := func(host string) { + b := r.Get(BreakerSettings{Host: host}) + if b == nil { + t.Error("failed to retrieve breaker") + } + } + + get("foo") + get("bar") + get("baz") + + time.Sleep(2 * options.IdleTTL / 3) + + get("foo") + get("bar") + + time.Sleep(2 * options.IdleTTL / 3) + + get("qux") + + if len(r.lookup) != 3 || r.lookup[toEvict] != nil { + t.Error("failed to evict breaker from lookup") + return + } + + current := r.access.first + for current != nil { + if current.settings.Host == "baz" { + t.Error("failed to evict idle breaker") + return + } + + current = current.next + } +} + +func TestRegistryFuzzy(t *testing.T) { + if testing.Short() { + t.Skip() + } + + const ( + hostCount = 1200 + customSettingsCount = 120 + concurrentRequests = 2048 + requestDurationMean = 120 * time.Microsecond + requestDurationDeviation = 60 * time.Microsecond + idleTTL = time.Second + duration = 3 * time.Second + ) + + genHost := func() string { + const ( + minHostLength = 12 + maxHostLength = 36 + ) + + h := make([]byte, minHostLength+rand.Intn(maxHostLength-minHostLength)) + for i := range h { + h[i] = 'a' + byte(rand.Intn(int('z'+1-'a'))) + } + + return string(h) + } + + hosts := make([]string, hostCount) + for i := range hosts { + hosts[i] = genHost() + } + + options := Options{IdleTTL: idleTTL} + + settings := make(map[string]BreakerSettings) + for _, h := range hosts { + s := BreakerSettings{Host: h, Type: ConsecutiveFailures, Failures: 5} + options.HostSettings = append(options.HostSettings, s) + settings[h] = s + } + + r := NewRegistry(options) + + // the first customSettingsCount hosts can have corresponding custom settings + customSettings := make(map[string]BreakerSettings) + for _, h := range hosts[:customSettingsCount] { + s := settings[h] + s.Failures = 15 + customSettings[h] = s + } + + var syncToken struct{} + sync := make(chan struct{}, 1) + sync <- syncToken + synced := func(f func()) { + t := <-sync + f() + sync <- t + } + + replaceHostSettings := func(settings map[string]BreakerSettings, old, nu string) { + if s, ok := settings[old]; ok { + delete(settings, old) + s.Host = nu + settings[nu] = s + } + } + + replaceHost := func() { + synced(func() { + i := rand.Intn(len(hosts)) + old := hosts[i] + nu := genHost() + hosts[i] = nu + replaceHostSettings(settings, old, nu) + replaceHostSettings(customSettings, old, nu) + }) + } + + stop := make(chan struct{}) + + getSettings := func(useCustom bool) BreakerSettings { + var s BreakerSettings + synced(func() { + if useCustom { + s = customSettings[hosts[rand.Intn(customSettingsCount)]] + return + } + + s = settings[hosts[rand.Intn(hostCount)]] + }) + + return s + } + + requestDuration := func() time.Duration { + mean := float64(requestDurationMean) + deviation := float64(requestDurationDeviation) + return time.Duration(rand.NormFloat64()*deviation + mean) + } + + makeRequest := func(useCustom bool) { + s := getSettings(useCustom) + b := r.Get(s) + if b.settings != s { + t.Error("invalid breaker received") + close(stop) + } + + time.Sleep(requestDuration()) + } + + runAgent := func() { + for { + select { + case <-stop: + return + default: + } + + // 1% percent chance for getting a host replaced: + if rand.Intn(100) == 0 { + replaceHost() + } + + // 3% percent of the requests is custom: + makeRequest(rand.Intn(100) < 3) + } + } + + time.AfterFunc(duration, func() { + close(stop) + }) + + for i := 0; i < concurrentRequests; i++ { + go runAgent() + } + + <-stop +} diff --git a/proxy/breaker_test.go b/proxy/breaker_test.go new file mode 100644 index 0000000000..5e0c9b0292 --- /dev/null +++ b/proxy/breaker_test.go @@ -0,0 +1,303 @@ +package proxy_test + +import ( + "fmt" + "net/http" + "testing" + "time" + + "github.com/zalando/skipper/circuit" + "github.com/zalando/skipper/eskip" + "github.com/zalando/skipper/filters/builtin" + "github.com/zalando/skipper/proxy" + "github.com/zalando/skipper/proxy/proxytest" +) + +type breakerTestContext struct { + t *testing.T + proxy *proxytest.TestProxy + backend *failingBackend +} + +type scenarioStep func(*breakerTestContext) + +type breakerScenario struct { + title string + settings []circuit.BreakerSettings + steps []scenarioStep +} + +const ( + testConsecutiveFailureCount = 5 + testBreakerTimeout = 3 * time.Millisecond + testHalfOpenRequests = 3 + testRateWindow = 10 + testRateFailures = 4 +) + +func newBreakerProxy(backendURL string, settings []circuit.BreakerSettings) *proxytest.TestProxy { + r, err := eskip.Parse(fmt.Sprintf(`* -> "%s"`, backendURL)) + if err != nil { + panic(err) + } + + params := proxy.Params{ + CloseIdleConnsPeriod: -1, + } + + if len(settings) > 0 { + var breakerOptions circuit.Options + for _, si := range settings { + if si.Host == "" { + breakerOptions.Defaults = si + } + + breakerOptions.HostSettings = append(breakerOptions.HostSettings, si) + } + + params.CircuitBreakers = circuit.NewRegistry(breakerOptions) + } + + fr := builtin.MakeRegistry() + return proxytest.WithParams(fr, params, r...) +} + +func testBreaker(t *testing.T, s breakerScenario) { + b := newFailingBackend() + defer b.close() + + p := newBreakerProxy(b.url, s.settings) + defer p.Close() + + steps := s.steps + c := &breakerTestContext{ + t: t, + proxy: p, + backend: b, + } + + for !t.Failed() && len(steps) > 0 { + steps[0](c) + steps = steps[1:] + } +} + +func setBackendSucceed(c *breakerTestContext) { + c.backend.succeed() +} + +func setBackendFail(c *breakerTestContext) { + c.backend.fail() +} + +func setBackendDown(c *breakerTestContext) { + c.backend.down() +} + +func proxyRequest(c *breakerTestContext) (*http.Response, error) { + rsp, err := http.Get(c.proxy.URL) + if err != nil { + return nil, err + } + + rsp.Body.Close() + return rsp, nil +} + +func checkStatus(c *breakerTestContext, rsp *http.Response, expected int) { + if rsp.StatusCode != expected { + c.t.Errorf( + "wrong response status: %d instead of %d", + rsp.StatusCode, + expected, + ) + } +} + +func request(expectedStatus int) scenarioStep { + return func(c *breakerTestContext) { + rsp, err := proxyRequest(c) + if err != nil { + c.t.Error(err) + return + } + + checkStatus(c, rsp, expectedStatus) + } +} + +func requestOpen(c *breakerTestContext) { + rsp, err := proxyRequest(c) + if err != nil { + c.t.Error(err) + return + } + + checkStatus(c, rsp, 503) + if rsp.Header.Get("X-Circuit-Open") != "true" { + c.t.Error("failed to set circuit open header") + } +} + +func checkBackendCounter(count int) scenarioStep { + return func(c *breakerTestContext) { + if c.backend.counter() != count { + c.t.Error("invalid number of requests on the backend") + } + + c.backend.resetCounter() + } +} + +// as in scenario step N times +func times(n int, s scenarioStep) scenarioStep { + return func(c *breakerTestContext) { + for !c.t.Failed() && n > 0 { + s(c) + n-- + } + } +} + +func wait(d time.Duration) scenarioStep { + return func(*breakerTestContext) { + time.Sleep(d) + } +} + +func trace(msg string) scenarioStep { + return func(*breakerTestContext) { + println(msg) + } +} + +func TestBreakerConsecutive(t *testing.T) { + for _, s := range []breakerScenario{{ + title: "no breaker", + steps: []scenarioStep{ + request(200), + checkBackendCounter(1), + setBackendFail, + times(testConsecutiveFailureCount, request(500)), + checkBackendCounter(testConsecutiveFailureCount), + request(500), + checkBackendCounter(1), + }, + }, { + title: "open", + settings: []circuit.BreakerSettings{{ + Type: circuit.ConsecutiveFailures, + Failures: testConsecutiveFailureCount, + }}, + steps: []scenarioStep{ + request(200), + checkBackendCounter(1), + setBackendFail, + times(testConsecutiveFailureCount, request(500)), + checkBackendCounter(testConsecutiveFailureCount), + requestOpen, + checkBackendCounter(0), + }, + }, { + title: "open, fail to close", + settings: []circuit.BreakerSettings{{ + Type: circuit.ConsecutiveFailures, + Failures: testConsecutiveFailureCount, + Timeout: testBreakerTimeout, + HalfOpenRequests: testHalfOpenRequests, + }}, + steps: []scenarioStep{ + request(200), + checkBackendCounter(1), + setBackendFail, + times(testConsecutiveFailureCount, request(500)), + checkBackendCounter(testConsecutiveFailureCount), + requestOpen, + checkBackendCounter(0), + wait(2 * testBreakerTimeout), + request(500), + checkBackendCounter(1), + requestOpen, + checkBackendCounter(0), + }, + }, { + title: "open, fixed during timeout", + settings: []circuit.BreakerSettings{{ + Type: circuit.ConsecutiveFailures, + Failures: testConsecutiveFailureCount, + Timeout: testBreakerTimeout, + HalfOpenRequests: testHalfOpenRequests, + }}, + steps: []scenarioStep{ + request(200), + checkBackendCounter(1), + setBackendFail, + times(testConsecutiveFailureCount, request(500)), + checkBackendCounter(testConsecutiveFailureCount), + requestOpen, + checkBackendCounter(0), + wait(2 * testBreakerTimeout), + setBackendSucceed, + times(testHalfOpenRequests+1, request(200)), + checkBackendCounter(testHalfOpenRequests + 1), + }, + }, { + title: "open, fail again during half open", + settings: []circuit.BreakerSettings{{ + Type: circuit.ConsecutiveFailures, + Failures: testConsecutiveFailureCount, + Timeout: testBreakerTimeout, + HalfOpenRequests: testHalfOpenRequests, + }}, + steps: []scenarioStep{ + request(200), + checkBackendCounter(1), + setBackendFail, + times(testConsecutiveFailureCount, request(500)), + checkBackendCounter(testConsecutiveFailureCount), + requestOpen, + checkBackendCounter(0), + wait(2 * testBreakerTimeout), + setBackendSucceed, + times(1, request(200)), + checkBackendCounter(1), + setBackendFail, + times(1, request(500)), + checkBackendCounter(1), + requestOpen, + checkBackendCounter(0), + }, + }, { + title: "open due to backend being down", + settings: []circuit.BreakerSettings{{ + Type: circuit.ConsecutiveFailures, + Failures: testConsecutiveFailureCount, + }}, + steps: []scenarioStep{ + request(200), + checkBackendCounter(1), + setBackendDown, + times(testConsecutiveFailureCount, request(503)), + checkBackendCounter(0), + requestOpen, + }, + }} { + t.Run(s.title, func(t *testing.T) { + testBreaker(t, s) + }) + } +} + +func TestBreakerRate(t *testing.T) { + for _, s := range []breakerScenario{{ + title: "open", + }} { + t.Run(s.title, func(t *testing.T) { + testBreaker(t, s) + }) + } +} + +// rate breaker +// different settings per host +// route settings diff --git a/proxy/context.go b/proxy/context.go index 71b441912b..09f80974d2 100644 --- a/proxy/context.go +++ b/proxy/context.go @@ -8,6 +8,7 @@ import ( "net/url" "time" + "github.com/zalando/skipper/circuit" "github.com/zalando/skipper/routing" ) @@ -158,6 +159,12 @@ func (c *context) setResponse(r *http.Response, preserveOriginal bool) { } } +func (c *context) breakerSettings() circuit.BreakerSettings { + return circuit.BreakerSettings{ + Host: c.outgoingHost, + } +} + func (c *context) ResponseWriter() http.ResponseWriter { return c.responseWriter } func (c *context) Request() *http.Request { return c.request } func (c *context) Response() *http.Response { return c.response } diff --git a/proxy/failingbackend_test.go b/proxy/failingbackend_test.go new file mode 100644 index 0000000000..39993b041f --- /dev/null +++ b/proxy/failingbackend_test.go @@ -0,0 +1,208 @@ +package proxy_test + +import ( + "errors" + "net" + "net/http" + "testing" +) + +type failingBackend struct { + c chan *failingBackend + up bool + healthy bool + address string + url string + server *http.Server + count int +} + +func freeAddress() string { + l, err := net.Listen("tcp", ":0") + if err != nil { + panic(err) + } + + defer l.Close() + return l.Addr().String() +} + +func newFailingBackend() *failingBackend { + address := freeAddress() + b := &failingBackend{ + c: make(chan *failingBackend, 1), + healthy: true, + address: address, + url: "http://" + address, + } + + b.startSynced() + b.c <- b + return b +} + +func (b *failingBackend) synced(f func()) { + b = <-b.c + f() + b.c <- b +} + +func (b *failingBackend) succeed() { + b.synced(func() { + if b.healthy { + return + } + + if !b.up { + b.startSynced() + } + + b.healthy = true + }) +} + +func (b *failingBackend) fail() { + b.synced(func() { + b.healthy = false + }) +} + +func (b *failingBackend) counter() int { + var count int + b.synced(func() { + count = b.count + }) + + return count +} + +func (b *failingBackend) resetCounter() { + b.synced(func() { + b.count = 0 + }) +} + +func (b *failingBackend) startSynced() { + if b.up { + return + } + + l, err := net.Listen("tcp", b.address) + if err != nil { + panic(err) + } + + b.server = &http.Server{Handler: b} + + b.up = true + go func(s *http.Server, l net.Listener) { + err := s.Serve(l) + if err != nil && err != http.ErrServerClosed { + panic(err) + } + }(b.server, l) +} + +func (b *failingBackend) start() { + b.synced(b.startSynced) +} + +func (b *failingBackend) closeSynced() { + if !b.up { + return + } + + b.server.Close() + b.up = false +} + +func (b *failingBackend) close() { + b.synced(b.closeSynced) +} + +func (b *failingBackend) down() { b.close() } + +func (b *failingBackend) reset() { + b.synced(func() { + b.closeSynced() + b.count = 0 + b.startSynced() + }) +} + +func (b *failingBackend) ServeHTTP(w http.ResponseWriter, r *http.Request) { + b.synced(func() { + b.count++ + if !b.healthy { + w.WriteHeader(http.StatusInternalServerError) + } + }) +} + +func TestFailingBackend(t *testing.T) { + b := newFailingBackend() + defer b.close() + + req := func(fail, down bool) error { + rsp, err := http.Get(b.url) + if down { + if err == nil { + return errors.New("failed to fail") + } + + return nil + } else if err != nil { + return err + } + + defer rsp.Body.Close() + + if fail && rsp.StatusCode != http.StatusInternalServerError || + !fail && rsp.StatusCode != http.StatusOK { + t.Error("invalid status", rsp.StatusCode) + } + + return nil + } + + if err := req(false, false); err != nil { + t.Error(err) + return + } + + b.fail() + if err := req(true, false); err != nil { + t.Error(err) + return + } + + b.succeed() + if err := req(false, false); err != nil { + t.Error(err) + return + } + + b.fail() + if err := req(true, false); err != nil { + t.Error(err) + return + } + + b.down() + if err := req(false, true); err != nil { + t.Error(err) + return + } + + b.start() + if err := req(true, false); err != nil { + t.Error(err) + return + } + + b.succeed() + if err := req(false, false); err != nil { + t.Error(err) + return + } +} diff --git a/proxy/proxy.go b/proxy/proxy.go index 4737c6136b..0882b56dd6 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -13,6 +13,7 @@ import ( "time" log "github.com/sirupsen/logrus" + "github.com/zalando/skipper/circuit" "github.com/zalando/skipper/eskip" "github.com/zalando/skipper/metrics" "github.com/zalando/skipper/routing" @@ -110,9 +111,17 @@ type Params struct { // -1. Note, that disabling looping by this option, may result // wrong routing depending on the current configuration. MaxLoopbacks int + + CircuitBreakers *circuit.Registry } -var errMaxLoopbacksReached = errors.New("max loopbacks reached") +var ( + errMaxLoopbacksReached = errors.New("max loopbacks reached") + errCircuitBreakerOpen = &proxyError{ + err: errors.New("circuit breaker open"), + code: http.StatusServiceUnavailable, + } +) // When set, the proxy will skip the TLS verification on outgoing requests. func (f Flags) Insecure() bool { return f&Insecure != 0 } @@ -155,6 +164,7 @@ type Proxy struct { flushInterval time.Duration experimentalUpgrade bool maxLoops int + breakers *circuit.Registry } // proxyError is used to wrap errors during proxying and to indicate @@ -313,6 +323,7 @@ func WithParams(p Params) *Proxy { flushInterval: p.FlushInterval, experimentalUpgrade: p.ExperimentalUpgrade, maxLoops: p.MaxLoopbacks, + breakers: p.CircuitBreakers, } } @@ -470,6 +481,19 @@ func (p *Proxy) makeBackendRequest(ctx *context) (*http.Response, error) { return response, nil } +func (p *Proxy) checkBreaker(c *context) (func(bool), bool) { + if p.breakers == nil { + return nil, true + } + + b := p.breakers.Get(c.breakerSettings()) + if b == nil { + return nil, true + } + + return b.Allow() +} + func (p *Proxy) do(ctx *context) error { if ctx.loopCounter > p.maxLoops { return errMaxLoopbacksReached @@ -515,13 +539,26 @@ func (p *Proxy) do(ctx *context) error { ctx.outgoingDebugRequest = debugReq ctx.setResponse(&http.Response{Header: make(http.Header)}, p.flags.PreserveOriginal()) } else { + done, allow := p.checkBreaker(ctx) + if !allow { + return errCircuitBreakerOpen + } + backendStart := time.Now() rsp, err := p.makeBackendRequest(ctx) if err != nil { + if done != nil { + done(false) + } + p.metrics.IncErrorsBackend(ctx.route.Id) return err } + if done != nil { + done(rsp.StatusCode < http.StatusInternalServerError) + } + ctx.setResponse(rsp, p.flags.PreserveOriginal()) p.metrics.MeasureBackend(ctx.route.Id, backendStart) p.metrics.MeasureBackendHost(ctx.route.Host, backendStart) @@ -557,6 +594,49 @@ func (p *Proxy) serveResponse(ctx *context) { } } +func (p *Proxy) errorResponse(ctx *context, err error) { + perr, ok := err.(*proxyError) + if ok && perr.handled { + return + } + + id := unknownRouteID + if ctx.route != nil { + id = ctx.route.Id + } + + code := http.StatusInternalServerError + if ok && perr.code != 0 { + code = perr.code + } + + if p.flags.Debug() { + di := &debugInfo{ + incoming: ctx.originalRequest, + outgoing: ctx.outgoingDebugRequest, + response: ctx.response, + err: err, + filterPanics: ctx.debugFilterPanics, + } + + if ctx.route != nil { + di.route = &ctx.route.Route + } + + dbgResponse(ctx.responseWriter, di) + return + } + + switch { + case err == errCircuitBreakerOpen: + ctx.responseWriter.Header().Set("X-Circuit-Open", "true") + default: + log.Errorf("error while proxying, route %s, status code %d: %v", id, code, err) + } + + p.sendError(ctx, id, code) +} + // http.Handler implementation func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { ctx := newContext(w, r, p.flags.PreserveOriginal()) @@ -573,38 +653,7 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { err := p.do(ctx) if err != nil { - if perr, ok := err.(*proxyError); !ok || !perr.handled { - id := unknownRouteID - if ctx.route != nil { - id = ctx.route.Id - } - - code := http.StatusInternalServerError - if ok && perr.code != 0 { - code = perr.code - } - - if p.flags.Debug() { - di := &debugInfo{ - incoming: ctx.originalRequest, - outgoing: ctx.outgoingDebugRequest, - response: ctx.response, - err: err, - filterPanics: ctx.debugFilterPanics, - } - - if ctx.route != nil { - di.route = &ctx.route.Route - } - - dbgResponse(w, di) - return - } - - p.sendError(ctx, id, code) - log.Errorf("error while proxying, route %s, status code %d: %v", id, code, err) - } - + p.errorResponse(ctx, err) return } diff --git a/proxy/proxy_test.go b/proxy/proxy_test.go index 7c5bae3c2d..ddf0f14c4b 100644 --- a/proxy/proxy_test.go +++ b/proxy/proxy_test.go @@ -480,14 +480,14 @@ func TestAppliesFilters(t *testing.T) { } } -type breaker struct { +type shunter struct { resp *http.Response } -func (b *breaker) Request(c filters.FilterContext) { c.Serve(b.resp) } -func (*breaker) Response(filters.FilterContext) {} -func (b *breaker) CreateFilter(fc []interface{}) (filters.Filter, error) { return b, nil } -func (*breaker) Name() string { return "breaker" } +func (b *shunter) Request(c filters.FilterContext) { c.Serve(b.resp) } +func (*shunter) Response(filters.FilterContext) {} +func (b *shunter) CreateFilter(fc []interface{}) (filters.Filter, error) { return b, nil } +func (*shunter) Name() string { return "shunter" } func TestBreakFilterChain(t *testing.T) { s := startTestServer([]byte("Hello World!"), 0, func(r *http.Request) { @@ -503,14 +503,14 @@ func TestBreakFilterChain(t *testing.T) { StatusCode: http.StatusUnauthorized, Status: "Impossible body", } - fr.Register(&breaker{resp1}) + fr.Register(&shunter{resp1}) fr.Register(builtin.NewResponseHeader()) doc := fmt.Sprintf(`breakerDemo: - Path("/breaker") -> + Path("/shunter") -> requestHeader("X-Expected", "request header") -> responseHeader("X-Expected", "response header") -> - breaker() -> + shunter() -> requestHeader("X-Unexpected", "foo") -> responseHeader("X-Unexpected", "bar") -> "%s"`, s.URL) @@ -522,7 +522,7 @@ func TestBreakFilterChain(t *testing.T) { defer tp.close() - r, _ := http.NewRequest("GET", "https://www.example.org/breaker", nil) + r, _ := http.NewRequest("GET", "https://www.example.org/shunter", nil) w := httptest.NewRecorder() tp.proxy.ServeHTTP(w, r) @@ -535,11 +535,11 @@ func TestBreakFilterChain(t *testing.T) { } if _, has := r.Header["X-Unexpected"]; has { - t.Error("Request has an unexpected header from a filter after the breaker in the chain") + t.Error("Request has an unexpected header from a filter after the shunter in the chain") } if _, has := w.Header()["X-Unexpected"]; has { - t.Error("Response has an unexpected header from a filter after the breaker in the chain") + t.Error("Response has an unexpected header from a filter after the shunter in the chain") } if w.Code != http.StatusUnauthorized && w.Body.String() != "Impossible body" { From 3cbe423d46f99c03c1bfb69a8e8839fd512e1c35 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 3 Jul 2017 01:03:07 +0200 Subject: [PATCH 02/12] route specific breaker settings --- circuit/breaker.go | 6 +- circuit/breaker_test.go | 1 + circuit/registry.go | 4 +- circuit/registry_test.go | 2 +- filters/circuit/breaker.go | 160 ++++++++++++++++++++++++++++++++ filters/circuit/breaker_test.go | 128 +++++++++++++++++++++++++ proxy/context.go | 7 -- proxy/proxy.go | 5 +- 8 files changed, 300 insertions(+), 13 deletions(-) create mode 100644 filters/circuit/breaker.go create mode 100644 filters/circuit/breaker_test.go diff --git a/circuit/breaker.go b/circuit/breaker.go index 429d29955b..3949426081 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -19,7 +19,7 @@ type BreakerSettings struct { Window, Failures int Timeout time.Duration HalfOpenRequests int - disabled bool + Disabled bool } type breakerImplementation interface { @@ -98,7 +98,7 @@ func (b *Breaker) readyToTrip(failures int) bool { } } -func (b *Breaker) tick(success bool) { +func (b *Breaker) countRate(success bool) { b.mx.Lock() defer b.mx.Unlock() @@ -121,7 +121,7 @@ func (b *Breaker) rateAllow() (func(bool), bool) { } return func(success bool) { - b.tick(success) + b.countRate(success) done(success) }, true } diff --git a/circuit/breaker_test.go b/circuit/breaker_test.go index eddbfea79d..19d03ae3a7 100644 --- a/circuit/breaker_test.go +++ b/circuit/breaker_test.go @@ -127,6 +127,7 @@ func TestRateBreaker(t *testing.T) { }) } +// no checks, used for race detector func TestRateBreakerFuzzy(t *testing.T) { if testing.Short() { t.Skip() diff --git a/circuit/registry.go b/circuit/registry.go index 9188327998..d641944e32 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -2,6 +2,8 @@ package circuit import "time" +const RouteSettingsKey = "#circuitbreakersettings" + type Options struct { Defaults BreakerSettings HostSettings []BreakerSettings @@ -64,7 +66,7 @@ func (r *Registry) dropLookup(b *Breaker) { func (r *Registry) Get(s BreakerSettings) *Breaker { // we check for host, because we don't want to use shared global breakers - if s.disabled || s.Host == "" { + if s.Disabled || s.Host == "" { return nil } diff --git a/circuit/registry_test.go b/circuit/registry_test.go index ad773bc9f7..0540cbbbb0 100644 --- a/circuit/registry_test.go +++ b/circuit/registry_test.go @@ -21,7 +21,7 @@ func TestRegistry(t *testing.T) { } createDisabledSettings := func() BreakerSettings { - return BreakerSettings{disabled: true} + return BreakerSettings{Disabled: true} } checkNil := func(t *testing.T, b *Breaker) { diff --git a/filters/circuit/breaker.go b/filters/circuit/breaker.go new file mode 100644 index 0000000000..c67123da9f --- /dev/null +++ b/filters/circuit/breaker.go @@ -0,0 +1,160 @@ +package circuit + +import ( + "time" + + "github.com/zalando/skipper/circuit" + "github.com/zalando/skipper/filters" +) + +const ConsecutiveBreakerName = "consecutiveBreaker" + +type spec struct { + typ circuit.BreakerType +} + +type filter struct { + settings circuit.BreakerSettings +} + +func getIntArg(a interface{}) (int, error) { + if i, ok := a.(int); ok { + return i, nil + } + + if f, ok := a.(float64); ok { + return int(f), nil + } + + return 0, filters.ErrInvalidFilterParameters +} + +func getDurationArg(a interface{}) (time.Duration, error) { + if s, ok := a.(string); ok { + return time.ParseDuration(s) + } + + i, err := getIntArg(a) + return time.Duration(i) * time.Millisecond, err +} + +func NewConsecutiveBreaker() filters.Spec { + return &spec{typ: circuit.ConsecutiveFailures} +} + +func NewRateBreaker() filters.Spec { + return &spec{typ: circuit.FailureRate} +} + +func NewDisableBreaker() filters.Spec { + return &spec{} +} + +func (s *spec) Name() string { return ConsecutiveBreakerName } + +func consecutiveFilter(args []interface{}) (filters.Filter, error) { + if len(args) == 0 || len(args) > 3 { + return nil, filters.ErrInvalidFilterParameters + } + + failures, err := getIntArg(args[0]) + if err != nil { + return nil, err + } + + var timeout time.Duration + if len(args) > 1 { + timeout, err = getDurationArg(args[1]) + if err != nil { + return nil, err + } + } + + var halfOpenRequests int + if len(args) > 2 { + halfOpenRequests, err = getIntArg(args[2]) + if err != nil { + return nil, err + } + } + + return &filter{ + settings: circuit.BreakerSettings{ + Type: circuit.ConsecutiveFailures, + Failures: failures, + Timeout: timeout, + HalfOpenRequests: halfOpenRequests, + }, + }, nil +} + +func rateFilter(args []interface{}) (filters.Filter, error) { + if len(args) < 2 || len(args) > 4 { + return nil, filters.ErrInvalidFilterParameters + } + + failures, err := getIntArg(args[0]) + if err != nil { + return nil, err + } + + window, err := getIntArg(args[1]) + if err != nil { + return nil, err + } + + var timeout time.Duration + if len(args) > 2 { + timeout, err = getDurationArg(args[2]) + if err != nil { + return nil, err + } + } + + var halfOpenRequests int + if len(args) > 3 { + halfOpenRequests, err = getIntArg(args[3]) + if err != nil { + return nil, err + } + } + + return &filter{ + settings: circuit.BreakerSettings{ + Type: circuit.FailureRate, + Failures: failures, + Window: window, + Timeout: timeout, + HalfOpenRequests: halfOpenRequests, + }, + }, nil +} + +func disableFilter(args []interface{}) (filters.Filter, error) { + if len(args) != 0 { + return nil, filters.ErrInvalidFilterParameters + } + + return &filter{ + settings: circuit.BreakerSettings{ + Disabled: true, + }, + }, nil +} + +func (s *spec) CreateFilter(args []interface{}) (filters.Filter, error) { + switch s.typ { + case circuit.ConsecutiveFailures: + return consecutiveFilter(args) + case circuit.FailureRate: + return rateFilter(args) + default: + return disableFilter(args) + } +} + +func (f *filter) Request(ctx filters.FilterContext) { + ctx.StateBag()[circuit.RouteSettingsKey] = f.settings +} + +func (f *filter) Response(filters.FilterContext) {} diff --git a/filters/circuit/breaker_test.go b/filters/circuit/breaker_test.go new file mode 100644 index 0000000000..09d2ff8b2a --- /dev/null +++ b/filters/circuit/breaker_test.go @@ -0,0 +1,128 @@ +package circuit + +import ( + "testing" + "time" + + "github.com/zalando/skipper/circuit" + "github.com/zalando/skipper/filters" + "github.com/zalando/skipper/filters/filtertest" +) + +func TestArgs(t *testing.T) { + test := func(s filters.Spec, fail bool, args ...interface{}) func(*testing.T) { + return func(t *testing.T) { + if _, err := s.CreateFilter(args); fail && err == nil { + t.Error("failed to fail") + } else if !fail && err != nil { + t.Error(err) + } + } + } + + testOK := func(s filters.Spec, args ...interface{}) func(*testing.T) { return test(s, false, args...) } + testErr := func(s filters.Spec, args ...interface{}) func(*testing.T) { return test(s, true, args...) } + + t.Run("consecutive", func(t *testing.T) { + s := NewConsecutiveBreaker() + t.Run("missing", testErr(s, nil)) + t.Run("too many", testErr(s, 6, "1m", 12, 42)) + t.Run("wrong failure count", testErr(s, "6", "1m", 12)) + t.Run("wrong timeout", testErr(s, 6, "foo", 12)) + t.Run("wrong half-open requests", testErr(s, 6, "1m", "foo")) + t.Run("only failure count", testOK(s, 6)) + t.Run("only failure count and timeout", testOK(s, 6, "1m")) + t.Run("full", testOK(s, 6, "1m", 12)) + t.Run("timeout as milliseconds", testOK(s, 6, 60000, 12)) + }) + + t.Run("rate", func(t *testing.T) { + s := NewRateBreaker() + t.Run("missing both", testErr(s, nil)) + t.Run("missing window", testErr(s, 30)) + t.Run("too many", testErr(s, 30, 300, "1m", 45, "foo")) + t.Run("wrong failure count", testErr(s, "30", 300, "1m", 45)) + t.Run("wrong window", testErr(s, 30, "300", "1m", 45)) + t.Run("wrong timeout", testErr(s, 30, "300", "foo", 45)) + t.Run("wrong half-open requests", testErr(s, 30, "300", "1m", "foo")) + t.Run("only failures and window", testOK(s, 30, 300)) + t.Run("only failures, window and timeout", testOK(s, 30, 300, "1m")) + t.Run("full", testOK(s, 30, 300, "1m", 45)) + t.Run("timeout as milliseconds", testOK(s, 30, 300, 60000, 45)) + }) + + t.Run("disable", func(t *testing.T) { + s := NewDisableBreaker() + t.Run("with args fail", testErr(s, 6)) + t.Run("no args, ok", testOK(s)) + }) +} + +func TestBreaker(t *testing.T) { + test := func( + s func() filters.Spec, + expect circuit.BreakerSettings, + args ...interface{}, + ) func(*testing.T) { + return func(t *testing.T) { + s := s() + + f, err := s.CreateFilter(args) + if err != nil { + t.Error(err) + } + + ctx := &filtertest.Context{ + FStateBag: make(map[string]interface{}), + } + + f.Request(ctx) + + settings, ok := ctx.StateBag()[circuit.RouteSettingsKey] + if !ok { + t.Error("failed to set the breaker settings") + } + + if settings != expect { + t.Error("invalid settings") + t.Log("got", settings) + t.Log("expected", expect) + } + } + } + + t.Run("consecutive breaker", test( + NewConsecutiveBreaker, + circuit.BreakerSettings{ + Type: circuit.ConsecutiveFailures, + Failures: 6, + Timeout: time.Minute, + HalfOpenRequests: 12, + }, + 6, + "1m", + 12, + )) + + t.Run("rate breaker", test( + NewRateBreaker, + circuit.BreakerSettings{ + Type: circuit.FailureRate, + Failures: 30, + Window: 300, + Timeout: time.Minute, + HalfOpenRequests: 12, + }, + 30, + 300, + "1m", + 12, + )) + + t.Run("disable breaker", test( + NewDisableBreaker, + circuit.BreakerSettings{ + Disabled: true, + }, + )) +} diff --git a/proxy/context.go b/proxy/context.go index 09f80974d2..71b441912b 100644 --- a/proxy/context.go +++ b/proxy/context.go @@ -8,7 +8,6 @@ import ( "net/url" "time" - "github.com/zalando/skipper/circuit" "github.com/zalando/skipper/routing" ) @@ -159,12 +158,6 @@ func (c *context) setResponse(r *http.Response, preserveOriginal bool) { } } -func (c *context) breakerSettings() circuit.BreakerSettings { - return circuit.BreakerSettings{ - Host: c.outgoingHost, - } -} - func (c *context) ResponseWriter() http.ResponseWriter { return c.responseWriter } func (c *context) Request() *http.Request { return c.request } func (c *context) Response() *http.Response { return c.response } diff --git a/proxy/proxy.go b/proxy/proxy.go index 0882b56dd6..6bf1cf49b6 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -486,7 +486,10 @@ func (p *Proxy) checkBreaker(c *context) (func(bool), bool) { return nil, true } - b := p.breakers.Get(c.breakerSettings()) + settings, _ := c.stateBag[circuit.RouteSettingsKey].(circuit.BreakerSettings) + settings.Host = c.outgoingHost + + b := p.breakers.Get(settings) if b == nil { return nil, true } From 403270a616372d641a9633de39af7cff92f3145b Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 3 Jul 2017 12:23:36 +0200 Subject: [PATCH 03/12] WIP notes --- circuit/binarysampler.go | 3 --- circuit/breaker.go | 12 ++++++++---- circuit/list.go | 8 ++++---- circuit/registry_test.go | 1 + 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/circuit/binarysampler.go b/circuit/binarysampler.go index 8a4d1677ad..1874f3ba9e 100644 --- a/circuit/binarysampler.go +++ b/circuit/binarysampler.go @@ -1,8 +1,5 @@ package circuit -// TODO: apply a TTL for stale windows. Try to calculate it from the existing -// input arguments - // contains a series of events with 0 or 1 values, e.g. errors or successes, // within a limited window. // count contains the number of events with the value of 1 in the window. diff --git a/circuit/breaker.go b/circuit/breaker.go index 3949426081..6bb8c74028 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -5,6 +5,10 @@ import ( "time" ) +// TODO: +// - in case of the rate breaker, there are unnecessary synchronization steps due to the 3rd party gobreaker +// - introduce a TTL in the rate breaker for the stale samplers + type BreakerType int const ( @@ -99,14 +103,14 @@ func (b *Breaker) readyToTrip(failures int) bool { } func (b *Breaker) countRate(success bool) { + if !b.impl.Closed() { + return + } + b.mx.Lock() defer b.mx.Unlock() if b.sampler == nil { - if !b.impl.Closed() { - return - } - b.sampler = newBinarySampler(b.settings.Window) } diff --git a/circuit/list.go b/circuit/list.go index 8f3a6045c0..7b71542943 100644 --- a/circuit/list.go +++ b/circuit/list.go @@ -1,9 +1,9 @@ package circuit -// a simple linked list to keep accessed breakers sorted in -// constant time. It can return the first consecutive items -// that match a condition (e.g. the ones that were inactive -// for a while). +// a simple list to keep breakers sorted by access order. +// It can return the first consecutive items that match a +// condition (e.g. the ones that were inactive for a +// while). type list struct { first, last *Breaker } diff --git a/circuit/registry_test.go b/circuit/registry_test.go index 0540cbbbb0..1c33ff4b89 100644 --- a/circuit/registry_test.go +++ b/circuit/registry_test.go @@ -6,6 +6,7 @@ import ( "time" ) +// no checks, used for race detector func TestRegistry(t *testing.T) { createSettings := func(cf int) BreakerSettings { return BreakerSettings{ From 0dea7598e2aae58e942ee20639f5528d459e0e76 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Fri, 7 Jul 2017 17:01:48 +0200 Subject: [PATCH 04/12] refactor the circuit breaker package --- circuit/binarysampler.go | 8 +-- circuit/breaker.go | 86 ++++++--------------------------- circuit/consecutivebreaker.go | 31 ++++++++++++ circuit/gobreaker.go | 34 ------------- circuit/list.go | 2 +- circuit/ratebreaker.go | 72 +++++++++++++++++++++++++++ circuit/registry.go | 91 +++++++++++++++++------------------ 7 files changed, 167 insertions(+), 157 deletions(-) create mode 100644 circuit/consecutivebreaker.go delete mode 100644 circuit/gobreaker.go create mode 100644 circuit/ratebreaker.go diff --git a/circuit/binarysampler.go b/circuit/binarysampler.go index 1874f3ba9e..7c040211c2 100644 --- a/circuit/binarysampler.go +++ b/circuit/binarysampler.go @@ -1,9 +1,9 @@ package circuit -// contains a series of events with 0 or 1 values, e.g. errors or successes, -// within a limited window. -// count contains the number of events with the value of 1 in the window. -// compresses the event storage by 64. +// binarysampler contains a series of events as 0 or 1 values, e.g. errors or successes, +// within a limited, sliding window. +// count contains the actual number of events with the value of 1 within the window. +// it compresses the event storage by 64. type binarySampler struct { size int filled int diff --git a/circuit/breaker.go b/circuit/breaker.go index 6bb8c74028..d7c5623c1a 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -1,9 +1,6 @@ package circuit -import ( - "sync" - "time" -) +import "time" // TODO: // - in case of the rate breaker, there are unnecessary synchronization steps due to the 3rd party gobreaker @@ -28,20 +25,19 @@ type BreakerSettings struct { type breakerImplementation interface { Allow() (func(bool), bool) - Closed() bool } +type voidBreaker struct{} + // represents a single circuit breaker type Breaker struct { settings BreakerSettings ts time.Time prev, next *Breaker impl breakerImplementation - mx *sync.Mutex - sampler *binarySampler } -func applySettings(to, from BreakerSettings) BreakerSettings { +func (to BreakerSettings) mergeSettings(from BreakerSettings) BreakerSettings { if to.Type == BreakerNone { to.Type = from.Type @@ -66,77 +62,27 @@ func applySettings(to, from BreakerSettings) BreakerSettings { return to } -func newBreaker(s BreakerSettings) *Breaker { - b := &Breaker{ - settings: s, - mx: &sync.Mutex{}, - } - b.impl = newGobreaker(s, b.readyToTrip) - return b -} - -func (b *Breaker) rateReadyToTrip() bool { - b.mx.Lock() - defer b.mx.Unlock() - - if b.sampler == nil { - return false - } - - ready := b.sampler.count >= b.settings.Failures - if ready { - b.sampler = nil - } - - return ready +func (b voidBreaker) Allow() (func(bool), bool) { + return func(bool) {}, true } -func (b *Breaker) readyToTrip(failures int) bool { - switch b.settings.Type { +func newBreaker(s BreakerSettings) *Breaker { + var impl breakerImplementation + switch s.Type { case ConsecutiveFailures: - return failures >= b.settings.Failures + impl = newConsecutive(s) case FailureRate: - return b.rateReadyToTrip() + impl = newRate(s) default: - return false + impl = voidBreaker{} } -} -func (b *Breaker) countRate(success bool) { - if !b.impl.Closed() { - return - } - - b.mx.Lock() - defer b.mx.Unlock() - - if b.sampler == nil { - b.sampler = newBinarySampler(b.settings.Window) - } - - // count the failures in closed state - b.sampler.tick(!success) -} - -func (b *Breaker) rateAllow() (func(bool), bool) { - done, ok := b.impl.Allow() - if !ok { - return nil, false + return &Breaker{ + settings: s, + impl: impl, } - - return func(success bool) { - b.countRate(success) - done(success) - }, true } func (b *Breaker) Allow() (func(bool), bool) { - switch b.settings.Type { - case ConsecutiveFailures: - return b.impl.Allow() - case FailureRate: - return b.rateAllow() - default: - return nil, false - } + return b.impl.Allow() } diff --git a/circuit/consecutivebreaker.go b/circuit/consecutivebreaker.go new file mode 100644 index 0000000000..757a0d2676 --- /dev/null +++ b/circuit/consecutivebreaker.go @@ -0,0 +1,31 @@ +package circuit + +import "github.com/sony/gobreaker" + +type consecutiveBreaker struct { + gb *gobreaker.TwoStepCircuitBreaker +} + +func newConsecutive(s BreakerSettings) *consecutiveBreaker { + return &consecutiveBreaker{ + gb: gobreaker.NewTwoStepCircuitBreaker(gobreaker.Settings{ + Name: s.Host, + MaxRequests: uint32(s.HalfOpenRequests), + Timeout: s.Timeout, + ReadyToTrip: func(c gobreaker.Counts) bool { + return int(c.ConsecutiveFailures) >= s.Failures + }, + }), + } +} + +func (b *consecutiveBreaker) Allow() (func(bool), bool) { + done, err := b.gb.Allow() + + // this error can only indicate that the breaker is not closed + if err != nil { + return nil, false + } + + return done, true +} diff --git a/circuit/gobreaker.go b/circuit/gobreaker.go deleted file mode 100644 index 487726f294..0000000000 --- a/circuit/gobreaker.go +++ /dev/null @@ -1,34 +0,0 @@ -package circuit - -import "github.com/sony/gobreaker" - -// wrapper for changing interface: -type gobreakerWrap struct { - gb *gobreaker.TwoStepCircuitBreaker -} - -func newGobreaker(s BreakerSettings, readyToTrip func(int) bool) gobreakerWrap { - return gobreakerWrap{gobreaker.NewTwoStepCircuitBreaker(gobreaker.Settings{ - Name: s.Host, - MaxRequests: uint32(s.HalfOpenRequests), - Timeout: s.Timeout, - ReadyToTrip: func(c gobreaker.Counts) bool { - return readyToTrip(int(c.ConsecutiveFailures)) - }, - })} -} - -func (w gobreakerWrap) Allow() (func(bool), bool) { - done, err := w.gb.Allow() - - // this error can only indicate that the breaker is not closed - if err != nil { - return nil, false - } - - return done, true -} - -func (w gobreakerWrap) Closed() bool { - return w.gb.State() == gobreaker.StateClosed -} diff --git a/circuit/list.go b/circuit/list.go index 7b71542943..34cf1a7e3f 100644 --- a/circuit/list.go +++ b/circuit/list.go @@ -2,7 +2,7 @@ package circuit // a simple list to keep breakers sorted by access order. // It can return the first consecutive items that match a -// condition (e.g. the ones that were inactive for a +// condition (in our case, the ones that were inactive for a // while). type list struct { first, last *Breaker diff --git a/circuit/ratebreaker.go b/circuit/ratebreaker.go new file mode 100644 index 0000000000..71d0ecca96 --- /dev/null +++ b/circuit/ratebreaker.go @@ -0,0 +1,72 @@ +package circuit + +import ( + "sync" + + "github.com/sony/gobreaker" +) + +type rateBreaker struct { + settings BreakerSettings + mx *sync.Mutex + sampler *binarySampler + gb *gobreaker.TwoStepCircuitBreaker +} + +func newRate(s BreakerSettings) *rateBreaker { + b := &rateBreaker{ + settings: s, + mx: &sync.Mutex{}, + } + + b.gb = gobreaker.NewTwoStepCircuitBreaker(gobreaker.Settings{ + Name: s.Host, + MaxRequests: uint32(s.HalfOpenRequests), + Timeout: s.Timeout, + ReadyToTrip: func(gobreaker.Counts) bool { return b.readyToTrip() }, + }) + + return b +} + +func (b *rateBreaker) readyToTrip() bool { + b.mx.Lock() + defer b.mx.Unlock() + + if b.sampler == nil { + return false + } + + ready := b.sampler.count >= b.settings.Failures + if ready { + b.sampler = nil + } + + return ready +} + +// count the failures in closed and half-open state +func (b *rateBreaker) countRate(success bool) { + b.mx.Lock() + defer b.mx.Unlock() + + if b.sampler == nil { + b.sampler = newBinarySampler(b.settings.Window) + } + + b.sampler.tick(!success) +} + +func (b *rateBreaker) Allow() (func(bool), bool) { + done, err := b.gb.Allow() + + // this error can only indicate that the breaker is not closed + if err != nil { + return nil, false + } + + return func(success bool) { + b.countRate(success) + done(success) + }, true +} diff --git a/circuit/registry.go b/circuit/registry.go index d641944e32..bd0859f8ac 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -1,6 +1,9 @@ package circuit -import "time" +import ( + "sync" + "time" +) const RouteSettingsKey = "#circuitbreakersettings" @@ -16,52 +19,70 @@ type Registry struct { idleTTL time.Duration lookup map[BreakerSettings]*Breaker access *list - sync chan *Registry + mx *sync.Mutex } func NewRegistry(o Options) *Registry { hs := make(map[string]BreakerSettings) for _, s := range o.HostSettings { - hs[s.Host] = applySettings(s, o.Defaults) + hs[s.Host] = s.mergeSettings(o.Defaults) } if o.IdleTTL <= 0 { o.IdleTTL = time.Hour } - r := &Registry{ + return &Registry{ defaults: o.Defaults, hostSettings: hs, idleTTL: o.IdleTTL, lookup: make(map[BreakerSettings]*Breaker), access: &list{}, - sync: make(chan *Registry, 1), + mx: &sync.Mutex{}, } - - r.sync <- r - return r -} - -func (r *Registry) synced(f func()) { - r = <-r.sync - f() - r.sync <- r } -func (r *Registry) applySettings(s BreakerSettings) BreakerSettings { +func (r *Registry) mergeDefaults(s BreakerSettings) BreakerSettings { config, ok := r.hostSettings[s.Host] if !ok { config = r.defaults } - return applySettings(s, config) + return s.mergeSettings(config) } -func (r *Registry) dropLookup(b *Breaker) { - for b != nil { - delete(r.lookup, b.settings) - b = b.next +func (r *Registry) dropIdle(now time.Time) { + drop, _ := r.access.dropHeadIf(func(b *Breaker) bool { + return now.Sub(b.ts) > r.idleTTL + }) + + for drop != nil { + delete(r.lookup, drop.settings) + drop = drop.next + } +} + +func (r *Registry) get(s BreakerSettings) *Breaker { + r.mx.Lock() + defer r.mx.Unlock() + + now := time.Now() + + b, ok := r.lookup[s] + if !ok { + // check if there is any to evict, evict if yes + r.dropIdle(now) + + // create a new one + b = newBreaker(s) + r.lookup[s] = b } + + // append/move the breaker to the last position of the access history + b.ts = now + r.access.appendLast(b) + + return b } func (r *Registry) Get(s BreakerSettings) *Breaker { @@ -70,36 +91,10 @@ func (r *Registry) Get(s BreakerSettings) *Breaker { return nil } - // apply host specific and global defaults when not set in the request - s = r.applySettings(s) + s = r.mergeDefaults(s) if s.Type == BreakerNone { return nil } - var b *Breaker - r.synced(func() { - now := time.Now() - - var ok bool - b, ok = r.lookup[s] - if !ok { - // if the breaker doesn't exist with the requested settings, - // check if there is any to evict, evict if yet, and create - // a new one - - drop, _ := r.access.dropHeadIf(func(b *Breaker) bool { - return now.Sub(b.ts) > r.idleTTL - }) - - r.dropLookup(drop) - b = newBreaker(s) - r.lookup[s] = b - } - - // append/move the breaker to the last position of the access history - b.ts = now - r.access.appendLast(b) - }) - - return b + return r.get(s) } From 5cbca8a846ffef40879afe07138a5f6246af7101 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Fri, 7 Jul 2017 19:07:38 +0200 Subject: [PATCH 05/12] optional individual ttl for each breaker --- circuit/breaker.go | 9 ++++ circuit/registry.go | 24 +++++---- circuit/registry_test.go | 95 +++++++++++++++++++++++++++++++-- filters/circuit/breaker.go | 39 ++++++++++++-- filters/circuit/breaker_test.go | 6 ++- 5 files changed, 151 insertions(+), 22 deletions(-) diff --git a/circuit/breaker.go b/circuit/breaker.go index d7c5623c1a..a247a85c85 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -21,6 +21,7 @@ type BreakerSettings struct { Timeout time.Duration HalfOpenRequests int Disabled bool + IdleTTL time.Duration } type breakerImplementation interface { @@ -59,6 +60,10 @@ func (to BreakerSettings) mergeSettings(from BreakerSettings) BreakerSettings { to.HalfOpenRequests = from.HalfOpenRequests } + if to.IdleTTL == 0 { + to.IdleTTL = from.IdleTTL + } + return to } @@ -86,3 +91,7 @@ func newBreaker(s BreakerSettings) *Breaker { func (b *Breaker) Allow() (func(bool), bool) { return b.impl.Allow() } + +func (b *Breaker) idle(now time.Time) bool { + return now.Sub(b.ts) > b.settings.IdleTTL +} diff --git a/circuit/registry.go b/circuit/registry.go index bd0859f8ac..9659bb5668 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -5,18 +5,19 @@ import ( "time" ) -const RouteSettingsKey = "#circuitbreakersettings" +const ( + DefaultIdleTTL = time.Hour + RouteSettingsKey = "#circuitbreakersettings" +) type Options struct { Defaults BreakerSettings HostSettings []BreakerSettings - IdleTTL time.Duration } type Registry struct { defaults BreakerSettings hostSettings map[string]BreakerSettings - idleTTL time.Duration lookup map[BreakerSettings]*Breaker access *list mx *sync.Mutex @@ -28,14 +29,13 @@ func NewRegistry(o Options) *Registry { hs[s.Host] = s.mergeSettings(o.Defaults) } - if o.IdleTTL <= 0 { - o.IdleTTL = time.Hour + if o.Defaults.IdleTTL <= 0 { + o.Defaults.IdleTTL = DefaultIdleTTL } return &Registry{ defaults: o.Defaults, hostSettings: hs, - idleTTL: o.IdleTTL, lookup: make(map[BreakerSettings]*Breaker), access: &list{}, mx: &sync.Mutex{}, @@ -43,17 +43,17 @@ func NewRegistry(o Options) *Registry { } func (r *Registry) mergeDefaults(s BreakerSettings) BreakerSettings { - config, ok := r.hostSettings[s.Host] + defaults, ok := r.hostSettings[s.Host] if !ok { - config = r.defaults + defaults = r.defaults } - return s.mergeSettings(config) + return s.mergeSettings(defaults) } func (r *Registry) dropIdle(now time.Time) { drop, _ := r.access.dropHeadIf(func(b *Breaker) bool { - return now.Sub(b.ts) > r.idleTTL + return b.idle(now) }) for drop != nil { @@ -69,7 +69,9 @@ func (r *Registry) get(s BreakerSettings) *Breaker { now := time.Now() b, ok := r.lookup[s] - if !ok { + if !ok || b.idle(now) { + // if doesn't exist or idle, cleanup and create a new one + // check if there is any to evict, evict if yes r.dropIdle(now) diff --git a/circuit/registry_test.go b/circuit/registry_test.go index 1c33ff4b89..1d099bad03 100644 --- a/circuit/registry_test.go +++ b/circuit/registry_test.go @@ -12,6 +12,7 @@ func TestRegistry(t *testing.T) { return BreakerSettings{ Type: ConsecutiveFailures, Failures: cf, + IdleTTL: DefaultIdleTTL, } } @@ -207,6 +208,9 @@ func TestRegistryEvictIdle(t *testing.T) { } options := Options{ + Defaults: BreakerSettings{ + IdleTTL: 15 * time.Millisecond, + }, HostSettings: []BreakerSettings{{ Host: "foo", Type: ConsecutiveFailures, @@ -224,7 +228,6 @@ func TestRegistryEvictIdle(t *testing.T) { Type: ConsecutiveFailures, Failures: 7, }}, - IdleTTL: 15 * time.Millisecond, } toEvict := options.HostSettings[2] r := NewRegistry(options) @@ -240,12 +243,12 @@ func TestRegistryEvictIdle(t *testing.T) { get("bar") get("baz") - time.Sleep(2 * options.IdleTTL / 3) + time.Sleep(2 * options.Defaults.IdleTTL / 3) get("foo") get("bar") - time.Sleep(2 * options.IdleTTL / 3) + time.Sleep(2 * options.Defaults.IdleTTL / 3) get("qux") @@ -265,6 +268,81 @@ func TestRegistryEvictIdle(t *testing.T) { } } +func TestIndividualIdle(t *testing.T) { + if testing.Short() { + t.Skip() + } + + // create with default and host specific + // + // check for both: + // - fail n - 1 + // - wait idle + // - fail + // - stays closed + + const ( + consecutiveFailures = 5 + idleTimeout = 15 * time.Millisecond + hostIdleTimeout = 6 * time.Millisecond + ) + + r := NewRegistry(Options{ + Defaults: BreakerSettings{ + Type: ConsecutiveFailures, + Failures: consecutiveFailures, + IdleTTL: idleTimeout, + }, + HostSettings: []BreakerSettings{{ + Host: "foo", + IdleTTL: hostIdleTimeout, + }}, + }) + + shouldBeClosed := func(t *testing.T, host string) func(bool) { + b := r.Get(BreakerSettings{Host: host}) + if b == nil { + t.Error("failed get breaker") + return nil + } + + done, ok := b.Allow() + if !ok { + t.Error("breaker unexpectedly open") + return nil + } + + return done + } + + fail := func(t *testing.T, host string) { + done := shouldBeClosed(t, host) + if done != nil { + done(false) + } + } + + mkfail := func(t *testing.T, host string) func() { + return func() { + fail(t, host) + } + } + + t.Run("default", func(t *testing.T) { + times(consecutiveFailures-1, mkfail(t, "bar")) + time.Sleep(idleTimeout) + fail(t, "bar") + shouldBeClosed(t, "bar") + }) + + t.Run("host", func(t *testing.T) { + times(consecutiveFailures-1, mkfail(t, "foo")) + time.Sleep(hostIdleTimeout) + fail(t, "foo") + shouldBeClosed(t, "foo") + }) +} + func TestRegistryFuzzy(t *testing.T) { if testing.Short() { t.Skip() @@ -299,11 +377,16 @@ func TestRegistryFuzzy(t *testing.T) { hosts[i] = genHost() } - options := Options{IdleTTL: idleTTL} + options := Options{Defaults: BreakerSettings{IdleTTL: idleTTL}} settings := make(map[string]BreakerSettings) for _, h := range hosts { - s := BreakerSettings{Host: h, Type: ConsecutiveFailures, Failures: 5} + s := BreakerSettings{ + Host: h, + Type: ConsecutiveFailures, + Failures: 5, + IdleTTL: idleTTL, + } options.HostSettings = append(options.HostSettings, s) settings[h] = s } @@ -315,6 +398,7 @@ func TestRegistryFuzzy(t *testing.T) { for _, h := range hosts[:customSettingsCount] { s := settings[h] s.Failures = 15 + s.IdleTTL = idleTTL customSettings[h] = s } @@ -373,6 +457,7 @@ func TestRegistryFuzzy(t *testing.T) { b := r.Get(s) if b.settings != s { t.Error("invalid breaker received") + t.Log(b.settings, s) close(stop) } diff --git a/filters/circuit/breaker.go b/filters/circuit/breaker.go index c67123da9f..0436a62c43 100644 --- a/filters/circuit/breaker.go +++ b/filters/circuit/breaker.go @@ -7,7 +7,11 @@ import ( "github.com/zalando/skipper/filters" ) -const ConsecutiveBreakerName = "consecutiveBreaker" +const ( + ConsecutiveBreakerName = "consecutiveBreaker" + RateBreakerName = "rateBreaker" + DisableBreakerName = "disableBreaker" +) type spec struct { typ circuit.BreakerType @@ -50,10 +54,19 @@ func NewDisableBreaker() filters.Spec { return &spec{} } -func (s *spec) Name() string { return ConsecutiveBreakerName } +func (s *spec) Name() string { + switch s.typ { + case circuit.ConsecutiveFailures: + return ConsecutiveBreakerName + case circuit.FailureRate: + return RateBreakerName + default: + return DisableBreakerName + } +} func consecutiveFilter(args []interface{}) (filters.Filter, error) { - if len(args) == 0 || len(args) > 3 { + if len(args) == 0 || len(args) > 4 { return nil, filters.ErrInvalidFilterParameters } @@ -78,18 +91,27 @@ func consecutiveFilter(args []interface{}) (filters.Filter, error) { } } + var idleTTL time.Duration + if len(args) > 3 { + idleTTL, err = getDurationArg(args[3]) + if err != nil { + return nil, err + } + } + return &filter{ settings: circuit.BreakerSettings{ Type: circuit.ConsecutiveFailures, Failures: failures, Timeout: timeout, HalfOpenRequests: halfOpenRequests, + IdleTTL: idleTTL, }, }, nil } func rateFilter(args []interface{}) (filters.Filter, error) { - if len(args) < 2 || len(args) > 4 { + if len(args) < 2 || len(args) > 5 { return nil, filters.ErrInvalidFilterParameters } @@ -119,6 +141,14 @@ func rateFilter(args []interface{}) (filters.Filter, error) { } } + var idleTTL time.Duration + if len(args) > 4 { + idleTTL, err = getDurationArg(args[4]) + if err != nil { + return nil, err + } + } + return &filter{ settings: circuit.BreakerSettings{ Type: circuit.FailureRate, @@ -126,6 +156,7 @@ func rateFilter(args []interface{}) (filters.Filter, error) { Window: window, Timeout: timeout, HalfOpenRequests: halfOpenRequests, + IdleTTL: idleTTL, }, }, nil } diff --git a/filters/circuit/breaker_test.go b/filters/circuit/breaker_test.go index 09d2ff8b2a..1d68fcee21 100644 --- a/filters/circuit/breaker_test.go +++ b/filters/circuit/breaker_test.go @@ -26,7 +26,7 @@ func TestArgs(t *testing.T) { t.Run("consecutive", func(t *testing.T) { s := NewConsecutiveBreaker() t.Run("missing", testErr(s, nil)) - t.Run("too many", testErr(s, 6, "1m", 12, 42)) + t.Run("too many", testErr(s, 6, "1m", 12, "30m", 42)) t.Run("wrong failure count", testErr(s, "6", "1m", 12)) t.Run("wrong timeout", testErr(s, 6, "foo", 12)) t.Run("wrong half-open requests", testErr(s, 6, "1m", "foo")) @@ -34,13 +34,14 @@ func TestArgs(t *testing.T) { t.Run("only failure count and timeout", testOK(s, 6, "1m")) t.Run("full", testOK(s, 6, "1m", 12)) t.Run("timeout as milliseconds", testOK(s, 6, 60000, 12)) + t.Run("with idle ttl", testOK(s, 6, 60000, 12, "30m")) }) t.Run("rate", func(t *testing.T) { s := NewRateBreaker() t.Run("missing both", testErr(s, nil)) t.Run("missing window", testErr(s, 30)) - t.Run("too many", testErr(s, 30, 300, "1m", 45, "foo")) + t.Run("too many", testErr(s, 30, 300, "1m", 45, "30m", 42)) t.Run("wrong failure count", testErr(s, "30", 300, "1m", 45)) t.Run("wrong window", testErr(s, 30, "300", "1m", 45)) t.Run("wrong timeout", testErr(s, 30, "300", "foo", 45)) @@ -49,6 +50,7 @@ func TestArgs(t *testing.T) { t.Run("only failures, window and timeout", testOK(s, 30, 300, "1m")) t.Run("full", testOK(s, 30, 300, "1m", 45)) t.Run("timeout as milliseconds", testOK(s, 30, 300, 60000, 45)) + t.Run("with idle ttl", testOK(s, 30, 300, 60000, 12, "30m")) }) t.Run("disable", func(t *testing.T) { From b3571a2d6f1ecdf7c7da9cb1680066c1e91c7905 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sun, 9 Jul 2017 19:30:01 +0200 Subject: [PATCH 06/12] test host and route specific settings --- circuit/breaker.go | 2 +- circuit/registry.go | 13 +- circuit/registry_test.go | 2 +- filters/builtin/builtin.go | 4 + filters/circuit/breaker.go | 2 +- filters/circuit/breaker_test.go | 2 +- proxy/breaker_test.go | 416 ++++++++++++++++++++++++++++---- proxy/proxy.go | 3 +- 8 files changed, 386 insertions(+), 58 deletions(-) diff --git a/circuit/breaker.go b/circuit/breaker.go index a247a85c85..e7911dbf1e 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -12,6 +12,7 @@ const ( BreakerNone BreakerType = iota ConsecutiveFailures FailureRate + BreakerDisabled ) type BreakerSettings struct { @@ -20,7 +21,6 @@ type BreakerSettings struct { Window, Failures int Timeout time.Duration HalfOpenRequests int - Disabled bool IdleTTL time.Duration } diff --git a/circuit/registry.go b/circuit/registry.go index 9659bb5668..15b0ab884c 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -24,15 +24,15 @@ type Registry struct { } func NewRegistry(o Options) *Registry { + if o.Defaults.IdleTTL <= 0 { + o.Defaults.IdleTTL = DefaultIdleTTL + } + hs := make(map[string]BreakerSettings) for _, s := range o.HostSettings { hs[s.Host] = s.mergeSettings(o.Defaults) } - if o.Defaults.IdleTTL <= 0 { - o.Defaults.IdleTTL = DefaultIdleTTL - } - return &Registry{ defaults: o.Defaults, hostSettings: hs, @@ -71,8 +71,9 @@ func (r *Registry) get(s BreakerSettings) *Breaker { b, ok := r.lookup[s] if !ok || b.idle(now) { // if doesn't exist or idle, cleanup and create a new one + r.access.remove(b, b) - // check if there is any to evict, evict if yes + // check if there is any other to evict, evict if yes r.dropIdle(now) // create a new one @@ -89,7 +90,7 @@ func (r *Registry) get(s BreakerSettings) *Breaker { func (r *Registry) Get(s BreakerSettings) *Breaker { // we check for host, because we don't want to use shared global breakers - if s.Disabled || s.Host == "" { + if s.Type == BreakerDisabled || s.Host == "" { return nil } diff --git a/circuit/registry_test.go b/circuit/registry_test.go index 1d099bad03..ac52b4f644 100644 --- a/circuit/registry_test.go +++ b/circuit/registry_test.go @@ -23,7 +23,7 @@ func TestRegistry(t *testing.T) { } createDisabledSettings := func() BreakerSettings { - return BreakerSettings{Disabled: true} + return BreakerSettings{Type: BreakerDisabled} } checkNil := func(t *testing.T, b *Breaker) { diff --git a/filters/builtin/builtin.go b/filters/builtin/builtin.go index 1e236c3dba..b35169b3e4 100644 --- a/filters/builtin/builtin.go +++ b/filters/builtin/builtin.go @@ -6,6 +6,7 @@ package builtin import ( "github.com/zalando/skipper/filters" "github.com/zalando/skipper/filters/auth" + "github.com/zalando/skipper/filters/circuit" "github.com/zalando/skipper/filters/cookie" "github.com/zalando/skipper/filters/diag" "github.com/zalando/skipper/filters/flowid" @@ -83,6 +84,9 @@ func MakeRegistry() filters.Registry { cookie.NewRequestCookie(), cookie.NewResponseCookie(), cookie.NewJSCookie(), + circuit.NewConsecutiveBreaker(), + circuit.NewRateBreaker(), + circuit.NewDisableBreaker(), } { r.Register(s) } diff --git a/filters/circuit/breaker.go b/filters/circuit/breaker.go index 0436a62c43..3e5feb6cbc 100644 --- a/filters/circuit/breaker.go +++ b/filters/circuit/breaker.go @@ -168,7 +168,7 @@ func disableFilter(args []interface{}) (filters.Filter, error) { return &filter{ settings: circuit.BreakerSettings{ - Disabled: true, + Type: circuit.BreakerDisabled, }, }, nil } diff --git a/filters/circuit/breaker_test.go b/filters/circuit/breaker_test.go index 1d68fcee21..ab4e344f0b 100644 --- a/filters/circuit/breaker_test.go +++ b/filters/circuit/breaker_test.go @@ -124,7 +124,7 @@ func TestBreaker(t *testing.T) { t.Run("disable breaker", test( NewDisableBreaker, circuit.BreakerSettings{ - Disabled: true, + Type: circuit.BreakerDisabled, }, )) } diff --git a/proxy/breaker_test.go b/proxy/breaker_test.go index 5e0c9b0292..f999d7eadf 100644 --- a/proxy/breaker_test.go +++ b/proxy/breaker_test.go @@ -3,20 +3,22 @@ package proxy_test import ( "fmt" "net/http" + "strings" "testing" "time" "github.com/zalando/skipper/circuit" "github.com/zalando/skipper/eskip" "github.com/zalando/skipper/filters/builtin" + circuitfilters "github.com/zalando/skipper/filters/circuit" "github.com/zalando/skipper/proxy" "github.com/zalando/skipper/proxy/proxytest" ) type breakerTestContext struct { - t *testing.T - proxy *proxytest.TestProxy - backend *failingBackend + t *testing.T + proxy *proxytest.TestProxy + backends map[string]*failingBackend } type scenarioStep func(*breakerTestContext) @@ -24,6 +26,7 @@ type scenarioStep func(*breakerTestContext) type breakerScenario struct { title string settings []circuit.BreakerSettings + filters map[string][]*eskip.Filter steps []scenarioStep } @@ -33,29 +36,54 @@ const ( testHalfOpenRequests = 3 testRateWindow = 10 testRateFailures = 4 + defaultHost = "default" ) -func newBreakerProxy(backendURL string, settings []circuit.BreakerSettings) *proxytest.TestProxy { - r, err := eskip.Parse(fmt.Sprintf(`* -> "%s"`, backendURL)) - if err != nil { - panic(err) - } +func urlHostNerr(u string) string { + return strings.Split(u, "//")[1] +} +func newBreakerProxy( + backends map[string]*failingBackend, + settings []circuit.BreakerSettings, + filters map[string][]*eskip.Filter, +) *proxytest.TestProxy { params := proxy.Params{ CloseIdleConnsPeriod: -1, } + // for testing, mapping the configured backend hosts to the random generated host + var r []*eskip.Route if len(settings) > 0 { var breakerOptions circuit.Options - for _, si := range settings { - if si.Host == "" { - breakerOptions.Defaults = si + for _, s := range settings { + b := backends[s.Host] + if b == nil { + r = append(r, &eskip.Route{ + Id: defaultHost, + HostRegexps: []string{fmt.Sprintf("^%s$", defaultHost)}, + Filters: filters[defaultHost], + Backend: backends[defaultHost].url, + }) + s.Host = urlHostNerr(backends[defaultHost].url) + breakerOptions.Defaults = s + } else { + r = append(r, &eskip.Route{ + Id: s.Host, + HostRegexps: []string{fmt.Sprintf("^%s$", s.Host)}, + Filters: filters[s.Host], + Backend: backends[s.Host].url, + }) + s.Host = urlHostNerr(backends[s.Host].url) + breakerOptions.HostSettings = append(breakerOptions.HostSettings, s) } - - breakerOptions.HostSettings = append(breakerOptions.HostSettings, si) } params.CircuitBreakers = circuit.NewRegistry(breakerOptions) + } else { + r = append(r, &eskip.Route{ + Backend: backends[defaultHost].url, + }) } fr := builtin.MakeRegistry() @@ -63,17 +91,30 @@ func newBreakerProxy(backendURL string, settings []circuit.BreakerSettings) *pro } func testBreaker(t *testing.T, s breakerScenario) { - b := newFailingBackend() - defer b.close() + backends := make(map[string]*failingBackend) + for _, si := range s.settings { + h := si.Host + if h == "" { + h = defaultHost + } + + backends[h] = newFailingBackend() + defer backends[h].close() + } + + if len(backends) == 0 { + backends[defaultHost] = newFailingBackend() + defer backends[defaultHost].close() + } - p := newBreakerProxy(b.url, s.settings) + p := newBreakerProxy(backends, s.settings, s.filters) defer p.Close() steps := s.steps c := &breakerTestContext{ - t: t, - proxy: p, - backend: b, + t: t, + proxy: p, + backends: backends, } for !t.Failed() && len(steps) > 0 { @@ -82,20 +123,45 @@ func testBreaker(t *testing.T, s breakerScenario) { } } +func setBackendHostSucceed(c *breakerTestContext, host string) { + c.backends[host].succeed() +} + func setBackendSucceed(c *breakerTestContext) { - c.backend.succeed() + setBackendHostSucceed(c, defaultHost) +} + +func setBackendFailForHost(c *breakerTestContext, host string) { + c.backends[host].fail() +} + +func setBackendHostFail(host string) scenarioStep { + return func(c *breakerTestContext) { + setBackendFailForHost(c, host) + } } func setBackendFail(c *breakerTestContext) { - c.backend.fail() + setBackendFailForHost(c, defaultHost) +} + +func setBackendHostDown(c *breakerTestContext, host string) { + c.backends[host].down() } func setBackendDown(c *breakerTestContext) { - c.backend.down() + setBackendHostDown(c, defaultHost) } -func proxyRequest(c *breakerTestContext) (*http.Response, error) { - rsp, err := http.Get(c.proxy.URL) +func proxyRequestHost(c *breakerTestContext, host string) (*http.Response, error) { + req, err := http.NewRequest("GET", c.proxy.URL, nil) + if err != nil { + return nil, err + } + + req.Host = host + + rsp, err := (&http.Client{}).Do(req) if err != nil { return nil, err } @@ -104,6 +170,10 @@ func proxyRequest(c *breakerTestContext) (*http.Response, error) { return rsp, nil } +func proxyRequest(c *breakerTestContext) (*http.Response, error) { + return proxyRequestHost(c, defaultHost) +} + func checkStatus(c *breakerTestContext, rsp *http.Response, expected int) { if rsp.StatusCode != expected { c.t.Errorf( @@ -114,46 +184,78 @@ func checkStatus(c *breakerTestContext, rsp *http.Response, expected int) { } } -func request(expectedStatus int) scenarioStep { +func requestHostForStatus(c *breakerTestContext, host string, expectedStatus int) *http.Response { + rsp, err := proxyRequestHost(c, host) + if err != nil { + c.t.Error(err) + return nil + } + + checkStatus(c, rsp, expectedStatus) + return rsp +} + +func requestHost(host string, expectedStatus int) scenarioStep { return func(c *breakerTestContext) { - rsp, err := proxyRequest(c) - if err != nil { - c.t.Error(err) - return - } + requestHostForStatus(c, host, expectedStatus) + } +} - checkStatus(c, rsp, expectedStatus) +func request(expectedStatus int) scenarioStep { + return func(c *breakerTestContext) { + requestHostForStatus(c, defaultHost, expectedStatus) } } -func requestOpen(c *breakerTestContext) { - rsp, err := proxyRequest(c) - if err != nil { - c.t.Error(err) +func requestOpenForHost(c *breakerTestContext, host string) { + rsp := requestHostForStatus(c, host, 503) + if c.t.Failed() { return } - checkStatus(c, rsp, 503) if rsp.Header.Get("X-Circuit-Open") != "true" { c.t.Error("failed to set circuit open header") } } -func checkBackendCounter(count int) scenarioStep { +func requestHostOpen(host string) scenarioStep { return func(c *breakerTestContext) { - if c.backend.counter() != count { - c.t.Error("invalid number of requests on the backend") - } + requestOpenForHost(c, host) + } +} - c.backend.resetCounter() +func requestOpen(c *breakerTestContext) { + requestOpenForHost(c, defaultHost) +} + +func checkBackendForCounter(c *breakerTestContext, host string, count int) { + if c.backends[host].counter() != count { + c.t.Error("invalid number of requests on the backend") + } + + c.backends[host].resetCounter() +} + +func checkBackendHostCounter(host string, count int) scenarioStep { + return func(c *breakerTestContext) { + checkBackendForCounter(c, host, count) + } +} + +func checkBackendCounter(count int) scenarioStep { + return func(c *breakerTestContext) { + checkBackendForCounter(c, defaultHost, count) } } // as in scenario step N times -func times(n int, s scenarioStep) scenarioStep { +func times(n int, s ...scenarioStep) scenarioStep { return func(c *breakerTestContext) { for !c.t.Failed() && n > 0 { - s(c) + for _, si := range s { + si(c) + } + n-- } } @@ -196,7 +298,7 @@ func TestBreakerConsecutive(t *testing.T) { times(testConsecutiveFailureCount, request(500)), checkBackendCounter(testConsecutiveFailureCount), requestOpen, - checkBackendCounter(0), + // checkBackendCounter(0), }, }, { title: "open, fail to close", @@ -291,6 +393,107 @@ func TestBreakerConsecutive(t *testing.T) { func TestBreakerRate(t *testing.T) { for _, s := range []breakerScenario{{ title: "open", + settings: []circuit.BreakerSettings{{ + Type: circuit.FailureRate, + Failures: testRateFailures, + Window: testRateWindow, + }}, + steps: []scenarioStep{ + times(testRateWindow, request(200)), + checkBackendCounter(testRateWindow), + setBackendFail, + times(testRateFailures, request(500)), + checkBackendCounter(testRateFailures), + requestOpen, + checkBackendCounter(0), + }, + }, { + title: "open, fail to close", + settings: []circuit.BreakerSettings{{ + Type: circuit.FailureRate, + Failures: testRateFailures, + Window: testRateWindow, + Timeout: testBreakerTimeout, + HalfOpenRequests: testHalfOpenRequests, + }}, + steps: []scenarioStep{ + times(testRateWindow, request(200)), + checkBackendCounter(testRateWindow), + setBackendFail, + times(testRateFailures, request(500)), + checkBackendCounter(testRateFailures), + requestOpen, + checkBackendCounter(0), + wait(2 * testBreakerTimeout), + request(500), + checkBackendCounter(1), + requestOpen, + checkBackendCounter(0), + }, + }, { + title: "open, fixed during timeout", + settings: []circuit.BreakerSettings{{ + Type: circuit.FailureRate, + Failures: testRateFailures, + Window: testRateWindow, + Timeout: testBreakerTimeout, + HalfOpenRequests: testHalfOpenRequests, + }}, + steps: []scenarioStep{ + times(testRateWindow, request(200)), + checkBackendCounter(testRateWindow), + setBackendFail, + times(testRateFailures, request(500)), + checkBackendCounter(testRateFailures), + requestOpen, + checkBackendCounter(0), + wait(2 * testBreakerTimeout), + setBackendSucceed, + times(testHalfOpenRequests+1, request(200)), + checkBackendCounter(testHalfOpenRequests + 1), + }, + }, { + title: "open, fail again during half open", + settings: []circuit.BreakerSettings{{ + Type: circuit.FailureRate, + Failures: testRateFailures, + Window: testRateWindow, + Timeout: testBreakerTimeout, + HalfOpenRequests: testHalfOpenRequests, + }}, + steps: []scenarioStep{ + times(testRateWindow, request(200)), + checkBackendCounter(testRateWindow), + setBackendFail, + times(testRateFailures, request(500)), + checkBackendCounter(testRateFailures), + requestOpen, + checkBackendCounter(0), + wait(2 * testBreakerTimeout), + setBackendSucceed, + times(1, request(200)), + checkBackendCounter(1), + setBackendFail, + times(1, request(500)), + checkBackendCounter(1), + requestOpen, + checkBackendCounter(0), + }, + }, { + title: "open due to backend being down", + settings: []circuit.BreakerSettings{{ + Type: circuit.FailureRate, + Failures: testRateFailures, + Window: testRateWindow, + }}, + steps: []scenarioStep{ + times(testRateWindow, request(200)), + checkBackendCounter(testRateWindow), + setBackendDown, + times(testRateFailures, request(503)), + checkBackendCounter(0), + requestOpen, + }, }} { t.Run(s.title, func(t *testing.T) { testBreaker(t, s) @@ -298,6 +501,125 @@ func TestBreakerRate(t *testing.T) { } } -// rate breaker -// different settings per host -// route settings +func TestBreakerMultipleHosts(t *testing.T) { + testBreaker(t, breakerScenario{ + settings: []circuit.BreakerSettings{{ + Type: circuit.FailureRate, + Failures: testRateFailures + 2, + Window: testRateWindow, + }, { + Host: "foo", + Type: circuit.BreakerDisabled, + }, { + Host: "bar", + Type: circuit.FailureRate, + Failures: testRateFailures, + Window: testRateWindow, + }}, + steps: []scenarioStep{ + times( + testRateWindow, + // request(200), + requestHost("foo", 200), + // requestHost("bar", 200), + ), + // checkBackendCounter(testRateWindow), + checkBackendHostCounter("foo", testRateWindow), + // checkBackendHostCounter("bar", testRateWindow), + // setBackendFail, + trace("setting fail"), + setBackendHostFail("foo"), + // setBackendHostFail("bar"), + times(testRateFailures, + // request(500), + requestHost("foo", 500), + // requestHost("bar", 500), + ), + // checkBackendCounter(testRateFailures), + checkBackendHostCounter("foo", testRateFailures), + // checkBackendHostCounter("bar", testRateFailures), + // request(500), + requestHost("foo", 500), + // requestHostOpen("bar"), + // checkBackendCounter(1), + checkBackendHostCounter("foo", 1), + // checkBackendHostCounter("bar", 0), + // request(500), + requestHost("foo", 500), + // checkBackendCounter(1), + checkBackendHostCounter("foo", 1), + // requestOpen, + requestHost("foo", 500), + // checkBackendCounter(0), + checkBackendHostCounter("foo", 1), + }, + }) +} + +func TestBreakerMultipleHostsAndRouteBasedSettings(t *testing.T) { + testBreaker(t, breakerScenario{ + settings: []circuit.BreakerSettings{{ + Type: circuit.FailureRate, + Failures: testRateFailures + 2, + Window: testRateWindow, + }, { + Host: "foo", + Type: circuit.FailureRate, + Failures: testRateFailures + 1, + Window: testRateWindow, + }, { + Host: "bar", + Type: circuit.FailureRate, + Failures: testRateFailures + 1, + Window: testRateWindow, + }}, + filters: map[string][]*eskip.Filter{ + "foo": {{ + Name: circuitfilters.DisableBreakerName, + }}, + "bar": {{ + Name: circuitfilters.RateBreakerName, + Args: []interface{}{ + testRateFailures, + testRateWindow, + }, + }}, + }, + steps: []scenarioStep{ + times( + testRateWindow, + request(200), + requestHost("foo", 200), + requestHost("bar", 200), + ), + checkBackendCounter(testRateWindow), + checkBackendHostCounter("foo", testRateWindow), + checkBackendHostCounter("bar", testRateWindow), + setBackendFail, + setBackendHostFail("foo"), + setBackendHostFail("bar"), + times(testRateFailures, + request(500), + requestHost("foo", 500), + requestHost("bar", 500), + ), + checkBackendCounter(testRateFailures), + checkBackendHostCounter("foo", testRateFailures), + checkBackendHostCounter("bar", testRateFailures), + request(500), + requestHost("foo", 500), + requestHostOpen("bar"), + checkBackendCounter(1), + checkBackendHostCounter("foo", 1), + checkBackendHostCounter("bar", 0), + request(500), + requestHost("foo", 500), + checkBackendCounter(1), + checkBackendHostCounter("foo", 1), + requestOpen, + requestHost("foo", 500), + checkBackendCounter(0), + checkBackendHostCounter("foo", 1), + }, + }) +} diff --git a/proxy/proxy.go b/proxy/proxy.go index 6bf1cf49b6..ad536153d0 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -494,7 +494,8 @@ func (p *Proxy) checkBreaker(c *context) (func(bool), bool) { return nil, true } - return b.Allow() + done, ok := b.Allow() + return done, ok } func (p *Proxy) do(ctx *context) error { From 60ce060aca74995a25e51e29c771176e09af9875 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sun, 9 Jul 2017 21:15:19 +0200 Subject: [PATCH 07/12] breaker configuration from command line --- circuit/breaker.go | 47 +++++++++++++++- circuit/breaker_test.go | 20 +++++++ circuit/registry.go | 35 ++++++++---- circuit/registry_test.go | 108 ++++++++++++++++-------------------- cmd/skipper/breakerflags.go | 104 ++++++++++++++++++++++++++++++++++ cmd/skipper/main.go | 3 + proxy/breaker_test.go | 54 +++++++++--------- skipper.go | 7 +++ 8 files changed, 276 insertions(+), 102 deletions(-) create mode 100644 cmd/skipper/breakerflags.go diff --git a/circuit/breaker.go b/circuit/breaker.go index e7911dbf1e..c8685b196e 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -1,6 +1,10 @@ package circuit -import "time" +import ( + "strconv" + "strings" + "time" +) // TODO: // - in case of the rate breaker, there are unnecessary synchronization steps due to the 3rd party gobreaker @@ -67,6 +71,47 @@ func (to BreakerSettings) mergeSettings(from BreakerSettings) BreakerSettings { return to } +func (s BreakerSettings) String() string { + var ss []string + + switch s.Type { + case ConsecutiveFailures: + ss = append(ss, "type=consecutive") + case FailureRate: + ss = append(ss, "type=rate") + case BreakerDisabled: + return "disabled" + default: + return "none" + } + + if s.Host != "" { + ss = append(ss, "host="+s.Host) + } + + if s.Type == FailureRate && s.Window > 0 { + ss = append(ss, "window="+strconv.Itoa(s.Window)) + } + + if s.Failures > 0 { + ss = append(ss, "failures="+strconv.Itoa(s.Failures)) + } + + if s.Timeout > 0 { + ss = append(ss, "timeout="+s.Timeout.String()) + } + + if s.HalfOpenRequests > 0 { + ss = append(ss, "half-open-requests="+strconv.Itoa(s.HalfOpenRequests)) + } + + if s.IdleTTL > 0 { + ss = append(ss, "idle-ttl="+s.IdleTTL.String()) + } + + return strings.Join(ss, ",") +} + func (b voidBreaker) Allow() (func(bool), bool) { return func(bool) {}, true } diff --git a/circuit/breaker_test.go b/circuit/breaker_test.go index 19d03ae3a7..f8bc2693d0 100644 --- a/circuit/breaker_test.go +++ b/circuit/breaker_test.go @@ -183,3 +183,23 @@ func TestRateBreakerFuzzy(t *testing.T) { <-stop } + +func TestSettingsString(t *testing.T) { + s := BreakerSettings{ + Type: FailureRate, + Host: "www.example.org", + Failures: 30, + Window: 300, + Timeout: time.Minute, + HalfOpenRequests: 15, + IdleTTL: time.Hour, + } + + ss := s.String() + expect := "type=rate,host=www.example.org,window=300,failures=30,timeout=1m0s,half-open-requests=15,idle-ttl=1h0m0s" + if ss != expect { + t.Error("invalid breaker settings string") + t.Logf("got : %s", ss) + t.Logf("expected: %s", expect) + } +} diff --git a/circuit/registry.go b/circuit/registry.go index 15b0ab884c..f681804a47 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -10,11 +10,6 @@ const ( RouteSettingsKey = "#circuitbreakersettings" ) -type Options struct { - Defaults BreakerSettings - HostSettings []BreakerSettings -} - type Registry struct { defaults BreakerSettings hostSettings map[string]BreakerSettings @@ -23,18 +18,32 @@ type Registry struct { mx *sync.Mutex } -func NewRegistry(o Options) *Registry { - if o.Defaults.IdleTTL <= 0 { - o.Defaults.IdleTTL = DefaultIdleTTL +func NewRegistry(settings ...BreakerSettings) *Registry { + var ( + defaults BreakerSettings + hostSettings []BreakerSettings + ) + + for _, s := range settings { + if s.Host == "" { + defaults = s + continue + } + + hostSettings = append(hostSettings, s) + } + + if defaults.IdleTTL <= 0 { + defaults.IdleTTL = DefaultIdleTTL } hs := make(map[string]BreakerSettings) - for _, s := range o.HostSettings { - hs[s.Host] = s.mergeSettings(o.Defaults) + for _, s := range settings { + hs[s.Host] = s.mergeSettings(defaults) } return &Registry{ - defaults: o.Defaults, + defaults: defaults, hostSettings: hs, lookup: make(map[BreakerSettings]*Breaker), access: &list{}, @@ -71,7 +80,9 @@ func (r *Registry) get(s BreakerSettings) *Breaker { b, ok := r.lookup[s] if !ok || b.idle(now) { // if doesn't exist or idle, cleanup and create a new one - r.access.remove(b, b) + if b != nil { + r.access.remove(b, b) + } // check if there is any other to evict, evict if yes r.dropIdle(now) diff --git a/circuit/registry_test.go b/circuit/registry_test.go index ac52b4f644..cab0468615 100644 --- a/circuit/registry_test.go +++ b/circuit/registry_test.go @@ -59,7 +59,7 @@ func TestRegistry(t *testing.T) { } t.Run("no settings", func(t *testing.T) { - r := NewRegistry(Options{}) + r := NewRegistry() b := r.Get(BreakerSettings{Host: "foo"}) checkNil(t, b) @@ -67,7 +67,7 @@ func TestRegistry(t *testing.T) { t.Run("only default settings", func(t *testing.T) { d := createSettings(5) - r := NewRegistry(Options{Defaults: d}) + r := NewRegistry(d) b := r.Get(BreakerSettings{Host: "foo"}) checkWithoutHost(t, b, r.defaults) @@ -76,7 +76,7 @@ func TestRegistry(t *testing.T) { t.Run("only host settings", func(t *testing.T) { h0 := createHostSettings("foo", 5) h1 := createHostSettings("bar", 5) - r := NewRegistry(Options{HostSettings: []BreakerSettings{h0, h1}}) + r := NewRegistry(h0, h1) b := r.Get(BreakerSettings{Host: "foo"}) checkWithHost(t, b, h0) @@ -92,10 +92,7 @@ func TestRegistry(t *testing.T) { d := createSettings(5) h0 := createHostSettings("foo", 5) h1 := createHostSettings("bar", 5) - r := NewRegistry(Options{ - Defaults: d, - HostSettings: []BreakerSettings{h0, h1}, - }) + r := NewRegistry(d, h0, h1) b := r.Get(BreakerSettings{Host: "foo"}) checkWithHost(t, b, h0) @@ -108,7 +105,7 @@ func TestRegistry(t *testing.T) { }) t.Run("only custom settings", func(t *testing.T) { - r := NewRegistry(Options{}) + r := NewRegistry() cs := createHostSettings("foo", 15) b := r.Get(cs) @@ -117,7 +114,7 @@ func TestRegistry(t *testing.T) { t.Run("only default settings, with custom", func(t *testing.T) { d := createSettings(5) - r := NewRegistry(Options{Defaults: d}) + r := NewRegistry(d) cs := createHostSettings("foo", 15) b := r.Get(cs) @@ -127,7 +124,7 @@ func TestRegistry(t *testing.T) { t.Run("only host settings, with custom", func(t *testing.T) { h0 := createHostSettings("foo", 5) h1 := createHostSettings("bar", 5) - r := NewRegistry(Options{HostSettings: []BreakerSettings{h0, h1}}) + r := NewRegistry(h0, h1) cs := createHostSettings("foo", 15) b := r.Get(cs) @@ -146,10 +143,7 @@ func TestRegistry(t *testing.T) { d := createSettings(5) h0 := createHostSettings("foo", 5) h1 := createHostSettings("bar", 5) - r := NewRegistry(Options{ - Defaults: d, - HostSettings: []BreakerSettings{h0, h1}, - }) + r := NewRegistry(d, h0, h1) cs := createHostSettings("foo", 15) b := r.Get(cs) @@ -165,7 +159,7 @@ func TestRegistry(t *testing.T) { }) t.Run("no settings and disabled", func(t *testing.T) { - r := NewRegistry(Options{}) + r := NewRegistry() b := r.Get(createDisabledSettings()) checkNil(t, b) @@ -173,7 +167,7 @@ func TestRegistry(t *testing.T) { t.Run("only default settings, disabled", func(t *testing.T) { d := createSettings(5) - r := NewRegistry(Options{Defaults: d}) + r := NewRegistry(d) b := r.Get(createDisabledSettings()) checkNil(t, b) @@ -182,7 +176,7 @@ func TestRegistry(t *testing.T) { t.Run("only host settings, disabled", func(t *testing.T) { h0 := createHostSettings("foo", 5) h1 := createHostSettings("bar", 5) - r := NewRegistry(Options{HostSettings: []BreakerSettings{h0, h1}}) + r := NewRegistry(h0, h1) b := r.Get(createDisabledSettings()) checkNil(t, b) @@ -192,10 +186,7 @@ func TestRegistry(t *testing.T) { d := createSettings(5) h0 := createHostSettings("foo", 5) h1 := createHostSettings("bar", 5) - r := NewRegistry(Options{ - Defaults: d, - HostSettings: []BreakerSettings{h0, h1}, - }) + r := NewRegistry(d, h0, h1) b := r.Get(createDisabledSettings()) checkNil(t, b) @@ -207,30 +198,27 @@ func TestRegistryEvictIdle(t *testing.T) { t.Skip() } - options := Options{ - Defaults: BreakerSettings{ - IdleTTL: 15 * time.Millisecond, - }, - HostSettings: []BreakerSettings{{ - Host: "foo", - Type: ConsecutiveFailures, - Failures: 4, - }, { - Host: "bar", - Type: ConsecutiveFailures, - Failures: 5, - }, { - Host: "baz", - Type: ConsecutiveFailures, - Failures: 6, - }, { - Host: "qux", - Type: ConsecutiveFailures, - Failures: 7, - }}, - } - toEvict := options.HostSettings[2] - r := NewRegistry(options) + settings := []BreakerSettings{{ + IdleTTL: 15 * time.Millisecond, + }, { + Host: "foo", + Type: ConsecutiveFailures, + Failures: 4, + }, { + Host: "bar", + Type: ConsecutiveFailures, + Failures: 5, + }, { + Host: "baz", + Type: ConsecutiveFailures, + Failures: 6, + }, { + Host: "qux", + Type: ConsecutiveFailures, + Failures: 7, + }} + toEvict := settings[3] + r := NewRegistry(settings...) get := func(host string) { b := r.Get(BreakerSettings{Host: host}) @@ -243,12 +231,12 @@ func TestRegistryEvictIdle(t *testing.T) { get("bar") get("baz") - time.Sleep(2 * options.Defaults.IdleTTL / 3) + time.Sleep(2 * settings[0].IdleTTL / 3) get("foo") get("bar") - time.Sleep(2 * options.Defaults.IdleTTL / 3) + time.Sleep(2 * settings[0].IdleTTL / 3) get("qux") @@ -287,17 +275,17 @@ func TestIndividualIdle(t *testing.T) { hostIdleTimeout = 6 * time.Millisecond ) - r := NewRegistry(Options{ - Defaults: BreakerSettings{ + r := NewRegistry( + BreakerSettings{ Type: ConsecutiveFailures, Failures: consecutiveFailures, IdleTTL: idleTimeout, }, - HostSettings: []BreakerSettings{{ + BreakerSettings{ Host: "foo", IdleTTL: hostIdleTimeout, - }}, - }) + }, + ) shouldBeClosed := func(t *testing.T, host string) func(bool) { b := r.Get(BreakerSettings{Host: host}) @@ -377,9 +365,9 @@ func TestRegistryFuzzy(t *testing.T) { hosts[i] = genHost() } - options := Options{Defaults: BreakerSettings{IdleTTL: idleTTL}} + settings := []BreakerSettings{{IdleTTL: idleTTL}} - settings := make(map[string]BreakerSettings) + settingsMap := make(map[string]BreakerSettings) for _, h := range hosts { s := BreakerSettings{ Host: h, @@ -387,16 +375,16 @@ func TestRegistryFuzzy(t *testing.T) { Failures: 5, IdleTTL: idleTTL, } - options.HostSettings = append(options.HostSettings, s) - settings[h] = s + settings = append(settings, s) + settingsMap[h] = s } - r := NewRegistry(options) + r := NewRegistry(settings...) // the first customSettingsCount hosts can have corresponding custom settings customSettings := make(map[string]BreakerSettings) for _, h := range hosts[:customSettingsCount] { - s := settings[h] + s := settingsMap[h] s.Failures = 15 s.IdleTTL = idleTTL customSettings[h] = s @@ -425,7 +413,7 @@ func TestRegistryFuzzy(t *testing.T) { old := hosts[i] nu := genHost() hosts[i] = nu - replaceHostSettings(settings, old, nu) + replaceHostSettings(settingsMap, old, nu) replaceHostSettings(customSettings, old, nu) }) } @@ -440,7 +428,7 @@ func TestRegistryFuzzy(t *testing.T) { return } - s = settings[hosts[rand.Intn(hostCount)]] + s = settingsMap[hosts[rand.Intn(hostCount)]] }) return s diff --git a/cmd/skipper/breakerflags.go b/cmd/skipper/breakerflags.go new file mode 100644 index 0000000000..a01e6a09ff --- /dev/null +++ b/cmd/skipper/breakerflags.go @@ -0,0 +1,104 @@ +package main + +import ( + "errors" + "strconv" + "strings" + + "github.com/zalando/skipper/circuit" +) + +const breakerUsage = `set global or host specific circuit breakers, e.g. -breaker type=rate,host=www.example.org,window=300,failures=30 + possible breaker properties: + type: consecutive/rate/disabled (defaults to consecutive) + host: a host name that overrides the global for a host + failures: the number of failures for consecutive or rate breakers + window: the size of the sliding window for the rate breaker + timeout: duration string or milliseconds while the breaker stays open + half-open-requests: the number of requests in half-open state to succeed before getting closed again + idle-ttl: duration string or milliseconds after the breaker is considered idle and reset` + +type breakerFlags []circuit.BreakerSettings + +var errInvalidBreakerConfig = errors.New("invalid breaker config") + +func (b *breakerFlags) String() string { + s := make([]string, len(*b)) + for i, bi := range *b { + s[i] = bi.String() + } + + return strings.Join(s, "\n") +} + +func (b *breakerFlags) Set(value string) error { + var s circuit.BreakerSettings + + vs := strings.Split(value, ",") + for _, vi := range vs { + kv := strings.Split(vi, "=") + if len(kv) != 2 { + return errInvalidBreakerConfig + } + + switch kv[0] { + case "type": + switch kv[1] { + case "consecutive": + s.Type = circuit.ConsecutiveFailures + case "rate": + s.Type = circuit.FailureRate + case "disabled": + s.Type = circuit.BreakerDisabled + default: + return errInvalidBreakerConfig + } + case "host": + s.Host = kv[1] + case "window": + i, err := strconv.Atoi(kv[1]) + if err != nil { + return err + } + + s.Window = i + case "failures": + i, err := strconv.Atoi(kv[1]) + if err != nil { + return err + } + + s.Failures = i + case "timeout": + d, err := parseDurationFlag(kv[1]) + if err != nil { + return err + } + + s.Timeout = d + case "half-open-requests": + i, err := strconv.Atoi(kv[1]) + if err != nil { + return err + } + + s.HalfOpenRequests = i + case "idle-ttl": + d, err := parseDurationFlag(kv[1]) + if err != nil { + return err + } + + s.IdleTTL = d + default: + return errInvalidBreakerConfig + } + } + + if s.Type == circuit.BreakerNone { + s.Type = circuit.ConsecutiveFailures + } + + *b = append(*b, s) + return nil +} diff --git a/cmd/skipper/main.go b/cmd/skipper/main.go index 3a71494d47..617d51c124 100644 --- a/cmd/skipper/main.go +++ b/cmd/skipper/main.go @@ -135,6 +135,7 @@ var ( experimentalUpgrade bool printVersion bool maxLoopbacks int + breakers breakerFlags ) func init() { @@ -182,6 +183,7 @@ func init() { flag.BoolVar(&experimentalUpgrade, "experimental-upgrade", defaultExperimentalUpgrade, experimentalUpgradeUsage) flag.BoolVar(&printVersion, "version", false, versionUsage) flag.IntVar(&maxLoopbacks, "max-loopbacks", proxy.DefaultMaxLoopbacks, maxLoopbacksUsage) + flag.Var(&breakers, "breaker", breakerUsage) flag.Parse() } @@ -268,6 +270,7 @@ func main() { BackendFlushInterval: backendFlushInterval, ExperimentalUpgrade: experimentalUpgrade, MaxLoopbacks: maxLoopbacks, + BreakerSettings: breakers, } if insecure { diff --git a/proxy/breaker_test.go b/proxy/breaker_test.go index f999d7eadf..85136a966b 100644 --- a/proxy/breaker_test.go +++ b/proxy/breaker_test.go @@ -55,9 +55,8 @@ func newBreakerProxy( // for testing, mapping the configured backend hosts to the random generated host var r []*eskip.Route if len(settings) > 0 { - var breakerOptions circuit.Options - for _, s := range settings { - b := backends[s.Host] + for i := range settings { + b := backends[settings[i].Host] if b == nil { r = append(r, &eskip.Route{ Id: defaultHost, @@ -65,21 +64,18 @@ func newBreakerProxy( Filters: filters[defaultHost], Backend: backends[defaultHost].url, }) - s.Host = urlHostNerr(backends[defaultHost].url) - breakerOptions.Defaults = s } else { r = append(r, &eskip.Route{ - Id: s.Host, - HostRegexps: []string{fmt.Sprintf("^%s$", s.Host)}, - Filters: filters[s.Host], - Backend: backends[s.Host].url, + Id: settings[i].Host, + HostRegexps: []string{fmt.Sprintf("^%s$", settings[i].Host)}, + Filters: filters[settings[i].Host], + Backend: backends[settings[i].Host].url, }) - s.Host = urlHostNerr(backends[s.Host].url) - breakerOptions.HostSettings = append(breakerOptions.HostSettings, s) + settings[i].Host = urlHostNerr(backends[settings[i].Host].url) } } - params.CircuitBreakers = circuit.NewRegistry(breakerOptions) + params.CircuitBreakers = circuit.NewRegistry(settings...) } else { r = append(r, &eskip.Route{ Backend: backends[defaultHost].url, @@ -519,36 +515,36 @@ func TestBreakerMultipleHosts(t *testing.T) { steps: []scenarioStep{ times( testRateWindow, - // request(200), + request(200), requestHost("foo", 200), - // requestHost("bar", 200), + requestHost("bar", 200), ), - // checkBackendCounter(testRateWindow), + checkBackendCounter(testRateWindow), checkBackendHostCounter("foo", testRateWindow), - // checkBackendHostCounter("bar", testRateWindow), - // setBackendFail, + checkBackendHostCounter("bar", testRateWindow), + setBackendFail, trace("setting fail"), setBackendHostFail("foo"), - // setBackendHostFail("bar"), + setBackendHostFail("bar"), times(testRateFailures, - // request(500), + request(500), requestHost("foo", 500), - // requestHost("bar", 500), + requestHost("bar", 500), ), - // checkBackendCounter(testRateFailures), + checkBackendCounter(testRateFailures), checkBackendHostCounter("foo", testRateFailures), - // checkBackendHostCounter("bar", testRateFailures), - // request(500), + checkBackendHostCounter("bar", testRateFailures), + request(500), requestHost("foo", 500), - // requestHostOpen("bar"), - // checkBackendCounter(1), + requestHostOpen("bar"), + checkBackendCounter(1), checkBackendHostCounter("foo", 1), - // checkBackendHostCounter("bar", 0), - // request(500), + checkBackendHostCounter("bar", 0), + request(500), requestHost("foo", 500), - // checkBackendCounter(1), + checkBackendCounter(1), checkBackendHostCounter("foo", 1), - // requestOpen, + requestOpen, requestHost("foo", 500), // checkBackendCounter(0), checkBackendHostCounter("foo", 1), diff --git a/skipper.go b/skipper.go index 86aa868af2..df16f155a7 100644 --- a/skipper.go +++ b/skipper.go @@ -8,6 +8,7 @@ import ( "time" log "github.com/sirupsen/logrus" + "github.com/zalando/skipper/circuit" "github.com/zalando/skipper/dataclients/kubernetes" "github.com/zalando/skipper/eskipfile" "github.com/zalando/skipper/etcd" @@ -227,6 +228,8 @@ type Options struct { ExperimentalUpgrade bool MaxLoopbacks int + + BreakerSettings []circuit.BreakerSettings } func createDataClients(o Options, auth innkeeper.Authentication) ([]routing.DataClient, error) { @@ -448,6 +451,10 @@ func Run(o Options) error { MaxLoopbacks: o.MaxLoopbacks, } + if len(o.BreakerSettings) > 0 { + proxyParams.CircuitBreakers = circuit.NewRegistry(o.BreakerSettings...) + } + if o.DebugListener != "" { do := proxyParams do.Flags |= proxy.Debug From 427bd2e79f0d9e90c76cfcec100fb2ec30500dc6 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 10 Jul 2017 00:42:56 +0200 Subject: [PATCH 08/12] add documentation --- circuit/breaker.go | 12 ++- circuit/doc.go | 149 ++++++++++++++++++++++++++++++++ circuit/registry.go | 32 ++++--- cmd/skipper/breakerflags.go | 3 +- doc.go | 12 ++- eskip/doc.go | 92 +++++++++++--------- filters/circuit/breaker.go | 27 +++++- filters/circuit/breaker_test.go | 2 +- proxy/doc.go | 14 ++- proxy/proxy.go | 6 +- skipper.go | 3 + 11 files changed, 292 insertions(+), 60 deletions(-) create mode 100644 circuit/doc.go diff --git a/circuit/breaker.go b/circuit/breaker.go index c8685b196e..f91f5b2da7 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -10,6 +10,7 @@ import ( // - in case of the rate breaker, there are unnecessary synchronization steps due to the 3rd party gobreaker // - introduce a TTL in the rate breaker for the stale samplers +// BreakerType defines the type of the used breaker: consecutive, rate or disabled. type BreakerType int const ( @@ -19,6 +20,10 @@ const ( BreakerDisabled ) +// BreakerSettings contains the settings for individual circuit breakers. +// +// See the package docs for the detailed merging/overriding rules of the settings and for the meaning of the +// individual fields. type BreakerSettings struct { Type BreakerType Host string @@ -34,7 +39,9 @@ type breakerImplementation interface { type voidBreaker struct{} -// represents a single circuit breaker +// Breaker represents a single circuit breaker for a particular set of settings. +// +// Use the Get() method of the Registry to request fully initialized breakers. type Breaker struct { settings BreakerSettings ts time.Time @@ -71,6 +78,7 @@ func (to BreakerSettings) mergeSettings(from BreakerSettings) BreakerSettings { return to } +// String returns the string representation of a particular set of circuit breaker settings. func (s BreakerSettings) String() string { var ss []string @@ -133,6 +141,8 @@ func newBreaker(s BreakerSettings) *Breaker { } } +// Allow returns true if the breaker is in the closed state and a callback function to report the outcome of the +// operation, where true means success. It may not return a callback function when the state is not closed. func (b *Breaker) Allow() (func(bool), bool) { return b.impl.Allow() } diff --git a/circuit/doc.go b/circuit/doc.go new file mode 100644 index 0000000000..2fa91fbf32 --- /dev/null +++ b/circuit/doc.go @@ -0,0 +1,149 @@ +/* +Package circuit implements circuit breaker functionality for the proxy. + +It provides two types of circuit breakers: consecutive and failure rate based. The circuit breakers can be +configured globally, or based on hosts and individual routes. The registry object ensures synchronized access to +the active breakers and releases the idle ones. + +The circuit breakers are always assigned to backend hosts, so that the outcome of requests to one host never +affects the circuit breaker behavior of another host. Besides hosts, individual routes can have separate circuit +breakers, too. + +Breaker Type - Consecutive Failures + +This breaker opens when the proxy couldn't connect to a backend or received a >=500 status code at least N times +in a row, where N is the configuration of the breaker. When open, the proxy returns 503 - Service Unavailable +response during the configured timeout. After this timeout, the breaker goes into half-open state, where it +expects that M number of requests succeed. The requests in the half-open state are accepted concurrently. If any +of the requests during the half-open state fails, the breaker goes back to open state. If all succeed, it goes +to closed state again. + +Breaker Type - Failure Rate + +The "rate breaker" works similar to the "consecutive breaker", but instead of considering N consecutive failures +for going open, it opens when the failure reaches a rate of N out of M, where M is a sliding window, N consecutiveBreaker(9) + -> "https://foo.backend.net"; + + backendHealthcheck: Path("/healthcheck") + -> disableBreaker() + -> "https://foo.backend.net"; + +The breaker settings can be defined in the following levels: global, based on the backend host, based on +individual route settings. The values are merged in the same order so, that the global settings serve as +defaults for the host settings, and the result of the global and host settings serve as defaults for the route +settings. Setting global values happens the same way as setting host values, but leaving the Host field empty. +Setting route based values happens with filters in the route definitions. +(https://godoc.org/github.com/zalando/skipper/filters/circuit) + +Settings - Type + +It can be ConsecutiveFailures, FailureRate or Disabled, where the first two values select which breaker to use, +while the Disabled value can override a global or host configuration disabling the circuit breaker for the +specific host or route. + +Command line name: type. Possible command line values: consecutive, rate, disabled. + +Settings - Host + +The Host field indicates to which backend host should the current set of settings be applied. Leaving it empty +indicates global settings. + +Command line name: host. + +Settings - Window + +The window value sets the size of the sliding counter window of the failure rate breaker. + +Command line name: window. Possible command line values: any positive integer. + +Settings - Failures + +The failures value sets the max failure count for both the "consecutive" and "rate" breakers. + +Command line name: failures. Possible command line values: any positive integer. + +Settings - Timeout + +With the timeout we can set how long the breaker should stay open, before becoming half-open. + +Command line name: timeout. Possible command line values: any positive integer as milliseconds or duration +string, e.g. 15m30s. + +Settings - Half-Open Requests + +Defines the number of requests expected to succeed while in the circuit breaker is in the half-open state. + +Command line name: half-open-requests. Possible command line values: any positive integer. + +Settings - Idle TTL + +Defines the idle timeout after which a circuit breaker gets recycled, if it wasn't used. + +Command line name: idle-ttl. Possible command line values: any positive integer as milliseconds or duration +string, e.g. 15m30s. + +Filters + +The following circuit breaker filters are supported: consecutiveBreaker(), rateBreaker() and disableBreaker(). + +The consecutiveBreaker filter expects one mandatory parameter: the number of consecutive failures to open. It +accepts the following optional arguments: timeout, half-open requests, idle-ttl, whose meaning is the same as in +case of the command line values. + + consecutiveBreaker(5, "1m", 12, "30m") + +The rateBreaker filter expects two mandatory parameters: the number of consecutive failures to open and the size +of the sliding window. It accepts the following optional arguments: timeout, half-open requests, idle-ttl, whose +meaning is the same as in case of the command line values. + + rateBreaker(30, 300, "1m", 12, "30m") + +The disableBreaker filter doesn't expect any arguments, and it disables the circuit breaker, if any, for the +route that it appears in. + + disableBreaker() + +Proxy Usage + +The proxy, when circuit breakers are configured, uses them for backend connections. When it fails to establish a +connection to a backend host, or receives a status code >=500, then it reports to the breaker as a failure. If +the breaker decides to go to the open state, the proxy doesn't try to make any backend requests, returns 503 +status code, and appends a header to the response: + + X-Circuit-Open: true + +Registry + +The active circuit breakers are stored in a registry. They are created on-demand, for the requested settings. +The registry synchronizes access to the shared circuit breakers. When the registry detects that a circuit +breaker is idle, it resets it, this way avoiding that an old series of failures would cause the circuit breaker +go open for an unreasonably low number of failures. The registry also makes sure to cleanup idle circuit +breakers that are not requested anymore, passively, whenever a new circuit breaker is created. This way it +prevents that with a continously changing route configuration, circuit breakers for inaccessible backend hosts +would be stored infinitely. +*/ +package circuit diff --git a/circuit/registry.go b/circuit/registry.go index f681804a47..11e8748d39 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -5,11 +5,12 @@ import ( "time" ) -const ( - DefaultIdleTTL = time.Hour - RouteSettingsKey = "#circuitbreakersettings" -) +// DefaultIdleTTL is used to recycle those unused circuit breakers that don't have this value configured and it +// is not set globally. +const DefaultIdleTTL = time.Hour +// Registry objects hold the active circuit breakers, ensure synchronized access to them, apply default settings +// and recycle the idle ones. type Registry struct { defaults BreakerSettings hostSettings map[string]BreakerSettings @@ -18,6 +19,8 @@ type Registry struct { mx *sync.Mutex } +// NewRegistry initializes a registry with the provided default settings. Settings with the same host value are +// merged together, and settings with an empty host field are merged into each. func NewRegistry(settings ...BreakerSettings) *Registry { var ( defaults BreakerSettings @@ -26,7 +29,7 @@ func NewRegistry(settings ...BreakerSettings) *Registry { for _, s := range settings { if s.Host == "" { - defaults = s + defaults = defaults.mergeSettings(s) continue } @@ -38,8 +41,12 @@ func NewRegistry(settings ...BreakerSettings) *Registry { } hs := make(map[string]BreakerSettings) - for _, s := range settings { - hs[s.Host] = s.mergeSettings(defaults) + for _, s := range hostSettings { + if sh, ok := hs[s.Host]; ok { + hs[s.Host] = s.mergeSettings(sh) + } else { + hs[s.Host] = s.mergeSettings(defaults) + } } return &Registry{ @@ -80,9 +87,7 @@ func (r *Registry) get(s BreakerSettings) *Breaker { b, ok := r.lookup[s] if !ok || b.idle(now) { // if doesn't exist or idle, cleanup and create a new one - if b != nil { - r.access.remove(b, b) - } + r.access.remove(b, b) // check if there is any other to evict, evict if yes r.dropIdle(now) @@ -99,6 +104,13 @@ func (r *Registry) get(s BreakerSettings) *Breaker { return b } +// Get returns a circuit breaker for the provided settings. The BreakerSettings object is used here as a key, +// but typically it is enough to just set its Host field: +// +// r.Get(BreakerSettings{Host: backendHost}) +// +// The key will be filled up with the defaults and the matching circuit breaker will be returned if it exists or +// a new one will be created. func (r *Registry) Get(s BreakerSettings) *Breaker { // we check for host, because we don't want to use shared global breakers if s.Type == BreakerDisabled || s.Host == "" { diff --git a/cmd/skipper/breakerflags.go b/cmd/skipper/breakerflags.go index a01e6a09ff..a087899f71 100644 --- a/cmd/skipper/breakerflags.go +++ b/cmd/skipper/breakerflags.go @@ -16,7 +16,8 @@ const breakerUsage = `set global or host specific circuit breakers, e.g. -breake window: the size of the sliding window for the rate breaker timeout: duration string or milliseconds while the breaker stays open half-open-requests: the number of requests in half-open state to succeed before getting closed again - idle-ttl: duration string or milliseconds after the breaker is considered idle and reset` + idle-ttl: duration string or milliseconds after the breaker is considered idle and reset + (see also: https://godoc.org/github.com/zalando/skipper/circuit)` type breakerFlags []circuit.BreakerSettings diff --git a/doc.go b/doc.go index 4a537eb25e..40809868d9 100644 --- a/doc.go +++ b/doc.go @@ -6,7 +6,8 @@ Skipper works as an HTTP reverse proxy that is responsible for mapping incoming requests to multiple HTTP backend services, based on routes that are selected by the request attributes. At the same time, both the requests and the responses can be augmented by a filter chain that is -specifically defined for each route. +specifically defined for each route. Optionally, it can provide circuit +breaker mechanism individually for each backend host. Skipper can load and update the route definitions from multiple data sources without being restarted. @@ -184,6 +185,15 @@ Skipper can use additional data sources, provided by extensions. Sources must implement the DataClient interface in the routing package. +Circuit Breaker + +Skipper provides circuit breakers, configured globally, based on backend +hosts or based on individual routes. It supports two types of circuit +breaker behavior: open on N consecutive failures, or open on N failures +out of M requests. For details, see: +https://godoc.org/github.com/zalando/skipper/circuit. + + Running Skipper Skipper can be started with the default executable command 'skipper', or diff --git a/eskip/doc.go b/eskip/doc.go index f70ce2f2bf..b242283025 100644 --- a/eskip/doc.go +++ b/eskip/doc.go @@ -12,14 +12,14 @@ expression prefixed by its id and a ':'. A routing table example: - catalog: Path("/*category") -> "https://catalog.example.org"; - productPage: Path("/products/:id") -> "https://products.example.org"; - userAccount: Path("/user/:id/*userpage") -> "https://users.example.org"; + catalog: Path("/*category") -> "https://catalog.example.org"; + productPage: Path("/products/:id") -> "https://products.example.org"; + userAccount: Path("/user/:id/*userpage") -> "https://users.example.org"; - // 404 - notfound: * -> - modPath(/.+/, "/notfound.html") -> static("/", "/var/www") -> - + // 404 + notfound: * -> + modPath(/.+/, "/notfound.html") -> static("/", "/var/www") -> + A route expression always contains a match expression and a backend expression, and it can contain optional filter expressions. The match @@ -28,9 +28,9 @@ filters take place between the matcher and the backend. A route expression example: - Path("/api/*resource") && Header("Accept", "application/json") -> - modPath("^/api", "") -> requestHeader("X-Type", "external") -> - "https://api.example.org" + Path("/api/*resource") && Header("Accept", "application/json") -> + modPath("^/api", "") -> requestHeader("X-Type", "external") -> + "https://api.example.org" Match Expressions - Predicates @@ -41,11 +41,11 @@ separated by '&&'. A match expression example: - Path("/api/*resource") && Header("Accept", "application/json") + Path("/api/*resource") && Header("Accept", "application/json") The following predicate expressions are recognized: - Path("/some/path") + Path("/some/path") The path predicate accepts a single argument, that can be a fixed path like "/some/path", or it can contain wildcards in place of one or more @@ -56,7 +56,7 @@ supports the glob standard, e.g. "/some/path/**" will work as expected. The arguments are available to the filters while processing the matched requests. - PathSubtree("/some/path") + PathSubtree("/some/path") The path subtree predicate behaves similar to the path predicate, but it matches the exact path in the definition and any sub path below it. @@ -65,37 +65,37 @@ name "*". If a free wildcard is appended to the definition, e.g. PathSubtree("/some/path/*rest"), the free wildcard name is used instead of "*". The simple wildcards behave similar to the Path predicate. - PathRegexp(/regular-expression/) + PathRegexp(/regular-expression/) The regexp path predicate accepts a regular expression as a single argument that needs to be matched by the request path. The regular expression can be surrounded by '/' or '"'. - Host(/host-regular-expression/) + Host(/host-regular-expression/) The host predicate accepts a regular expression as a single argument that needs to be matched by the host header in the request. - Method("HEAD") + Method("HEAD") The method predicate is used to match the http request method. - Header("Accept", "application/json") + Header("Accept", "application/json") The header predicate is used to match the http headers in the request. It accepts two arguments, the name of the header field and the exact header value to match. - HeaderRegexp("Accept", /\Wapplication\/json\W/) + HeaderRegexp("Accept", /\Wapplication\/json\W/) The header regexp predicate works similar to the header expression, but the value to be matched is a regular expression. - * + * Catch all predicate. - Any() + Any() Former, deprecated form of the catch all predicate. @@ -106,7 +106,7 @@ Eskip supports custom route matching predicates, that can be implemented in extensions. The notation of custom predicates is the same as of the built-in route matching expressions: - Foo(3.14, "bar") + Foo(3.14, "bar") During parsing, custom predicates may define any arbitrary list of arguments of types number, string or regular expression, and it is the @@ -125,42 +125,48 @@ filter. The arguments can be of type string ("a string"), number A filter example: - setResponseHeader("max-age", "86400") -> static("/", "/var/www/public") + setResponseHeader("max-age", "86400") -> static("/", "/var/www/public") The default Skipper implementation provides the following built-in filters: - setRequestHeader("header-name", "header-value") + setRequestHeader("header-name", "header-value") - setResponseHeader("header-name", "header-value") + setResponseHeader("header-name", "header-value") - appendRequestHeader("header-name", "header-value") + appendRequestHeader("header-name", "header-value") - appendResponseHeader("header-name", "header-value") + appendResponseHeader("header-name", "header-value") - dropRequestHeader("header-name") + dropRequestHeader("header-name") - dropResponseHeader("header-name") + dropResponseHeader("header-name") - modPath(/regular-expression/, "replacement") + modPath(/regular-expression/, "replacement") - setPath("/replacement") + setPath("/replacement") - redirectTo(302, "https://ui.example.org") + redirectTo(302, "https://ui.example.org") - flowId("reuse", 64) + flowId("reuse", 64) - healthcheck() + healthcheck() - static("/images", "/var/www/images") + static("/images", "/var/www/images") - stripQuery("true") + stripQuery("true") - preserveHost() + preserveHost() - status(418) + status(418) - tee("https://audit-logging.example.org") + tee("https://audit-logging.example.org") + + consecutiveBreaker() + + rateBreaker() + + disableBreaker() For details about the built-in filters, please, refer to the documentation of the skipper/filters package. Skipper is designed to be @@ -183,7 +189,7 @@ a loopback. A network endpoint address example: - "http://internal.example.org:9090" + "http://internal.example.org:9090" An endpoint address backend is surrounded by '"'. It contains the scheme and the hostname of the endpoint, and optionally the port number that is @@ -191,7 +197,7 @@ inferred from the scheme if not specified. A shunt backend: - + The shunt backend means that the route will not forward the request to a network endpoint, but the router will handle the request itself. By @@ -219,9 +225,9 @@ character. Example with comments: - // forwards to the API endpoint - route1: Path("/api") -> "https://api.example.org"; - route2: * -> // everything else 404 + // forwards to the API endpoint + route1: Path("/api") -> "https://api.example.org"; + route2: * -> // everything else 404 Regular expressions diff --git a/filters/circuit/breaker.go b/filters/circuit/breaker.go index 3e5feb6cbc..01fb6b8b5d 100644 --- a/filters/circuit/breaker.go +++ b/filters/circuit/breaker.go @@ -1,3 +1,8 @@ +/* +Package circuit provides filters to control the circuit breaker settings on the route level. + +For detailed documentation of the circuit breakers, see https://godoc.org/github.com/zalando/skipper/circuit. +*/ package circuit import ( @@ -11,6 +16,7 @@ const ( ConsecutiveBreakerName = "consecutiveBreaker" RateBreakerName = "rateBreaker" DisableBreakerName = "disableBreaker" + RouteSettingsKey = "#circuitbreakersettings" ) type spec struct { @@ -42,14 +48,33 @@ func getDurationArg(a interface{}) (time.Duration, error) { return time.Duration(i) * time.Millisecond, err } +// NewConsecutiveBreaker creates a filter specification to instantiate consecutiveBreaker() filters. +// +// These filters set a breaker for the current route that open if the backend failures for the route reach a +// value of N, where N is a mandatory argument of the filter: +// +// consecutiveBreaker(15) +// +// The filter accepts the following optional arguments: timeout (milliseconds or duration string), +// half-open-requests (integer), idle-ttl (milliseconds or duration string). func NewConsecutiveBreaker() filters.Spec { return &spec{typ: circuit.ConsecutiveFailures} } +// NewRateBreaker creates a filter specification to instantiate rateBreaker() filters. +// +// These filters set a breaker for the current route that open if the backend failures for the route reach a +// value of N withing a window of the last M requests, where N and M are mandatory arguments of the filter: +// +// rateBreaker(30, 300) +// +// The filter accepts the following optional arguments: timeout (milliseconds or duration string), +// half-open-requests (integer), idle-ttl (milliseconds or duration string). func NewRateBreaker() filters.Spec { return &spec{typ: circuit.FailureRate} } +// NewDisableBreaker disables the circuit breaker for a route. It doesn't accept any arguments. func NewDisableBreaker() filters.Spec { return &spec{} } @@ -185,7 +210,7 @@ func (s *spec) CreateFilter(args []interface{}) (filters.Filter, error) { } func (f *filter) Request(ctx filters.FilterContext) { - ctx.StateBag()[circuit.RouteSettingsKey] = f.settings + ctx.StateBag()[RouteSettingsKey] = f.settings } func (f *filter) Response(filters.FilterContext) {} diff --git a/filters/circuit/breaker_test.go b/filters/circuit/breaker_test.go index ab4e344f0b..e8025256a3 100644 --- a/filters/circuit/breaker_test.go +++ b/filters/circuit/breaker_test.go @@ -80,7 +80,7 @@ func TestBreaker(t *testing.T) { f.Request(ctx) - settings, ok := ctx.StateBag()[circuit.RouteSettingsKey] + settings, ok := ctx.StateBag()[RouteSettingsKey] if !ok { t.Error("failed to set the breaker settings") } diff --git a/proxy/doc.go b/proxy/doc.go index 13e65a9ef4..60660395b8 100644 --- a/proxy/doc.go +++ b/proxy/doc.go @@ -103,7 +103,7 @@ from the external data sources, and are tested against the requests before the general routing tree. -Handling the 'Host' header +Handling the Host header The default behavior regarding the 'Host' header of the proxy requests is that the proxy ignores the value set in the incoming request. This @@ -123,6 +123,18 @@ and `SetOutgoingHost()` methods of the `FilterContext` need to be used instead of the `Request.Header` map. +Circuit Breakers + +When configured, skipper can use circuit breakers for the backend +requests. It asks the registry for a matching circuit breaker for +every request, and if there is any, then checks if it is closed +before sending out the backend request. On connection errors and +responses with a status code higher than 499, it reports a failure +to the current breaker, otherwise a success. + +For details, see: https://godoc.org/github.com/zalando/skipper/circuit. + + Proxy Example The below example demonstrates creating a routing proxy as a standard diff --git a/proxy/proxy.go b/proxy/proxy.go index ad536153d0..9431ab3335 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -15,6 +15,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/zalando/skipper/circuit" "github.com/zalando/skipper/eskip" + circuitfilters "github.com/zalando/skipper/filters/circuit" "github.com/zalando/skipper/metrics" "github.com/zalando/skipper/routing" ) @@ -112,6 +113,9 @@ type Params struct { // wrong routing depending on the current configuration. MaxLoopbacks int + // CircuitBreakes provides a registry that skipper can use to + // find the matching circuit breaker for backend requests. If not + // set, no circuit breakers are used. CircuitBreakers *circuit.Registry } @@ -486,7 +490,7 @@ func (p *Proxy) checkBreaker(c *context) (func(bool), bool) { return nil, true } - settings, _ := c.stateBag[circuit.RouteSettingsKey].(circuit.BreakerSettings) + settings, _ := c.stateBag[circuitfilters.RouteSettingsKey].(circuit.BreakerSettings) settings.Host = c.outgoingHost b := p.breakers.Get(settings) diff --git a/skipper.go b/skipper.go index df16f155a7..f11a68a81d 100644 --- a/skipper.go +++ b/skipper.go @@ -227,8 +227,11 @@ type Options struct { // Experimental feature to handle protocol Upgrades for Websockets, SPDY, etc. ExperimentalUpgrade bool + // MaxLoopbacks defines the maximum number of loops that the proxy can execute when the routing table + // contains loop backends (). MaxLoopbacks int + // BreakerSettings contain global and host specific settings for the circuit breakers. BreakerSettings []circuit.BreakerSettings } From fe2f78658694f44ddc6b5f1c898dacf95638ecff Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 10 Jul 2017 12:45:31 +0200 Subject: [PATCH 09/12] docs reviewed --- circuit/breaker.go | 13 +++---- circuit/doc.go | 69 ++++++++++++++++++++------------------ circuit/ratebreaker.go | 5 +++ circuit/registry.go | 12 +++---- doc.go | 8 ++--- filters/circuit/breaker.go | 2 +- proxy/doc.go | 8 ++--- proxy/proxy.go | 2 +- readme.md | 1 + 9 files changed, 62 insertions(+), 58 deletions(-) diff --git a/circuit/breaker.go b/circuit/breaker.go index f91f5b2da7..1d3cee93ae 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -6,10 +6,6 @@ import ( "time" ) -// TODO: -// - in case of the rate breaker, there are unnecessary synchronization steps due to the 3rd party gobreaker -// - introduce a TTL in the rate breaker for the stale samplers - // BreakerType defines the type of the used breaker: consecutive, rate or disabled. type BreakerType int @@ -22,7 +18,7 @@ const ( // BreakerSettings contains the settings for individual circuit breakers. // -// See the package docs for the detailed merging/overriding rules of the settings and for the meaning of the +// See the package overview for the detailed merging/overriding rules of the settings and for the meaning of the // individual fields. type BreakerSettings struct { Type BreakerType @@ -78,7 +74,7 @@ func (to BreakerSettings) mergeSettings(from BreakerSettings) BreakerSettings { return to } -// String returns the string representation of a particular set of circuit breaker settings. +// String returns the string representation of a particular set of settings. func (s BreakerSettings) String() string { var ss []string @@ -141,8 +137,9 @@ func newBreaker(s BreakerSettings) *Breaker { } } -// Allow returns true if the breaker is in the closed state and a callback function to report the outcome of the -// operation, where true means success. It may not return a callback function when the state is not closed. +// Allow returns true if the breaker is in the closed state and a callback function for reporting the outcome of +// the operation. The callback expects true values if the outcome of the request was successful. Allow may not +// return a callback function when the state is open. func (b *Breaker) Allow() (func(bool), bool) { return b.impl.Allow() } diff --git a/circuit/doc.go b/circuit/doc.go index 2fa91fbf32..70c37a4965 100644 --- a/circuit/doc.go +++ b/circuit/doc.go @@ -2,8 +2,8 @@ Package circuit implements circuit breaker functionality for the proxy. It provides two types of circuit breakers: consecutive and failure rate based. The circuit breakers can be -configured globally, or based on hosts and individual routes. The registry object ensures synchronized access to -the active breakers and releases the idle ones. +configured either globally, based on hosts or individual routes. The registry ensures synchronized access to the +active breakers and the recycling of the idle ones. The circuit breakers are always assigned to backend hosts, so that the outcome of requests to one host never affects the circuit breaker behavior of another host. Besides hosts, individual routes can have separate circuit @@ -12,24 +12,23 @@ breakers, too. Breaker Type - Consecutive Failures This breaker opens when the proxy couldn't connect to a backend or received a >=500 status code at least N times -in a row, where N is the configuration of the breaker. When open, the proxy returns 503 - Service Unavailable -response during the configured timeout. After this timeout, the breaker goes into half-open state, where it -expects that M number of requests succeed. The requests in the half-open state are accepted concurrently. If any -of the requests during the half-open state fails, the breaker goes back to open state. If all succeed, it goes -to closed state again. +in a row. When open, the proxy returns 503 - Service Unavailable response during the breaker timeout. After this +timeout, the breaker goes into half-open state, in which it expects that M number of requests succeed. The +requests in the half-open state are accepted concurrently. If any of the requests during the half-open state +fails, the breaker goes back to open state. If all succeed, it goes to closed state again. Breaker Type - Failure Rate The "rate breaker" works similar to the "consecutive breaker", but instead of considering N consecutive failures -for going open, it opens when the failure reaches a rate of N out of M, where M is a sliding window, N "https://foo.backend.net"; The breaker settings can be defined in the following levels: global, based on the backend host, based on -individual route settings. The values are merged in the same order so, that the global settings serve as -defaults for the host settings, and the result of the global and host settings serve as defaults for the route -settings. Setting global values happens the same way as setting host values, but leaving the Host field empty. -Setting route based values happens with filters in the route definitions. +individual route settings. The values are merged in the same order, so the global settings serve as defaults for +the host settings, and the result of the global and host settings serve as defaults for the route settings. +Setting global values happens the same way as setting host values, but leaving the Host field empty. Setting +route based values happens with filters in the route definitions. (https://godoc.org/github.com/zalando/skipper/filters/circuit) Settings - Type @@ -90,20 +94,20 @@ Settings - Timeout With the timeout we can set how long the breaker should stay open, before becoming half-open. -Command line name: timeout. Possible command line values: any positive integer as milliseconds or duration +Command line name: timeout. Possible command line values: any positive integer as milliseconds or a duration string, e.g. 15m30s. Settings - Half-Open Requests -Defines the number of requests expected to succeed while in the circuit breaker is in the half-open state. +Defines the number of requests expected to succeed while the circuit breaker is in the half-open state. Command line name: half-open-requests. Possible command line values: any positive integer. Settings - Idle TTL -Defines the idle timeout after which a circuit breaker gets recycled, if it wasn't used. +Defines the idle timeout after which a circuit breaker gets recycled, if it hasn't been used. -Command line name: idle-ttl. Possible command line values: any positive integer as milliseconds or duration +Command line name: idle-ttl. Possible command line values: any positive integer as milliseconds or a duration string, e.g. 15m30s. Filters @@ -111,14 +115,12 @@ Filters The following circuit breaker filters are supported: consecutiveBreaker(), rateBreaker() and disableBreaker(). The consecutiveBreaker filter expects one mandatory parameter: the number of consecutive failures to open. It -accepts the following optional arguments: timeout, half-open requests, idle-ttl, whose meaning is the same as in -case of the command line values. +accepts the following optional arguments: timeout, half-open requests, idle-ttl. consecutiveBreaker(5, "1m", 12, "30m") The rateBreaker filter expects two mandatory parameters: the number of consecutive failures to open and the size -of the sliding window. It accepts the following optional arguments: timeout, half-open requests, idle-ttl, whose -meaning is the same as in case of the command line values. +of the sliding window. It accepts the following optional arguments: timeout, half-open requests, idle-ttl. rateBreaker(30, 300, "1m", 12, "30m") @@ -129,10 +131,11 @@ route that it appears in. Proxy Usage -The proxy, when circuit breakers are configured, uses them for backend connections. When it fails to establish a -connection to a backend host, or receives a status code >=500, then it reports to the breaker as a failure. If -the breaker decides to go to the open state, the proxy doesn't try to make any backend requests, returns 503 -status code, and appends a header to the response: +The proxy, when circuit breakers are configured, uses them for backend connections. It checks the breaker for +the current backend host if it's closed before making backend requests. It reports the outcome of the request to +the breaker, considering connection failures and backend responses with status code >=500 as failures. When the +breaker is open, the proxy doesn't try to make backend requests, and returns a response with a status code of +503 and appending a header to the response: X-Circuit-Open: true @@ -141,9 +144,9 @@ Registry The active circuit breakers are stored in a registry. They are created on-demand, for the requested settings. The registry synchronizes access to the shared circuit breakers. When the registry detects that a circuit breaker is idle, it resets it, this way avoiding that an old series of failures would cause the circuit breaker -go open for an unreasonably low number of failures. The registry also makes sure to cleanup idle circuit -breakers that are not requested anymore, passively, whenever a new circuit breaker is created. This way it -prevents that with a continously changing route configuration, circuit breakers for inaccessible backend hosts -would be stored infinitely. +go open after an unreasonably low number of recent failures. The registry also makes sure to cleanup idle +circuit breakers that are not requested anymore by the proxy. This happens in a passive way, whenever a new +circuit breaker is created. The cleanup prevents storing circuit breakers for inaccessible backend hosts +infinitely in those scenarios where the route configuration is continuously changing. */ package circuit diff --git a/circuit/ratebreaker.go b/circuit/ratebreaker.go index 71d0ecca96..4e71774579 100644 --- a/circuit/ratebreaker.go +++ b/circuit/ratebreaker.go @@ -6,6 +6,11 @@ import ( "github.com/sony/gobreaker" ) +// TODO: +// in case of the rate breaker, there are unnecessary synchronization steps due to the 3rd party gobreaker. If +// the sliding window was part of the implementation of the individual breakers, this additional syncrhonization +// would not be required. + type rateBreaker struct { settings BreakerSettings mx *sync.Mutex diff --git a/circuit/registry.go b/circuit/registry.go index 11e8748d39..a49b03b1a0 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -5,12 +5,10 @@ import ( "time" ) -// DefaultIdleTTL is used to recycle those unused circuit breakers that don't have this value configured and it -// is not set globally. const DefaultIdleTTL = time.Hour // Registry objects hold the active circuit breakers, ensure synchronized access to them, apply default settings -// and recycle the idle ones. +// and recycle the idle breakers. type Registry struct { defaults BreakerSettings hostSettings map[string]BreakerSettings @@ -19,8 +17,8 @@ type Registry struct { mx *sync.Mutex } -// NewRegistry initializes a registry with the provided default settings. Settings with the same host value are -// merged together, and settings with an empty host field are merged into each. +// NewRegistry initializes a registry with the provided default settings. Settings with an empty Host field are +// considered as defaults. Settings with the same Host field are merged together. func NewRegistry(settings ...BreakerSettings) *Registry { var ( defaults BreakerSettings @@ -109,8 +107,8 @@ func (r *Registry) get(s BreakerSettings) *Breaker { // // r.Get(BreakerSettings{Host: backendHost}) // -// The key will be filled up with the defaults and the matching circuit breaker will be returned if it exists or -// a new one will be created. +// The key will be filled up with the defaults and the matching circuit breaker will be returned if it exists, +// or a new one will be created if not. func (r *Registry) Get(s BreakerSettings) *Breaker { // we check for host, because we don't want to use shared global breakers if s.Type == BreakerDisabled || s.Host == "" { diff --git a/doc.go b/doc.go index 40809868d9..8e33804ea3 100644 --- a/doc.go +++ b/doc.go @@ -187,10 +187,10 @@ must implement the DataClient interface in the routing package. Circuit Breaker -Skipper provides circuit breakers, configured globally, based on backend -hosts or based on individual routes. It supports two types of circuit -breaker behavior: open on N consecutive failures, or open on N failures -out of M requests. For details, see: +Skipper provides circuit breakers, configured either globally, based on +backend hosts or based on individual routes. It supports two types of +circuit breaker behavior: open on N consecutive failures, or open on N +failures out of M requests. For details, see: https://godoc.org/github.com/zalando/skipper/circuit. diff --git a/filters/circuit/breaker.go b/filters/circuit/breaker.go index 01fb6b8b5d..3c54a3d1b5 100644 --- a/filters/circuit/breaker.go +++ b/filters/circuit/breaker.go @@ -64,7 +64,7 @@ func NewConsecutiveBreaker() filters.Spec { // NewRateBreaker creates a filter specification to instantiate rateBreaker() filters. // // These filters set a breaker for the current route that open if the backend failures for the route reach a -// value of N withing a window of the last M requests, where N and M are mandatory arguments of the filter: +// value of N within a window of the last M requests, where N and M are mandatory arguments of the filter: // // rateBreaker(30, 300) // diff --git a/proxy/doc.go b/proxy/doc.go index 60660395b8..2afe921e23 100644 --- a/proxy/doc.go +++ b/proxy/doc.go @@ -127,10 +127,10 @@ Circuit Breakers When configured, skipper can use circuit breakers for the backend requests. It asks the registry for a matching circuit breaker for -every request, and if there is any, then checks if it is closed -before sending out the backend request. On connection errors and -responses with a status code higher than 499, it reports a failure -to the current breaker, otherwise a success. +every request, and if there is any, checks if it is closed before +sending out the backend request. On connection errors and responses +with a status code higher than 499, it reports a failure to the +current breaker, otherwise it reports a success. For details, see: https://godoc.org/github.com/zalando/skipper/circuit. diff --git a/proxy/proxy.go b/proxy/proxy.go index 9431ab3335..d784864c67 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -113,7 +113,7 @@ type Params struct { // wrong routing depending on the current configuration. MaxLoopbacks int - // CircuitBreakes provides a registry that skipper can use to + // CircuitBreakers provides a registry that skipper can use to // find the matching circuit breaker for backend requests. If not // set, no circuit breakers are used. CircuitBreakers *circuit.Registry diff --git a/readme.md b/readme.md index e469798fa3..9e1952abfb 100644 --- a/readme.md +++ b/readme.md @@ -90,6 +90,7 @@ Skipper's [godoc](https://godoc.org/github.com/zalando/skipper) page includes de - Service Backends - Route Definitions - Data Sources +- Circuit Breakers - Extending It with Customized Predicates, Filters, and Builds - Proxy Packages - Logging and Metrics From a451c1a1255e9344c6e2c766c62a9226c338f9de Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Tue, 11 Jul 2017 17:18:40 +0200 Subject: [PATCH 10/12] remove sorted access list --- circuit/list.go | 77 -------------------- circuit/list_test.go | 147 --------------------------------------- circuit/registry.go | 19 ++--- circuit/registry_test.go | 7 +- 4 files changed, 7 insertions(+), 243 deletions(-) delete mode 100644 circuit/list.go delete mode 100644 circuit/list_test.go diff --git a/circuit/list.go b/circuit/list.go deleted file mode 100644 index 34cf1a7e3f..0000000000 --- a/circuit/list.go +++ /dev/null @@ -1,77 +0,0 @@ -package circuit - -// a simple list to keep breakers sorted by access order. -// It can return the first consecutive items that match a -// condition (in our case, the ones that were inactive for a -// while). -type list struct { - first, last *Breaker -} - -func (l *list) remove(from, to *Breaker) { - if from == nil || l.first == nil { - return - } - - if from == l.first { - l.first = to.next - } else if from.prev != nil { - from.prev.next = to.next - } - - if to == l.last { - l.last = from.prev - } else if to.next != nil { - to.next.prev = from.prev - } - - from.prev = nil - to.next = nil -} - -func (l *list) append(from, to *Breaker) { - if from == nil { - return - } - - if l.last == nil { - l.first = from - l.last = to - return - } - - l.last.next = from - from.prev = l.last - l.last = to -} - -// appends an item to the end of the list. If the list already -// contains the item, moves it to the end. -func (l *list) appendLast(b *Breaker) { - l.remove(b, b) - l.append(b, b) -} - -// returns the first consecutive items that match the predicate -func (l *list) getMatchingHead(predicate func(*Breaker) bool) (first, last *Breaker) { - current := l.first - for { - if current == nil || !predicate(current) { - return - } - - if first == nil { - first = current - } - - last, current = current, current.next - } -} - -// takes the first consecutive items that match a predicate, -// removes them from the list, and returns them. -func (l *list) dropHeadIf(predicate func(*Breaker) bool) (from, to *Breaker) { - from, to = l.getMatchingHead(predicate) - l.remove(from, to) - return -} diff --git a/circuit/list_test.go b/circuit/list_test.go deleted file mode 100644 index caea3c13f2..0000000000 --- a/circuit/list_test.go +++ /dev/null @@ -1,147 +0,0 @@ -package circuit - -import "testing" - -func checkListDirection(t *testing.T, first *Breaker, reverse bool, expected ...*Breaker) { -} - -func checkList(t *testing.T, first *Breaker, expected ...*Breaker) { - current := first - for i, expected := range expected { - if current == nil { - t.Error("less items in the list than expected") - return - } - - if i == 0 && current.prev != nil { - t.Error("damaged list") - return - } - - if expected != current { - t.Error("invalid order") - return - } - - current = current.next - } - - if current != nil { - t.Error("more items in the list than expected") - } -} - -func appendAll(l *list, items ...*Breaker) { - for _, i := range items { - l.appendLast(i) - } -} - -func TestListAppend(t *testing.T) { - t.Run("append", func(t *testing.T) { - l := &list{} - - b0 := newBreaker(BreakerSettings{}) - b1 := newBreaker(BreakerSettings{}) - b2 := newBreaker(BreakerSettings{}) - appendAll(l, b0, b1, b2) - checkList(t, l.first, b0, b1, b2) - }) - - t.Run("reappend", func(t *testing.T) { - l := &list{} - - b0 := newBreaker(BreakerSettings{}) - b1 := newBreaker(BreakerSettings{}) - b2 := newBreaker(BreakerSettings{}) - appendAll(l, b0, b1, b2) - - l.appendLast(b1) - - checkList(t, l.first, b0, b2, b1) - }) - - t.Run("reappend first", func(t *testing.T) { - l := &list{} - - b0 := newBreaker(BreakerSettings{}) - b1 := newBreaker(BreakerSettings{}) - b2 := newBreaker(BreakerSettings{}) - appendAll(l, b0, b1, b2) - - l.appendLast(b0) - - checkList(t, l.first, b1, b2, b0) - }) - - t.Run("reappend last", func(t *testing.T) { - l := &list{} - - b0 := newBreaker(BreakerSettings{}) - b1 := newBreaker(BreakerSettings{}) - b2 := newBreaker(BreakerSettings{}) - appendAll(l, b0, b1, b2) - - l.appendLast(b2) - - checkList(t, l.first, b0, b1, b2) - }) -} - -func TestDropHead(t *testing.T) { - createToDrop := func() *Breaker { return newBreaker(BreakerSettings{Host: "drop"}) } - createNotToDrop := func() *Breaker { return newBreaker(BreakerSettings{Host: "no-drop"}) } - predicate := func(item *Breaker) bool { return item.settings.Host == "drop" } - - t.Run("from empty", func(t *testing.T) { - l := &list{} - drop, _ := l.dropHeadIf(predicate) - checkList(t, l.first) - checkList(t, drop) - }) - - t.Run("drop matching", func(t *testing.T) { - l := &list{} - - b0 := createToDrop() - b1 := createToDrop() - b2 := createNotToDrop() - b3 := createToDrop() - b4 := createNotToDrop() - appendAll(l, b0, b1, b2, b3, b4) - - drop, _ := l.dropHeadIf(predicate) - checkList(t, l.first, b2, b3, b4) - checkList(t, drop, b0, b1) - }) - - t.Run("none match", func(t *testing.T) { - l := &list{} - - b0 := createNotToDrop() - b1 := createToDrop() - b2 := createNotToDrop() - b3 := createToDrop() - b4 := createNotToDrop() - appendAll(l, b0, b1, b2, b3, b4) - - drop, _ := l.dropHeadIf(predicate) - checkList(t, l.first, b0, b1, b2, b3, b4) - checkList(t, drop) - }) - - t.Run("all match", func(t *testing.T) { - l := &list{} - - b0 := createToDrop() - b1 := createToDrop() - b2 := createToDrop() - b3 := createToDrop() - b4 := createToDrop() - appendAll(l, b0, b1, b2, b3, b4) - - drop, _ := l.dropHeadIf(predicate) - checkList(t, l.first) - checkList(t, drop, b0, b1, b2, b3, b4) - }) -} diff --git a/circuit/registry.go b/circuit/registry.go index a49b03b1a0..d403e31e54 100644 --- a/circuit/registry.go +++ b/circuit/registry.go @@ -13,7 +13,6 @@ type Registry struct { defaults BreakerSettings hostSettings map[string]BreakerSettings lookup map[BreakerSettings]*Breaker - access *list mx *sync.Mutex } @@ -51,7 +50,6 @@ func NewRegistry(settings ...BreakerSettings) *Registry { defaults: defaults, hostSettings: hs, lookup: make(map[BreakerSettings]*Breaker), - access: &list{}, mx: &sync.Mutex{}, } } @@ -66,13 +64,10 @@ func (r *Registry) mergeDefaults(s BreakerSettings) BreakerSettings { } func (r *Registry) dropIdle(now time.Time) { - drop, _ := r.access.dropHeadIf(func(b *Breaker) bool { - return b.idle(now) - }) - - for drop != nil { - delete(r.lookup, drop.settings) - drop = drop.next + for h, b := range r.lookup { + if b.idle(now) { + delete(r.lookup, h) + } } } @@ -84,9 +79,6 @@ func (r *Registry) get(s BreakerSettings) *Breaker { b, ok := r.lookup[s] if !ok || b.idle(now) { - // if doesn't exist or idle, cleanup and create a new one - r.access.remove(b, b) - // check if there is any other to evict, evict if yes r.dropIdle(now) @@ -95,9 +87,8 @@ func (r *Registry) get(s BreakerSettings) *Breaker { r.lookup[s] = b } - // append/move the breaker to the last position of the access history + // set the access timestamp b.ts = now - r.access.appendLast(b) return b } diff --git a/circuit/registry_test.go b/circuit/registry_test.go index cab0468615..15d6d61a6e 100644 --- a/circuit/registry_test.go +++ b/circuit/registry_test.go @@ -245,14 +245,11 @@ func TestRegistryEvictIdle(t *testing.T) { return } - current := r.access.first - for current != nil { - if current.settings.Host == "baz" { + for s := range r.lookup { + if s.Host == "baz" { t.Error("failed to evict idle breaker") return } - - current = current.next } } From 761d56999581967618f1b128f86fe6a26a35ece8 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Tue, 11 Jul 2017 17:54:00 +0200 Subject: [PATCH 11/12] log open and close events --- circuit/consecutivebreaker.go | 47 ++++++++++++++++++++++++++--------- circuit/ratebreaker.go | 18 +++++++++++--- circuit/registry_test.go | 8 ------ 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/circuit/consecutivebreaker.go b/circuit/consecutivebreaker.go index 757a0d2676..377265da33 100644 --- a/circuit/consecutivebreaker.go +++ b/circuit/consecutivebreaker.go @@ -1,31 +1,54 @@ package circuit -import "github.com/sony/gobreaker" +import ( + log "github.com/sirupsen/logrus" + "github.com/sony/gobreaker" +) type consecutiveBreaker struct { - gb *gobreaker.TwoStepCircuitBreaker + settings BreakerSettings + open bool + gb *gobreaker.TwoStepCircuitBreaker } func newConsecutive(s BreakerSettings) *consecutiveBreaker { - return &consecutiveBreaker{ - gb: gobreaker.NewTwoStepCircuitBreaker(gobreaker.Settings{ - Name: s.Host, - MaxRequests: uint32(s.HalfOpenRequests), - Timeout: s.Timeout, - ReadyToTrip: func(c gobreaker.Counts) bool { - return int(c.ConsecutiveFailures) >= s.Failures - }, - }), + b := &consecutiveBreaker{ + settings: s, } + + b.gb = gobreaker.NewTwoStepCircuitBreaker(gobreaker.Settings{ + Name: s.Host, + MaxRequests: uint32(s.HalfOpenRequests), + Timeout: s.Timeout, + ReadyToTrip: b.readyToTrip, + }) + + return b +} + +func (b *consecutiveBreaker) readyToTrip(c gobreaker.Counts) bool { + b.open = int(c.ConsecutiveFailures) >= b.settings.Failures + if b.open { + log.Infof("circuit breaker open: %v", b.settings) + } + + return b.open } func (b *consecutiveBreaker) Allow() (func(bool), bool) { done, err := b.gb.Allow() // this error can only indicate that the breaker is not closed - if err != nil { + closed := err == nil + + if !closed { return nil, false } + if b.open { + b.open = false + log.Infof("circuit breaker closed: %v", b.settings) + } + return done, true } diff --git a/circuit/ratebreaker.go b/circuit/ratebreaker.go index 4e71774579..321d399b69 100644 --- a/circuit/ratebreaker.go +++ b/circuit/ratebreaker.go @@ -1,6 +1,7 @@ package circuit import ( + log "github.com/sirupsen/logrus" "sync" "github.com/sony/gobreaker" @@ -13,6 +14,7 @@ import ( type rateBreaker struct { settings BreakerSettings + open bool mx *sync.Mutex sampler *binarySampler gb *gobreaker.TwoStepCircuitBreaker @@ -42,12 +44,13 @@ func (b *rateBreaker) readyToTrip() bool { return false } - ready := b.sampler.count >= b.settings.Failures - if ready { + b.open = b.sampler.count >= b.settings.Failures + if b.open { + log.Infof("circuit breaker open: %v", b.settings) b.sampler = nil } - return ready + return b.open } // count the failures in closed and half-open state @@ -66,10 +69,17 @@ func (b *rateBreaker) Allow() (func(bool), bool) { done, err := b.gb.Allow() // this error can only indicate that the breaker is not closed - if err != nil { + closed := err == nil + + if !closed { return nil, false } + if b.open { + b.open = false + log.Infof("circuit breaker closed: %v", b.settings) + } + return func(success bool) { b.countRate(success) done(success) diff --git a/circuit/registry_test.go b/circuit/registry_test.go index 15d6d61a6e..d2888aa335 100644 --- a/circuit/registry_test.go +++ b/circuit/registry_test.go @@ -258,14 +258,6 @@ func TestIndividualIdle(t *testing.T) { t.Skip() } - // create with default and host specific - // - // check for both: - // - fail n - 1 - // - wait idle - // - fail - // - stays closed - const ( consecutiveFailures = 5 idleTimeout = 15 * time.Millisecond From 9da7acfaa78285220bda703740493362476e2cb0 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Wed, 12 Jul 2017 15:55:56 +0200 Subject: [PATCH 12/12] drop unused fields --- circuit/breaker.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/circuit/breaker.go b/circuit/breaker.go index 1d3cee93ae..1136c16bee 100644 --- a/circuit/breaker.go +++ b/circuit/breaker.go @@ -39,10 +39,9 @@ type voidBreaker struct{} // // Use the Get() method of the Registry to request fully initialized breakers. type Breaker struct { - settings BreakerSettings - ts time.Time - prev, next *Breaker - impl breakerImplementation + settings BreakerSettings + ts time.Time + impl breakerImplementation } func (to BreakerSettings) mergeSettings(from BreakerSettings) BreakerSettings {