-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
Copy pathbuffer.go
344 lines (301 loc) · 11.4 KB
/
buffer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
/*
Copyright 2019 The Vitess Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package buffer provides a buffer for MASTER traffic during failovers.
//
// Instead of returning an error to the application (when the vttablet master
// becomes unavailable), the buffer will automatically retry buffered requests
// after the end of the failover was detected.
//
// Buffering (stalling) requests will increase the number of requests in flight
// within vtgate and at upstream layers. Therefore, it is important to limit
// the size of the buffer and the buffering duration (window) per request.
// See the file flags.go for the available configuration and its defaults.
package buffer
import (
"fmt"
"strings"
"sync"
"time"
"context"
"vitess.io/vitess/go/sync2"
"vitess.io/vitess/go/vt/discovery"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vterrors"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
)
var (
bufferFullError = vterrors.New(vtrpcpb.Code_UNAVAILABLE, "master buffer is full")
entryEvictedError = vterrors.New(vtrpcpb.Code_UNAVAILABLE, "buffer full: request evicted for newer request")
contextCanceledError = vterrors.New(vtrpcpb.Code_UNAVAILABLE, "context was canceled before failover finished")
)
// bufferMode specifies how the buffer is configured for a given shard.
type bufferMode int
const (
// bufferDisabled will let all requests pass through and do nothing.
bufferDisabled bufferMode = iota
// bufferEnabled means all requests should be buffered.
bufferEnabled
// bufferDryRun will track the failover, but not actually buffer requests.
bufferDryRun
)
// Buffer is used to track ongoing MASTER tablet failovers and buffer
// requests while the MASTER tablet is unavailable.
// Once the new MASTER starts accepting requests, buffering stops and requests
// queued so far will be automatically retried.
//
// There should be exactly one instance of this buffer. For each failover, an
// instance of "ShardBuffer" will be created.
type Buffer struct {
// Immutable configuration fields.
// Except for "now", they are parsed from command line flags.
// keyspaces has the same purpose as "shards" but applies to a whole keyspace.
keyspaces map[string]bool
// shards is a set of keyspace/shard entries to which buffering is limited.
// If empty (and *enabled==true), buffering is enabled for all shards.
shards map[string]bool
// now returns the current time. Overridden in tests.
now func() time.Time
// bufferSizeSema limits how many requests can be buffered
// ("-buffer_size") and is shared by all shardBuffer instances.
bufferSizeSema *sync2.Semaphore
// mu guards all fields in this group.
// In particular, it is used to serialize the following Go routines:
// - 1. Requests which may buffer (RLock, can be run in parallel)
// - 2. Request which starts buffering (based on the seen error)
// - 3. LegacyHealthCheck listener ("StatsUpdate") which stops buffering
// - 4. Timer which may stop buffering after -buffer_max_failover_duration
mu sync.RWMutex
// buffers holds a shardBuffer object per shard, even if no failover is in
// progress.
// Key Format: "<keyspace>/<shard>"
buffers map[string]*shardBuffer
// stopped is true after Shutdown() was run.
stopped bool
}
// New creates a new Buffer object.
func New() *Buffer {
return newWithNow(time.Now)
}
func newWithNow(now func() time.Time) *Buffer {
if err := verifyFlags(); err != nil {
log.Fatalf("Invalid buffer configuration: %v", err)
}
bufferSize.Set(int64(*size))
keyspaces, shards := keyspaceShardsToSets(*shards)
if *enabledDryRun {
log.Infof("vtgate buffer in dry-run mode enabled for all requests. Dry-run bufferings will log failovers but not buffer requests.")
}
if *enabled {
log.Infof("vtgate buffer enabled. MASTER requests will be buffered during detected failovers.")
// Log a second line if it's only enabled for some keyspaces or shards.
header := "Buffering limited to configured "
limited := ""
if len(keyspaces) > 0 {
limited += "keyspaces: " + setToString(keyspaces)
}
if len(shards) > 0 {
if limited == "" {
limited += " and "
}
limited += "shards: " + setToString(shards)
}
if limited != "" {
limited = header + limited
dryRunOverride := ""
if *enabledDryRun {
dryRunOverride = " Dry-run mode is overridden for these entries and actual buffering will take place."
}
log.Infof("%v.%v", limited, dryRunOverride)
}
}
if !*enabledDryRun && !*enabled {
log.Infof("vtgate buffer not enabled.")
}
return &Buffer{
keyspaces: keyspaces,
shards: shards,
now: now,
bufferSizeSema: sync2.NewSemaphore(*size, 0),
buffers: make(map[string]*shardBuffer),
}
}
// mode determines for the given keyspace and shard if buffering, dry-run
// buffering or no buffering at all should be enabled.
func (b *Buffer) mode(keyspace, shard string) bufferMode {
// Actual buffering is enabled if
// a) no keyspaces and shards were listed in particular,
if *enabled && len(b.keyspaces) == 0 && len(b.shards) == 0 {
// No explicit whitelist given i.e. all shards should be buffered.
return bufferEnabled
}
// b) or this keyspace is listed,
if b.keyspaces[keyspace] {
return bufferEnabled
}
// c) or this shard is listed.
keyspaceShard := topoproto.KeyspaceShardString(keyspace, shard)
if b.shards[keyspaceShard] {
return bufferEnabled
}
if *enabledDryRun {
return bufferDryRun
}
return bufferDisabled
}
// RetryDoneFunc will be returned for each buffered request and must be called
// after the buffered request was retried.
// Without this signal, the buffer would not know how many buffered requests are
// currently retried.
type RetryDoneFunc context.CancelFunc
// WaitForFailoverEnd blocks until a pending buffering due to a failover for
// keyspace/shard is over.
// If there is no ongoing failover, "err" is checked. If it's caused by a
// failover, buffering may be started.
// It returns an error if buffering failed (e.g. buffer full).
// If it does not return an error, it may return a RetryDoneFunc which must be
// called after the request was retried.
func (b *Buffer) WaitForFailoverEnd(ctx context.Context, keyspace, shard string, err error) (RetryDoneFunc, error) {
// If an err is given, it must be related to a failover.
// We never buffer requests with other errors.
if err != nil && !CausedByFailover(err) {
return nil, nil
}
sb := b.getOrCreateBuffer(keyspace, shard)
if sb == nil {
// Buffer is shut down. Ignore all calls.
requestsSkipped.Add([]string{keyspace, shard, skippedShutdown}, 1)
return nil, nil
}
if sb.disabled() {
requestsSkipped.Add([]string{keyspace, shard, skippedDisabled}, 1)
return nil, nil
}
return sb.waitForFailoverEnd(ctx, keyspace, shard, err)
}
// ProcessMasterHealth notifies the buffer to record a new master
// and end any failover buffering that may be in progress
func (b *Buffer) ProcessMasterHealth(th *discovery.TabletHealth) {
if th.Target.TabletType != topodatapb.TabletType_MASTER {
panic(fmt.Sprintf("BUG: non MASTER TabletHealth object must not be forwarded: %#v", th))
}
timestamp := th.MasterTermStartTime
if timestamp == 0 {
// Masters where TabletExternallyReparented was never called will return 0.
// Ignore them.
return
}
sb := b.getOrCreateBuffer(th.Target.Keyspace, th.Target.Shard)
if sb == nil {
// Buffer is shut down. Ignore all calls.
return
}
sb.recordExternallyReparentedTimestamp(timestamp, th.Tablet.Alias)
}
// StatsUpdate keeps track of the "tablet_externally_reparented_timestamp" of
// each master. This way we can detect the end of a failover.
// It is part of the discovery.LegacyHealthCheckStatsListener interface.
func (b *Buffer) StatsUpdate(ts *discovery.LegacyTabletStats) {
if ts.Target.TabletType != topodatapb.TabletType_MASTER {
panic(fmt.Sprintf("BUG: non MASTER LegacyTabletStats object must not be forwarded: %#v", ts))
}
timestamp := ts.TabletExternallyReparentedTimestamp
if timestamp == 0 {
// Masters where TabletExternallyReparented was never called will return 0.
// Ignore them.
return
}
sb := b.getOrCreateBuffer(ts.Target.Keyspace, ts.Target.Shard)
if sb == nil {
// Buffer is shut down. Ignore all calls.
return
}
sb.recordExternallyReparentedTimestamp(timestamp, ts.Tablet.Alias)
}
// CausedByFailover returns true if "err" was supposedly caused by a failover.
// To simplify things, we've merged the detection for different MySQL flavors
// in one function. Supported flavors: MariaDB, MySQL, Google internal.
func CausedByFailover(err error) bool {
log.V(2).Infof("Checking error (type: %T) if it is caused by a failover. err: %v", err, err)
// TODO(sougou): Remove the INTERNAL check after rollout.
if code := vterrors.Code(err); code != vtrpcpb.Code_FAILED_PRECONDITION && code != vtrpcpb.Code_INTERNAL {
return false
}
errString := err.Error()
switch {
// All flavors.
case strings.Contains(errString, "operation not allowed in state NOT_SERVING") ||
strings.Contains(errString, "operation not allowed in state SHUTTING_DOWN") ||
// Match 1290 if -queryserver-config-terse-errors explicitly hid the error message
// (which it does to avoid logging the original query including any PII).
strings.Contains(errString, "(errno 1290) (sqlstate HY000) during query:"):
return true
// MariaDB flavor.
case strings.Contains(errString, "The MariaDB server is running with the --read-only option so it cannot execute this statement (errno 1290) (sqlstate HY000)"):
return true
// MySQL flavor.
case strings.Contains(errString, "The MySQL server is running with the --read-only option so it cannot execute this statement (errno 1290) (sqlstate HY000)"):
return true
// Google internal flavor.
case strings.Contains(errString, "failover in progress (errno 1227) (sqlstate 42000)"):
return true
}
return false
}
// getOrCreateBuffer returns the ShardBuffer for the given keyspace and shard.
// It returns nil if Buffer is shut down and all calls should be ignored.
func (b *Buffer) getOrCreateBuffer(keyspace, shard string) *shardBuffer {
key := topoproto.KeyspaceShardString(keyspace, shard)
b.mu.RLock()
sb, ok := b.buffers[key]
stopped := b.stopped
b.mu.RUnlock()
if stopped {
return nil
}
if ok {
return sb
}
b.mu.Lock()
defer b.mu.Unlock()
// Look it up again because it could have been created in the meantime.
sb, ok = b.buffers[key]
if !ok {
sb = newShardBuffer(b.mode(keyspace, shard), keyspace, shard, b.now, b.bufferSizeSema)
b.buffers[key] = sb
}
return sb
}
// Shutdown blocks until all pending ShardBuffer objects are shut down.
// In particular, it guarantees that all launched Go routines are stopped after
// it returns.
func (b *Buffer) Shutdown() {
b.shutdown()
b.waitForShutdown()
}
func (b *Buffer) shutdown() {
b.mu.Lock()
defer b.mu.Unlock()
for _, sb := range b.buffers {
sb.shutdown()
}
b.stopped = true
}
func (b *Buffer) waitForShutdown() {
b.mu.RLock()
defer b.mu.RUnlock()
for _, sb := range b.buffers {
sb.waitForShutdown()
}
}