-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
Copy pathtraffic_switcher.go
1994 lines (1853 loc) · 77.1 KB
/
traffic_switcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Copyright 2021 The Vitess Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package workflow
import (
"context"
"encoding/json"
"errors"
"fmt"
"sort"
"strings"
"sync"
"time"
"golang.org/x/exp/maps"
"golang.org/x/sync/errgroup"
"vitess.io/vitess/go/json2"
"vitess.io/vitess/go/mysql/sqlerror"
"vitess.io/vitess/go/sqlescape"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/binlog/binlogplayer"
"vitess.io/vitess/go/vt/concurrency"
"vitess.io/vitess/go/vt/key"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/logutil"
"vitess.io/vitess/go/vt/mysqlctl/tmutils"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/topotools"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vtgate/vindexes"
"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
"vitess.io/vitess/go/vt/vttablet/tmclient"
binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
querypb "vitess.io/vitess/go/vt/proto/query"
tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vschemapb "vitess.io/vitess/go/vt/proto/vschema"
vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
)
const (
// Frozen is the message value of frozen vreplication streams.
Frozen = "FROZEN"
// Running is the state value of a vreplication stream in the
// replicating state.
Running = "RUNNING"
// How long to wait when refreshing the state of each tablet in a shard. Note that these
// are refreshed in parallel, non-topo errors are ignored (in the error handling) and we
// may only do a partial refresh. Because in some cases it's unsafe to switch the traffic
// if some tablets do not refresh, we may need to look for partial results and produce
// an error (with the provided details of WHY) if we see them.
// Side note: the default lock/lease TTL in etcd is 60s so the default tablet refresh
// timeout of 60s can cause us to lose our keyspace lock before completing the
// operation too.
shardTabletRefreshTimeout = time.Duration(30 * time.Second)
// Use pt-osc's naming convention, this format also ensures vstreamer ignores such tables.
renameTableTemplate = "_%.59s_old" // limit table name to 64 characters
sqlDeleteWorkflow = "delete from _vt.vreplication where db_name = %s and workflow = %s"
sqlGetMaxSequenceVal = "select max(%a) as maxval from %a.%a"
sqlInitSequenceTable = "insert into %a.%a (id, next_id, cache) values (0, %d, 1000) on duplicate key update next_id = if(next_id < %d, %d, next_id)"
sqlCreateSequenceTable = "create table if not exists %a (id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'"
)
// accessType specifies the type of access for a shard (allow/disallow writes).
type accessType int
const (
allowWrites = accessType(iota)
disallowWrites
)
// The following constants define the switching direction.
const (
DirectionForward = TrafficSwitchDirection(iota)
DirectionBackward
)
// The following consts define if DropSource will drop or rename the table.
const (
DropTable = TableRemovalType(iota)
RenameTable
)
// TrafficSwitchDirection specifies the switching direction.
type TrafficSwitchDirection int
func (tsd TrafficSwitchDirection) String() string {
if tsd == DirectionForward {
return "forward"
}
return "backward"
}
// TableRemovalType specifies the way the a table will be removed during a
// DropSource for a MoveTables workflow.
type TableRemovalType int
var (
// ErrNoStreams occurs when no target streams are found for a workflow in a
// target keyspace.
ErrNoStreams = errors.New("no streams found")
tableRemovalTypeStrs = []string{
"DROP TABLE",
"RENAME TABLE",
}
)
// String returns a string representation of a TableRemovalType
func (trt TableRemovalType) String() string {
if trt < DropTable || trt > RenameTable {
return "Unknown"
}
return tableRemovalTypeStrs[trt]
}
// ITrafficSwitcher is a hack to allow us to maintain the legacy wrangler
// package for vtctl/vtctlclient while migrating most of the TrafficSwitcher
// related code to the workflow package for vtctldclient usage.
//
// After moving TrafficSwitcher to this package and removing the implementation
// in wrangler, this type should be removed, and StreamMigrator should be updated
// to contain a field of type *TrafficSwitcher instead of ITrafficSwitcher.
type ITrafficSwitcher interface {
/* Functions that expose types and behavior contained in *wrangler.Wrangler */
TopoServer() *topo.Server
TabletManagerClient() tmclient.TabletManagerClient
Logger() logutil.Logger
// VReplicationExec here is used when we want the (*wrangler.Wrangler)
// implementation, which does a topo lookup on the tablet alias before
// calling the underlying TabletManagerClient RPC.
VReplicationExec(ctx context.Context, alias *topodatapb.TabletAlias, query string) (*querypb.QueryResult, error)
/* Functions that expose fields on the *wrangler.trafficSwitcher */
ExternalTopo() *topo.Server
MigrationType() binlogdatapb.MigrationType
ReverseWorkflowName() string
SourceKeyspaceName() string
SourceKeyspaceSchema() *vindexes.KeyspaceSchema
Sources() map[string]*MigrationSource
Tables() []string
TargetKeyspaceName() string
Targets() map[string]*MigrationTarget
WorkflowName() string
SourceTimeZone() string
/* Functions that *wrangler.trafficSwitcher implements */
ForAllSources(f func(source *MigrationSource) error) error
ForAllTargets(f func(target *MigrationTarget) error) error
ForAllUIDs(f func(target *MigrationTarget, uid int32) error) error
SourceShards() []*topo.ShardInfo
TargetShards() []*topo.ShardInfo
}
// TargetInfo contains the metadata for a set of targets involved in a workflow.
type TargetInfo struct {
Targets map[string]*MigrationTarget
Frozen bool
OptCells string
OptTabletTypes string
WorkflowType binlogdatapb.VReplicationWorkflowType
WorkflowSubType binlogdatapb.VReplicationWorkflowSubType
Options *vtctldatapb.WorkflowOptions
}
// MigrationSource contains the metadata for each migration source.
type MigrationSource struct {
si *topo.ShardInfo
primary *topo.TabletInfo
Position string
Journaled bool
}
// NewMigrationSource returns a MigrationSource for the given shard and primary.
//
// (TODO|@ajm188): do we always want to start with (position:"", journaled:false)?
func NewMigrationSource(si *topo.ShardInfo, primary *topo.TabletInfo) *MigrationSource {
return &MigrationSource{
si: si,
primary: primary,
}
}
// GetShard returns the *topo.ShardInfo for the migration source.
func (source *MigrationSource) GetShard() *topo.ShardInfo {
return source.si
}
// GetPrimary returns the *topo.TabletInfo for the primary tablet of the
// migration source.
func (source *MigrationSource) GetPrimary() *topo.TabletInfo {
return source.primary
}
// trafficSwitcher contains the metadata for switching read and write traffic
// for vreplication streams.
type trafficSwitcher struct {
ws *Server
logger logutil.Logger
migrationType binlogdatapb.MigrationType
isPartialMigration bool // Is this on a subset of shards
workflow string
// Should we continue if we encounter some potentially non-fatal errors such
// as partial tablet refreshes?
force bool
// If frozen is true, the rest of the fields are not set.
frozen bool
reverseWorkflow string
id int64
sources map[string]*MigrationSource
targets map[string]*MigrationTarget
sourceKeyspace string
targetKeyspace string
tables []string
keepRoutingRules bool
sourceKSSchema *vindexes.KeyspaceSchema
optCells string // cells option passed to MoveTables/Reshard Create
optTabletTypes string // tabletTypes option passed to MoveTables/Reshard Create
externalCluster string
externalTopo *topo.Server
sourceTimeZone string
targetTimeZone string
workflowType binlogdatapb.VReplicationWorkflowType
workflowSubType binlogdatapb.VReplicationWorkflowSubType
options *vtctldatapb.WorkflowOptions
}
func (ts *trafficSwitcher) TopoServer() *topo.Server { return ts.ws.ts }
func (ts *trafficSwitcher) TabletManagerClient() tmclient.TabletManagerClient { return ts.ws.tmc }
func (ts *trafficSwitcher) Logger() logutil.Logger {
if ts.logger == nil {
ts.logger = logutil.NewConsoleLogger() // Use the default system logger
}
return ts.logger
}
func (ts *trafficSwitcher) VReplicationExec(ctx context.Context, alias *topodatapb.TabletAlias, query string) (*querypb.QueryResult, error) {
return ts.ws.VReplicationExec(ctx, alias, query)
}
func (ts *trafficSwitcher) ExternalTopo() *topo.Server { return ts.externalTopo }
func (ts *trafficSwitcher) MigrationType() binlogdatapb.MigrationType { return ts.migrationType }
func (ts *trafficSwitcher) IsPartialMigration() bool { return ts.isPartialMigration }
func (ts *trafficSwitcher) ReverseWorkflowName() string { return ts.reverseWorkflow }
func (ts *trafficSwitcher) SourceKeyspaceName() string { return ts.sourceKSSchema.Keyspace.Name }
func (ts *trafficSwitcher) SourceKeyspaceSchema() *vindexes.KeyspaceSchema { return ts.sourceKSSchema }
func (ts *trafficSwitcher) Sources() map[string]*MigrationSource { return ts.sources }
func (ts *trafficSwitcher) Tables() []string { return ts.tables }
func (ts *trafficSwitcher) TargetKeyspaceName() string { return ts.targetKeyspace }
func (ts *trafficSwitcher) Targets() map[string]*MigrationTarget { return ts.targets }
func (ts *trafficSwitcher) WorkflowName() string { return ts.workflow }
func (ts *trafficSwitcher) SourceTimeZone() string { return ts.sourceTimeZone }
func (ts *trafficSwitcher) TargetTimeZone() string { return ts.targetTimeZone }
func (ts *trafficSwitcher) ForAllSources(f func(source *MigrationSource) error) error {
var wg sync.WaitGroup
allErrors := &concurrency.AllErrorRecorder{}
for _, source := range ts.sources {
wg.Add(1)
go func(source *MigrationSource) {
defer wg.Done()
if err := f(source); err != nil {
allErrors.RecordError(err)
}
}(source)
}
wg.Wait()
return allErrors.AggrError(vterrors.Aggregate)
}
func (ts *trafficSwitcher) ForAllTargets(f func(target *MigrationTarget) error) error {
var wg sync.WaitGroup
allErrors := &concurrency.AllErrorRecorder{}
for _, target := range ts.targets {
wg.Add(1)
go func(target *MigrationTarget) {
defer wg.Done()
if err := f(target); err != nil {
allErrors.RecordError(err)
}
}(target)
}
wg.Wait()
return allErrors.AggrError(vterrors.Aggregate)
}
// MigrationTarget contains the metadata for each migration target.
type MigrationTarget struct {
si *topo.ShardInfo
primary *topo.TabletInfo
Sources map[int32]*binlogdatapb.BinlogSource
Position string
}
// GetShard returns the *topo.ShardInfo for the migration target.
func (target *MigrationTarget) GetShard() *topo.ShardInfo {
return target.si
}
// GetPrimary returns the *topo.TabletInfo for the primary tablet of the
// migration target.
func (target *MigrationTarget) GetPrimary() *topo.TabletInfo {
return target.primary
}
func (ts *trafficSwitcher) SourceShards() []*topo.ShardInfo {
shards := make([]*topo.ShardInfo, 0, len(ts.Sources()))
for _, source := range ts.Sources() {
shards = append(shards, source.GetShard())
}
return shards
}
func (ts *trafficSwitcher) TargetShards() []*topo.ShardInfo {
shards := make([]*topo.ShardInfo, 0, len(ts.Targets()))
for _, target := range ts.Targets() {
shards = append(shards, target.GetShard())
}
return shards
}
func (ts *trafficSwitcher) getSourceAndTargetShardsNames() ([]string, []string) {
var sourceShards, targetShards []string
for _, si := range ts.SourceShards() {
sourceShards = append(sourceShards, si.ShardName())
}
for _, si := range ts.TargetShards() {
targetShards = append(targetShards, si.ShardName())
}
return sourceShards, targetShards
}
// isPartialMoveTables returns true if the workflow is MoveTables, has the same
// number of shards, is not covering the entire shard range, and has one-to-one
// shards in source and target.
func (ts *trafficSwitcher) isPartialMoveTables(sourceShards, targetShards []string) (bool, error) {
if ts.MigrationType() != binlogdatapb.MigrationType_TABLES {
return false, nil
}
skr, tkr, err := getSourceAndTargetKeyRanges(sourceShards, targetShards)
if err != nil {
return false, err
}
if key.KeyRangeIsComplete(skr) || key.KeyRangeIsComplete(tkr) || len(sourceShards) != len(targetShards) {
return false, nil
}
return key.KeyRangeEqual(skr, tkr), nil
}
// addParticipatingTablesToKeyspace updates the vschema with the new tables that
// were created as part of the Migrate flow. It is called when the Migrate flow
// is Completed.
func (ts *trafficSwitcher) addParticipatingTablesToKeyspace(ctx context.Context, keyspace, tableSpecs string) error {
vschema, err := ts.TopoServer().GetVSchema(ctx, keyspace)
if err != nil {
return err
}
if vschema == nil {
return fmt.Errorf("no vschema found for keyspace %s", keyspace)
}
if vschema.Tables == nil {
vschema.Tables = make(map[string]*vschemapb.Table)
}
if strings.HasPrefix(tableSpecs, "{") { // user defined the vschema snippet, typically for a sharded target
wrap := fmt.Sprintf(`{"tables": %s}`, tableSpecs)
ks := &vschemapb.Keyspace{}
if err := json2.UnmarshalPB([]byte(wrap), ks); err != nil {
return err
}
for table, vtab := range ks.Tables {
vschema.Tables[table] = vtab
}
} else {
if vschema.Sharded {
return fmt.Errorf("no sharded vschema was provided, so you will need to update the vschema of the target manually for the moved tables")
}
for _, table := range ts.tables {
vschema.Tables[table] = &vschemapb.Table{}
}
}
return ts.TopoServer().SaveVSchema(ctx, keyspace, vschema)
}
func (ts *trafficSwitcher) deleteRoutingRules(ctx context.Context) error {
rules, err := topotools.GetRoutingRules(ctx, ts.TopoServer())
if err != nil {
return err
}
for _, table := range ts.Tables() {
delete(rules, table)
delete(rules, table+"@replica")
delete(rules, table+"@rdonly")
delete(rules, ts.TargetKeyspaceName()+"."+table)
delete(rules, ts.TargetKeyspaceName()+"."+table+"@replica")
delete(rules, ts.TargetKeyspaceName()+"."+table+"@rdonly")
delete(rules, ts.SourceKeyspaceName()+"."+table)
delete(rules, ts.SourceKeyspaceName()+"."+table+"@replica")
delete(rules, ts.SourceKeyspaceName()+"."+table+"@rdonly")
}
if err := topotools.SaveRoutingRules(ctx, ts.TopoServer(), rules); err != nil {
return err
}
return nil
}
func (ts *trafficSwitcher) deleteShardRoutingRules(ctx context.Context) error {
if !ts.isPartialMigration {
return nil
}
srr, err := topotools.GetShardRoutingRules(ctx, ts.TopoServer())
if err != nil {
if topo.IsErrType(err, topo.NoNode) {
ts.Logger().Warningf("No shard routing rules found when attempting to delete the ones for the %s keyspace", ts.targetKeyspace)
return nil
}
return err
}
for _, si := range ts.TargetShards() {
delete(srr, fmt.Sprintf("%s.%s", ts.targetKeyspace, si.ShardName()))
}
if err := topotools.SaveShardRoutingRules(ctx, ts.TopoServer(), srr); err != nil {
return err
}
return nil
}
func (ts *trafficSwitcher) deleteKeyspaceRoutingRules(ctx context.Context) error {
if !ts.IsMultiTenantMigration() {
return nil
}
ts.Logger().Infof("deleteKeyspaceRoutingRules: workflow %s.%s", ts.targetKeyspace, ts.workflow)
reason := fmt.Sprintf("Deleting rules for %s", ts.SourceKeyspaceName())
return topotools.UpdateKeyspaceRoutingRules(ctx, ts.TopoServer(), reason,
func(ctx context.Context, rules *map[string]string) error {
for _, suffix := range tabletTypeSuffixes {
delete(*rules, ts.SourceKeyspaceName()+suffix)
}
return nil
})
}
func (ts *trafficSwitcher) dropSourceDeniedTables(ctx context.Context) error {
return ts.ForAllSources(func(source *MigrationSource) error {
if _, err := ts.TopoServer().UpdateShardFields(ctx, ts.SourceKeyspaceName(), source.GetShard().ShardName(), func(si *topo.ShardInfo) error {
return si.UpdateDeniedTables(ctx, topodatapb.TabletType_PRIMARY, nil, true, ts.Tables())
}); err != nil {
return err
}
rtbsCtx, cancel := context.WithTimeout(ctx, shardTabletRefreshTimeout)
defer cancel()
isPartial, partialDetails, err := topotools.RefreshTabletsByShard(rtbsCtx, ts.TopoServer(), ts.TabletManagerClient(), source.GetShard(), nil, ts.Logger())
if isPartial {
msg := fmt.Sprintf("failed to successfully refresh all tablets in the %s/%s source shard (%v):\n %v",
source.GetShard().Keyspace(), source.GetShard().ShardName(), err, partialDetails)
if ts.force {
log.Warning(msg)
return nil
} else {
return errors.New(msg)
}
}
return err
})
}
func (ts *trafficSwitcher) dropTargetDeniedTables(ctx context.Context) error {
return ts.ForAllTargets(func(target *MigrationTarget) error {
if _, err := ts.TopoServer().UpdateShardFields(ctx, ts.TargetKeyspaceName(), target.GetShard().ShardName(), func(si *topo.ShardInfo) error {
return si.UpdateDeniedTables(ctx, topodatapb.TabletType_PRIMARY, nil, true, ts.Tables())
}); err != nil {
return err
}
rtbsCtx, cancel := context.WithTimeout(ctx, shardTabletRefreshTimeout)
defer cancel()
isPartial, partialDetails, err := topotools.RefreshTabletsByShard(rtbsCtx, ts.TopoServer(), ts.TabletManagerClient(), target.GetShard(), nil, ts.Logger())
if isPartial {
msg := fmt.Sprintf("failed to successfully refresh all tablets in the %s/%s target shard (%v):\n %v",
target.GetShard().Keyspace(), target.GetShard().ShardName(), err, partialDetails)
if ts.force {
log.Warning(msg)
return nil
} else {
return errors.New(msg)
}
}
return err
})
}
func (ts *trafficSwitcher) validateWorkflowHasCompleted(ctx context.Context) error {
return doValidateWorkflowHasCompleted(ctx, ts)
}
func (ts *trafficSwitcher) dropParticipatingTablesFromKeyspace(ctx context.Context, keyspace string) error {
vschema, err := ts.TopoServer().GetVSchema(ctx, keyspace)
if err != nil {
return err
}
// VReplication does NOT create the vschema entries in SHARDED
// TARGET keyspaces -- as we cannot know the proper vindex
// definitions to use -- and we should not delete them either
// (on workflow Cancel) as the user must create them separately
// and they contain information about the vindex definitions, etc.
if vschema.Sharded && keyspace == ts.TargetKeyspaceName() {
return nil
}
for _, tableName := range ts.Tables() {
delete(vschema.Tables, tableName)
}
return ts.TopoServer().SaveVSchema(ctx, keyspace, vschema)
}
func (ts *trafficSwitcher) removeSourceTables(ctx context.Context, removalType TableRemovalType) error {
err := ts.ForAllSources(func(source *MigrationSource) error {
for _, tableName := range ts.Tables() {
primaryDbName, err := sqlescape.EnsureEscaped(source.GetPrimary().DbName())
if err != nil {
return err
}
tableNameEscaped, err := sqlescape.EnsureEscaped(tableName)
if err != nil {
return err
}
query := fmt.Sprintf("drop table %s.%s", primaryDbName, tableNameEscaped)
if removalType == DropTable {
ts.Logger().Infof("%s: Dropping table %s.%s\n",
topoproto.TabletAliasString(source.GetPrimary().GetAlias()), source.GetPrimary().DbName(), tableName)
} else {
renameName, err := sqlescape.EnsureEscaped(getRenameFileName(tableName))
if err != nil {
return err
}
ts.Logger().Infof("%s: Renaming table %s.%s to %s.%s\n",
topoproto.TabletAliasString(source.GetPrimary().GetAlias()), source.GetPrimary().DbName(), tableName, source.GetPrimary().DbName(), renameName)
query = fmt.Sprintf("rename table %s.%s TO %s.%s", primaryDbName, tableNameEscaped, primaryDbName, renameName)
}
_, err = ts.ws.tmc.ExecuteFetchAsDba(ctx, source.GetPrimary().Tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{
Query: []byte(query),
MaxRows: 1,
ReloadSchema: true,
DisableForeignKeyChecks: true,
})
if err != nil {
if IsTableDidNotExistError(err) {
ts.Logger().Warningf("%s: Table %s did not exist when attempting to remove it", topoproto.TabletAliasString(source.GetPrimary().GetAlias()), tableName)
} else {
ts.Logger().Errorf("%s: Error removing table %s: %v", topoproto.TabletAliasString(source.GetPrimary().GetAlias()), tableName, err)
return err
}
}
ts.Logger().Infof("%s: Removed table %s.%s\n", topoproto.TabletAliasString(source.GetPrimary().GetAlias()), source.GetPrimary().DbName(), tableName)
}
return nil
})
if err != nil {
return err
}
return ts.dropParticipatingTablesFromKeyspace(ctx, ts.SourceKeyspaceName())
}
// FIXME: even after dropSourceShards there are still entries in the topo, need to research and fix
func (ts *trafficSwitcher) dropSourceShards(ctx context.Context) error {
return ts.ForAllSources(func(source *MigrationSource) error {
ts.Logger().Infof("Deleting shard %s.%s\n", source.GetShard().Keyspace(), source.GetShard().ShardName())
err := ts.ws.DeleteShard(ctx, source.GetShard().Keyspace(), source.GetShard().ShardName(), true, false)
if err != nil {
ts.Logger().Errorf("Error deleting shard %s: %v", source.GetShard().ShardName(), err)
return err
}
ts.Logger().Infof("Deleted shard %s.%s\n", source.GetShard().Keyspace(), source.GetShard().ShardName())
return nil
})
}
func (ts *trafficSwitcher) switchShardReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error {
ts.Logger().Infof("switchShardReads: workflow: %s, direction: %s, cells: %v, tablet types: %v",
ts.workflow, direction.String(), cells, servedTypes)
var fromShards, toShards []*topo.ShardInfo
if direction == DirectionForward {
fromShards, toShards = ts.SourceShards(), ts.TargetShards()
} else {
fromShards, toShards = ts.TargetShards(), ts.SourceShards()
}
cellsStr := strings.Join(cells, ",")
if err := ts.TopoServer().ValidateSrvKeyspace(ctx, ts.TargetKeyspaceName(), cellsStr); err != nil {
err2 := vterrors.Wrapf(err, "Before switching shard reads, found SrvKeyspace for %s is corrupt in cell %s",
ts.TargetKeyspaceName(), cellsStr)
ts.Logger().Errorf("%w", err2)
return err2
}
for _, servedType := range servedTypes {
if err := ts.ws.updateShardRecords(ctx, ts.SourceKeyspaceName(), fromShards, cells, servedType, true /* isFrom */, false /* clearSourceShards */, ts.Logger()); err != nil {
return err
}
if err := ts.ws.updateShardRecords(ctx, ts.SourceKeyspaceName(), toShards, cells, servedType, false, false, ts.Logger()); err != nil {
return err
}
err := ts.TopoServer().MigrateServedType(ctx, ts.SourceKeyspaceName(), toShards, fromShards, servedType, cells)
if err != nil {
return err
}
}
if err := ts.TopoServer().ValidateSrvKeyspace(ctx, ts.TargetKeyspaceName(), cellsStr); err != nil {
err2 := vterrors.Wrapf(err, "after switching shard reads, found SrvKeyspace for %s is corrupt in cell %s",
ts.TargetKeyspaceName(), cellsStr)
ts.Logger().Errorf("%w", err2)
return err2
}
return nil
}
func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, rebuildSrvVSchema bool, direction TrafficSwitchDirection) error {
ts.Logger().Infof("switchTableReads: workflow: %s, direction: %s, cells: %v, tablet types: %v",
ts.workflow, direction.String(), cells, servedTypes)
rules, err := topotools.GetRoutingRules(ctx, ts.TopoServer())
if err != nil {
return err
}
// We assume that the following rules were setup when the targets were created:
// table -> sourceKeyspace.table
// targetKeyspace.table -> sourceKeyspace.table
// For forward migration, we add tablet type specific rules to redirect traffic to the target.
// For backward, we redirect to source.
for _, servedType := range servedTypes {
if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY {
return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid tablet type specified when switching reads: %v", servedType)
}
tt := strings.ToLower(servedType.String())
for _, table := range ts.Tables() {
if direction == DirectionForward {
toTarget := []string{ts.TargetKeyspaceName() + "." + table}
rules[table+"@"+tt] = toTarget
rules[ts.TargetKeyspaceName()+"."+table+"@"+tt] = toTarget
rules[ts.SourceKeyspaceName()+"."+table+"@"+tt] = toTarget
} else {
toSource := []string{ts.SourceKeyspaceName() + "." + table}
rules[table+"@"+tt] = toSource
rules[ts.TargetKeyspaceName()+"."+table+"@"+tt] = toSource
rules[ts.SourceKeyspaceName()+"."+table+"@"+tt] = toSource
}
}
}
if err := topotools.SaveRoutingRules(ctx, ts.TopoServer(), rules); err != nil {
return err
}
if rebuildSrvVSchema {
return ts.TopoServer().RebuildSrvVSchema(ctx, cells)
}
return nil
}
func (ts *trafficSwitcher) startReverseVReplication(ctx context.Context) error {
return ts.ForAllSources(func(source *MigrationSource) error {
query := fmt.Sprintf("update _vt.vreplication set state='Running', message='' where db_name=%s and workflow=%s",
encodeString(source.GetPrimary().DbName()), encodeString(ts.ReverseWorkflowName()))
_, err := ts.VReplicationExec(ctx, source.GetPrimary().GetAlias(), query)
return err
})
}
func (ts *trafficSwitcher) createJournals(ctx context.Context, sourceWorkflows []string) error {
ts.Logger().Infof("In createJournals for source workflows %+v", sourceWorkflows)
return ts.ForAllSources(func(source *MigrationSource) error {
if source.Journaled {
return nil
}
participants := make([]*binlogdatapb.KeyspaceShard, 0)
participantMap := make(map[string]bool)
journal := &binlogdatapb.Journal{
Id: ts.id,
MigrationType: ts.MigrationType(),
Tables: ts.Tables(),
LocalPosition: source.Position,
Participants: participants,
SourceWorkflows: sourceWorkflows,
}
for targetShard, target := range ts.Targets() {
for _, tsource := range target.Sources {
participantMap[tsource.Shard] = true
}
journal.ShardGtids = append(journal.ShardGtids, &binlogdatapb.ShardGtid{
Keyspace: ts.TargetKeyspaceName(),
Shard: targetShard,
Gtid: target.Position,
})
}
shards := make([]string, 0)
for shard := range participantMap {
shards = append(shards, shard)
}
sort.Sort(vreplication.ShardSorter(shards))
for _, shard := range shards {
journal.Participants = append(journal.Participants, &binlogdatapb.KeyspaceShard{
Keyspace: source.GetShard().Keyspace(),
Shard: shard,
})
}
ts.Logger().Infof("Creating journal: %v", journal)
statement := fmt.Sprintf("insert into _vt.resharding_journal "+
"(id, db_name, val) "+
"values (%v, %v, %v)",
ts.id, encodeString(source.GetPrimary().DbName()), encodeString(journal.String()))
if _, err := ts.TabletManagerClient().VReplicationExec(ctx, source.GetPrimary().Tablet, statement); err != nil {
return err
}
return nil
})
}
func (ts *trafficSwitcher) changeShardsAccess(ctx context.Context, keyspace string, shards []*topo.ShardInfo, access accessType) error {
if err := ts.TopoServer().UpdateDisableQueryService(ctx, keyspace, shards, topodatapb.TabletType_PRIMARY, nil, access == disallowWrites /* disable */); err != nil {
return err
}
return ts.ws.refreshPrimaryTablets(ctx, shards, ts.force)
}
func (ts *trafficSwitcher) allowTargetWrites(ctx context.Context) error {
if ts.MigrationType() == binlogdatapb.MigrationType_TABLES {
return ts.switchDeniedTables(ctx)
}
return ts.changeShardsAccess(ctx, ts.TargetKeyspaceName(), ts.TargetShards(), allowWrites)
}
func (ts *trafficSwitcher) changeRouting(ctx context.Context) error {
if ts.MigrationType() == binlogdatapb.MigrationType_TABLES {
return ts.changeWriteRoute(ctx)
}
return ts.changeShardRouting(ctx)
}
func (ts *trafficSwitcher) changeWriteRoute(ctx context.Context) error {
if ts.IsMultiTenantMigration() {
// For multi-tenant migrations, we can only move forward and not backwards.
ts.Logger().Infof("Pointing keyspace routing rules for primary to %s for workflow %s", ts.TargetKeyspaceName(), ts.workflow)
if err := changeKeyspaceRouting(ctx, ts.TopoServer(), []topodatapb.TabletType{topodatapb.TabletType_PRIMARY},
ts.SourceKeyspaceName() /* from */, ts.TargetKeyspaceName() /* to */, "SwitchWrites"); err != nil {
return err
}
} else if ts.isPartialMigration {
srr, err := topotools.GetShardRoutingRules(ctx, ts.TopoServer())
if err != nil {
return err
}
for _, si := range ts.SourceShards() {
delete(srr, fmt.Sprintf("%s.%s", ts.TargetKeyspaceName(), si.ShardName()))
ts.Logger().Infof("Deleted shard routing: %v:%v", ts.TargetKeyspaceName(), si.ShardName())
srr[fmt.Sprintf("%s.%s", ts.SourceKeyspaceName(), si.ShardName())] = ts.TargetKeyspaceName()
ts.Logger().Infof("Added shard routing: %v:%v", ts.SourceKeyspaceName(), si.ShardName())
}
if err := topotools.SaveShardRoutingRules(ctx, ts.TopoServer(), srr); err != nil {
return err
}
} else {
rules, err := topotools.GetRoutingRules(ctx, ts.TopoServer())
if err != nil {
return err
}
for _, table := range ts.Tables() {
targetKsTable := fmt.Sprintf("%s.%s", ts.TargetKeyspaceName(), table)
sourceKsTable := fmt.Sprintf("%s.%s", ts.SourceKeyspaceName(), table)
delete(rules, targetKsTable)
ts.Logger().Infof("Deleted routing: %s", targetKsTable)
rules[table] = []string{targetKsTable}
rules[sourceKsTable] = []string{targetKsTable}
ts.Logger().Infof("Added routing: %v %v", table, sourceKsTable)
}
if err := topotools.SaveRoutingRules(ctx, ts.TopoServer(), rules); err != nil {
return err
}
}
return ts.TopoServer().RebuildSrvVSchema(ctx, nil)
}
func (ts *trafficSwitcher) changeShardRouting(ctx context.Context) error {
if err := ts.TopoServer().ValidateSrvKeyspace(ctx, ts.TargetKeyspaceName(), ""); err != nil {
err2 := vterrors.Wrapf(err, "Before changing shard routes, found SrvKeyspace for %s is corrupt", ts.TargetKeyspaceName())
ts.Logger().Errorf("%w", err2)
return err2
}
err := ts.ForAllSources(func(source *MigrationSource) error {
_, err := ts.TopoServer().UpdateShardFields(ctx, ts.SourceKeyspaceName(), source.GetShard().ShardName(), func(si *topo.ShardInfo) error {
si.IsPrimaryServing = false
return nil
})
return err
})
if err != nil {
return err
}
err = ts.ForAllTargets(func(target *MigrationTarget) error {
_, err := ts.TopoServer().UpdateShardFields(ctx, ts.TargetKeyspaceName(), target.GetShard().ShardName(), func(si *topo.ShardInfo) error {
si.IsPrimaryServing = true
return nil
})
return err
})
if err != nil {
return err
}
err = ts.TopoServer().MigrateServedType(ctx, ts.TargetKeyspaceName(), ts.TargetShards(), ts.SourceShards(), topodatapb.TabletType_PRIMARY, nil)
if err != nil {
return err
}
if err := ts.TopoServer().ValidateSrvKeyspace(ctx, ts.TargetKeyspaceName(), ""); err != nil {
err2 := vterrors.Wrapf(err, "after changing shard routes, found SrvKeyspace for %s is corrupt", ts.TargetKeyspaceName())
ts.Logger().Errorf("%w", err2)
return err2
}
return nil
}
func (ts *trafficSwitcher) getReverseVReplicationUpdateQuery(targetCell string, sourceCell string, dbname string, options string) string {
// we try to be clever to understand what user intends:
// if target's cell is present in cells but not source's cell we replace it
// with the source's cell.
if ts.optCells != "" && targetCell != sourceCell && strings.Contains(ts.optCells+",", targetCell+",") &&
!strings.Contains(ts.optCells+",", sourceCell+",") {
ts.optCells = strings.Replace(ts.optCells, targetCell, sourceCell, 1)
}
if ts.optCells != "" || ts.optTabletTypes != "" {
query := fmt.Sprintf("update _vt.vreplication set cell = '%s', tablet_types = '%s', options = '%s' where workflow = '%s' and db_name = '%s'",
ts.optCells, ts.optTabletTypes, options, ts.ReverseWorkflowName(), dbname)
return query
}
return ""
}
func (ts *trafficSwitcher) deleteReverseVReplication(ctx context.Context) error {
return ts.ForAllSources(func(source *MigrationSource) error {
query := fmt.Sprintf(sqlDeleteWorkflow, encodeString(source.GetPrimary().DbName()), encodeString(ts.reverseWorkflow))
if _, err := ts.TabletManagerClient().VReplicationExec(ctx, source.GetPrimary().Tablet, query); err != nil {
// vreplication.exec returns no error on delete if the rows do not exist.
return err
}
ts.ws.deleteWorkflowVDiffData(ctx, source.GetPrimary().Tablet, ts.reverseWorkflow)
ts.ws.optimizeCopyStateTable(source.GetPrimary().Tablet)
return nil
})
}
func (ts *trafficSwitcher) ForAllUIDs(f func(target *MigrationTarget, uid int32) error) error {
var wg sync.WaitGroup
allErrors := &concurrency.AllErrorRecorder{}
for _, target := range ts.Targets() {
for uid := range target.Sources {
wg.Add(1)
go func(target *MigrationTarget, uid int32) {
defer wg.Done()
if err := f(target, uid); err != nil {
allErrors.RecordError(err)
}
}(target, uid)
}
}
wg.Wait()
return allErrors.AggrError(vterrors.Aggregate)
}
func (ts *trafficSwitcher) createReverseVReplication(ctx context.Context) error {
if err := ts.deleteReverseVReplication(ctx); err != nil {
return err
}
err := ts.ForAllUIDs(func(target *MigrationTarget, uid int32) error {
bls := target.Sources[uid]
source := ts.Sources()[bls.Shard]
reverseBls := &binlogdatapb.BinlogSource{
Keyspace: ts.TargetKeyspaceName(),
Shard: target.GetShard().ShardName(),
TabletType: bls.TabletType,
Filter: &binlogdatapb.Filter{},
OnDdl: bls.OnDdl,
SourceTimeZone: bls.TargetTimeZone,
TargetTimeZone: bls.SourceTimeZone,
}
var err error
for _, rule := range bls.Filter.Rules {
if rule.Filter == "exclude" {
reverseBls.Filter.Rules = append(reverseBls.Filter.Rules, rule)
continue
}
var filter string
if strings.HasPrefix(rule.Match, "/") {
if ts.SourceKeyspaceSchema().Keyspace.Sharded {
filter = key.KeyRangeString(source.GetShard().KeyRange)
}
} else {
var inKeyrange string
if ts.SourceKeyspaceSchema().Keyspace.Sharded {
vtable, ok := ts.SourceKeyspaceSchema().Tables[rule.Match]
if !ok {
return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "table %s not found in vschema", rule.Match)
}
// We currently assume the primary vindex is the best way to filter rows
// for the table, which may not always be true.
// TODO: handle more of these edge cases explicitly, e.g. sequence tables.
switch vtable.Type {
case vindexes.TypeReference:
// For reference tables there are no vindexes and thus no filter to apply.
default:
// For non-reference tables we return an error if there's no primary
// vindex as it's not clear what to do.
if len(vtable.ColumnVindexes) > 0 && len(vtable.ColumnVindexes[0].Columns) > 0 {
inKeyrange = fmt.Sprintf(" where in_keyrange(%s, '%s.%s', '%s')", sqlparser.String(vtable.ColumnVindexes[0].Columns[0]),
ts.SourceKeyspaceName(), vtable.ColumnVindexes[0].Name, key.KeyRangeString(source.GetShard().KeyRange))
} else {
return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary vindex found for the %s table in the %s keyspace",
vtable.Name.String(), ts.SourceKeyspaceName())
}
}
}
filter = fmt.Sprintf("select * from %s%s", sqlescape.EscapeID(rule.Match), inKeyrange)
if ts.IsMultiTenantMigration() {
filter, err = ts.addTenantFilter(ctx, filter)
if err != nil {
return err
}
}
}
reverseBls.Filter.Rules = append(reverseBls.Filter.Rules, &binlogdatapb.Rule{
Match: rule.Match,
Filter: filter,
})
}
ts.Logger().Infof("Creating reverse workflow vreplication stream on tablet %s: workflow %s, startPos %s",
source.GetPrimary().GetAlias(), ts.ReverseWorkflowName(), target.Position)
_, err = ts.VReplicationExec(ctx, source.GetPrimary().GetAlias(),
binlogplayer.CreateVReplicationState(ts.ReverseWorkflowName(), reverseBls, target.Position,
binlogdatapb.VReplicationWorkflowState_Stopped, source.GetPrimary().DbName(), ts.workflowType, ts.workflowSubType))
if err != nil {
return err
}
// if user has defined the cell/tablet_types parameters in the forward workflow, update the reverse workflow as well
optionsJSON, err := json.Marshal(ts.options)
if err != nil {
return err
}
updateQuery := ts.getReverseVReplicationUpdateQuery(target.GetPrimary().GetAlias().GetCell(),
source.GetPrimary().GetAlias().GetCell(), source.GetPrimary().DbName(), string(optionsJSON))
if updateQuery != "" {
ts.Logger().Infof("Updating vreplication stream entry on %s with: %s", source.GetPrimary().GetAlias(), updateQuery)
_, err = ts.VReplicationExec(ctx, source.GetPrimary().GetAlias(), updateQuery)
return err
}
return nil
})
return err
}
func (ts *trafficSwitcher) addTenantFilter(ctx context.Context, filter string) (string, error) {
parser := ts.ws.env.Parser()
tenantClause, err := ts.buildTenantPredicate(ctx)
if err != nil {
return "", err
}
stmt, err := parser.Parse(filter)
if err != nil {
return "", err
}