Skip to content

Commit

Permalink
fix(table-diff): Support PK changes (cloudquery#5638)
Browse files Browse the repository at this point in the history
<!-- 🎉 Thank you for making CloudQuery awesome by submitting a PR 🎉 -->

#### Summary

This PR fixes the table diff logic to support PK changes. For example
cloudquery#5636 (comment)
is wrong.



<!--
Use the following steps to ensure your PR is ready to be reviewed

- [ ] Read the [contribution guidelines](../blob/main/CONTRIBUTING.md)
🧑‍🎓
- [ ] Test locally on your own infrastructure
- [ ] Run `go fmt` to format your code 🖊
- [ ] Lint your changes via `golangci-lint run` 🚨 (install golangci-lint
[here](https://golangci-lint.run/usage/install/#local-installation))
- [ ] Update or add tests 🧪
- [ ] Ensure the status checks below are successful ✅
--->
  • Loading branch information
erezrokah authored Dec 19, 2022
1 parent 8e2663c commit 91ec60b
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 18 deletions.
58 changes: 41 additions & 17 deletions scripts/table_diff/changes/changes.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ type change struct {
Breaking bool `json:"breaking"`
}

type column struct {
dataType string
pk bool
}

func backtickStrings(strings ...string) []interface{} {
backticked := make([]interface{}, len(strings))
for i, s := range strings {
Expand All @@ -27,47 +32,66 @@ func backtickStrings(strings ...string) []interface{} {
return backticked
}

func parseColumnChange(line string) (name string, dataType string) {
func parseColumnChange(line string) (name string, dataType string, pk bool) {
match := columnRegex.FindStringSubmatch(line)
if match == nil {
return "", ""
return "", "", false
}
result := make(map[string]string)
for i, name := range columnRegex.SubexpNames() {
if i != 0 && name != "" {
result[name] = match[i]
}
}
return result["name"], result["dataType"]
cleanName := strings.TrimSuffix(result["name"], " (PK)")
return cleanName, result["dataType"], result["name"] != cleanName
}

func getColumnChanges(file *gitdiff.File, table string) (changes []change) {
addedColumns := make(map[string]string)
deletedColumns := make(map[string]string)
addedColumns := make(map[string]column)
deletedColumns := make(map[string]column)
for _, fragment := range file.TextFragments {
for _, line := range fragment.Lines {
name, dataType := parseColumnChange(line.Line)
name, dataType, pk := parseColumnChange(line.Line)
if name == "" || dataType == "" {
continue
}
column := column{dataType: dataType, pk: pk}
switch line.Op {
case gitdiff.OpAdd:
addedColumns[name] = dataType
addedColumns[name] = column
case gitdiff.OpDelete:
deletedColumns[name] = dataType
deletedColumns[name] = column
}
}
}
for deletedName, deletedDataType := range deletedColumns {
if addedDataType, ok := addedColumns[deletedName]; ok {
if addedDataType != deletedDataType {
for deletedName, deletedColumn := range deletedColumns {
if addedColumn, ok := addedColumns[deletedName]; ok {
if deletedColumn.dataType == addedColumn.dataType && deletedColumn.pk == addedColumn.pk {
changes = append(changes, change{
Text: fmt.Sprintf("Table %s: column type changed from %s to %s for %s", backtickStrings(table, deletedDataType, addedDataType, deletedName)...),
Breaking: true,
Text: fmt.Sprintf("Table %s: column order changed for %s", backtickStrings(table, deletedName)...),
Breaking: false,
})
} else {
continue
}

if addedColumn.dataType != deletedColumn.dataType {
changes = append(changes, change{
Text: fmt.Sprintf("Table %s: column order changed for %s", backtickStrings(table, deletedName)...),
Text: fmt.Sprintf("Table %s: column type changed from %s to %s for %s", backtickStrings(table, deletedColumn.dataType, addedColumn.dataType, deletedName)...),
Breaking: false,
})
}

if addedColumn.pk && !deletedColumn.pk {
changes = append(changes, change{
Text: fmt.Sprintf("Table %s: column primary key %s added", backtickStrings(table, deletedName)...),
Breaking: false,
})
}

if !addedColumn.pk && deletedColumn.pk {
changes = append(changes, change{
Text: fmt.Sprintf("Table %s: column primary key %s removed", backtickStrings(table, deletedName)...),
Breaking: false,
})
}
Expand All @@ -78,10 +102,10 @@ func getColumnChanges(file *gitdiff.File, table string) (changes []change) {
})
}
}
for addedName, addedDataType := range addedColumns {
for addedName, addedColumn := range addedColumns {
if _, ok := deletedColumns[addedName]; !ok {
changes = append(changes, change{
Text: fmt.Sprintf("Table %s: column added with name %s and type %s", backtickStrings(table, addedName, addedDataType)...),
Text: fmt.Sprintf("Table %s: column added with name %s and type %s", backtickStrings(table, addedName, addedColumn.dataType)...),
Breaking: false,
})
}
Expand Down
23 changes: 22 additions & 1 deletion scripts/table_diff/changes/changes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@ func Test_parseColumnChange(t *testing.T) {
args args
wantName string
wantDataType string
wantPk bool
}{
{name: "Should parse name and data type when change is a column", args: args{line: "|name|String|"}, wantName: "name", wantDataType: "String"},
{name: "Should parse name, pk and data type when a column is a primary key", args: args{line: "|name (PK)|String|"}, wantName: "name", wantDataType: "String", wantPk: true},
{name: "Should return empty strings when change is not a column", args: args{line: "# Table: azure_appservice_site_auth_settings"}, wantName: "", wantDataType: ""},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotName, gotDataType := parseColumnChange(tt.args.line)
gotName, gotDataType, pk := parseColumnChange(tt.args.line)
require.Equal(t, tt.wantName, gotName)
require.Equal(t, tt.wantDataType, gotDataType)
require.Equal(t, tt.wantPk, pk)
})
}
}
Expand Down Expand Up @@ -158,6 +161,24 @@ func Test_getChanges(t *testing.T) {
},
wantErr: false,
},
{
name: "Should handle PK changes",
diffDataFile: "testdata/pr_5636_diff.txt",
wantChanges: []change{
{
Text: "Table `gcp_resourcemanager_projects`: column primary key `_cq_id` removed",
Breaking: false,
},
{
Text: "Table `gcp_resourcemanager_projects`: column primary key `name` added",
Breaking: false,
},
{
Text: "Table `gcp_resourcemanager_projects`: column primary key `project_id` added",
Breaking: false,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
27 changes: 27 additions & 0 deletions scripts/table_diff/changes/testdata/pr_5636_diff.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
diff --git a/plugins/source/gcp/docs/tables/gcp_resourcemanager_projects.md b/plugins/source/gcp/docs/tables/gcp_resourcemanager_projects.md
index a39e930ae27..5c8b8b5ea39 100644
--- a/plugins/source/gcp/docs/tables/gcp_resourcemanager_projects.md
+++ b/plugins/source/gcp/docs/tables/gcp_resourcemanager_projects.md
@@ -2,7 +2,7 @@



-The primary key for this table is **_cq_id**.
+The composite primary key for this table is (**project_id**, **name**).



@@ -11,10 +11,10 @@ The primary key for this table is **_cq_id**.
| ------------- | ------------- |
|_cq_source_name|String|
|_cq_sync_time|Timestamp|
-|_cq_id (PK)|UUID|
+|_cq_id|UUID|
|_cq_parent_id|UUID|
-|project_id|String|
-|name|String|
+|project_id (PK)|String|
+|name (PK)|String|
|parent|String|
|state|String|
|display_name|String|

0 comments on commit 91ec60b

Please sign in to comment.