databricks · denik · Feb 14, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025
@@ -56,6 +56,7 @@ const (
 	EntryPointScript = "script"
 	CleanupScript    = "script.cleanup"
 	PrepareScript    = "script.prepare"
+	ReplsFile        = "repls.json"
 	MaxFileSize      = 100_000
 )
 
@@ -65,6 +66,10 @@ var Scripts = map[string]bool{
 	PrepareScript:    true,
 }
 
+var Ignored = map[string]bool{
+	ReplsFile: true,
+}
+
 func TestAccept(t *testing.T) {
 	testAccept(t, InprocessMode, SingleTest)
 }
@@ -152,6 +157,8 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
 	testdiff.PrepareReplacementSdkVersion(t, &repls)
 	testdiff.PrepareReplacementsGoVersion(t, &repls)
 
+	repls.SetPath(cwd, "[TESTROOT]")
+
 	repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"})
 
 	testDirs := getTests(t)
@@ -310,6 +317,12 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
 	// User replacements come last:
 	repls.Repls = append(repls.Repls, config.Repls...)
 
+	if config.SaveRepls {
+		replsJson, err := json.MarshalIndent(repls.Repls, "", "  ")
+		require.NoError(t, err)
+		testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson))
+	}
+
 	if coverDir != "" {
 		// Creating individual coverage directory for each test, because writing to the same one
 		// results in sporadic failures like this one (only if tests are running in parallel):
@@ -320,6 +333,10 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
 		cmd.Env = append(cmd.Env, "GOCOVERDIR="+coverDir)
 	}
 
+	absDir, err := filepath.Abs(dir)
+	require.NoError(t, err)
+	cmd.Env = append(cmd.Env, "TESTDIR="+absDir)
+
 	// Write combined output to a file
 	out, err := os.Create(filepath.Join(tmpDir, "output.txt"))
 	require.NoError(t, err)
@@ -368,6 +385,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
 		if _, ok := outputs[relPath]; ok {
 			continue
 		}
+		if _, ok := Ignored[relPath]; ok {
+			continue
+		}
 		unexpected = append(unexpected, relPath)
 		if strings.HasPrefix(relPath, "out") {
 			// We have a new file starting with "out"

@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+"""This script implements "diff -r -U2 dir1 dir2" but applies replacements first"""
+
+import sys
+import difflib
+import json
+import re
+from pathlib import Path
+
+
+def replaceAll(patterns, s):
+    for comp, new in patterns:
+        s = comp.sub(new, s)
+    return s
+
+
+def main():
+    d1, d2 = sys.argv[1:]
+    d1, d2 = Path(d1), Path(d2)
+
+    with open("repls.json") as f:  # Must have 'SaveRepls = true' in test.toml
+        repls = json.load(f)
+
+    patterns = []
+    for r in repls:
+        try:
+            c = re.compile(r["Old"])
+            patterns.append((c, r["New"]))
+        except re.error as e:
+            print(f"Regex error for pattern {r}: {e}", file=sys.stderr)
+
+    files1 = [str(p.relative_to(d1)) for p in d1.rglob("*") if p.is_file()]
+    files2 = [str(p.relative_to(d2)) for p in d2.rglob("*") if p.is_file()]
+
+    set1 = set(files1)
+    set2 = set(files2)
+
+    for f in sorted(set1 | set2):
+        p1 = d1 / f
+        p2 = d2 / f
+        if f not in set2:
+            print(f"Only in {d1}: {f}")
+        elif f not in set1:
+            print(f"Only in {d2}: {f}")
+        else:
+            a = [replaceAll(patterns, x) for x in p1.read_text().splitlines(True)]
+            b = [replaceAll(patterns, x) for x in p2.read_text().splitlines(True)]
+            if a != b:
+                p1_str = p1.as_posix()
+                p2_str = p2.as_posix()
+                for line in difflib.unified_diff(a, b, p1_str, p2_str, "", "", 2):
+                    print(line, end="")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/acceptance/config_test.go b/acceptance/config_test.go
@@ -27,6 +27,9 @@ type TestConfig struct {
 	// If true, do not run this test against cloud environment
 	LocalOnly bool
 
+	// if true, save file repls.json with all the replacemnts
+	SaveRepls bool
+
 	// List of additional replacements to apply on this test.
 	// Old is a regexp, New is a replacement expression.
 	Repls []testdiff.Replacement

@@ -0,0 +1,7 @@
+Hello!
+{
+    "id": "[USERID]",
+    "userName": "[USERNAME]"
+}
+
+Footer
@@ -0,0 +1,7 @@
+Hello!
+{
+    "id": "[UUID]",
+    "userName": "[USERNAME]"
+}
+
+Footer
@@ -0,0 +1,13 @@
+
+>>> diff.py out_dir_a out_dir_b
+Only in out_dir_a: only_in_a
+Only in out_dir_b: only_in_b
+--- out_dir_a/output.txt
++++ out_dir_b/output.txt
+@@ -1,5 +1,5 @@
+ Hello!
+ {
+-    "id": "[USERID]",
++    "id": "[UUID]",
+     "userName": "[USERNAME]"
+ }
@@ -0,0 +1,17 @@
+mkdir out_dir_a
+mkdir out_dir_b
+
+touch out_dir_a/only_in_a
+touch out_dir_b/only_in_b
+
+echo Hello! >> out_dir_a/output.txt
+echo Hello! >> out_dir_b/output.txt
+
+curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt
+printf "\n\nFooter" >> out_dir_a/output.txt
+printf '{\n    "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n    "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt
+
+# Unlike regular diff, diff.py will apply replacements first before doing the comparison
+errcode trace diff.py out_dir_a out_dir_b
+
+rm out_dir_a/only_in_a out_dir_b/only_in_b
diff --git a/acceptance/selftest/diff/test.toml b/acceptance/selftest/diff/test.toml
@@ -0,0 +1 @@
+SaveRepls = true
@@ -0,0 +1 @@
+LocalOnly = true