Testing docker runtime inside the CI

Closes #224
symflower · Jul 4, 2024 · b9b3d2f · b9b3d2f
1 parent 7612dbd
commit b9b3d2f
Show file tree

Hide file tree

Showing 3 changed files with 162 additions and 12 deletions.
diff --git a/cmd/eval-dev-quality/cmd/command.go b/cmd/eval-dev-quality/cmd/command.go
@@ -31,6 +31,10 @@ func Execute(logger *log.Logger, arguments []string) {
 			c.SetLogger(logger)
 		}
 
+		if c, ok := command.(SetArguments); ok {
+			c.SetArguments(arguments)
+		}
+
 		return command.Execute(args)
 	}
 
@@ -51,3 +55,9 @@ type SetLogger interface {
 	// SetLogger sets the logger of the command.
 	SetLogger(logger *log.Logger)
 }
+
+// SetArguments defines a command that allows to set its arguments.
+type SetArguments interface {
+	// SetArguments sets the commands arguments.
+	SetArguments(args []string)
+}
diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -82,6 +82,8 @@ type Evaluate struct {
 	// Namespace the namespace under which the kubernetes resources should be created.
 	Namespace string `long:"namespace" description:"The Namespace which should be used for kubernetes resources." default:"eval-dev-quality"`
 
+	// args holds a list of all the passed arguments.
+	args []string
 	// logger holds the logger of the command.
 	logger *log.Logger
 	// timestamp holds the timestamp of the command execution.
@@ -95,6 +97,14 @@ func (command *Evaluate) SetLogger(logger *log.Logger) {
 	command.logger = logger
 }
 
+var _ SetArguments = (*Evaluate)(nil)
+
+// SetArguments sets the commands arguments.
+func (command *Evaluate) SetArguments(args []string) {
+	availableFlags := util.Flags(command)
+	command.args = util.FilterArgsKeep(args, availableFlags)
+}
+
 // Initialize initializes the command according to the arguments.
 func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context, cleanup func()) {
 	// Ensure the cleanup always runs in case there is a panic.
@@ -172,15 +182,17 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
 
 	// Ensure the "testdata" path exists and make it absolute.
 	{
-		if err := osutil.DirExists(command.TestdataPath); err != nil {
-			command.logger.Panicf("ERROR: testdata path %q cannot be accessed: %s", command.TestdataPath, err)
-		}
-		testdataPath, err := filepath.Abs(command.TestdataPath)
-		if err != nil {
-			command.logger.Panicf("ERROR: could not resolve testdata path %q to an absolute path: %s", command.TestdataPath, err)
+		if command.Runtime == "local" { // Ignore testdata path during containerized execution.
+			if err := osutil.DirExists(command.TestdataPath); err != nil {
+				command.logger.Panicf("ERROR: testdata path %q cannot be accessed: %s", command.TestdataPath, err)
+			}
+			testdataPath, err := filepath.Abs(command.TestdataPath)
+			if err != nil {
+				command.logger.Panicf("ERROR: could not resolve testdata path %q to an absolute path: %s", command.TestdataPath, err)
+			}
+			command.TestdataPath = testdataPath
+			evaluationContext.TestdataPath = testdataPath
 		}
-		command.TestdataPath = testdataPath
-		evaluationContext.TestdataPath = testdataPath
 	}
 
 	// Setup evaluation result directory.
@@ -446,18 +458,15 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
 
 // evaluateDocker executes the evaluation for each model inside a docker container.
 func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
-	availableFlags := util.Flags(command)
 	ignoredFlags := []string{
 		"model",
 		"parallel",
 		"result-path",
 		"runtime",
 	}
 
-	// Filter all the args to only contain flags which can be used.
-	args := util.FilterArgsKeep(os.Args[2:], availableFlags)
 	// Filter the args to remove all flags unsuited for running the container.
-	args = util.FilterArgsRemove(args, ignoredFlags)
+	args := util.FilterArgsRemove(command.args, ignoredFlags)
 
 	parallel := util.NewParallel(command.Parallel)
 

diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go
@@ -768,6 +768,137 @@ func TestEvaluateExecute(t *testing.T) {
 		})
 	})
 
+	t.Run("Runtime", func(t *testing.T) {
+		// The Github runner for MacOS does not have docker installed and the image itself is not usable on Windows.
+		if osutil.IsDarwin() || osutil.IsWindows() {
+			t.Skip("Unsupported OS for runtime tests")
+		}
+
+		validate(t, &testCase{
+			Name: "Docker",
+
+			Arguments: []string{
+				"--runtime", "docker",
+				"--model", "symflower/symbolic-execution",
+				"--model", "symflower/symbolic-execution",
+				"--model", "symflower/symbolic-execution",
+				"--testdata", "testdata/", // We need to override the testdata path back to the default one because the actual evaluation is running inside the container.
+				"--repository", filepath.Join("golang", "plain"),
+				"--runs=1",
+				"--parallel=3",
+			},
+
+			ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
+				actualAssessments := validateMetrics(t, extractMetricsLogsMatch, output, []metrics.Assessments{
+					metrics.Assessments{
+						metrics.AssessmentKeyCoverage:         10,
+						metrics.AssessmentKeyFilesExecuted:    1,
+						metrics.AssessmentKeyResponseNoError:  1,
+						metrics.AssessmentKeyResponseNoExcess: 1,
+						metrics.AssessmentKeyResponseWithCode: 1,
+					},
+					metrics.Assessments{
+						metrics.AssessmentKeyCoverage:         10,
+						metrics.AssessmentKeyFilesExecuted:    1,
+						metrics.AssessmentKeyResponseNoError:  1,
+						metrics.AssessmentKeyResponseNoExcess: 1,
+						metrics.AssessmentKeyResponseWithCode: 1,
+					},
+					metrics.Assessments{
+						metrics.AssessmentKeyCoverage:         10,
+						metrics.AssessmentKeyFilesExecuted:    1,
+						metrics.AssessmentKeyResponseNoError:  1,
+						metrics.AssessmentKeyResponseNoExcess: 1,
+						metrics.AssessmentKeyResponseWithCode: 1,
+					},
+				}, []uint64{14, 14, 14})
+				// Assert non-deterministic behavior.
+				assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
+				assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254))
+				assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
+				assert.Equal(t, 3, strings.Count(output, "Evaluation score for"))
+			},
+			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+				filepath.Join("result-directory", "evaluation.log"): nil,
+
+				// Parallel run 1
+				filepath.Join("result-directory", "symflower", "symbolic-execution", "categories.svg"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution", "evaluation.csv"): func(t *testing.T, filePath, data string) {
+					actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
+						metrics.Assessments{
+							metrics.AssessmentKeyCoverage:         10,
+							metrics.AssessmentKeyFilesExecuted:    1,
+							metrics.AssessmentKeyResponseNoError:  1,
+							metrics.AssessmentKeyResponseNoExcess: 1,
+							metrics.AssessmentKeyResponseWithCode: 1,
+						},
+					}, []uint64{14})
+					// Assert non-deterministic behavior.
+					assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
+					assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254))
+					assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
+				},
+				filepath.Join("result-directory", "symflower", "symbolic-execution", "evaluation.log"):    nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution", "golang-summed.csv"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution", "models-summed.csv"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution", "README.md"):         nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
+					assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
+				},
+
+				// Parallel run 2
+				filepath.Join("result-directory", "symflower", "symbolic-execution-0", "categories.svg"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-0", "evaluation.csv"): func(t *testing.T, filePath, data string) {
+					actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
+						metrics.Assessments{
+							metrics.AssessmentKeyCoverage:         10,
+							metrics.AssessmentKeyFilesExecuted:    1,
+							metrics.AssessmentKeyResponseNoError:  1,
+							metrics.AssessmentKeyResponseNoExcess: 1,
+							metrics.AssessmentKeyResponseWithCode: 1,
+						},
+					}, []uint64{14})
+					// Assert non-deterministic behavior.
+					assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
+					assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254))
+					assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
+				},
+				filepath.Join("result-directory", "symflower", "symbolic-execution-0", "evaluation.log"):    nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-0", "golang-summed.csv"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-0", "models-summed.csv"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-0", "README.md"):         nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
+					assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
+				},
+
+				// Parallel run 3
+				filepath.Join("result-directory", "symflower", "symbolic-execution-1", "categories.svg"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-1", "evaluation.csv"): func(t *testing.T, filePath, data string) {
+					actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
+						metrics.Assessments{
+							metrics.AssessmentKeyCoverage:         10,
+							metrics.AssessmentKeyFilesExecuted:    1,
+							metrics.AssessmentKeyResponseNoError:  1,
+							metrics.AssessmentKeyResponseNoExcess: 1,
+							metrics.AssessmentKeyResponseWithCode: 1,
+						},
+					}, []uint64{14})
+					// Assert non-deterministic behavior.
+					assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
+					assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254))
+					assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
+				},
+				filepath.Join("result-directory", "symflower", "symbolic-execution-1", "evaluation.log"):    nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-1", "golang-summed.csv"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-1", "models-summed.csv"): nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-1", "README.md"):         nil,
+				filepath.Join("result-directory", "symflower", "symbolic-execution-1", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
+					assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
+				},
+			},
+		})
+	})
+
 	// This case checks a beautiful bug where the Markdown export crashed when the current working directory contained a README.md file. While this is not the case during the tests (as the current work directory is the directory of this file), it certainly caused problems when our binary was executed from the repository root (which of course contained a README.md). Therefore, we sadly have to modify the current work directory right within the tests of this case to reproduce the problem and fix it forever.
 	validate(t, &testCase{
 		Name: "Current work directory contains a README.md",