diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go index d71185c6..48d9eb8d 100644 --- a/cmd/eval-dev-quality/cmd/evaluate_test.go +++ b/cmd/eval-dev-quality/cmd/evaluate_test.go @@ -93,9 +93,7 @@ func validateMetrics(t *testing.T, regex *regexp.Regexp, data string, expectedAs actualAssessments, actualScores := extractMetrics(t, regex, data) require.Equal(t, len(expectedAssessments), len(actualAssessments), "expected and actual assessment length") - for i := range actualAssessments { - metricstesting.AssertAssessmentsEqual(t, expectedAssessments[i], actualAssessments[i]) - } + assert.Equal(t, metricstesting.CleanSlice(expectedAssessments), metricstesting.CleanSlice(actualAssessments)) assert.Equal(t, expectedScores, actualScores) return actualAssessments @@ -217,18 +215,18 @@ func TestEvaluateExecute(t *testing.T) { ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { actualAssessments := validateMetrics(t, extractMetricsLogsMatch, output, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 20, - metrics.AssessmentKeyFilesExecuted: 2, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, - metrics.AssessmentKeyResponseNoError: 2, - metrics.AssessmentKeyResponseNoExcess: 2, - metrics.AssessmentKeyResponseWithCode: 2, + metrics.AssessmentKeyCoverage: 20, + metrics.AssessmentKeyFilesExecuted: 2, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, + metrics.AssessmentKeyResponseNoError: 2, + metrics.AssessmentKeyResponseNoExcess: 2, + metrics.AssessmentKeyResponseWithCode: 2, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 508, + metrics.AssessmentKeyResponseCharacterCount: 508, }, }, []uint64{28}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508)) assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ @@ -239,29 +237,29 @@ func TestEvaluateExecute(t *testing.T) { filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, }, []uint64{14, 14}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) assert.Greater(t, actualAssessments[1][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) }, filepath.Join("result-directory", "evaluation.log"): nil, filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { @@ -282,18 +280,18 @@ func TestEvaluateExecute(t *testing.T) { ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { actualAssessments := validateMetrics(t, extractMetricsLogsMatch, output, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 40, - metrics.AssessmentKeyFilesExecuted: 4, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, - metrics.AssessmentKeyResponseNoError: 4, - metrics.AssessmentKeyResponseNoExcess: 4, - metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyCoverage: 40, + metrics.AssessmentKeyFilesExecuted: 4, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, + metrics.AssessmentKeyResponseNoError: 4, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 786, + metrics.AssessmentKeyResponseCharacterCount: 786, }, }, []uint64{56}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(786)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(786)) assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ @@ -304,51 +302,51 @@ func TestEvaluateExecute(t *testing.T) { filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, }, []uint64{14, 14, 14, 14}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) assert.Greater(t, actualAssessments[1][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) assert.Greater(t, actualAssessments[2][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[2][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(139)) - assert.Equal(t, actualAssessments[2][metrics.AssessmentKeyResponseCharacterCount], uint64(139)) assert.Greater(t, actualAssessments[3][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(139)) - assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyResponseCharacterCount], uint64(139)) }, filepath.Join("result-directory", "evaluation.log"): nil, filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { @@ -378,18 +376,18 @@ func TestEvaluateExecute(t *testing.T) { ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { actualAssessments := validateMetrics(t, extractMetricsLogsMatch, output, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 20, - metrics.AssessmentKeyFilesExecuted: 2, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, - metrics.AssessmentKeyResponseNoError: 2, - metrics.AssessmentKeyResponseNoExcess: 2, - metrics.AssessmentKeyResponseWithCode: 2, + metrics.AssessmentKeyCoverage: 20, + metrics.AssessmentKeyFilesExecuted: 2, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, + metrics.AssessmentKeyResponseNoError: 2, + metrics.AssessmentKeyResponseNoExcess: 2, + metrics.AssessmentKeyResponseWithCode: 2, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 508, + metrics.AssessmentKeyResponseCharacterCount: 508, }, }, []uint64{28}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508)) assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ @@ -400,29 +398,29 @@ func TestEvaluateExecute(t *testing.T) { filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, }, []uint64{14, 14}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) assert.Greater(t, actualAssessments[1][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) }, filepath.Join("result-directory", "evaluation.log"): nil, filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { @@ -451,29 +449,29 @@ func TestEvaluateExecute(t *testing.T) { filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, }, []uint64{14, 14}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) assert.Greater(t, actualAssessments[1][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) }, filepath.Join("result-directory", "evaluation.log"): nil, filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { @@ -625,18 +623,18 @@ func TestEvaluateExecute(t *testing.T) { ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { actualAssessments := validateMetrics(t, extractMetricsLogsMatch, output, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 60, - metrics.AssessmentKeyFilesExecuted: 6, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 6, - metrics.AssessmentKeyResponseNoError: 6, - metrics.AssessmentKeyResponseNoExcess: 6, - metrics.AssessmentKeyResponseWithCode: 6, + metrics.AssessmentKeyCoverage: 60, + metrics.AssessmentKeyFilesExecuted: 6, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 6, + metrics.AssessmentKeyResponseNoError: 6, + metrics.AssessmentKeyResponseNoExcess: 6, + metrics.AssessmentKeyResponseWithCode: 6, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 1524, + metrics.AssessmentKeyResponseCharacterCount: 1524, }, }, []uint64{84}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(1524)) - assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(1524)) assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ @@ -653,59 +651,69 @@ func TestEvaluateExecute(t *testing.T) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, }, []uint64{14, 14, 14, 14, 14, 14}) // Assert non-deterministic behavior. for _, assessment := range actualAssessments { assert.Greater(t, assessment[metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, assessment[metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(254)) - assert.Equal(t, assessment[metrics.AssessmentKeyResponseCharacterCount], uint64(254)) } }, filepath.Join("result-directory", "evaluation.log"): func(t *testing.T, filePath, data string) { @@ -792,34 +800,38 @@ func TestEvaluateExecute(t *testing.T) { ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { actualAssessments := validateMetrics(t, extractMetricsLogsMatch, output, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 40, - metrics.AssessmentKeyFilesExecuted: 4, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, - metrics.AssessmentKeyResponseNoError: 4, - metrics.AssessmentKeyResponseNoExcess: 4, - metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyCoverage: 40, + metrics.AssessmentKeyFilesExecuted: 4, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, + metrics.AssessmentKeyResponseNoError: 4, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 786, + metrics.AssessmentKeyResponseCharacterCount: 786, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 40, - metrics.AssessmentKeyFilesExecuted: 4, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, - metrics.AssessmentKeyResponseNoError: 4, - metrics.AssessmentKeyResponseNoExcess: 4, - metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyCoverage: 40, + metrics.AssessmentKeyFilesExecuted: 4, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, + metrics.AssessmentKeyResponseNoError: 4, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 786, + metrics.AssessmentKeyResponseCharacterCount: 786, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 40, - metrics.AssessmentKeyFilesExecuted: 4, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, - metrics.AssessmentKeyResponseNoError: 4, - metrics.AssessmentKeyResponseNoExcess: 4, - metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyCoverage: 40, + metrics.AssessmentKeyFilesExecuted: 4, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 4, + metrics.AssessmentKeyResponseNoError: 4, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 4, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 786, + metrics.AssessmentKeyResponseCharacterCount: 786, }, }, []uint64{56, 56, 56}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, uint64(786), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) - assert.Equal(t, uint64(786), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) assert.Equal(t, 3, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ @@ -832,42 +844,48 @@ func TestEvaluateExecute(t *testing.T) { filepath.Join("result-directory", "symflower_symbolic-execution", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, }, []uint64{14, 14, 14, 14}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) - assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) }, filepath.Join("result-directory", "symflower_symbolic-execution", "evaluation.log"): nil, filepath.Join("result-directory", "symflower_symbolic-execution", "README.md"): nil, @@ -884,42 +902,48 @@ func TestEvaluateExecute(t *testing.T) { filepath.Join("result-directory", "symflower_symbolic-execution_1", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, }, []uint64{14, 14, 14, 14}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) - assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) }, filepath.Join("result-directory", "symflower_symbolic-execution_1", "evaluation.log"): nil, filepath.Join("result-directory", "symflower_symbolic-execution_1", "README.md"): nil, @@ -936,42 +960,48 @@ func TestEvaluateExecute(t *testing.T) { filepath.Join("result-directory", "symflower_symbolic-execution_2", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254, + metrics.AssessmentKeyResponseCharacterCount: 254, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, metrics.Assessments{ - metrics.AssessmentKeyCoverage: 10, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, - metrics.AssessmentKeyResponseNoError: 1, - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 139, + metrics.AssessmentKeyResponseCharacterCount: 139, }, }, []uint64{14, 14, 14, 14}) // Assert non-deterministic behavior. assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) - assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) - assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) }, filepath.Join("result-directory", "symflower_symbolic-execution_2", "evaluation.log"): nil, filepath.Join("result-directory", "symflower_symbolic-execution_2", "README.md"): nil, diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index 536879b8..16dd99de 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -126,19 +126,12 @@ func TestEvaluate(t *testing.T) { var actualAssessments metricstesting.AssessmentTuples require.NoError(t, assessmentStore.Walk(func(m evalmodel.Model, l language.Language, r string, ti task.Identifier, a metrics.Assessments) error { - // Normalize assessments. - if v, ok := a[metrics.AssessmentKeyProcessingTime]; ok { - if assert.Greater(t, v, uint64(0)) { - delete(a, metrics.AssessmentKeyProcessingTime) - } - } - actualAssessments = append(actualAssessments, &metricstesting.AssessmentTuple{ Model: m, Language: l, RepositoryPath: r, Task: ti, - Assessment: a, + Assessment: metricstesting.Clean(a), }) return nil @@ -501,7 +494,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryNextPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 1, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 1, @@ -513,7 +505,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryNextPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 1, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 1, @@ -525,7 +516,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPlainPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 2, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 2, @@ -537,7 +527,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPlainPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 2, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 2, @@ -602,7 +591,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryNextPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 2, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 2, @@ -614,7 +602,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryNextPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 2, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 2, @@ -626,7 +613,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPlainPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 1, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 1, @@ -638,7 +624,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPlainPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 1, metrics.AssessmentKeyFilesExecutedMaximumReachable: 2, metrics.AssessmentKeyResponseNoError: 1, @@ -762,7 +747,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -774,7 +758,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -834,7 +817,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -846,7 +828,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -935,7 +916,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -947,7 +927,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -1019,7 +998,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -1031,7 +1009,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 3, metrics.AssessmentKeyFilesExecutedMaximumReachable: 3, metrics.AssessmentKeyResponseNoError: 3, @@ -1085,7 +1062,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTests, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 1, metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, metrics.AssessmentKeyResponseNoError: 1, @@ -1097,7 +1073,6 @@ func TestEvaluate(t *testing.T) { RepositoryPath: repositoryPath, Task: evaluatetask.IdentifierWriteTestsSymflowerFix, Assessment: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, metrics.AssessmentKeyFilesExecuted: 1, metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, metrics.AssessmentKeyResponseNoError: 1, diff --git a/evaluate/metrics/testing/assessments.go b/evaluate/metrics/testing/assessments.go index 3f652f15..1435a125 100644 --- a/evaluate/metrics/testing/assessments.go +++ b/evaluate/metrics/testing/assessments.go @@ -1,12 +1,7 @@ package metricstesting import ( - "testing" - - "golang.org/x/exp/maps" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "maps" "github.com/symflower/eval-dev-quality/evaluate/metrics" "github.com/symflower/eval-dev-quality/language" @@ -14,44 +9,42 @@ import ( "github.com/symflower/eval-dev-quality/task" ) -// AssertAssessmentsEqual checks if the given assessments are equal ignoring default and nondeterministic values. -func AssertAssessmentsEqual(t *testing.T, expected metrics.Assessments, actual metrics.Assessments) { - expected = maps.Clone(expected) - actual = maps.Clone(actual) +// Clean deletes all empty and nondeterministic keys from the assessment. +func Clean(assessment metrics.Assessments) metrics.Assessments { + copy := metrics.Assessments{} + maps.Copy(copy, assessment) - clearNonDeterministicAssessmentValues(expected) - clearNonDeterministicAssessmentValues(actual) + delete(copy, metrics.AssessmentKeyProcessingTime) - assert.Truef(t, expected.Equal(actual), "expected:%s\nactual:%s", expected, actual) -} + for _, key := range metrics.AllAssessmentKeysStrings { + if copy[metrics.AssessmentKey(key)] == 0 { + delete(copy, metrics.AssessmentKey(key)) + } + } -// AssertTaskAssessmentsEqual checks if the given assessments per task are equal ignoring default and nondeterministic values. -func AssertTaskAssessmentsEqual(t *testing.T, expected map[task.Identifier]metrics.Assessments, actual map[task.Identifier]metrics.Assessments) { - expected = maps.Clone(expected) - actual = maps.Clone(actual) + return copy +} - // The expected and actual maps must have the same task identifiers. - require.ElementsMatch(t, maps.Keys(expected), maps.Keys(actual)) +// CleanSlice deletes all empty and nondeterministic keys from the assessments. +func CleanSlice(assessments []metrics.Assessments) []metrics.Assessments { + copy := make([]metrics.Assessments, len(assessments)) - // Ignore non-deterministic values. - for _, assessment := range expected { - clearNonDeterministicAssessmentValues(assessment) - } - for _, assessment := range actual { - clearNonDeterministicAssessmentValues(assessment) + for i, assessment := range assessments { + copy[i] = Clean(assessment) } - for task, expectedAssessment := range expected { - actualAssessment := actual[task] - assert.Truef(t, expectedAssessment.Equal(actualAssessment), "task:%s\nexpected:%s\nactual:%s", task, expected, actual) - } + return copy } -// clearNonDeterministicAssessmentValues ignores non-deterministic values such as processing time and response character count. -func clearNonDeterministicAssessmentValues(assessment metrics.Assessments) { - assessment[metrics.AssessmentKeyProcessingTime] = 0 - assessment[metrics.AssessmentKeyGenerateTestsForFileCharacterCount] = 0 - assessment[metrics.AssessmentKeyResponseCharacterCount] = 0 +// CleanMap deletes all empty and nondeterministic keys from the assessments. +func CleanMap[E comparable](assessments map[E]metrics.Assessments) map[E]metrics.Assessments { + copy := map[E]metrics.Assessments{} + + for key, assessment := range assessments { + copy[key] = Clean(assessment) + } + + return copy } // AssessmentsWithProcessingTime is an empty assessment collection with positive processing time. diff --git a/evaluate/report/collection_test.go b/evaluate/report/collection_test.go index 6d040672..02ece21c 100644 --- a/evaluate/report/collection_test.go +++ b/evaluate/report/collection_test.go @@ -34,16 +34,12 @@ func TestAssessmentPerModelPerLanguagePerRepositoryWalk(t *testing.T) { assert.NoError(t, assessmentStore.Walk(func(m model.Model, l language.Language, r string, ti task.Identifier, a metrics.Assessments) (err error) { actualOrder = append(actualOrder, a) - metricstesting.AssertAssessmentsEqual(t, assessmentLookup[m][l][r][ti], a) + assert.Equal(t, metricstesting.Clean(assessmentLookup[m][l][r][ti]), metricstesting.Clean(a)) return nil })) - if assert.Equal(t, len(tc.ExpectedOrder), len(actualOrder)) { - for i := range tc.ExpectedOrder { - metricstesting.AssertAssessmentsEqual(t, tc.ExpectedOrder[i], actualOrder[i]) - } - } + assert.Equal(t, metricstesting.CleanSlice(tc.ExpectedOrder), metricstesting.CleanSlice(actualOrder)) }) } diff --git a/evaluate/task/testing/task.go b/evaluate/task/testing/task.go index b3c049c3..f88e708f 100644 --- a/evaluate/task/testing/task.go +++ b/evaluate/task/testing/task.go @@ -59,7 +59,8 @@ func (tc *TestCaseTask) Validate(t *testing.T, createRepository createRepository } actualRepositoryAssessment, actualProblems, actualErr := tc.Task.Run(taskContext) - metricstesting.AssertTaskAssessmentsEqual(t, tc.ExpectedRepositoryAssessment, actualRepositoryAssessment) + assert.Equal(t, metricstesting.CleanMap(tc.ExpectedRepositoryAssessment), metricstesting.CleanMap(actualRepositoryAssessment)) + if assert.Equal(t, len(tc.ExpectedProblemContains), len(actualProblems), "problems count") { for i, expectedProblem := range tc.ExpectedProblemContains { actualProblem := actualProblems[i] diff --git a/model/llm/llm_test.go b/model/llm/llm_test.go index 0a3de5c4..6963282b 100644 --- a/model/llm/llm_test.go +++ b/model/llm/llm_test.go @@ -68,7 +68,8 @@ func TestModelGenerateTestsForFile(t *testing.T) { } actualAssessment, actualError := llm.WriteTests(ctx) assert.NoError(t, actualError) - metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment) + + assert.Equal(t, metricstesting.Clean(tc.ExpectedAssessment), metricstesting.Clean(actualAssessment)) actualTestFileContent, err := os.ReadFile(filepath.Join(temporaryPath, tc.ExpectedTestFilePath)) assert.NoError(t, err) @@ -172,7 +173,8 @@ func TestModelRepairSourceCodeFile(t *testing.T) { } actualAssessment, actualError := llm.RepairCode(ctx) assert.NoError(t, actualError) - metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment) + + assert.Equal(t, metricstesting.Clean(tc.ExpectedAssessment), metricstesting.Clean(actualAssessment)) actualSourceFileContent, err := os.ReadFile(filepath.Join(repositoryPath, tc.SourceFilePath)) assert.NoError(t, err) @@ -210,8 +212,10 @@ func TestModelRepairSourceCodeFile(t *testing.T) { }, ExpectedAssessment: metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 134, + metrics.AssessmentKeyResponseCharacterCount: 143, }, ExpectedSourceFileContent: ` package openingBracketMissing @@ -260,8 +264,10 @@ func TestModelRepairSourceCodeFile(t *testing.T) { }, ExpectedAssessment: metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 186, + metrics.AssessmentKeyResponseCharacterCount: 195, }, ExpectedSourceFileContent: ` package com.eval; @@ -514,7 +520,8 @@ func TestModelTranspile(t *testing.T) { actualAssessment, actualError := llm.Transpile(ctx) assert.NoError(t, actualError) - metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment) + + assert.Equal(t, metricstesting.Clean(tc.ExpectedAssessment), metricstesting.Clean(actualAssessment)) actualStubFileContent, err := os.ReadFile(filepath.Join(repositoryPath, tc.StubFilePath)) assert.NoError(t, err) @@ -562,8 +569,10 @@ func TestModelTranspile(t *testing.T) { StubFilePath: filepath.Join("binarySearch.go"), ExpectedAssessment: metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 280, + metrics.AssessmentKeyResponseCharacterCount: 289, }, ExpectedStubFileContent: transpiledFileContent, }) @@ -610,8 +619,10 @@ func TestModelTranspile(t *testing.T) { StubFilePath: filepath.Join("src", "main", "java", "com", "eval", "BinarySearch.java"), ExpectedAssessment: metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, - metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 348, + metrics.AssessmentKeyResponseCharacterCount: 357, }, ExpectedStubFileContent: transpiledFileContent, }) diff --git a/model/llm/prompt/parse_test.go b/model/llm/prompt/parse_test.go index a017f7f3..a7009781 100644 --- a/model/llm/prompt/parse_test.go +++ b/model/llm/prompt/parse_test.go @@ -31,7 +31,7 @@ func TestParseResponse(t *testing.T) { assert.Error(t, err) } - metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment) + assert.Equal(t, metricstesting.Clean(tc.ExpectedAssessment), metricstesting.Clean(actualAssessment)) assert.Equal(t, strings.TrimSpace(tc.ExpectedCode), actualCode) }) } diff --git a/model/symflower/symflower_test.go b/model/symflower/symflower_test.go index 857fa1eb..95980998 100644 --- a/model/symflower/symflower_test.go +++ b/model/symflower/symflower_test.go @@ -78,8 +78,7 @@ func TestModelGenerateTestsForFile(t *testing.T) { } else { require.NoError(t, actualError) - metricstesting.AssertAssessmentsEqual(t, tc.ExpectedAssessment, actualAssessment) - + assert.Equal(t, metricstesting.Clean(tc.ExpectedAssessment), metricstesting.Clean(actualAssessment)) actualTestResult, actualProblems, err := tc.Language.ExecuteTests(logger, repositoryPath) require.NoError(t, err) require.Empty(t, actualProblems)