Skip to content

Commit

Permalink
Apply "symflower fix" to a "write-test" result of a model when it err…
Browse files Browse the repository at this point in the history
…ors, so model responses can possibly be fixed

Closes of #213
  • Loading branch information
ruiAzevedo19 authored and bauersimon committed Jul 2, 2024
1 parent fcf95c4 commit 0390ff0
Show file tree
Hide file tree
Showing 11 changed files with 593 additions and 129 deletions.
280 changes: 170 additions & 110 deletions cmd/eval-dev-quality/cmd/evaluate_test.go

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion evaluate/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,8 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
}
}
if isOnlyPlainRepositories {
totalScore = uint64(len(ctx.Languages)) * uint64(ctx.Runs)
// For each task, the `symflower fix` is also called so we multiply the total score by 2.
totalScore = 2 * uint64(len(ctx.Languages)) * uint64(ctx.Runs)
}

return assessments, totalScore
Expand Down
162 changes: 152 additions & 10 deletions evaluate/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func TestEvaluate(t *testing.T) {
return nil
}))

assert.Equal(t, tc.ExpectedAssessments, actualAssessments)
assert.ElementsMatch(t, tc.ExpectedAssessments, actualAssessments)
assert.Equal(t, tc.ExpectedTotalScore, actualTotalScore)

if tc.ExpectedOutputValidate != nil {
Expand Down Expand Up @@ -200,8 +200,15 @@ func TestEvaluate(t *testing.T) {
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: metrics.Assessments{},
},
},
ExpectedTotalScore: 1,
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), mockedModel.ID(), "golang", "golang", "plain.log"): nil,
},
Expand Down Expand Up @@ -246,8 +253,15 @@ func TestEvaluate(t *testing.T) {
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: metrics.Assessments{},
},
},
ExpectedTotalScore: 1,
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, ErrEmptyResponseFromModel.Error())
Expand Down Expand Up @@ -301,8 +315,19 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 1,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14,
metrics.AssessmentKeyResponseCharacterCount: 14,
metrics.AssessmentKeyResponseNoError: 1,
},
},
},
ExpectedTotalScore: 1,
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "Attempt 1/3: "+ErrEmptyResponseFromModel.Error())
Expand Down Expand Up @@ -355,8 +380,19 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 1,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14,
metrics.AssessmentKeyResponseCharacterCount: 14,
metrics.AssessmentKeyResponseNoError: 1,
},
},
},
ExpectedTotalScore: 1,
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "DONE 0 tests, 1 error")
Expand Down Expand Up @@ -443,6 +479,17 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 1,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryNextPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
Expand All @@ -454,6 +501,17 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 2,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPlainPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
},
},
},
ExpectedTotalScore: 0,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
Expand Down Expand Up @@ -516,6 +574,17 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 2,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryNextPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
Expand All @@ -527,6 +596,17 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 1,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPlainPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
},
},
},
ExpectedTotalScore: 0,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
Expand Down Expand Up @@ -581,6 +661,13 @@ func TestEvaluate(t *testing.T) {
Task: evaluatetask.IdentifierWriteTests,
Assessment: map[metrics.AssessmentKey]uint64{},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPlainPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{},
},
},
ExpectedTotalScore: 0,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
Expand Down Expand Up @@ -634,8 +721,19 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 3,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 3,
metrics.AssessmentKeyResponseNoError: 3,
},
},
},
ExpectedTotalScore: 3,
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
},
Expand Down Expand Up @@ -690,8 +788,19 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 3,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 3,
metrics.AssessmentKeyResponseNoError: 3,
},
},
},
ExpectedTotalScore: 3,
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
},
Expand Down Expand Up @@ -776,8 +885,19 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 3,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 3,
metrics.AssessmentKeyResponseNoError: 3,
},
},
},
ExpectedTotalScore: 3,
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
},
Expand Down Expand Up @@ -845,8 +965,19 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 3,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 3,
metrics.AssessmentKeyResponseNoError: 3,
},
},
},
ExpectedTotalScore: 3,
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
},
Expand Down Expand Up @@ -895,8 +1026,19 @@ func TestEvaluate(t *testing.T) {
metrics.AssessmentKeyResponseNoError: 1,
},
},
&metricstesting.AssessmentTuple{
Model: mockedModel,
Language: languageGolang,
RepositoryPath: repositoryPath,
Task: evaluatetask.IdentifierWriteTestsSymflowerFix,
Assessment: map[metrics.AssessmentKey]uint64{
metrics.AssessmentKeyCoverage: 0,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
},
},
},
ExpectedTotalScore: 1,
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
},
Expand Down
16 changes: 16 additions & 0 deletions evaluate/metrics/assessment.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,19 @@ func (a Assessments) StringCSV() (row []string) {

return row
}

// CombineWithSymflowerFixAssessments combines the model assessments with the ones from "symflower fix".
func CombineWithSymflowerFixAssessments(model Assessments, fixed Assessments) (combined Assessments) {
combined = NewAssessments()

combined[AssessmentKeyCoverage] = fixed[AssessmentKeyCoverage]
combined[AssessmentKeyFilesExecuted] = fixed[AssessmentKeyFilesExecuted]
combined[AssessmentKeyGenerateTestsForFileCharacterCount] = model[AssessmentKeyGenerateTestsForFileCharacterCount]
combined[AssessmentKeyProcessingTime] = model[AssessmentKeyProcessingTime] + fixed[AssessmentKeyProcessingTime]
combined[AssessmentKeyResponseCharacterCount] = model[AssessmentKeyResponseCharacterCount]
combined[AssessmentKeyResponseNoError] = model[AssessmentKeyResponseNoError]
combined[AssessmentKeyResponseNoExcess] = model[AssessmentKeyResponseNoExcess]
combined[AssessmentKeyResponseWithCode] = model[AssessmentKeyResponseWithCode]

return combined
}
51 changes: 51 additions & 0 deletions evaluate/metrics/assessment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,54 @@ func TestAssessmentsScore(t *testing.T) {
ExpectedScore: uint64(9),
})
}

func TestCombineModelAndSymflowerFixAssessments(t *testing.T) {
type testCase struct {
Name string

ModelAssessment Assessments
SymflowerFixAssessments Assessments

ExpectedAssessments Assessments
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualAssessments := CombineWithSymflowerFixAssessments(tc.ModelAssessment, tc.SymflowerFixAssessments)

assert.Equal(t, tc.ExpectedAssessments, actualAssessments)
})
}

validate(t, &testCase{
Name: "Simple",

ModelAssessment: Assessments{
AssessmentKeyFilesExecuted: 1,
AssessmentKeyProcessingTime: uint64(200),
AssessmentKeyCoverage: 0,
AssessmentKeyResponseCharacterCount: 100,
AssessmentKeyGenerateTestsForFileCharacterCount: 50,
AssessmentKeyResponseNoError: 0,
AssessmentKeyResponseWithCode: 1,
AssessmentKeyResponseNoExcess: 1,
},
SymflowerFixAssessments: Assessments{
AssessmentKeyFilesExecuted: 1,
AssessmentKeyProcessingTime: uint64(100),
AssessmentKeyCoverage: 10,
AssessmentKeyResponseNoError: 1,
},

ExpectedAssessments: Assessments{
AssessmentKeyFilesExecuted: 1,
AssessmentKeyProcessingTime: uint64(300),
AssessmentKeyCoverage: 10,
AssessmentKeyResponseCharacterCount: 100,
AssessmentKeyGenerateTestsForFileCharacterCount: 50,
AssessmentKeyResponseNoError: 0,
AssessmentKeyResponseWithCode: 1,
AssessmentKeyResponseNoExcess: 1,
},
})
}
32 changes: 32 additions & 0 deletions evaluate/task/symflower-fix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package task

import (
"context"
"time"

pkgerrors "github.com/pkg/errors"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/language"
"github.com/symflower/eval-dev-quality/log"
"github.com/symflower/eval-dev-quality/tools"
"github.com/symflower/eval-dev-quality/util"
)

// symflowerFix runs the "symflower fix" command and returns its execution time in milliseconds.
func symflowerFix(logger *log.Logger, modelAssessment metrics.Assessments, repositoryPath string, language language.Language) (duration uint64, err error) {
start := time.Now()
_, err = util.CommandWithResult(context.Background(), logger, &util.Command{
Command: []string{
tools.SymflowerPath, "fix",
"--language", language.ID(),
"--workspace", repositoryPath,
},

Directory: repositoryPath,
})
if err != nil {
return 0, pkgerrors.WithStack(err)
}

return uint64(time.Since(start).Milliseconds()), nil
}
Loading

0 comments on commit 0390ff0

Please sign in to comment.