Skip to content

Commit

Permalink
refactor, Move the code that runs the model with "symflower fix" to c…
Browse files Browse the repository at this point in the history
…ommon location, for reusability

Part of #375
  • Loading branch information
ruiAzevedo19 committed Dec 20, 2024
1 parent 4ef6a1a commit f1b7737
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 60 deletions.
47 changes: 47 additions & 0 deletions evaluate/task/symflower.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import (
"time"

pkgerrors "github.com/pkg/errors"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/language"
"github.com/symflower/eval-dev-quality/log"
"github.com/symflower/eval-dev-quality/model"
evaltask "github.com/symflower/eval-dev-quality/task"
"github.com/symflower/eval-dev-quality/tools"
"github.com/symflower/eval-dev-quality/util"
Expand Down Expand Up @@ -72,3 +74,48 @@ func ExecuteWithSymflowerFix(ctx evaltask.Context, logger *log.Logger, packagePa

return testResult, duration, problems, nil
}

func runModelAndSymflowerFix(ctx evaltask.Context, modelCtx model.Context, runModel func(model.Context) (metrics.Assessments, error)) (modelAssessment metrics.Assessments, withSymflowerFixAssessment metrics.Assessments, problems []error, err error) {
modelAssessment = metrics.NewAssessments()
withSymflowerFixAssessment = modelAssessment // The symflower assessment tracks how the model result can be improved in case of a failure, so just link to the model assessment until we successfully applied "symflower fix".

assessments, err := runModel(modelCtx)
if err != nil {
return nil, nil, append(problems, pkgerrors.WithMessage(err, modelCtx.FilePath)), nil
}
if assessments[metrics.AssessmentKeyProcessingTime] == 0 {
return nil, nil, problems, pkgerrors.Errorf("no model response time measurement present for %q at repository %q", ctx.Model.ID(), ctx.Repository.Name())
}
modelAssessment.Add(assessments)
modelAssessment.Award(metrics.AssessmentKeyResponseNoError)

testResult, ps, err := ctx.Language.ExecuteTests(modelCtx.Logger, modelCtx.RepositoryPath)
problems = append(problems, ps...)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, modelCtx.FilePath))
} else if ctx.Repository.Configuration().Validation.Execution.Validate(testResult.StdOut) {
modelCtx.Logger.Printf("Executes tests with %d coverage objects", testResult.Coverage)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, testResult.Coverage)
}

if ctx.Language.SupportsFix() {
withSymflowerFixTestResult, processingTime, ps, err := ExecuteWithSymflowerFix(ctx, modelCtx.Logger, ctx.Repository.DataPath())
problems = append(problems, ps...)
if err != nil {
problems = append(problems, err)
} else if ctx.Repository.Configuration().Validation.Execution.Validate(withSymflowerFixTestResult.StdOut) {
ctx.Logger.Printf("with symflower repair: Executes tests with %d coverage objects", withSymflowerFixTestResult.Coverage)

// Symflower was able to fix a failure so now update the assessment with the improved results.
withSymflowerFix := metrics.NewAssessments()
withSymflowerFix[metrics.AssessmentKeyProcessingTime] = processingTime
withSymflowerFix.Award(metrics.AssessmentKeyFilesExecuted)
withSymflowerFix.AwardPoints(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage)

withSymflowerFixAssessment = metrics.CombineWithSymflowerFixAssessments(modelAssessment, withSymflowerFix)
}
}

return modelAssessment, withSymflowerFixAssessment, problems, nil
}
75 changes: 15 additions & 60 deletions evaluate/task/write-test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,20 @@ func (t *WriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[evaltas
ctx.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
}

modelAssessmentFile, withSymflowerFixAssessmentFile, ps, err := runModelAndSymflowerFix(ctx, taskLogger, modelCapability, dataPath, filePath, &ArgumentsWriteTest{
arguments := &ArgumentsWriteTest{
TestFramework: testFramework,
})
}
modelContext := model.Context{
Language: ctx.Language,

RepositoryPath: dataPath,
FilePath: filePath,

Logger: taskLogger.Logger,

Arguments: arguments,
}
modelAssessmentFile, withSymflowerFixAssessmentFile, ps, err := runModelAndSymflowerFix(ctx, modelContext, modelCapability.WriteTests)
problems = append(problems, ps...)
if err != nil {
return nil, problems, err
Expand Down Expand Up @@ -121,10 +132,8 @@ func (t *WriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[evaltas
continue
}

modelTemplateAssessmentFile, templateWithSymflowerFixAssessmentFile, ps, err := runModelAndSymflowerFix(ctx, taskLogger, modelCapability, dataPath, filePath, &ArgumentsWriteTest{
Template: string(testTemplate),
TestFramework: testFramework,
})
arguments.Template = string(testTemplate)
modelTemplateAssessmentFile, templateWithSymflowerFixAssessmentFile, ps, err := runModelAndSymflowerFix(ctx, modelContext, modelCapability.WriteTests)
problems = append(problems, ps...)
if err != nil {
return nil, problems, err
Expand All @@ -149,60 +158,6 @@ func (t *WriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[evaltas
return repositoryAssessment, problems, nil
}

func runModelAndSymflowerFix(ctx evaltask.Context, taskLogger *taskLogger, modelCapability model.CapabilityWriteTests, dataPath string, filePath string, arguments *ArgumentsWriteTest) (modelAssessment metrics.Assessments, withSymflowerFixAssessment metrics.Assessments, problems []error, err error) {
modelAssessment = metrics.NewAssessments()
withSymflowerFixAssessment = modelAssessment // The symflower assessment tracks how the model result can be improved in case of a failure, so just link to the model assessment until we successfully applied "symflower fix".
modelContext := model.Context{
Language: ctx.Language,

RepositoryPath: dataPath,
FilePath: filePath,

Logger: taskLogger.Logger,

Arguments: arguments,
}
assessments, err := modelCapability.WriteTests(modelContext)
if err != nil {
return nil, nil, append(problems, pkgerrors.WithMessage(err, filePath)), nil
}
if assessments[metrics.AssessmentKeyProcessingTime] == 0 {
return nil, nil, problems, pkgerrors.Errorf("no model response time measurement present for %q at repository %q", ctx.Model.ID(), ctx.Repository.Name())
}
modelAssessment.Add(assessments)
modelAssessment.Award(metrics.AssessmentKeyResponseNoError)

testResult, ps, err := ctx.Language.ExecuteTests(taskLogger.Logger, dataPath)
problems = append(problems, ps...)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))
} else if ctx.Repository.Configuration().Validation.Execution.Validate(testResult.StdOut) {
taskLogger.Printf("Executes tests with %d coverage objects", testResult.Coverage)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, testResult.Coverage)
}

if ctx.Language.SupportsFix() {
withSymflowerFixTestResult, processingTime, ps, err := ExecuteWithSymflowerFix(ctx, taskLogger.Logger, ctx.Repository.DataPath())
problems = append(problems, ps...)
if err != nil {
problems = append(problems, err)
} else if ctx.Repository.Configuration().Validation.Execution.Validate(withSymflowerFixTestResult.StdOut) {
ctx.Logger.Printf("with symflower repair: Executes tests with %d coverage objects", withSymflowerFixTestResult.Coverage)

// Symflower was able to fix a failure so now update the assessment with the improved results.
withSymflowerFix := metrics.NewAssessments()
withSymflowerFix[metrics.AssessmentKeyProcessingTime] = processingTime
withSymflowerFix.Award(metrics.AssessmentKeyFilesExecuted)
withSymflowerFix.AwardPoints(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage)

withSymflowerFixAssessment = metrics.CombineWithSymflowerFixAssessments(modelAssessment, withSymflowerFix)
}
}

return modelAssessment, withSymflowerFixAssessment, problems, nil
}

// validateWriteTestsRepository checks if the repository for the "write-tests" task is well-formed.
func validateWriteTestsRepository(logger *log.Logger, repositoryPath string, language language.Language) (err error) {
logger.Printf("validating repository %q", repositoryPath)
Expand Down

0 comments on commit f1b7737

Please sign in to comment.