Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move existing metrics to assessments #38

Merged
merged 1 commit into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ func (command *Evaluate) Execute(args []string) (err error) {

// Check that models and languages can be evaluated by executing the "plain" repositories.
log.Printf("Checking that models and languages can be used for evaluation")
metricsPerModel := map[string]metrics.Metrics{}
metricsPerModel := map[string]metrics.Assessments{}
problemsPerModel := map[string][]error{}
{
// Ensure we report metrics for every model even if they are excluded.
for _, modelID := range command.Models {
metricsPerModel[modelID] = metrics.Metrics{}
metricsPerModel[modelID] = metrics.NewAssessments()
}

for _, languageID := range command.Languages {
Expand All @@ -114,7 +114,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
language := language.Languages[languageID]

metrics, ps, err := evaluate.EvaluateRepository(model, language, filepath.Join(command.TestdataPath, language.ID(), "plain"))
metricsPerModel[modelID] = metricsPerModel[modelID].Add(metrics)
metricsPerModel[modelID].Add(metrics)
if err != nil {
ps = append(ps, err)
}
Expand Down Expand Up @@ -155,7 +155,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
language := language.Languages[languageID]

metrics, ps, err := evaluate.EvaluateRepository(model, language, filepath.Join(languagePath, repository.Name()))
metricsPerModel[model.ID()] = metricsPerModel[model.ID()].Add(metrics)
metricsPerModel[model.ID()].Add(metrics)
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
if err != nil {
log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repository.Name(), err)
Expand Down
74 changes: 73 additions & 1 deletion evaluate/metrics/assessment.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
package metrics

import (
"encoding/csv"
"fmt"
"sort"
"strings"

pkgerrors "github.com/pkg/errors"
"golang.org/x/exp/maps"
)

// AssessmentKey defines a key for a numerical key-value assessment pair.
type AssessmentKey string

Expand All @@ -20,14 +30,22 @@ func RegisterAssessmentKey(key string) AssessmentKey {
}

var (
// AssessmentKeyFilesExecutes holds the successfully executed files.
AssessmentKeyFilesExecuted = RegisterAssessmentKey("files-executed")
// AssessmentKeyFilesProblems holds the files with problems.
AssessmentKeyFilesProblems = RegisterAssessmentKey("files-problems")

// AssessmentKeyCoverageStatement counts the cases where 100% coverage was reached.
AssessmentKeyCoverageStatement = RegisterAssessmentKey("coverage-statement")

// AssessmentKeyNoExcessResponse indicates that a model did not produce more content as requested.
AssessmentKeyNoExcessResponse = RegisterAssessmentKey("no-excess-response")
)

// Assessments holds a collection of numerical assessment metrics.
type Assessments map[AssessmentKey]uint

// NewAssessments create a new assessment collection.
// NewAssessments creates a new assessment collection.
func NewAssessments() Assessments {
return map[AssessmentKey]uint{}
}
Expand All @@ -51,3 +69,57 @@ func Merge(a Assessments, b Assessments) (c Assessments) {

return c
}

// String returns a string representation of the metrics.
func (a Assessments) String() string {
if a == nil {
a = NewAssessments()
}
metrics := make([]string, len(allAssessmentKeys))

for i, key := range allAssessmentKeys {
metrics[i] = fmt.Sprintf("%s=%d", key, a[key])
}

return strings.Join(metrics, ", ")
}

// StringCSV returns a CSV row string representation of the metrics.
func (a Assessments) StringCSV() (row []string) {
if a == nil {
a = NewAssessments()
}

row = make([]string, len(allAssessmentKeys))
for i, key := range allAssessmentKeys {
row[i] = fmt.Sprintf("%d", a[key])
}

return row
}

func csvHeader() []string {
return append([]string{"model"}, allAssessmentKeysStrings...)
}

// FormatStringCSV formats the given metrics as CSV.
func FormatStringCSV(metricsPerModel map[string]Assessments) (string, error) {
var out strings.Builder
csv := csv.NewWriter(&out)

if err := csv.Write(csvHeader()); err != nil {
return "", err
}
models := maps.Keys(metricsPerModel)
sort.Strings(models)
for _, model := range models {
row := metricsPerModel[model].StringCSV()

if err := csv.Write(append([]string{model}, row...)); err != nil {
return "", pkgerrors.WithStack(err)
}
}
csv.Flush()

return out.String(), nil
}
96 changes: 96 additions & 0 deletions evaluate/metrics/assessment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/zimmski/osutil/bytesutil"
)

func TestAssessmentsAdd(t *testing.T) {
Expand Down Expand Up @@ -114,3 +115,98 @@ func TestMerge(t *testing.T) {
},
})
}

func TestAssessmentString(t *testing.T) {
type testCase struct {
Name string

Assessment Assessments

ExpectedString string
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualString := tc.Assessment.String()

assert.Equal(t, tc.ExpectedString, actualString)
})
}

validate(t, &testCase{
Name: "Initial Metrics",

Assessment: NewAssessments(),

ExpectedString: "files-executed=0, files-problems=0, coverage-statement=0, no-excess-response=0",
})

validate(t, &testCase{
Name: "Empty Metrics",

Assessment: Assessments{
AssessmentKeyCoverageStatement: 1,
AssessmentKeyFilesExecuted: 2,
AssessmentKeyFilesProblems: 3,
AssessmentKeyNoExcessResponse: 4,
},

ExpectedString: "files-executed=2, files-problems=3, coverage-statement=1, no-excess-response=4",
})
}

func TestFormatStringCSV(t *testing.T) {
type testCase struct {
Name string

AssessmentPerModel map[string]Assessments

ExpectedString string
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualString, err := FormatStringCSV(tc.AssessmentPerModel)
assert.NoError(t, err)

assert.Equal(t, bytesutil.StringTrimIndentations(tc.ExpectedString), actualString)
})
}

validate(t, &testCase{
Name: "Single Empty Model",

AssessmentPerModel: map[string]Assessments{
"Model": Assessments{},
},

ExpectedString: `
model,files-executed,files-problems,coverage-statement,no-excess-response
Model,0,0,0,0
`,
})
validate(t, &testCase{
Name: "Multiple Models",

AssessmentPerModel: map[string]Assessments{
"ModelA": Assessments{
AssessmentKeyCoverageStatement: 1,
AssessmentKeyFilesExecuted: 2,
AssessmentKeyFilesProblems: 3,
AssessmentKeyNoExcessResponse: 4,
},
"ModelB": Assessments{
AssessmentKeyCoverageStatement: 1,
AssessmentKeyFilesExecuted: 2,
AssessmentKeyFilesProblems: 3,
AssessmentKeyNoExcessResponse: 4,
},
},

ExpectedString: `
model,files-executed,files-problems,coverage-statement,no-excess-response
ModelA,2,3,1,4
ModelB,2,3,1,4
`,
})
}
117 changes: 0 additions & 117 deletions evaluate/metrics/metrics.go

This file was deleted.

Loading
Loading