Skip to content

Commit

Permalink
Extendable metrics collection
Browse files Browse the repository at this point in the history
  • Loading branch information
bauersimon committed Apr 3, 2024
1 parent f2a023f commit c944c1f
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 6 deletions.
6 changes: 5 additions & 1 deletion cmd/eval-symflower-codegen-testing/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,18 @@ func (command *Evaluate) Execute(args []string) (err error) {
// Check that models and languages can be evaluated by executing the "plain" repositories.
log.Printf("Checking that models and languages can used for evaluation")
problemsPerModel := map[string][]error{}
metricsPerModel := map[string]evaluate.Metrics{}
for _, languageID := range command.Languages {
for _, modelID := range command.Models {
model := model.Models[modelID]
language := language.Languages[languageID]

ps, err := evaluate.EvaluateRepository(model, language, filepath.Join(command.TestdataPath, language.ID(), "plain"))
metrics, ps, err := evaluate.EvaluateRepository(model, language, filepath.Join(command.TestdataPath, language.ID(), "plain"))
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
if err != nil {
log.Fatalf("%+v", err)
}
metricsPerModel[model.ID()] = metricsPerModel[model.ID()].Add(metrics)
}
}

Expand All @@ -87,6 +89,8 @@ func (command *Evaluate) Execute(args []string) (err error) {
log.Printf("%+v:", p)
}
}

log.Printf("Evaluation score for %q: %s", modelID, metricsPerModel[modelID].Percentual())
}

return nil
Expand Down
72 changes: 72 additions & 0 deletions evaluate/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package evaluate

import (
"fmt"
)

// MetricKey is a description for a numerical metric.
type MetricKey string

// allMetricKeys holds all metric keys.
var allMetricKeys []MetricKey

func registerMetricKey(key string) MetricKey {
metric := MetricKey(key)
allMetricKeys = append(allMetricKeys, metric)

return metric
}

var (
// MetricKeyTotal is the total number of benchmarking candidates.
MetricKeyTotal = registerMetricKey("total")

// MetricKeyExecuted is the number of benchmarking candidates with successful execution.
MetricKeyExecuted = registerMetricKey("executed")
)

// Metrics holds numerical benchmarking metrics.
type Metrics map[MetricKey]uint

// Metrics holds numerical percentage-based benchmarking metrics.
type MetricsPercentual map[MetricKey]float64

// Add sums two metrics objects.
func (m Metrics) Add(o Metrics) Metrics {
metrics := map[MetricKey]uint{}

for _, k := range allMetricKeys {
metrics[k] = m[k] + o[k]
}

return Metrics(metrics)
}

// Percentual converts the metrics into percentage-based metrics.
// If the total key "MetricsKeyTotal" is not present "nil" is returned instead.
func (m Metrics) Percentual() MetricsPercentual {
total := float64(m[MetricKeyTotal])
if total == 0.0 {
return nil
}
metrics := map[MetricKey]float64{}

for _, k := range allMetricKeys {
if k == MetricKeyTotal {
continue
}
metrics[k] = (float64(m[k]) / total) * 100.0
}

return MetricsPercentual(metrics)
}

// String returns a string representation of the metrics.
func (m Metrics) String() string {
return fmt.Sprintf("#executed=%d/%d", m[MetricKeyExecuted], m[MetricKeyTotal])
}

// String returns a string representation of the metrics.
func (m MetricsPercentual) String() string {
return fmt.Sprintf("#executed=%3.0f%%", m[MetricKeyExecuted])
}
13 changes: 8 additions & 5 deletions evaluate/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ import (
)

// EvaluateRepository evaluate a repository with the given model and language.
func EvaluateRepository(model model.Model, language language.Language, repositoryPath string) (problems []error, err error) {
func EvaluateRepository(model model.Model, language language.Language, repositoryPath string) (metrics Metrics, problems []error, err error) {
log.Printf("Evaluating model %q using language %q and repository %q", model.ID(), language.ID(), repositoryPath)
defer func() {
log.Printf("Evaluated model %q using language %q and repository %q: encountered %d problems", model.ID(), language.ID(), repositoryPath, len(problems))
}()

temporaryPath, err := os.MkdirTemp("", "eval-symflower-codegen-testing")
if err != nil {
return problems, pkgerrors.WithStack(err)
return nil, problems, pkgerrors.WithStack(err)
}
defer func() {
if e := os.RemoveAll(temporaryPath); e != nil {
Expand All @@ -35,15 +35,17 @@ func EvaluateRepository(model model.Model, language language.Language, repositor
}()
temporaryRepositoryPath := filepath.Join(temporaryPath, filepath.Base(repositoryPath))
if err := osutil.CopyTree(repositoryPath, temporaryRepositoryPath); err != nil {
return problems, pkgerrors.WithStack(err)
return nil, problems, pkgerrors.WithStack(err)
}

filePaths, err := language.Files(repositoryPath)
if err != nil {
return problems, pkgerrors.WithStack(err)
return nil, problems, pkgerrors.WithStack(err)
}

metrics = Metrics{}
for _, filePath := range filePaths {
metrics[MetricKeyTotal]++
if err := model.GenerateTestsForFile(temporaryRepositoryPath, filePath); err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))

Expand All @@ -55,7 +57,8 @@ func EvaluateRepository(model model.Model, language language.Language, repositor

continue
}
metrics[MetricKeyExecuted]++
}

return problems, nil
return metrics, problems, nil
}

0 comments on commit c944c1f

Please sign in to comment.