Skip to content

Commit

Permalink
Don't write LLM responses to separate files but instead associate req…
Browse files Browse the repository at this point in the history
…uest and response with UUIDs
  • Loading branch information
bauersimon committed Jan 10, 2025
1 parent 1a039cc commit 0eb5b9e
Show file tree
Hide file tree
Showing 8 changed files with 7 additions and 74 deletions.
2 changes: 0 additions & 2 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "preloading model")
assert.Contains(t, data, "unloading model")
},
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "response-1.log"): nil,
},
ExpectedOutputValidate: func(t *testing.T, output, resultPath string) {
assert.Contains(t, output, "msg=\"starting services for provider\" provider=ollama")
Expand Down Expand Up @@ -578,7 +577,6 @@ func TestEvaluateExecute(t *testing.T) {
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "evaluation.log"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "response-1.log"): nil,
},
})
}
Expand Down
4 changes: 0 additions & 4 deletions evaluate/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ func Evaluate(ctx *Context) {
logger.Info("starting run", "count", rl+1, "total", ctx.Runs)
}

logger := logger.With(log.AttributeKeyRun, rl+1)

for _, language := range ctx.Languages {
logger := logger.With(log.AttributeKeyLanguage, language.ID())

Expand Down Expand Up @@ -224,8 +222,6 @@ func Evaluate(ctx *Context) {
logger.Info("starting run", "count", rl+1, "total", ctx.Runs)
}

logger := logger.With(log.AttributeKeyRun, rl+1)

for _, language := range ctx.Languages {
languageID := language.ID()
logger := logger.With(log.AttributeKeyLanguage, languageID)
Expand Down
2 changes: 0 additions & 2 deletions evaluate/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,6 @@ func TestEvaluate(t *testing.T) {
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "\"msg\":\"query retry\",\"count\":1,\"total\":3,\"error\":\""+ErrEmptyResponseFromModel.Error())
},
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
"evaluation.csv": nil,
},
})
Expand Down Expand Up @@ -489,7 +488,6 @@ func TestEvaluate(t *testing.T) {
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "DONE 0 tests, 1 error")
},
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
"evaluation.csv": nil,
},
})
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require (

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/jessevdk/go-flags v1.5.1-0.20210607101731-3927b71304df h1:JTDw/M13b6dZmEJI/vfcCLENqcjUHi9UBry+R0pjh5Q=
github.com/jessevdk/go-flags v1.5.1-0.20210607101731-3927b71304df/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
Expand Down
32 changes: 0 additions & 32 deletions log/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ import (
type AttributeKey string

const (
// AttributeKeyArtifact holds the key for the "Artifact" attribute.
AttributeKeyArtifact = AttributeKey("Artifact")
// AttributeKeyLanguage holds the key for the "Language" attribute.
AttributeKeyLanguage = AttributeKey("Language")
// AttributeKeyModel holds the key for the "Model" attribute.
Expand All @@ -31,8 +29,6 @@ const (
AttributeKeyRepository = AttributeKey("Repository")
// AttributeKeyResultPath holds the key for the "ResultPath" attribute.
AttributeKeyResultPath = AttributeKey("ResultPath")
// AttributeKeyRun holds the key for the "Run" attribute.
AttributeKeyRun = AttributeKey("Run")
// AttributeKeyTask holds the key for the "Task" attribute.
AttributeKeyTask = AttributeKey("Task")
)
Expand Down Expand Up @@ -314,34 +310,6 @@ var defaultLogFileSpawners = []handlerSpawner{
return nil, err
}

return createFileHandlerForParent(parent, file), nil
},
},
handlerSpawner{
NeededAttributes: []AttributeKey{
AttributeKeyResultPath,

AttributeKeyArtifact,
AttributeKeyLanguage,
AttributeKeyModel,
AttributeKeyRepository,
AttributeKeyRun,
AttributeKeyTask,
},
Spawn: func(parent slog.Handler, attributes map[AttributeKey]string) (slog.Handler, error) {
resultPath := attributes[AttributeKeyResultPath]
modelID := attributes[AttributeKeyModel]
languageID := attributes[AttributeKeyLanguage]
repositoryName := attributes[AttributeKeyRepository]
taskIdentifier := attributes[AttributeKeyTask]
run := attributes[AttributeKeyRun]
artifact := attributes[AttributeKeyArtifact]

file, err := openLogFile(filepath.Join(resultPath, taskIdentifier, CleanModelNameForFileSystem(modelID), languageID, repositoryName, fmt.Sprintf("%s-%s.log", artifact, run)))
if err != nil {
return nil, err
}

return createFileHandlerForParent(parent, file), nil
},
},
Expand Down
32 changes: 0 additions & 32 deletions log/logger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,38 +149,6 @@ func TestLogger(t *testing.T) {
filepath.Join("taskA", "modelA", "languageA", "repositoryB", "evaluation.log"): nil,
},
})

t.Run("Artifacts", func(t *testing.T) {
validate(t, &testCase{
Name: "Response",

Do: func(logger *Logger, temporaryPath string) {
logger = logger.With(AttributeKeyResultPath, temporaryPath)
logger = logger.With(AttributeKeyLanguage, "languageA")
logger = logger.With(AttributeKeyModel, "modelA")
logger = logger.With(AttributeKeyRepository, "repositoryA")
logger = logger.With(AttributeKeyTask, "taskA")
logger = logger.With(AttributeKeyRun, "1")

logger.Info("artifact-content", Attribute(AttributeKeyArtifact, "response"))
logger.Info("no-artifact-content")
},

ExpectedLogOutputContains: []string{
"no-artifact-content",
"artifact-content",
},
ExpectedFilesContain: map[string][]string{
"evaluation.log": nil,
filepath.Join("taskA", "modelA", "languageA", "repositoryA", "evaluation.log"): []string{
"no-artifact-content",
},
filepath.Join("taskA", "modelA", "languageA", "repositoryA", "response-1.log"): []string{
"artifact-content",
},
},
})
})
})

t.Run("Text", func(t *testing.T) {
Expand Down
6 changes: 4 additions & 2 deletions model/llm/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/avast/retry-go"
"github.com/google/uuid"
pkgerrors "github.com/pkg/errors"
"github.com/zimmski/osutil/bytesutil"

Expand Down Expand Up @@ -298,14 +299,15 @@ func (m *Model) WriteTests(ctx model.Context) (assessment metrics.Assessments, e
func (m *Model) query(logger *log.Logger, request string) (response string, duration time.Duration, err error) {
if err := retry.Do(
func() error {
logger.Info("querying model", "model", m.ID(), "prompt", string(bytesutil.PrefixLines([]byte(request), []byte("\t"))))
id := uuid.NewString
logger.Info("querying model", "model", m.ID(), "id", id, "prompt", string(bytesutil.PrefixLines([]byte(request), []byte("\t"))))
start := time.Now()
response, err = m.provider.Query(context.Background(), m.model, request)
if err != nil {
return err
}
duration = time.Since(start)
logger.Info("model responded", "model", m.ID(), "duration", duration.Milliseconds(), "response", string(bytesutil.PrefixLines([]byte(response), []byte("\t"))), log.Attribute(log.AttributeKeyArtifact, "response"))
logger.Info("model responded", "model", m.ID(), "id", id, "duration", duration.Milliseconds(), "response", string(bytesutil.PrefixLines([]byte(response), []byte("\t"))))

return nil
},
Expand Down

0 comments on commit 0eb5b9e

Please sign in to comment.