Don't write LLM responses to separate files but instead associate req…

…uest and response with UUIDs
symflower · Jan 10, 2025 · 0eb5b9e · 0eb5b9e
1 parent 1a039cc
commit 0eb5b9e
Show file tree

Hide file tree

Showing 8 changed files with 7 additions and 74 deletions.
diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go
@@ -501,7 +501,6 @@ func TestEvaluateExecute(t *testing.T) {
 							assert.Contains(t, data, "preloading model")
 							assert.Contains(t, data, "unloading model")
 						},
-						filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "response-1.log"): nil,
 					},
 					ExpectedOutputValidate: func(t *testing.T, output, resultPath string) {
 						assert.Contains(t, output, "msg=\"starting services for provider\" provider=ollama")
@@ -578,7 +577,6 @@ func TestEvaluateExecute(t *testing.T) {
 						filepath.Join("result-directory", "evaluation.log"): nil,
 						filepath.Join("result-directory", "README.md"):      nil,
 						filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "evaluation.log"): nil,
-						filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "response-1.log"): nil,
 					},
 				})
 			}

diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go
@@ -111,8 +111,6 @@ func Evaluate(ctx *Context) {
 				logger.Info("starting run", "count", rl+1, "total", ctx.Runs)
 			}
 
-			logger := logger.With(log.AttributeKeyRun, rl+1)
-
 			for _, language := range ctx.Languages {
 				logger := logger.With(log.AttributeKeyLanguage, language.ID())
 
@@ -224,8 +222,6 @@ func Evaluate(ctx *Context) {
 			logger.Info("starting run", "count", rl+1, "total", ctx.Runs)
 		}
 
-		logger := logger.With(log.AttributeKeyRun, rl+1)
-
 		for _, language := range ctx.Languages {
 			languageID := language.ID()
 			logger := logger.With(log.AttributeKeyLanguage, languageID)

diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go
@@ -396,7 +396,6 @@ func TestEvaluate(t *testing.T) {
 					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
 						assert.Contains(t, data, "\"msg\":\"query retry\",\"count\":1,\"total\":3,\"error\":\""+ErrEmptyResponseFromModel.Error())
 					},
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
 					"evaluation.csv": nil,
 				},
 			})
@@ -489,7 +488,6 @@ func TestEvaluate(t *testing.T) {
 					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
 						assert.Contains(t, data, "DONE 0 tests, 1 error")
 					},
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
 					"evaluation.csv": nil,
 				},
 			})

diff --git a/go.mod b/go.mod
@@ -18,6 +18,7 @@ require (
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/google/uuid v1.6.0 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect

diff --git a/go.sum b/go.sum
@@ -4,6 +4,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/jessevdk/go-flags v1.5.1-0.20210607101731-3927b71304df h1:JTDw/M13b6dZmEJI/vfcCLENqcjUHi9UBry+R0pjh5Q=
 github.com/jessevdk/go-flags v1.5.1-0.20210607101731-3927b71304df/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4=
 github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=

diff --git a/log/logger.go b/log/logger.go
@@ -21,8 +21,6 @@ import (
 type AttributeKey string
 
 const (
-	// AttributeKeyArtifact holds the key for the "Artifact" attribute.
-	AttributeKeyArtifact = AttributeKey("Artifact")
 	// AttributeKeyLanguage holds the key for the "Language" attribute.
 	AttributeKeyLanguage = AttributeKey("Language")
 	// AttributeKeyModel holds the key for the "Model" attribute.
@@ -31,8 +29,6 @@ const (
 	AttributeKeyRepository = AttributeKey("Repository")
 	// AttributeKeyResultPath holds the key for the "ResultPath" attribute.
 	AttributeKeyResultPath = AttributeKey("ResultPath")
-	// AttributeKeyRun holds the key for the "Run" attribute.
-	AttributeKeyRun = AttributeKey("Run")
 	// AttributeKeyTask holds the key for the "Task" attribute.
 	AttributeKeyTask = AttributeKey("Task")
 )
@@ -314,34 +310,6 @@ var defaultLogFileSpawners = []handlerSpawner{
 				return nil, err
 			}
 
-			return createFileHandlerForParent(parent, file), nil
-		},
-	},
-	handlerSpawner{
-		NeededAttributes: []AttributeKey{
-			AttributeKeyResultPath,
-
-			AttributeKeyArtifact,
-			AttributeKeyLanguage,
-			AttributeKeyModel,
-			AttributeKeyRepository,
-			AttributeKeyRun,
-			AttributeKeyTask,
-		},
-		Spawn: func(parent slog.Handler, attributes map[AttributeKey]string) (slog.Handler, error) {
-			resultPath := attributes[AttributeKeyResultPath]
-			modelID := attributes[AttributeKeyModel]
-			languageID := attributes[AttributeKeyLanguage]
-			repositoryName := attributes[AttributeKeyRepository]
-			taskIdentifier := attributes[AttributeKeyTask]
-			run := attributes[AttributeKeyRun]
-			artifact := attributes[AttributeKeyArtifact]
-
-			file, err := openLogFile(filepath.Join(resultPath, taskIdentifier, CleanModelNameForFileSystem(modelID), languageID, repositoryName, fmt.Sprintf("%s-%s.log", artifact, run)))
-			if err != nil {
-				return nil, err
-			}
-
 			return createFileHandlerForParent(parent, file), nil
 		},
 	},

diff --git a/log/logger_test.go b/log/logger_test.go
@@ -149,38 +149,6 @@ func TestLogger(t *testing.T) {
 				filepath.Join("taskA", "modelA", "languageA", "repositoryB", "evaluation.log"): nil,
 			},
 		})
-
-		t.Run("Artifacts", func(t *testing.T) {
-			validate(t, &testCase{
-				Name: "Response",
-
-				Do: func(logger *Logger, temporaryPath string) {
-					logger = logger.With(AttributeKeyResultPath, temporaryPath)
-					logger = logger.With(AttributeKeyLanguage, "languageA")
-					logger = logger.With(AttributeKeyModel, "modelA")
-					logger = logger.With(AttributeKeyRepository, "repositoryA")
-					logger = logger.With(AttributeKeyTask, "taskA")
-					logger = logger.With(AttributeKeyRun, "1")
-
-					logger.Info("artifact-content", Attribute(AttributeKeyArtifact, "response"))
-					logger.Info("no-artifact-content")
-				},
-
-				ExpectedLogOutputContains: []string{
-					"no-artifact-content",
-					"artifact-content",
-				},
-				ExpectedFilesContain: map[string][]string{
-					"evaluation.log": nil,
-					filepath.Join("taskA", "modelA", "languageA", "repositoryA", "evaluation.log"): []string{
-						"no-artifact-content",
-					},
-					filepath.Join("taskA", "modelA", "languageA", "repositoryA", "response-1.log"): []string{
-						"artifact-content",
-					},
-				},
-			})
-		})
 	})
 
 	t.Run("Text", func(t *testing.T) {

diff --git a/model/llm/llm.go b/model/llm/llm.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/avast/retry-go"
+	"github.com/google/uuid"
 	pkgerrors "github.com/pkg/errors"
 	"github.com/zimmski/osutil/bytesutil"
 
@@ -298,14 +299,15 @@ func (m *Model) WriteTests(ctx model.Context) (assessment metrics.Assessments, e
 func (m *Model) query(logger *log.Logger, request string) (response string, duration time.Duration, err error) {
 	if err := retry.Do(
 		func() error {
-			logger.Info("querying model", "model", m.ID(), "prompt", string(bytesutil.PrefixLines([]byte(request), []byte("\t"))))
+			id := uuid.NewString
+			logger.Info("querying model", "model", m.ID(), "id", id, "prompt", string(bytesutil.PrefixLines([]byte(request), []byte("\t"))))
 			start := time.Now()
 			response, err = m.provider.Query(context.Background(), m.model, request)
 			if err != nil {
 				return err
 			}
 			duration = time.Since(start)
-			logger.Info("model responded", "model", m.ID(), "duration", duration.Milliseconds(), "response", string(bytesutil.PrefixLines([]byte(response), []byte("\t"))), log.Attribute(log.AttributeKeyArtifact, "response"))
+			logger.Info("model responded", "model", m.ID(), "id", id, "duration", duration.Milliseconds(), "response", string(bytesutil.PrefixLines([]byte(response), []byte("\t"))))
 
 			return nil
 		},