diff --git a/common/log/tag/tags.go b/common/log/tag/tags.go index 45ede18df6a..e63996fe165 100644 --- a/common/log/tag/tags.go +++ b/common/log/tag/tags.go @@ -568,6 +568,9 @@ func WorkerComponent(v interface{}) ZapTag { return NewStringTag("worker-component", fmt.Sprintf("%T", v)) } +// FailedAssertion is a tag for marking a message as a failed assertion. +var FailedAssertion = NewBoolTag("failed-assertion", true) + // history engine shard // ShardID returns tag for ShardID diff --git a/common/softassert/softassert.go b/common/softassert/softassert.go new file mode 100644 index 00000000000..7aa2fa5e26d --- /dev/null +++ b/common/softassert/softassert.go @@ -0,0 +1,48 @@ +// The MIT License +// +// Copyright (c) 2025 Temporal Technologies Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package softassert + +import ( + "go.temporal.io/server/common/log" + "go.temporal.io/server/common/log/tag" +) + +// That performs a soft assertion by logging an error if the given condition is false. +// It is meant to indicate a condition that are always expected to be true. +// Returns true if the condition is met, otherwise false. +// +// Example: +// softassert.That(logger, object.state == "ready", "object is not ready") +// +// Best practices: +// - Use it to check for programming errors and invariants. +// - Use it to communicate assumptions about the code. +// - Use it to abort or recover from an unexpected state. +// - Never use it as a substitute for regular error handling, validation, or control flow. +func That(logger log.Logger, condition bool, msg string) bool { + if !condition { + // By using the same prefix for all assertions, they can be reliably found in logs. + logger.Error("failed assertion: "+msg, tag.FailedAssertion) + } + return condition +} diff --git a/common/testing/testlogger/testlogger.go b/common/testing/testlogger/testlogger.go new file mode 100644 index 00000000000..90ff17d319c --- /dev/null +++ b/common/testing/testlogger/testlogger.go @@ -0,0 +1,511 @@ +// The MIT License +// +// Copyright (c) 2025 Temporal Technologies Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package testlogger + +import ( + "container/list" + "fmt" + "os" + "regexp" + "strings" + "sync" + "sync/atomic" + + "go.temporal.io/server/common/log" + "go.temporal.io/server/common/log/tag" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest" +) + +type Level string + +const ( + Debug Level = "debug" + Info Level = "info" + Warn Level = "warn" + // Panics, DPanics, and Fatal logs are considered errors + Error Level = "error" +) + +type Mode string + +const ( + // FailOnAnyUnexpectedError mode will fail if any unexpected error is encountered, + // like an allowlist. Use TestLogger.Expect to add an error to the allowlist. + FailOnAnyUnexpectedError Mode = "fail-on-unexpected-errors" + // FailOnExpectedErrorOnly mode will only fail if an expected error is encountered, + // like a blocklist. Use TestLogger.Expect to add an error to the blocklist. + FailOnExpectedErrorOnly = "fail-on-expected-errors" +) + +// This is a subset of the testing.T interface that we use in this package that is +// also shared with *rapid.T. +type TestingT interface { + zaptest.TestingT + Helper() + Name() string + Logf(format string, args ...any) + Log(args ...any) + Fatalf(format string, args ...any) + Fatal(args ...any) +} + +// CleanupCapable is an interface that allows a test to register cleanup functions. +// This is /not/ implemented by *rapid.T but is by *testing.T +type CleanupCapableT interface { + TestingT + Cleanup(func()) +} + +func (l Level) String() string { + return string(l) +} + +// Expectations represent errors we expect to happen in tests. +// Their only purpose is to allow un-expecting (hah!) an error we've +// marked as expected +type Expectation struct { + e *list.Element + testLogger *TestLogger + lvl Level +} + +// Forget removes a previously registered expectation. +// A forgotten expectation will no longer be evaluated when errors are encountered. +func (e *Expectation) Forget() { + e.testLogger.Forget(e) +} + +type matcher struct { + expectation *Expectation + msg *regexp.Regexp + tags map[string]*regexp.Regexp +} + +func newMatcher(msg string, tags []tag.Tag, e *Expectation) matcher { + var rgx *regexp.Regexp + if msg != "" { + rgx = regexp.MustCompile(msg) + } + m := matcher{ + expectation: e, + msg: rgx, + tags: map[string]*regexp.Regexp{}, + } + for _, t := range tags { + m.tags[t.Key()] = regexp.MustCompile(formatValue(t)) + } + return m +} + +func (m matcher) Matches(msg string, tags []tag.Tag) bool { + // A nil regexp (meaning original string "") means we're only matching based on tags. + if m.msg != nil && !m.msg.MatchString(msg) { + return false + } + + if len(m.tags) == 0 { + return true + } + + // We need to match all tags specified in the matcher, + // not all tags on the message + remainingMatches := len(m.tags) + for _, t := range tags { + pat, ok := m.tags[t.Key()] + if !ok { + continue + } + if pat.MatchString(formatValue(t)) { + remainingMatches-- + } + } + return remainingMatches == 0 +} + +type sharedTestLoggerState struct { + panicOnDPanic atomic.Bool + panicOnError atomic.Bool + t TestingT + mu struct { + sync.RWMutex + expectations map[Level]*list.List // Map[Level]List[matcher] + closed bool + failed bool + } + mode Mode + logExpectations bool + logCaller bool + level zapcore.Level +} + +// TestLogger is a log.Logger implementation that logs to the test's logger +// _but_ will fail the test if log levels _above_ Warn are present +type TestLogger struct { + wrapped log.Logger + state *sharedTestLoggerState + tags []tag.Tag +} + +type LoggerOption func(*TestLogger) + +func DontPanicOnError(t *TestLogger) *TestLogger { + t.state.panicOnError.Store(false) + return t +} + +func DontPanicOnDPanic(t *TestLogger) *TestLogger { + t.state.panicOnDPanic.Store(false) + return t +} + +func LogExpectations(t *TestLogger) *TestLogger { + t.state.logExpectations = true + return t +} + +// WithLogTags adds tags to the logs emitted by the underlying logger +func WithLogTags(tags ...tag.Tag) LoggerOption { + return func(t *TestLogger) { + t.tags = tags + } +} + +func WrapLogger(l log.Logger) LoggerOption { + return func(t *TestLogger) { + t.wrapped = l + } +} + +func LogLevel(level zapcore.Level) LoggerOption { + return func(t *TestLogger) { + t.state.level = level + } +} + +func WithoutCaller() LoggerOption { + return func(t *TestLogger) { + t.state.logCaller = false + } +} + +func LogCaller() LoggerOption { + return func(t *TestLogger) { + t.state.logCaller = false + } +} + +// SetLogLevel overrides the temporal test log level during this test. +func SetLogLevel(tt CleanupCapableT, level zapcore.Level) LoggerOption { + return func(t *TestLogger) { + oldLevel := os.Getenv("TEMPORAL_TEST_LOG_LEVEL") + _ = os.Setenv("TEMPORAL_TEST_LOG_LEVEL", level.String()) + tt.Cleanup(func() { + _ = os.Setenv("TEMPORAL_TEST_LOG_LEVEL", oldLevel) + }) + + t.state.level = level + } +} + +var _ log.Logger = (*TestLogger)(nil) + +// NewTestLogger creates a new TestLogger that logs to the provided testing.T. +// Mode controls the behavior of the logger for when an expected or unexpected error is encountered. +func NewTestLogger(t TestingT, mode Mode, opts ...LoggerOption) *TestLogger { + tl := &TestLogger{ + state: &sharedTestLoggerState{ + t: t, + logExpectations: false, + level: zapcore.DebugLevel, + logCaller: true, + mode: mode, + }, + } + tl.state.panicOnError.Store(true) + tl.state.panicOnDPanic.Store(true) + tl.state.mu.expectations = map[Level]*list.List{ + Debug: list.New(), + Info: list.New(), + Warn: list.New(), + Error: list.New(), + } + for _, opt := range opts { + opt(tl) + } + if tl.wrapped == nil { + ztl := zaptest.NewLogger(t, + zaptest.Level(tl.state.level), + zaptest.WrapOptions( + zap.AddStacktrace(zap.ErrorLevel), + zap.WithCaller(tl.state.logCaller), + ), + ) + tl.wrapped = log.NewZapLogger(ztl).Skip(1) + } + + // Only possible with a *testing.T until *rapid.T supports `Cleanup` + if ct, ok := t.(CleanupCapableT); ok { + // NOTE(tim): We don't care about anything logged after the test completes. Sure, this is racy, + // but it reduces the likelihood that we see stupid errors due to testing.T.Logf race conditions... + ct.Cleanup(tl.Close) + } + + return tl +} + +// ResetFailureStatus resets the failure state, returning the previous value. +// This is useful to verify that no unexpected errors were logged after the test +// completed (together with DontPanicOnError and/or DontPanicOnDPanic). +func (tl *TestLogger) ResetFailureStatus() bool { + tl.state.mu.Lock() + defer tl.state.mu.Unlock() + prevFailed := tl.state.mu.failed + tl.state.mu.failed = false + return prevFailed +} + +// Expect instructs the logger to expect certain errors, as specified by the msg and tag arguments. +// Depending on the Mode of the test logger, the expectation either acts as an entry in a +// blocklist (FailOnExpectedErrorOnly) or an allowlist (FailOnAnyUnexpectedError). +func (tl *TestLogger) Expect(level Level, msg string, tags ...tag.Tag) *Expectation { + tl.state.mu.Lock() + defer tl.state.mu.Unlock() + if tl.state.logExpectations { + tl.wrapped.Info(fmt.Sprintf("(%p) TestLogger::Expecting: '%s'\n", tl, msg)) + } + e := &Expectation{ + testLogger: tl, + lvl: level, + } + m := newMatcher(msg, tags, e) + e.e = tl.state.mu.expectations[level].PushBack(m) + return e +} + +// Forget removes a previously registered expectation. +// A forgotten expectation will no longer be evaluated when errors are encountered. +func (tl *TestLogger) Forget(e *Expectation) { + tl.state.mu.Lock() + defer tl.state.mu.Unlock() + tl.state.mu.expectations[e.lvl].Remove(e.e) +} + +func (tl *TestLogger) shouldFailTest(level Level, msg string, tags []tag.Tag) bool { + expectations := tl.state.mu.expectations[level] + for e := expectations.Front(); e != nil; e = e.Next() { + m, ok := e.Value.(matcher) + if !ok { + tl.state.t.Fatalf("Bug in TestLogger: invalid %T value in matcher list", e.Value) + } + if m.Matches(msg, tags) { + return tl.state.mode == FailOnExpectedErrorOnly + } + } + return tl.state.mode == FailOnAnyUnexpectedError +} + +// PanicOnError overrides the behavior of this logger. It returns the previous value +// so that it can be restored later. +func (tl *TestLogger) PanicOnError(v bool) bool { + return tl.state.panicOnError.Swap(v) +} + +// DPanic implements log.Logger. +func (tl *TestLogger) DPanic(msg string, tags ...tag.Tag) { + tl.state.mu.RLock() + defer tl.state.mu.RUnlock() + if tl.state.mu.closed { + return + } + if tl.tags != nil { + tags = append(tags, tl.tags...) + } + // note, actual panic'ing in wrapped is turned off so we can control. + tl.wrapped.DPanic(msg, tags...) + if tl.state.panicOnDPanic.Load() && tl.shouldFailTest(Error, msg, tags) { + tl.state.t.Helper() + panic(failureMessage("DPanic", msg, tags)) + } +} + +// Debug implements log.Logger. +func (tl *TestLogger) Debug(msg string, tags ...tag.Tag) { + tl.state.mu.RLock() + defer tl.state.mu.RUnlock() + if tl.state.mu.closed { + return + } + if tl.tags != nil { + tags = append(tags, tl.tags...) + } + tl.wrapped.Debug(msg, append(tags, tl.tags...)...) +} + +// Error implements log.Logger. +func (tl *TestLogger) Error(msg string, tags ...tag.Tag) { + tl.state.mu.RLock() + if tl.state.mu.closed { + tl.state.mu.RUnlock() + return + } + if tl.tags != nil { + tags = append(tags, tl.tags...) + } + if !tl.shouldFailTest(Error, msg, tags) { + tl.wrapped.Error(msg, tags...) + tl.state.mu.RUnlock() + return + } + tl.state.mu.RUnlock() + + if tl.state.panicOnError.Load() { + tl.state.t.Helper() + tl.wrapped.Error(msg, tags...) + panic(failureMessage("Error", msg, tags)) + } + + // Labeling the error as unexpected; so it can easily be identified later. + tl.wrapped.Error(errorMessage("Error", msg), tags...) + + // Not panic'ing, so marking the test as failed. + tl.state.mu.Lock() + tl.state.mu.failed = true + tl.state.mu.Unlock() +} + +// Fatal implements log.Logger. +func (tl *TestLogger) Fatal(msg string, tags ...tag.Tag) { + tl.state.mu.RLock() + defer tl.state.mu.RUnlock() + if tl.state.mu.closed { + return + } + if tl.tags != nil { + tags = append(tags, tl.tags...) + } + tl.state.t.Helper() + tl.state.t.Fatal(failureMessage("Fatal", msg, tags)) +} + +// Info implements log.Logger. +func (tl *TestLogger) Info(msg string, tags ...tag.Tag) { + tl.state.mu.RLock() + defer tl.state.mu.RUnlock() + if tl.state.mu.closed { + return + } + if tl.tags != nil { + tags = append(tags, tl.tags...) + } + tl.wrapped.Info(msg, tags...) +} + +// Panic implements log.Logger. +func (tl *TestLogger) Panic(msg string, tags ...tag.Tag) { + tl.state.mu.RLock() + defer tl.state.mu.RUnlock() + if tl.state.mu.closed { + return + } + if tl.tags != nil { + tags = append(tags, tl.tags...) + } + tl.state.t.Helper() + tl.state.t.Fatal(failureMessage("Panic", msg, tags)) +} + +// Warn implements log.Logger. +func (tl *TestLogger) Warn(msg string, tags ...tag.Tag) { + tl.state.mu.RLock() + defer tl.state.mu.RUnlock() + if tl.state.mu.closed { + return + } + if tl.tags != nil { + tags = append(tags, tl.tags...) + } + tl.wrapped.Warn(msg, tags...) +} + +// WithTags gives you a new logger, copying the tags of the source, appending the provided new Tags +func (tl *TestLogger) WithTags(tags ...tag.Tag) *TestLogger { + allTags := make([]tag.Tag, len(tl.tags)+len(tags)) + copy(allTags, tl.tags) + copy(allTags[len(tl.tags):], tags) + newtl := &TestLogger{ + wrapped: tl.wrapped, + state: tl.state, + tags: allTags, + } + return newtl +} + +// Close disallows any further logging, preventing the test framework from complaining about +// logging post-test. +func (tl *TestLogger) Close() { + // Taking the write lock ensures all in-progress log calls complete before we close. + // This prevents a race condition after the test has completed. + tl.state.mu.Lock() + tl.state.mu.closed = true + tl.state.mu.Unlock() +} + +func (tl *TestLogger) T() TestingT { + return tl.state.t +} + +// Format the log.Logger tags and such into a useful message +func failureMessage(level string, msg string, tags []tag.Tag) string { + var b strings.Builder + b.WriteString("FAIL: ") + b.WriteString(errorMessage(level, msg)) + for _, t := range tags { + b.WriteString(t.Key()) + b.WriteString("=") + b.WriteString(formatValue(t)) + b.WriteString(" ") + } + return b.String() +} + +func errorMessage(level string, msg string) string { + var b strings.Builder + b.WriteString("Unexpected ") + b.WriteString(level) + b.WriteString(" log encountered: ") + b.WriteString(msg) + return b.String() +} + +func formatValue(t tag.Tag) string { + switch v := t.Value().(type) { + case fmt.Stringer: + return v.String() + case string: + return v + default: + return fmt.Sprintf("%v", v) + } +} diff --git a/tests/deployment_test.go b/tests/deployment_test.go index 8298cba6f55..134ed1f98c5 100644 --- a/tests/deployment_test.go +++ b/tests/deployment_test.go @@ -120,6 +120,7 @@ func (s *DeploymentSuite) TearDownTest() { if s.sdkClient != nil { s.sdkClient.Close() } + s.FunctionalTestBase.TearDownTest() } // pollFromDeployment calls PollWorkflowTaskQueue to start deployment related workflows diff --git a/tests/schedule_test.go b/tests/schedule_test.go index c37632513b5..87090383abb 100644 --- a/tests/schedule_test.go +++ b/tests/schedule_test.go @@ -111,6 +111,7 @@ func (s *ScheduleFunctionalSuite) TearDownTest() { if s.sdkClient != nil { s.sdkClient.Close() } + s.FunctionalTestBase.TearDownTest() } func (s *ScheduleFunctionalSuite) TestBasics() { diff --git a/tests/testcore/functional_test_base.go b/tests/testcore/functional_test_base.go index 7979de9a70c..0fd678cfae2 100644 --- a/tests/testcore/functional_test_base.go +++ b/tests/testcore/functional_test_base.go @@ -60,6 +60,7 @@ import ( "go.temporal.io/server/common/testing/historyrequire" "go.temporal.io/server/common/testing/protorequire" "go.temporal.io/server/common/testing/testhooks" + "go.temporal.io/server/common/testing/testlogger" "go.temporal.io/server/common/testing/updateutils" "go.temporal.io/server/environment" "go.uber.org/fx" @@ -179,6 +180,13 @@ func (s *FunctionalTestBase) SetupSuite() { } func (s *FunctionalTestBase) TearDownSuite() { + // NOTE: We can't make s.Logger a testlogger.TestLogger because of AcquireShardSuiteBase. + if tl, ok := s.Logger.(*testlogger.TestLogger); ok { + // Before we tear down the cluster, we disable the test logger. + // This prevents cluster teardown errors from failing the test; and log spam. + tl.Close() + } + s.TearDownCluster() } @@ -191,9 +199,16 @@ func (s *FunctionalTestBase) SetupSuiteWithDefaultCluster(options ...TestCluster func (s *FunctionalTestBase) SetupSuiteWithCluster(clusterConfigFile string, options ...TestClusterOption) { params := ApplyTestClusterOptions(options) - // Logger might be already set by the test suite. + // NOTE: A suite might set its own logger. Example: AcquireShardSuiteBase. if s.Logger == nil { - s.Logger = log.NewTestLogger() + tl := testlogger.NewTestLogger(s.T(), testlogger.FailOnExpectedErrorOnly) + // Instead of panic'ing immediately, TearDownTest will check if the test logger failed + // after each test completed. This is better since otherwise is would fail inside + // the server and not the test, creating a lot of noise and possibly stuck tests. + testlogger.DontPanicOnError(tl) + // Fail test when an assertion fails (see `softassert` package). + tl.Expect(testlogger.Error, ".*", tag.FailedAssertion) + s.Logger = tl } // Setup test cluster. @@ -241,6 +256,7 @@ func (s *FunctionalTestBase) SetupTest() { } func (s *FunctionalTestBase) SetupSubTest() { + s.checkNoUnexpectedErrorLogs() // make sure the previous sub test was cleaned up properly s.initAssertions() } @@ -348,6 +364,25 @@ func (s *FunctionalTestBase) TearDownCluster() { } } +// **IMPORTANT**: When overridding this, make sure to invoke `s.FunctionalTestBase.TearDownTest()`. +func (s *FunctionalTestBase) TearDownTest() { + s.checkNoUnexpectedErrorLogs() +} + +// **IMPORTANT**: When overridding this, make sure to invoke `s.FunctionalTestBase.TearDownSubTest()`. +func (s *FunctionalTestBase) TearDownSubTest() { + s.checkNoUnexpectedErrorLogs() +} + +func (s *FunctionalTestBase) checkNoUnexpectedErrorLogs() { + if tl, ok := s.Logger.(*testlogger.TestLogger); ok { + if tl.ResetFailureStatus() { + s.Fail(`Failing test as unexpected error logs were found. +Look for 'Unexpected Error log encountered'.`) + } + } +} + // Register namespace using persistence API because: // 1. The Retention period is set to 0 for archival tests, and this can't be done through FE, // 2. Update search attributes would require an extra API call, diff --git a/tests/testcore/functional_test_sdk_suite.go b/tests/testcore/functional_test_sdk_suite.go index 8e9c213fac7..c0b43d64b05 100644 --- a/tests/testcore/functional_test_sdk_suite.go +++ b/tests/testcore/functional_test_sdk_suite.go @@ -124,6 +124,7 @@ func (s *FunctionalTestSdkSuite) TearDownTest() { if s.sdkClient != nil { s.sdkClient.Close() } + s.FunctionalTestBase.TearDownTest() } func (s *FunctionalTestSdkSuite) EventuallySucceeds(ctx context.Context, operationCtx backoff.OperationCtx) { diff --git a/tests/tls_test.go b/tests/tls_test.go index 9ab0742d774..41e5de7aa17 100644 --- a/tests/tls_test.go +++ b/tests/tls_test.go @@ -75,6 +75,7 @@ func (s *TLSFunctionalSuite) TearDownTest() { if s.sdkClient != nil { s.sdkClient.Close() } + s.FunctionalTestBase.TearDownTest() } func (s *TLSFunctionalSuite) TestGRPCMTLS() { diff --git a/tests/versioning_test.go b/tests/versioning_test.go index 7ec5055b07a..b17a547f82d 100644 --- a/tests/versioning_test.go +++ b/tests/versioning_test.go @@ -141,6 +141,7 @@ func (s *VersioningIntegSuite) TearDownTest() { if s.sdkClient != nil { s.sdkClient.Close() } + s.FunctionalTestBase.TearDownTest() } func (s *VersioningIntegSuite) TestVersionRuleConflictToken() { diff --git a/tests/worker_deployment_version_test.go b/tests/worker_deployment_version_test.go index f9ebbbf2552..e878f8230cf 100644 --- a/tests/worker_deployment_version_test.go +++ b/tests/worker_deployment_version_test.go @@ -113,6 +113,7 @@ func (s *DeploymentVersionSuite) TearDownTest() { if s.sdkClient != nil { s.sdkClient.Close() } + s.FunctionalTestBase.TearDownTest() } // pollFromDeployment calls PollWorkflowTaskQueue to start deployment related workflows