From 7774a4de17cebf74de7898a46d3ece7287e2c6f1 Mon Sep 17 00:00:00 2001 From: Andrew Chang Date: Tue, 4 Feb 2025 12:45:09 -0800 Subject: [PATCH] Disable and re-enable error injection before secondary db verification (#13368) Summary: The crash tests are failing during secondary database verification due to a "truncated block read" error. https://github.com/facebook/rocksdb/issues/13366 attempted to resolve the issue by checking for injected errors. However, that did not work. It turns out that sometimes faults are injected yet the return status is still "OK." See https://github.com/facebook/rocksdb/blob/main/utilities/fault_injection_fs.cc#L1407-L1414 for an example: ```cpp } else if (Random::GetTLSInstance()->OneIn(8)) { assert(result); // For a small chance, set the failure to status but turn the // result to be empty, which is supposed to be caught for a check. *result = Slice(); msg << "empty result"; ctx->message = msg.str(); ret_fault_injected = true; ``` My hypothesis is that this particular fault injection is the root cause of the "truncated block read" error. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13368 Test Plan: Hopefully the recurring crash tests start passing consistently for secondary db verification Reviewed By: hx235 Differential Revision: D69132024 Pulled By: archang19 fbshipit-source-id: 941406165a2fd306f10048614457261cda99d762 --- db_stress_tool/no_batched_ops_stress.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 6c8bd8c968a..321253b0c35 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -221,14 +221,22 @@ class NonBatchedOpsStressTest : public StressTest { const std::string key = Key(i); std::string from_db; + // Temporarily disable error injection to verify the secondary + if (fault_fs_guard) { + fault_fs_guard->DisableThreadLocalErrorInjection( + FaultInjectionIOType::kRead); + fault_fs_guard->DisableThreadLocalErrorInjection( + FaultInjectionIOType::kMetadataRead); + } + s = secondary_db_->Get(options, secondary_cfhs_[cf], key, &from_db); - if (!s.ok() && IsErrorInjectedAndRetryable(s)) { - fprintf( - stdout, - "Skipping secondary verification for key because error was " - "injected into read\n"); - continue; + // Re-enable error injection after verifying the secondary + if (fault_fs_guard) { + fault_fs_guard->EnableThreadLocalErrorInjection( + FaultInjectionIOType::kRead); + fault_fs_guard->EnableThreadLocalErrorInjection( + FaultInjectionIOType::kMetadataRead); } assert(!pre_read_expected_values.empty() &&