Skip to content

Commit

Permalink
Fix sanitizer issues [BMQ,MWC] (#373)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-e1off authored Aug 29, 2024
1 parent 0f3466d commit 4dd944b
Show file tree
Hide file tree
Showing 11 changed files with 62 additions and 122 deletions.
2 changes: 2 additions & 0 deletions etc/msansup.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
# MemorySanitizer suppressions file for BlazingMQ.

# Locking APIs from non-instrumented `libpthread` yields false-positives.
fun:_ZN11BloombergLP5bslmt9MutexImplINS0_8Platform12PosixThreadsEE*
26 changes: 12 additions & 14 deletions etc/tsansup.txt
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
# ThreadSanitizer suppressions file for BMQ
# ThreadSanitizer suppressions file for BlazingMQ.

# There's a lengthy comment in ObjectPool::getObject that explains why this
# isn't a race. I'm not smart enough to figure out if it's right, but I'll
# assume it's right
race:BloombergLP::bdlcc::ObjectPool<*>::getObject
race:BloombergLP::bdlcc::ObjectPool<*>::releaseObject

# Issue 176120121 is created for tracking.
race:BloombergLP::bdlma::ConcurrentPool::deallocate
race:BloombergLP::bdlma::ConcurrentPool::allocate
race:BloombergLP::bdlma::ConcurrentPool::deleteObject

# Similar to above, there is a lengthy comment in bcema_Pool::allocate which
# attempts to explain why there is no race.
race:BloombergLP::bcema_Pool<*>::allocate

# Not sure what the problem is here, but tsan can't show the other stack, so
# there's nothing to look into
race:__tsan_atomic32_fetch_add

# Don't warn about using cout from multiple threads
race:std::basic_ostream<char, *>& bsl::operator<< <*>(std::basic_ostream<*>&, bsl::basic_string<*> const&)

# Looks like ball::LoggerManager uses a plain ptr to store its singleton, and
# it makes tsan warn in some cases
race:BloombergLP::ball::LoggerManager::isInitialized()

# Suppress TSan report in a routine used in bmqimp::Brokersession test driver.
# It is a benign race in the test driver, but should be looked into at some
# point.
race:TestSession::waitForChannelClose
race:TestSession::arriveAtStepWithCfgs
# Suppress sporadically appearing data race in bmqimp::BrokerSession test driver.
# In TestSession::arriveAtStepWithCfgs() there is a call of queue->setOptions() method,
# at nearly same time in other thread bmqimp::BrokerSession::onConfigureQueueResponse() calls
# queue->options().suspendsOnBadHostHealth() method which is detected as data race.
# bmqt::QueueOptions and bmqimp::Queue classes are not thread safe by design,
# and bmqimp::BrokerSession::onConfigureQueueResponse() callback access them in
# not thread-safe manner, probably also by design, assuming that it will be called again
# if something is changed. Further investigation is required, suppress it for now.
race:BloombergLP::bmqt::QueueOptions::suspendsOnBadHostHealth

# Since we use mqbmock::Dispatcher in unit tests, this method does not get
# enqueued correctly back to cluster-dispatcher thread, causing potential
Expand Down
8 changes: 2 additions & 6 deletions etc/ubsansup.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,2 @@
# bmqp::Crc32c carries out misaligned access of Int64 in one of the internal
# routines, but only on x86/64. Misaligned accesses are handled gracefully by
# this hardware, unlike SPARC. So we simply suppress this warning. Other
# option is to update the code, which can be done by someone feeling
# adventurous.
alignment:crc32c1024SseInt
# UndefinedBehaviorSanitizer suppressions file for BlazingMQ.
# See details https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html#runtime-suppressions.
4 changes: 2 additions & 2 deletions src/groups/bmq/bmqimp/bmqimp_brokersession.t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2196,12 +2196,12 @@ bool TestSession::waitForChannelClose(const bsls::TimeInterval& timeout)
// Wait for the close to be called on the base channel
const bsls::TimeInterval expireAfter =
bsls::SystemTime::nowRealtimeClock() + timeout;
while (d_testChannel.closeCalls().empty() &&
while (d_testChannel.closeCallsEmpty() &&
bsls::SystemTime::nowRealtimeClock() < expireAfter) {
bslmt::ThreadUtil::microSleep(k_TIME_SOURCE_STEP.totalMicroseconds());
}

if (d_testChannel.closeCalls().empty()) {
if (d_testChannel.closeCallsEmpty()) {
return false; // RETURN
}

Expand Down
6 changes: 5 additions & 1 deletion src/groups/bmq/bmqp/bmqp_messageguidgenerator.t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,11 @@ static void test3_multithreadUseIP()

const int k_NUM_THREADS = 10;

#if defined(__has_feature)
#ifdef BSLS_PLATFORM_OS_SOLARIS
// This test case times out if 'k_NUM_GUIDS' is close to 1 million
// (it's unable to complete in 90 seconds).
const int k_NUM_GUIDS = 500000; // 500k
#elif defined(__has_feature)
// Avoid timeout under MemorySanitizer
const int k_NUM_GUIDS = __has_feature(memory_sanitizer) ? 500000 // 500k
: 1000000; // 1M
Expand Down
5 changes: 4 additions & 1 deletion src/groups/mqb/mqbs/mqbs_datastore.t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,10 @@ static void test2_defaultHashUniqueness()

mwctst::TestHelper::printTestName("DEFAULT HASH UNIQUENESS");

#if defined(__has_feature)
#ifdef BSLS_PLATFORM_OS_SOLARIS
// This test case times out if 'k_NUM_GUIDS' is close to 1 million
const bsls::Types::Int64 k_NUM_KEYS = 1000000; // 1M
#elif defined(__has_feature)
// Avoid timeout under MemorySanitizer
const bsls::Types::Int64 k_NUM_KEYS = __has_feature(memory_sanitizer)
? 1000000 // 1M
Expand Down
6 changes: 5 additions & 1 deletion src/groups/mqb/mqbu/mqbu_messageguidutil.t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ static void test2_multithread()

const int k_NUM_THREADS = 10;

#if defined(__has_feature)
#ifdef BSLS_PLATFORM_OS_SOLARIS
// This test case times out if 'k_NUM_GUIDS' is close to 1 million
// (it's unable to complete in 90 seconds).
const int k_NUM_GUIDS = 500000; // 500k
#elif defined(__has_feature)
// Avoid timeout under MemorySanitizer
const int k_NUM_GUIDS = __has_feature(memory_sanitizer) ? 500000 // 500k
: 1000000; // 1M
Expand Down
102 changes: 12 additions & 90 deletions src/groups/mwc/mwcio/mwcio_ntcchannelfactory.t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,22 @@ static const ChannelWatermarkType::Enum WAT_HIGH =
static const ChannelWatermarkType::Enum WAT_LOW =
ChannelWatermarkType::e_LOW_WATERMARK;

// Adjust attempt interval depending on the platform
#ifdef BSLS_PLATFORM_OS_SOLARIS
static const bool skipTest = true;
static const int k_ATTEMPT_INTERVAL_MS = 300;
#elif defined( \
__has_feature) // Clang-supported method for checking sanitizers.
static const bool skipTest = __has_feature(memory_sanitizer) ||
__has_feature(thread_sanitizer) ||
__has_feature(undefined_behavior_sanitizer);
static const int k_ATTEMPT_INTERVAL_MS =
(__has_feature(memory_sanitizer) || __has_feature(thread_sanitizer) ||
__has_feature(undefined_behavior_sanitizer))
? 400
: 1;
#elif defined(__SANITIZE_MEMORY__) || defined(__SANITIZE_THREAD__) || \
defined(__SANITIZE_UNDEFINED__)
// GCC-supported macros for checking MSAN, TSAN and UBSAN.
static const bool skipTest = true;
static const int k_ATTEMPT_INTERVAL_MS = 400;
#else
static const bool skipTest = false; // Default to running the test.
static const int k_ATTEMPT_INTERVAL_MS = 1;
#endif

// ========================
Expand Down Expand Up @@ -658,10 +661,9 @@ void Tester::connect(int line,

ConnectOptions reqOptions(options, s_allocator_p);

reqOptions.setAttemptInterval(
bsls::TimeInterval(0, 10 * bdlt::TimeUnitRatio::k_NS_PER_MS));

reqOptions.setAttemptInterval(bsls::TimeInterval(0, 1));
reqOptions.setAttemptInterval(bsls::TimeInterval(
0,
k_ATTEMPT_INTERVAL_MS * bdlt::TimeUnitRatio::k_NS_PER_MS));

HandleMap::iterator serverIter = d_handleMap.find(endpointOrServer);
if (serverIter != d_handleMap.end()) {
Expand Down Expand Up @@ -1055,22 +1057,6 @@ static void test6_preCreationCbTest()
{
mwctst::TestHelper::printTestName("Pre Creation Cb Test");

if (skipTest) {
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
// test relies on the timings of certain callbacks being fired before
// or after certain operations. Normally this timing is always
// observed, but in msan/tsan/ubsan enabled build, the timing gets
// changed, leading to test failure. Of course, the right fix is to
// not rely on these timings, which can be worked on if the test starts
// failing in non-instrumented builds. Additionally, we could try to
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
// reports have been fixed to see if that helps (see `msansup.txt`,
// `tsansup.txt` and `ubsansup.txt`).
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
<< bsl::endl;
return; // RETURN
}

Tester t(s_allocator_p);

// Concern 'a'
Expand Down Expand Up @@ -1106,22 +1092,6 @@ static void test5_visitChannelsTest()
{
mwctst::TestHelper::printTestName("Cancel Handle Test");

if (skipTest) {
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
// test relies on the timings of certain callbacks being fired before
// or after certain operations. Normally this timing is always
// observed, but in msan/tsan/ubsan enabled build, the timing gets
// changed, leading to test failure. Of course, the right fix is to
// not rely on these timings, which can be worked on if the test starts
// failing in non-instrumented builds. Additionally, we could try to
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
// reports have been fixed to see if that helps (see `msansup.txt`,
// `tsansup.txt` and `ubsansup.txt`).
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
<< bsl::endl;
return; // RETURN
}

Tester t(s_allocator_p);

// Concerns 'a'
Expand Down Expand Up @@ -1162,22 +1132,6 @@ static void test4_cancelHandleTest()
{
mwctst::TestHelper::printTestName("Cancel Handle Test");

if (skipTest) {
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
// test relies on the timings of certain callbacks being fired before
// or after certain operations. Normally this timing is always
// observed, but in msan/tsan/ubsan enabled build, the timing gets
// changed, leading to test failure. Of course, the right fix is to
// not rely on these timings, which can be worked on if the test starts
// failing in non-instrumented builds. Additionally, we could try to
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
// reports have been fixed to see if that helps (see `msansup.txt`,
// `tsansup.txt` and `ubsansup.txt`).
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
<< bsl::endl;
return; // RETURN
}

Tester t(s_allocator_p);

// Concerns 'a'
Expand Down Expand Up @@ -1232,22 +1186,6 @@ static void test3_watermarkTest()
{
mwctst::TestHelper::printTestName("Watermark Test");

if (skipTest) {
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
// test relies on the timings of certain callbacks being fired before
// or after certain operations. Normally this timing is always
// observed, but in msan/tsan/ubsan enabled build, the timing gets
// changed, leading to test failure. Of course, the right fix is to
// not rely on these timings, which can be worked on if the test starts
// failing in non-instrumented builds. Additionally, we could try to
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
// reports have been fixed to see if that helps (see `msansup.txt`,
// `tsansup.txt` and `ubsansup.txt`).
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
<< bsl::endl;
return; // RETURN
}

Tester t(s_allocator_p);

// Concern 'a'
Expand Down Expand Up @@ -1337,22 +1275,6 @@ static void test1_breathingTest()
{
mwctst::TestHelper::printTestName("Breathing Test");

if (skipTest) {
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
// test relies on the timings of certain callbacks being fired before
// or after certain operations. Normally this timing is always
// observed, but in msan/tsan/ubsan enabled build, the timing gets
// changed, leading to test failure. Of course, the right fix is to
// not rely on these timings, which can be worked on if the test starts
// failing in non-instrumented builds. Additionally, we could try to
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
// reports have been fixed to see if that helps (see `msansup.txt`,
// `tsansup.txt` and `ubsansup.txt`).
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
<< bsl::endl;
return; // RETURN
}

Tester t(s_allocator_p);
t.init(L_);

Expand Down
6 changes: 6 additions & 0 deletions src/groups/mwc/mwcio/mwcio_testchannel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,5 +293,11 @@ bool TestChannel::hasNoMoreWriteCalls() const
return d_hasNoMoreWriteCalls;
}

bool TestChannel::closeCallsEmpty() const
{
bslmt::LockGuard<bslmt::Mutex> guard(&d_mutex); // LOCK
return d_closeCalls.empty();
}

} // close package namespace
} // close enterprise namespace
6 changes: 5 additions & 1 deletion src/groups/mwc/mwcio/mwcio_testchannel.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ class TestChannel : public Channel {
bsl::deque<OnWatermarkCall> d_onWatermarkCalls;
mwct::PropertyBag d_properties;
bsl::string d_peerUri;
bslmt::Mutex d_mutex;
mutable bslmt::Mutex d_mutex;
bslmt::Condition d_condition;
bool d_isFinal;
bool d_hasNoMoreWriteCalls;
Expand Down Expand Up @@ -258,6 +258,10 @@ class TestChannel : public Channel {
const Status& writeStatus() const;
bool hasNoMoreWriteCalls() const;

/// Lock mutex and return `true` if d_closeCalls collection is empty,
/// `false` otherwise.
bool closeCallsEmpty() const;

// Channel
void read(Status* status,
int numBytes,
Expand Down
13 changes: 7 additions & 6 deletions src/groups/mwc/mwcma/mwcma_countingallocator.t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,23 +191,24 @@ static void test3_deallocate()
mwctst::TestHelper::printTestName("DEALLOCATE");

// This test twice-deallocates the same block of memory, to verify that
// such an operation fails. If we're running under MemorySanitizer or
// AddressSanitizer, we must skip this test to avoid detecting the issue
// and aborting.
// such an operation fails. If we're running under MemorySanitizer,
// AddressSanitizer or ThreadSanitizer, we must skip this test to avoid
// detecting the issue and aborting.
//
// Under MSan, we would instead try to explicitly "unpoison" the memory,
// but CountingAllocator keeps a hidden "header" block, which we have no
// good way of accessing to unpoison.
//
// Under ASan, we might be able to use the `no_sanitize` attribute, but
// GCC doesn't support it before versio 8.0 - so for now, better just to
// GCC doesn't support it before version 8.0 - so for now, better just to
// skip the testcase.
#if defined(__has_feature) // Clang-supported method for checking sanitizers.
const bool skipTestForSanitizers = __has_feature(address_sanitizer) ||
__has_feature(memory_sanitizer) ||
__has_feature(thread_sanitizer);
#elif defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__)
// GCC-supported macros for checking ASAN and TSAN.
#elif defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_MEMORY__) || \
defined(__SANITIZE_THREAD__)
// GCC-supported macros for checking ASAN, MSAN and TSAN.
const bool skipTestForSanitizers = true;
#else
const bool skipTestForSanitizers = false; // Default to running the test.
Expand Down

0 comments on commit 4dd944b

Please sign in to comment.