Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a crash that happens when Firestore is being terminated when the last shared pointer to it is in a listener #7412

Closed
wants to merge 18 commits into from
4 changes: 4 additions & 0 deletions Firestore/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Unreleased
- [fixed] Fixed a crash that would happen when the app is being deleted and
immediately disposed of and there's an active listener (#6909).

# v7.5.0
- [changed] A write to a document that contains FieldValue transforms is no
longer split up into two separate operations. This reduces the number of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES"
enableAddressSanitizer = "YES"
enableASanStackUseAfterReturn = "YES">
<MacroExpansion>
<BuildableReference
Expand Down
24 changes: 24 additions & 0 deletions Firestore/Example/Tests/Integration/API/FIRDatabaseTests.mm
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,30 @@ - (void)testAppDeleteLeadsToFirestoreTermination {
XCTAssertTrue(firestore.wrapped->client()->is_terminated());
}

// Ensures b/172958106 doesn't regress.
- (void)testDeleteAppWorksWhenLastReferenceToFirestoreIsInListener {
FIRApp *app = testutil::AppForUnitTesting(util::MakeString([FSTIntegrationTestCase projectID]));
FIRFirestore *firestore = [FIRFirestore firestoreForApp:app];

FIRDocumentReference *doc = [firestore documentWithPath:@"abc/123"];
// Make sure there is a listener.
[doc addSnapshotListener:^(FIRDocumentSnapshot *, NSError *){
}];
NSDictionary<NSString *, id> *data =
@{@"owner" : @{@"name" : @"Jonny", @"email" : @"[email protected]"}};
// Make sure the client is initialized.
[self writeDocumentRef:doc data:data];

XCTestExpectation *expectation = [self expectationWithDescription:@"App is deleted"];
[app deleteApp:^(BOOL) {
[expectation fulfill];
}];
// Let go of the last app reference.
app = nil;

[self awaitExpectations];
}

- (void)testTerminateCanBeCalledMultipleTimes {
FIRApp *app = testutil::AppForUnitTesting(util::MakeString([FSTIntegrationTestCase projectID]));
FIRFirestore *firestore = [FIRFirestore firestoreForApp:app];
Expand Down
3 changes: 2 additions & 1 deletion Firestore/core/src/api/firestore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ void Firestore::EnsureClientConfigured() {
HARD_ASSERT(worker_queue_, "Expected non-null worker queue");
client_ = FirestoreClient::Create(
MakeDatabaseInfo(), settings_, std::move(credentials_provider_),
user_executor_, worker_queue_, std::move(firebase_metadata_provider_));
user_executor_, worker_queue_, std::move(firebase_metadata_provider_),
shared_from_this());
}
}

Expand Down
53 changes: 35 additions & 18 deletions Firestore/core/src/core/firestore_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ namespace core {
using api::DocumentReference;
using api::DocumentSnapshot;
using api::DocumentSnapshotListener;
using api::Firestore;
using api::ListenerRegistration;
using api::QuerySnapshot;
using api::QuerySnapshotListener;
Expand Down Expand Up @@ -112,11 +113,13 @@ std::shared_ptr<FirestoreClient> FirestoreClient::Create(
std::shared_ptr<CredentialsProvider> credentials_provider,
std::shared_ptr<Executor> user_executor,
std::shared_ptr<AsyncQueue> worker_queue,
std::unique_ptr<FirebaseMetadataProvider> firebase_metadata_provider) {
std::unique_ptr<FirebaseMetadataProvider> firebase_metadata_provider,
std::shared_ptr<Firestore> firestore) {
// Have to use `new` because `make_shared` cannot access private constructor.
std::shared_ptr<FirestoreClient> shared_client(new FirestoreClient(
database_info, std::move(credentials_provider), std::move(user_executor),
std::move(worker_queue), std::move(firebase_metadata_provider)));
std::move(worker_queue), std::move(firebase_metadata_provider),
std::move(firestore)));

std::weak_ptr<FirestoreClient> weak_client(shared_client);
auto credential_change_listener = [weak_client, settings](User user) mutable {
Expand Down Expand Up @@ -158,12 +161,14 @@ FirestoreClient::FirestoreClient(
std::shared_ptr<CredentialsProvider> credentials_provider,
std::shared_ptr<Executor> user_executor,
std::shared_ptr<AsyncQueue> worker_queue,
std::unique_ptr<FirebaseMetadataProvider> firebase_metadata_provider)
std::unique_ptr<FirebaseMetadataProvider> firebase_metadata_provider,
std::shared_ptr<Firestore> firestore)
: database_info_(database_info),
credentials_provider_(std::move(credentials_provider)),
worker_queue_(std::move(worker_queue)),
user_executor_(std::move(user_executor)),
firebase_metadata_provider_(std::move(firebase_metadata_provider)) {
firebase_metadata_provider_(std::move(firebase_metadata_provider)),
weak_firestore_(std::move(firestore)) {
}

void FirestoreClient::Initialize(const User& user, const Settings& settings) {
Expand Down Expand Up @@ -231,21 +236,27 @@ FirestoreClient::~FirestoreClient() {
}

void FirestoreClient::Dispose() {
// Prevent new API invocations from enqueueing further work.
worker_queue_->EnterRestrictedMode();

// Clean up internal resources. It's possible that this can race with a call
// to `Firestore::ClearPersistence` or `Firestore::Terminate`, but that's OK
// because that operation does not rely on any state in this FirestoreClient.
std::promise<void> signal_disposing;
bool enqueued = worker_queue_->EnqueueEvenWhileRestricted([&, this] {
// Once this task has started running, AsyncQueue::Dispose will block on its
// completion. Signal as early as possible to lock out even restricted tasks
// as early as possible.
signal_disposing.set_value();

TerminateInternal();
});
bool enqueued = false;
// If `remote_store_` is null, it means termination has finished already. In
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no reference to remote_store_ in the code below, and remote_store_ isn't actually a good signal anyway, since it is reset asynchronously. I think you're referring to is_terminated(), right?

Second, phrasing this in terms of "the sequential order invariant" obscures what's going on. I think the issue is that EnqueueEvenWhileRestricted can't be called while already on the async queue. Restated that way it seems like we should just fix that rather than trying to introduce yet another guard on top of that.

// that case, enqueing termination is not only unnecessary, but also
// dangerous in the case when `Dispose` is invoked immediately after the
// termination, still on the worker queue -- it would break the sequential
// order invariant of the queue.
if (!is_terminated()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is racy. Two threads could observe !is_terminated() simultaneously and then both attempt the block below.

A better solution might be to have EnqueueEvenWhileRestricted do the right thing. This already has logic to deal with the race inherent in two callers attempting to dispose the instance. Perhaps we just need to make it EnqueueRelaxedEvenWhileRestricted?

// Prevent new API invocations from enqueueing further work.
worker_queue_->EnterRestrictedMode();

enqueued = worker_queue_->EnqueueEvenWhileRestricted([&, this] {
// Once this task has started running, AsyncQueue::Dispose will block on
// its completion. Signal as early as possible to lock out even restricted
// tasks as early as possible.
signal_disposing.set_value();

TerminateInternal();
});
}

// If we successfully enqueued the TerminateInternal task then wait for it to
// start.
Expand All @@ -263,7 +274,13 @@ void FirestoreClient::Dispose() {

void FirestoreClient::TerminateAsync(StatusCallback callback) {
worker_queue_->EnterRestrictedMode();
worker_queue_->EnqueueEvenWhileRestricted([this, callback] {
worker_queue_->EnqueueEvenWhileRestricted([&, this, callback] {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Default capture by reference in an asynchronously executing block is a recipe for disaster. It doesn't even seem like it's necessary. Remove?

// Make sure this `FirestoreClient` is not destroyed during
// `TerminateInternal`, so that `user_executor_` stays valid.
auto self = shared_from_this();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're depending on the shared_ptr keeping the instance alive, we should create it outside the callback. Otherwise, this can be destroyed while this lambda is awaiting execution.

It seems like both callbacks that call TerminateInternal should capture a shared_ptr<FirestoreClient> to ensure the instance lives past that point.

// Make sure that `api::Firestore` is not destroyed during the call to
// `TerminateInternal`.
auto firestore = weak_firestore_.lock();
TerminateInternal();

if (callback) {
Expand Down
9 changes: 7 additions & 2 deletions Firestore/core/src/core/firestore_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class FirestoreClient : public std::enable_shared_from_this<FirestoreClient> {
std::shared_ptr<util::Executor> user_executor,
std::shared_ptr<util::AsyncQueue> worker_queue,
std::unique_ptr<remote::FirebaseMetadataProvider>
firebase_metadata_provider);
firebase_metadata_provider,
std::shared_ptr<api::Firestore> firestore);

~FirestoreClient();

Expand Down Expand Up @@ -185,7 +186,8 @@ class FirestoreClient : public std::enable_shared_from_this<FirestoreClient> {
std::shared_ptr<util::Executor> user_executor,
std::shared_ptr<util::AsyncQueue> worker_queue,
std::unique_ptr<remote::FirebaseMetadataProvider>
firebase_metadata_provider);
firebase_metadata_provider,
std::shared_ptr<api::Firestore> firestore);

void Initialize(const auth::User& user, const api::Settings& settings);

Expand Down Expand Up @@ -223,6 +225,9 @@ class FirestoreClient : public std::enable_shared_from_this<FirestoreClient> {
bool credentials_initialized_ = false;
local::LruDelegate* _Nullable lru_delegate_;
util::DelayedOperation lru_callback_;

// Used during shutdown to guarantee lifetimes.
std::weak_ptr<api::Firestore> weak_firestore_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type here doesn't actually matter since we don't need to manipulate this instance. Could this be std::weak_ptr<void>?

};

} // namespace core
Expand Down
1 change: 1 addition & 0 deletions Firestore/core/src/util/async_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ void AsyncQueue::ExecuteBlocking(const Operation& operation) {
"ExecuteBlocking may not be called "
"before the previous operation finishes executing");

auto self = shared_from_this();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we haven't done this before? is_operation_in_progress_ = false; may crash when api::Firestore (and, consequently, the worker queue) is destroyed on the queue (in FirestoreClient::TerminateAsync).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like an oversight.

I think the idea at the time was that FirestoreClient would keep the AsyncQueue alive as long as it needed to, but this doesn't account for the case where the Firestore destructor is running on the AsyncQueue.

is_operation_in_progress_ = true;
operation();
is_operation_in_progress_ = false;
Expand Down
4 changes: 2 additions & 2 deletions Firestore/core/test/unit/util/ordered_code_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ TEST(OrderedCodeInvalidEncodingsTest, NonCanonical) {

#if defined(NDEBUG)
EXPECT_ANY_THROW(TestRead<uint64_t>(INCREASING, non_minimal));
#else // defined(NDEBUG)
#else // defined(NDEBUG)
absl::string_view s(non_minimal);
EXPECT_ANY_THROW(OrderedCode::ReadNumIncreasing(&s, NULL));
#endif // defined(NDEBUG)
Expand All @@ -372,7 +372,7 @@ TEST(OrderedCodeInvalidEncodingsTest, NonCanonical) {

#if defined(NDEBUG)
EXPECT_ANY_THROW(TestRead<int64_t>(INCREASING, non_minimal));
#else // defined(NDEBUG)
#else // defined(NDEBUG)
absl::string_view s(non_minimal);
EXPECT_ANY_THROW(OrderedCode::ReadSignedNumIncreasing(&s, NULL));
s = non_minimal;
Expand Down