Skip to content

Commit

Permalink
ENG-3612 In TabletPeer, we should not access try to call log() before…
Browse files Browse the repository at this point in the history
… the log is initialized

Summary:
An attempt to fix the core dump where remote bootstrap session is starting but the tablet peer
does not have a log it. Adding an explicit check that the tablet peer is in the running state,
and also hardening log() with additional checks. Now log() will crash in case the log is not
initialized, and a couple of places where we allow it to be null are handled separately.

Also pall include directories known to CMake to the PostgreSQL build (fixes the readline issue).

Test Plan:
Jenkins
itest
ptest

Reviewers: hector, sergei, bharat

Reviewed By: sergei, bharat

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D5162
  • Loading branch information
mbautin committed Jul 18, 2018
1 parent c48218f commit 7110b9a
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 18 deletions.
4 changes: 2 additions & 2 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
<maven-jar-plugin.version>3.0.2</maven-jar-plugin.version>
<maven-enforcer-plugin.version>3.0.0-M1</maven-enforcer-plugin.version>
<protobuf-maven-plugin.version>0.5.1</protobuf-maven-plugin.version>
<maven-surefire-plugin.version>2.20.1</maven-surefire-plugin.version>
<maven-failsafe-plugin.version>2.20.1</maven-failsafe-plugin.version>
<maven-surefire-plugin.version>2.22.0</maven-surefire-plugin.version>
<maven-failsafe-plugin.version>2.22.0</maven-failsafe-plugin.version>
<schema-validator-maven-plugin.version>5.5.0-SNAPSHOT</schema-validator-maven-plugin.version>
<scala-maven-plugin.version>3.2.1</scala-maven-plugin.version>
<maven-javadoc-plugin.version>3.0.0</maven-javadoc-plugin.version>
Expand Down
4 changes: 3 additions & 1 deletion src/postgres/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@


set(POSTGRES_EXTRA_C_CXX_FLAGS "")
foreach(include_dir ${Boost_INCLUDE_DIRS})
get_property(yb_cmake_include_dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
PROPERTY INCLUDE_DIRECTORIES)
foreach(include_dir ${yb_cmake_include_dirs})
set(POSTGRES_EXTRA_C_CXX_FLAGS "${POSTGRES_EXTRA_C_CXX_FLAGS} -I${include_dir}")
endforeach(include_dir)

Expand Down
4 changes: 0 additions & 4 deletions src/yb/consensus/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,6 @@ class Log : public RefCountedThreadSafe<Log> {
max_segment_size_ = max_segment_size;
}

void DisableAsyncAllocationForTests() {
options_.async_preallocate_segments = false;
}

void DisableSync() {
sync_disabled_ = true;
}
Expand Down
26 changes: 24 additions & 2 deletions src/yb/tablet/tablet_peer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ Status TabletPeer::InitTabletPeer(const shared_ptr<TabletClass> &tablet,
clock_ = clock;
proxy_cache_ = proxy_cache;
log_ = log;
// "Publish" the log pointer so it can be retrieved using the log() accessor.
log_atomic_ = log.get();

tablet->SetMemTableFlushFilterFactory([log] {
auto index = log->GetLatestEntryOpId().index;
Expand Down Expand Up @@ -265,7 +267,8 @@ Status TabletPeer::InitTabletPeer(const shared_ptr<TabletClass> &tablet,
}

TRACE("TabletPeer::Init() finished");
VLOG(2) << "T " << tablet_id() << " P " << consensus_->peer_uuid() << ": Peer Initted";
VLOG_WITH_PREFIX(2) << "Peer Initted";

return Status::OK();
}

Expand All @@ -274,7 +277,7 @@ Status TabletPeer::Start(const ConsensusBootstrapInfo& bootstrap_info) {
std::lock_guard<simple_spinlock> l(state_change_lock_);
TRACE("Starting consensus");

VLOG(2) << "T " << tablet_id() << " P " << consensus_->peer_uuid() << ": Peer starting";
VLOG_WITH_PREFIX(2) << "Peer starting";

VLOG(2) << "RaftConfig before starting: " << consensus_->CommittedConfig().DebugString();

Expand Down Expand Up @@ -650,6 +653,20 @@ Status TabletPeer::GetGCableDataSize(int64_t* retention_size) const {
return Status::OK();
}

log::Log* TabletPeer::log() const {
Log* log = log_atomic_.load(std::memory_order_acquire);
LOG_IF_WITH_PREFIX(FATAL, !log) << "log() called before the log instance is initialized.";
return log;
}

yb::OpId TabletPeer::GetLatestLogEntryOpId() const {
Log* log = log_atomic_.load(std::memory_order_acquire);
if (log) {
return log->GetLatestEntryOpId();
}
return yb::OpId();
}

std::unique_ptr<Operation> TabletPeer::CreateOperation(consensus::ReplicateMsg* replicate_msg) {
switch (replicate_msg->op_type()) {
case consensus::WRITE_OP:
Expand Down Expand Up @@ -822,6 +839,11 @@ uint64_t TabletPeer::OnDiskSize() const {
return ret;
}

std::string TabletPeer::LogPrefix() const {
return Substitute("T $0 P $1 [state=$2]: ",
tablet_id_, cached_permanent_uuid_, TabletStatePB_Name(state()));
}

scoped_refptr<OperationDriver> TabletPeer::CreateOperationDriver() {
return scoped_refptr<OperationDriver>(new OperationDriver(
&operation_tracker_,
Expand Down
22 changes: 18 additions & 4 deletions src/yb/tablet/tablet_peer.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,21 @@ class TabletPeer : public consensus::ReplicaOperationFactory,
// Returns a non-ok status if the tablet isn't running.
CHECKED_STATUS GetGCableDataSize(int64_t* retention_size) const;

// Return a pointer to the Log.
// TabletPeer keeps a reference to Log after Init().
log::Log* log() const {
return log_.get();
// Returns true if it is safe to retrieve the log pointer using the log() function from this
// tablet peer. Once the log pointer is initialized, it will stay valid for the lifetime of the
// TabletPeer.
bool log_available() const {
return log_atomic_.load(std::memory_order_acquire) != nullptr;
}

// Return a pointer to the Log. TabletPeer keeps a reference to Log after Init(). This function
// will crash if the log has not been initialized yet.
log::Log* log() const;

// Returns the OpId of the latest entry in the log, or a zero OpId if the log has not been
// initialized.
yb::OpId GetLatestLogEntryOpId() const;

server::Clock& clock() const override {
return *clock_;
}
Expand Down Expand Up @@ -307,6 +316,8 @@ class TabletPeer : public consensus::ReplicaOperationFactory,
// Caller should hold the lock_.
uint64_t OnDiskSize() const;

std::string LogPrefix() const;

protected:
friend class RefCountedThreadSafe<TabletPeer>;
friend class TabletPeerTest;
Expand Down Expand Up @@ -341,7 +352,10 @@ class TabletPeer : public consensus::ReplicaOperationFactory,

OperationTracker operation_tracker_;
OperationOrderVerifier operation_order_verifier_;

scoped_refptr<log::Log> log_;
std::atomic<log::Log*> log_atomic_{nullptr};

std::shared_ptr<TabletClass> tablet_;
rpc::ProxyCache* proxy_cache_;
std::shared_ptr<consensus::RaftConsensus> consensus_;
Expand Down
3 changes: 3 additions & 0 deletions src/yb/tserver/remote_bootstrap_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ void RemoteBootstrapServiceImpl::BeginRemoteBootstrapSession(
RPC_RETURN_NOT_OK(tablet_peer_lookup_->GetTabletPeer(tablet_id, &tablet_peer),
RemoteBootstrapErrorPB::TABLET_NOT_FOUND,
Substitute("Unable to find specified tablet: $0", tablet_id));
RPC_RETURN_NOT_OK(tablet_peer->CheckRunning(),
RemoteBootstrapErrorPB::TABLET_NOT_FOUND,
Substitute("Tablet is not running yet: $0", tablet_id));

scoped_refptr<RemoteBootstrapSessionClass> session;
{
Expand Down
5 changes: 4 additions & 1 deletion src/yb/tserver/remote_bootstrap_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,11 @@ Status RemoteBootstrapSession::Init() {
Substitute("Unable to access superblock for tablet $0",
tablet_id));

if (!tablet_peer_->log_available()) {
return STATUS(IllegalState, "Tablet is not running (log is uninitialized)");
}
// Get the latest opid in the log at this point in time so we can re-anchor.
auto last_logged_opid = tablet_peer_->log()->GetLatestEntryOpId();
auto last_logged_opid = tablet_peer_->GetLatestLogEntryOpId();

auto tablet = tablet_peer_->shared_tablet();
if (PREDICT_FALSE(!tablet)) {
Expand Down
5 changes: 1 addition & 4 deletions src/yb/tserver/ts_tablet_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -818,10 +818,7 @@ Status TSTabletManager::DeleteTablet(
scoped_refptr<TabletMetadata> meta = tablet_peer->tablet_metadata();
tablet_peer->Shutdown();

yb::OpId last_logged_opid;
if (tablet_peer->log()) {
last_logged_opid = tablet_peer->log()->GetLatestEntryOpId();
}
yb::OpId last_logged_opid = tablet_peer->GetLatestLogEntryOpId();

Status s = DeleteTabletData(meta,
delete_type,
Expand Down

0 comments on commit 7110b9a

Please sign in to comment.