Skip to content

Commit

Permalink
[#24658] YSQL: Switch pg_cron to normal SIGTERM
Browse files Browse the repository at this point in the history
Summary:
Switch pg_cron to normal using normal `SIGTERM` and `SIGQUIT` handlers.

In an attempt to fix stuck pg_cron workers the handler of `SIGTERM` was changed to `quickdie` 4b4c201/D37591. But in `quickdie` mode postgres does not invoke `ReportBackgroundWorkerExit` which signals the parent process when its child backend exits. This causes pg_cron launcher to get stuck in `WaitForBackgroundWorkerShutdown`.

With 111b65d/D39207 the issues with pg shutdown have been addressed and it is safe to now switch to the normal SIGTERM behavior which makes the background workers `die` in a clean manner.

#24706 tracks the issue where in yb `ReportBackgroundWorkerExit` is not called when the backend has a non graceful exit.

Fixes #24658
Jira: DB-13724

Test Plan: PgCronTest, DeactivateRunningJob

Reviewers: telgersma

Reviewed By: telgersma

Subscribers: ybase, yql

Differential Revision: https://phorge.dev.yugabyte.com/D39552
  • Loading branch information
hari90 committed Oct 30, 2024
1 parent 1850a32 commit d07daec
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 6 deletions.
8 changes: 2 additions & 6 deletions src/postgres/third-party-extensions/pg_cron/src/pg_cron.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,7 @@ PgCronLauncherMain(Datum arg)
/* Establish signal handlers before unblocking signals. */
pqsignal(SIGHUP, pg_cron_sighup);
pqsignal(SIGINT, SIG_IGN);
/* YB Note: Exit immediately. */
pqsignal(SIGTERM, quickdie);
pqsignal(SIGQUIT, quickdie);
pqsignal(SIGTERM, pg_cron_sigterm);

/* We're now ready to receive signals */
BackgroundWorkerUnblockSignals();
Expand Down Expand Up @@ -2157,9 +2155,7 @@ CronBackgroundWorker(Datum main_arg)
shm_mq_handle *responseq;

/* handle SIGTERM like regular backend */
pqsignal(SIGTERM, quickdie);
/* YB Note: Exit immediately. */
pqsignal(SIGQUIT, quickdie);
pqsignal(SIGTERM, die);
BackgroundWorkerUnblockSignals();

/* Set up a memory context and resource owner. */
Expand Down
40 changes: 40 additions & 0 deletions src/yb/integration-tests/pg_cron-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ class PgCronTest : public MiniClusterTestWithClient<ExternalMiniCluster> {
return conn->FetchRow<pgwrapper::PGUint64>(Format("SELECT COUNT(*) FROM $0", table_name));
}

Status WaitForRowCountAbove(int64_t min_count) {
return WaitFor(
[this, &min_count]() -> Result<bool> {
const auto row_count = VERIFY_RESULT(GetRowCount());
return row_count > min_count;
},
kTimeout, Format("Wait for row count to be above $0", min_count));
}

Status WaitForDataInPgCronLeaderTable() {
auto table_name =
stateful_service::GetStatefulServiceTableName(StatefulServiceKind::PG_CRON_LEADER);
Expand Down Expand Up @@ -542,4 +551,35 @@ TEST_F(PgCronTest, FailBeforeStoringLastMinute) {
ASSERT_GT(row_count, 0);
}

TEST_F(PgCronTest, DeactivateRunningJob) {
ASSERT_OK(Schedule1SecInsertJob());
// Start a job that will run for a long time.
auto sleep_job_id = ASSERT_RESULT(ScheduleJob("Sleep Job", "1 second", "SELECT pg_sleep(1000)"));

ASSERT_OK(WaitForRowCountAbove(0));

auto is_sleep_job_running = [this, sleep_job_id]() -> Result<bool> {
return VERIFY_RESULT(conn_->FetchRow<pgwrapper::PGUint64>(Format(
"SELECT COUNT(*) FROM cron.job_run_details WHERE jobid = $0 AND status = 'running'",
sleep_job_id))) > 0;
};

{
const auto is_running = ASSERT_RESULT(is_sleep_job_running());
ASSERT_TRUE(is_running);
}

// Deactivating the sleep job should stop it immediately.
ASSERT_OK(conn_->FetchFormat("SELECT cron.alter_job($0, active:=false)", sleep_job_id));

// Wait for the change to get picked up and sleep job to get canceled.
ASSERT_OK(WaitFor(
[&is_sleep_job_running]() -> Result<bool> { return !VERIFY_RESULT(is_sleep_job_running()); },
kTimeout, "Wait for sleeping job to get killed"));

// Make sure the other job is still running.
const auto initial_row_count = ASSERT_RESULT(GetRowCount());
ASSERT_OK(WaitForRowCountAbove(initial_row_count));
}

} // namespace yb

0 comments on commit d07daec

Please sign in to comment.