Skip to content

Commit

Permalink
feat: Remove prover db from house keeper
Browse files Browse the repository at this point in the history
This PR is a follow-up on
#2666, namely the remove
prover side from house keeper.

This PR contains:
- remove all prover jobs from house keeper (now in PJM)
- move core metrics from prover jobs to l1 batch metrics reporter
- remove old configuration

With these changes core & prover are fully decoupled. This will enable
removing unnecessary databases across all envs that don't run provers.
Alongside, core and prover deployments are independent.
  • Loading branch information
EmilLuta committed Sep 3, 2024
1 parent b2dd9a5 commit 42e085d
Show file tree
Hide file tree
Showing 26 changed files with 68 additions and 1,406 deletions.
16 changes: 3 additions & 13 deletions core/bin/zksync_server/src/node_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ impl MainNodeBuilder {
let pools_layer = PoolsLayerBuilder::empty(config, secrets)
.with_master(true)
.with_replica(true)
.with_prover(true) // Used by house keeper.
.build();
self.node.add_layer(pools_layer);
Ok(self)
Expand Down Expand Up @@ -446,18 +445,9 @@ impl MainNodeBuilder {

fn add_house_keeper_layer(mut self) -> anyhow::Result<Self> {
let house_keeper_config = try_load_config!(self.configs.house_keeper_config);
let fri_prover_config = try_load_config!(self.configs.prover_config);
let fri_witness_generator_config = try_load_config!(self.configs.witness_generator_config);
let fri_prover_group_config = try_load_config!(self.configs.prover_group_config);
let fri_proof_compressor_config = try_load_config!(self.configs.proof_compressor_config);

self.node.add_layer(HouseKeeperLayer::new(
house_keeper_config,
fri_prover_config,
fri_witness_generator_config,
fri_prover_group_config,
fri_proof_compressor_config,
));

self.node
.add_layer(HouseKeeperLayer::new(house_keeper_config));

Ok(self)
}
Expand Down
25 changes: 0 additions & 25 deletions core/lib/config/src/configs/house_keeper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,4 @@ use serde::Deserialize;
#[derive(Debug, Deserialize, Clone, PartialEq)]
pub struct HouseKeeperConfig {
pub l1_batch_metrics_reporting_interval_ms: u64,
pub gpu_prover_queue_reporting_interval_ms: u64,
pub prover_job_retrying_interval_ms: u64,
pub prover_stats_reporting_interval_ms: u64,
pub witness_job_moving_interval_ms: u64,
pub witness_generator_stats_reporting_interval_ms: u64,
pub witness_generator_job_retrying_interval_ms: u64,
pub prover_db_pool_size: u32,
pub proof_compressor_job_retrying_interval_ms: u64,
pub proof_compressor_stats_reporting_interval_ms: u64,
pub prover_job_archiver_archiving_interval_ms: Option<u64>,
pub prover_job_archiver_archive_after_secs: Option<u64>,
pub fri_gpu_prover_archiver_archiving_interval_ms: Option<u64>,
pub fri_gpu_prover_archiver_archive_after_secs: Option<u64>,
}

impl HouseKeeperConfig {
pub fn prover_job_archiver_params(&self) -> Option<(u64, u64)> {
self.prover_job_archiver_archiving_interval_ms
.zip(self.prover_job_archiver_archive_after_secs)
}

pub fn fri_gpu_prover_archiver_params(&self) -> Option<(u64, u64)> {
self.fri_gpu_prover_archiver_archiving_interval_ms
.zip(self.fri_gpu_prover_archiver_archive_after_secs)
}
}
13 changes: 0 additions & 13 deletions core/lib/config/src/testonly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -630,19 +630,6 @@ impl Distribution<configs::house_keeper::HouseKeeperConfig> for EncodeDist {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> configs::house_keeper::HouseKeeperConfig {
configs::house_keeper::HouseKeeperConfig {
l1_batch_metrics_reporting_interval_ms: self.sample(rng),
gpu_prover_queue_reporting_interval_ms: self.sample(rng),
prover_job_retrying_interval_ms: self.sample(rng),
prover_stats_reporting_interval_ms: self.sample(rng),
witness_job_moving_interval_ms: self.sample(rng),
witness_generator_stats_reporting_interval_ms: self.sample(rng),
prover_db_pool_size: self.sample(rng),
witness_generator_job_retrying_interval_ms: self.sample(rng),
proof_compressor_job_retrying_interval_ms: self.sample(rng),
proof_compressor_stats_reporting_interval_ms: self.sample(rng),
prover_job_archiver_archiving_interval_ms: self.sample(rng),
prover_job_archiver_archive_after_secs: self.sample(rng),
fri_gpu_prover_archiver_archiving_interval_ms: self.sample(rng),
fri_gpu_prover_archiver_archive_after_secs: self.sample(rng),
}
}
}
Expand Down
31 changes: 0 additions & 31 deletions core/lib/env_config/src/house_keeper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,6 @@ mod tests {
fn expected_config() -> HouseKeeperConfig {
HouseKeeperConfig {
l1_batch_metrics_reporting_interval_ms: 10_000,
gpu_prover_queue_reporting_interval_ms: 10_000,
prover_job_retrying_interval_ms: 10000,
prover_stats_reporting_interval_ms: 5_000,
witness_job_moving_interval_ms: 30_000,
witness_generator_stats_reporting_interval_ms: 10_000,
witness_generator_job_retrying_interval_ms: 30_000,
prover_db_pool_size: 2,
proof_compressor_job_retrying_interval_ms: 30_000,
proof_compressor_stats_reporting_interval_ms: 10_000,
prover_job_archiver_archiving_interval_ms: Some(1_800_000),
prover_job_archiver_archive_after_secs: Some(172_800),
// 24 hours
fri_gpu_prover_archiver_archiving_interval_ms: Some(86_400_000),
// 48 hours
fri_gpu_prover_archiver_archive_after_secs: Some(172_800),
}
}

Expand All @@ -41,22 +26,6 @@ mod tests {
let mut lock = MUTEX.lock();
let config = r#"
HOUSE_KEEPER_L1_BATCH_METRICS_REPORTING_INTERVAL_MS="10000"
HOUSE_KEEPER_GPU_PROVER_QUEUE_REPORTING_INTERVAL_MS="10000"
HOUSE_KEEPER_PROVER_JOB_RETRYING_INTERVAL_MS="10000"
HOUSE_KEEPER_WITNESS_JOB_MOVING_INTERVAL_MS="30000"
HOUSE_KEEPER_WITNESS_GENERATOR_STATS_REPORTING_INTERVAL_MS="10000"
HOUSE_KEEPER_WITNESS_GENERATOR_JOB_RETRYING_INTERVAL_MS="30000"
HOUSE_KEEPER_FRI_WITNESS_JOB_MOVING_INTERVAL_MS="40000"
HOUSE_KEEPER_FRI_PROVER_JOB_RETRYING_INTERVAL_MS="30000"
HOUSE_KEEPER_FRI_WITNESS_GENERATOR_JOB_RETRYING_INTERVAL_MS="30000"
HOUSE_KEEPER_PROVER_DB_POOL_SIZE="2"
HOUSE_KEEPER_PROVER_STATS_REPORTING_INTERVAL_MS="5000"
HOUSE_KEEPER_PROOF_COMPRESSOR_STATS_REPORTING_INTERVAL_MS="10000"
HOUSE_KEEPER_PROOF_COMPRESSOR_JOB_RETRYING_INTERVAL_MS="30000"
HOUSE_KEEPER_PROVER_JOB_ARCHIVER_ARCHIVING_INTERVAL_MS="1800000"
HOUSE_KEEPER_PROVER_JOB_ARCHIVER_ARCHIVE_AFTER_SECS="172800"
HOUSE_KEEPER_FRI_GPU_PROVER_ARCHIVER_ARCHIVING_INTERVAL_MS="86400000"
HOUSE_KEEPER_FRI_GPU_PROVER_ARCHIVER_ARCHIVE_AFTER_SECS="172800"
"#;
lock.set_env(config);

Expand Down
63 changes: 0 additions & 63 deletions core/lib/protobuf_config/src/house_keeper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,6 @@ impl ProtoRepr for proto::HouseKeeper {
&self.l1_batch_metrics_reporting_interval_ms,
)
.context("l1_batch_metrics_reporting_interval_ms")?,
gpu_prover_queue_reporting_interval_ms: *required(
&self.gpu_prover_queue_reporting_interval_ms,
)
.context("gpu_prover_queue_reporting_interval_ms")?,
prover_job_retrying_interval_ms: *required(&self.prover_job_retrying_interval_ms)
.context("prover_job_retrying_interval_ms")?,
prover_stats_reporting_interval_ms: *required(&self.prover_stats_reporting_interval_ms)
.context("prover_stats_reporting_interval_ms")?,
witness_job_moving_interval_ms: *required(&self.witness_job_moving_interval_ms)
.context("witness_job_moving_interval_ms")?,
witness_generator_stats_reporting_interval_ms: *required(
&self.witness_generator_stats_reporting_interval_ms,
)
.context("witness_generator_stats_reporting_interval_ms")?,
prover_db_pool_size: *required(&self.prover_db_pool_size)
.context("prover_db_pool_size")?,
proof_compressor_job_retrying_interval_ms: *required(
&self.proof_compressor_job_retrying_interval_ms,
)
.context("proof_compressor_job_retrying_interval_ms")?,
witness_generator_job_retrying_interval_ms: *required(
&self.witness_generator_job_retrying_interval_ms,
)
.context("witness_generator_job_retrying_interval_ms")?,
proof_compressor_stats_reporting_interval_ms: *required(
&self.proof_compressor_stats_reporting_interval_ms,
)
.context("proof_compressor_stats_reporting_interval_ms")?,

// TODO(PLA-862): Make these 2 variables required
prover_job_archiver_archiving_interval_ms: self
.prover_job_archiver_archiving_interval_ms,
prover_job_archiver_archive_after_secs: self.prover_job_archiver_archive_after_secs,
fri_gpu_prover_archiver_archiving_interval_ms: self
.fri_gpu_prover_archiver_archiving_interval_ms,
fri_gpu_prover_archiver_archive_after_secs: self
.fri_gpu_prover_archiver_archive_after_secs,
})
}

Expand All @@ -57,32 +20,6 @@ impl ProtoRepr for proto::HouseKeeper {
l1_batch_metrics_reporting_interval_ms: Some(
this.l1_batch_metrics_reporting_interval_ms,
),
gpu_prover_queue_reporting_interval_ms: Some(
this.gpu_prover_queue_reporting_interval_ms,
),
prover_job_retrying_interval_ms: Some(this.prover_job_retrying_interval_ms),
prover_stats_reporting_interval_ms: Some(this.prover_stats_reporting_interval_ms),
witness_job_moving_interval_ms: Some(this.witness_job_moving_interval_ms),
witness_generator_stats_reporting_interval_ms: Some(
this.witness_generator_stats_reporting_interval_ms,
),
witness_generator_job_retrying_interval_ms: Some(
this.witness_generator_job_retrying_interval_ms,
),
prover_db_pool_size: Some(this.prover_db_pool_size),
proof_compressor_job_retrying_interval_ms: Some(
this.proof_compressor_job_retrying_interval_ms,
),
proof_compressor_stats_reporting_interval_ms: Some(
this.proof_compressor_stats_reporting_interval_ms,
),
prover_job_archiver_archiving_interval_ms: this
.prover_job_archiver_archiving_interval_ms,
prover_job_archiver_archive_after_secs: this.prover_job_archiver_archive_after_secs,
fri_gpu_prover_archiver_archiving_interval_ms: this
.fri_gpu_prover_archiver_archiving_interval_ms,
fri_gpu_prover_archiver_archive_after_secs: this
.fri_gpu_prover_archiver_archive_after_secs,
}
}
}
26 changes: 13 additions & 13 deletions core/lib/protobuf_config/src/proto/config/house_keeper.proto
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ package zksync.config.house_keeper;

message HouseKeeper {
optional uint64 l1_batch_metrics_reporting_interval_ms = 1; // required; ms
optional uint64 gpu_prover_queue_reporting_interval_ms = 2; // required; ms
optional uint64 prover_job_retrying_interval_ms = 3; // required; ms
optional uint64 prover_stats_reporting_interval_ms = 4; // required ms
optional uint64 witness_job_moving_interval_ms = 5; // required; ms
optional uint64 witness_generator_stats_reporting_interval_ms = 6; // required; ms
optional uint64 witness_generator_job_retrying_interval_ms = 9; // required; ms
optional uint32 prover_db_pool_size = 10; // required
optional uint64 proof_compressor_job_retrying_interval_ms = 12; // required; ms
optional uint64 proof_compressor_stats_reporting_interval_ms = 13; // required; ms
optional uint64 prover_job_archiver_archiving_interval_ms = 14; // optional; ms
optional uint64 prover_job_archiver_archive_after_secs = 15; // optional; seconds
optional uint64 fri_gpu_prover_archiver_archiving_interval_ms = 16; // optional; ms
optional uint64 fri_gpu_prover_archiver_archive_after_secs = 17; // optional; seconds
reserved 2; reserved "gpu_prover_queue_reporting_interval_ms";
reserved 3; reserved "prover_job_retrying_interval_ms";
reserved 4; reserved "prover_stats_reporting_interval_ms";
reserved 5; reserved "witness_job_moving_interval_ms";
reserved 6; reserved "witness_generator_stats_reporting_interval_ms";
reserved 9; reserved "witness_generator_job_retrying_interval_ms";
reserved 10; reserved "prover_db_pool_size";
reserved 12; reserved "proof_compressor_job_retrying_interval_ms";
reserved 13; reserved "proof_compressor_stats_reporting_interval_ms";
reserved 14; reserved "prover_job_archiver_archiving_interval_ms";
reserved 15; reserved "prover_job_archiver_archive_after_secs";
reserved 16; reserved "fri_gpu_prover_archiver_archiving_interval_ms";
reserved 17; reserved "fri_gpu_prover_archiver_archive_after_secs";
}
33 changes: 32 additions & 1 deletion core/node/house_keeper/src/blocks_state_reporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use async_trait::async_trait;
use zksync_dal::{ConnectionPool, Core, CoreDal};
use zksync_shared_metrics::{BlockL1Stage, BlockStage, L1StageLatencyLabel, APP_METRICS};

use crate::periodic_job::PeriodicJob;
use crate::{metrics::FRI_PROVER_METRICS, periodic_job::PeriodicJob};

#[derive(Debug)]
pub struct L1BatchMetricsReporter {
Expand Down Expand Up @@ -88,6 +88,37 @@ impl L1BatchMetricsReporter {
APP_METRICS.blocks_state_block_eth_stage_latency[&L1StageLatencyLabel::UnexecutedBlock]
.set(now.saturating_sub(timestamp));
}

// proof generation details metrics
let oldest_unpicked_batch = match conn
.proof_generation_dal()
.get_oldest_unpicked_batch()
.await?
{
Some(l1_batch_number) => l1_batch_number.0 as u64,
// if there is no unpicked batch in database, we use sealed batch number as a result
None => {
conn.blocks_dal()
.get_sealed_l1_batch_number()
.await
.unwrap()
.unwrap()
.0 as u64
}
};
FRI_PROVER_METRICS
.oldest_unpicked_batch
.set(oldest_unpicked_batch);

if let Some(l1_batch_number) = conn
.proof_generation_dal()
.get_oldest_not_generated_batch()
.await?
{
FRI_PROVER_METRICS
.oldest_not_generated_batch
.set(l1_batch_number.0 as u64);
}
Ok(())
}
}
Expand Down
2 changes: 1 addition & 1 deletion core/node/house_keeper/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pub mod blocks_state_reporter;
mod metrics;
pub mod periodic_job;
pub mod prover;
11 changes: 11 additions & 0 deletions core/node/house_keeper/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use vise::{Gauge, Metrics};

#[derive(Debug, Metrics)]
#[metrics(prefix = "fri_prover")]
pub(crate) struct FriProverMetrics {
pub oldest_unpicked_batch: Gauge<u64>,
pub oldest_not_generated_batch: Gauge<u64>,
}

#[vise::register]
pub(crate) static FRI_PROVER_METRICS: vise::Global<FriProverMetrics> = vise::Global::new();

This file was deleted.

Loading

0 comments on commit 42e085d

Please sign in to comment.