From 1195ac70950997c18d78b66c9d76bd71a9cdc142 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 16 Dec 2022 11:03:07 +0100 Subject: [PATCH 01/61] init commit --- src/meta/src/rpc/elections.rs | 814 ++++++++++++++++++++++++++++++++++ src/meta/src/rpc/mod.rs | 1 + src/meta/src/rpc/server.rs | 330 ++++---------- 3 files changed, 911 insertions(+), 234 deletions(-) create mode 100644 src/meta/src/rpc/elections.rs diff --git a/src/meta/src/rpc/elections.rs b/src/meta/src/rpc/elections.rs new file mode 100644 index 0000000000000..8c6b4439d3b55 --- /dev/null +++ b/src/meta/src/rpc/elections.rs @@ -0,0 +1,814 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; +use std::thread; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use prost::Message; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use risingwave_common::util::addr::HostAddr; +use risingwave_pb::meta::{MetaLeaderInfo, MetaLeaseInfo}; +use tokio::sync::oneshot::Sender; +use tokio::sync::watch::Receiver; +use tokio::task::JoinHandle; + +use crate::rpc::{META_CF_NAME, META_LEADER_KEY, META_LEASE_KEY}; +use crate::storage::{MetaStore, MetaStoreError, Transaction}; +use crate::{MetaError, MetaResult}; + +// get duration since epoch +fn since_epoch() -> Duration { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") +} + +/// Contains the result of an election +/// Use this to get information about the current leader and yourself +struct ElectionResult { + pub meta_leader_info: MetaLeaderInfo, + pub _meta_lease_info: MetaLeaseInfo, + + // True if current node is leader. False if follower + pub is_leader: bool, +} + +impl ElectionResult { + pub fn get_leader_addr(&self) -> HostAddr { + self.meta_leader_info + .node_address + .parse::() + .expect("invalid leader addr") + } +} + +/// Runs for election in an attempt to become leader +/// +/// ## Returns +/// Returns `ElectionResult`, containing infos about the node who won the election or +/// `MetaError` if the election ran into an error +/// +/// ## Arguments +/// `meta_store`: The meta store which holds the lease, deciding about the election result +/// `addr`: Address of the node that runs for election +/// `lease_time_sec`: Amount of seconds that this lease will be valid +/// `next_lease_id`: If the node wins, the lease used until the next election will have this id +async fn campaign( + meta_store: &Arc, + addr: &String, + lease_time_sec: u64, + next_lease_id: u64, +) -> MetaResult { + tracing::info!("running for election with lease {}", next_lease_id); + + let campaign_leader_info = MetaLeaderInfo { + lease_id: next_lease_id, + node_address: addr.to_string(), + }; + + let now = since_epoch(); + let campaign_lease_info = MetaLeaseInfo { + leader: Some(campaign_leader_info.clone()), + lease_register_time: now.as_secs(), + lease_expire_time: now.as_secs() + lease_time_sec, + }; + + // get old leader info and lease + let get_infos_result = get_leader_lease_obj(meta_store).await; + let has_leader = get_infos_result.is_some(); + + // Delete leader info, if leader lease timed out + let lease_expired = if has_leader { + let some_time = lease_time_sec / 2; + let (_, lease_info) = get_infos_result.unwrap(); + lease_info.get_lease_expire_time() + some_time < since_epoch().as_secs() + } else { + false + }; + + // Leader is down + if !has_leader || lease_expired { + tracing::info!("We have no leader"); + + // cluster has no leader + if let Err(e) = meta_store + .put_cf( + META_CF_NAME, + META_LEADER_KEY.as_bytes().to_vec(), + campaign_leader_info.encode_to_vec(), + ) + .await + { + let msg = format!( + "new cluster put leader info failed, MetaStoreError: {:?}", + e + ); + tracing::warn!(msg); + return Err(MetaError::unavailable(msg)); + } + + // Check if new leader was elected in the meantime + return match renew_lease(&campaign_leader_info, lease_time_sec, meta_store).await { + Ok(is_leader) => { + if !is_leader { + return Err(MetaError::permission_denied( + "Node could not acquire/renew leader lease".into(), + )); + } + Ok(ElectionResult { + meta_leader_info: campaign_leader_info, + _meta_lease_info: campaign_lease_info, + is_leader: true, + }) + } + Err(e) => Err(e), + }; + } + + // follow-up elections: There have already been leaders before + let is_leader = match renew_lease(&campaign_leader_info, lease_time_sec, meta_store).await { + Err(e) => { + tracing::warn!("Encountered error when renewing lease {}", e); + return Err(e); + } + Ok(val) => val, + }; + + if is_leader { + // if is leader, return HostAddress to this node + return Ok(ElectionResult { + meta_leader_info: campaign_leader_info, + _meta_lease_info: campaign_lease_info, + is_leader, + }); + } + + // FIXME: This has to be done with a single transaction, not 2 + // if it is not leader, then get the current leaders HostAddress + // Ask Pin how to implement txn.get here + return match get_leader_lease_obj(meta_store).await { + None => Err(MetaError::unavailable( + "Meta information not stored in meta store".into(), + )), + Some(infos) => Ok(ElectionResult { + meta_leader_info: infos.0, + _meta_lease_info: infos.1, + is_leader, + }), + }; +} + +/// Try to renew/acquire the leader lease +/// +/// ## Returns +/// True if node was leader and was able to renew/acquire the lease. +/// False if node was follower and thus could not renew/acquire lease. +/// `MetaError` if operation ran into an error +/// +/// ## Arguments +/// `leader_info`: Info of the node that trie +/// `lease_time_sec`: Time in seconds that the lease is valid +/// `meta_store`: Store which holds the lease +async fn renew_lease( + leader_info: &MetaLeaderInfo, + lease_time_sec: u64, + meta_store: &Arc, +) -> MetaResult { + let now = since_epoch(); + let mut txn = Transaction::default(); + let lease_info = MetaLeaseInfo { + leader: Some(leader_info.clone()), + lease_register_time: now.as_secs(), + lease_expire_time: now.as_secs() + lease_time_sec, + }; + + txn.check_equal( + META_CF_NAME.to_string(), + META_LEADER_KEY.as_bytes().to_vec(), + leader_info.encode_to_vec(), + ); + txn.put( + META_CF_NAME.to_string(), + META_LEASE_KEY.as_bytes().to_vec(), + lease_info.encode_to_vec(), + ); + + let is_leader = match meta_store.txn(txn).await { + Err(e) => match e { + MetaStoreError::TransactionAbort() => false, + e => return Err(e.into()), + }, + Ok(_) => true, + }; + Ok(is_leader) +} + +/// Retrieve infos about the current leader +/// +/// ## Attributes: +/// `meta_store`: The store holding information about the leader +/// +/// ## Returns +/// None if leader OR lease is not present in store +/// else infos about the leader and lease +/// Panics if request against `meta_store` failed +async fn get_leader_lease_obj( + meta_store: &Arc, +) -> Option<(MetaLeaderInfo, MetaLeaseInfo)> { + let current_leader_info = MetaLeaderInfo::decode( + match meta_store + .get_cf(META_CF_NAME, META_LEADER_KEY.as_bytes()) + .await + { + Err(MetaStoreError::ItemNotFound(_)) => return None, + Err(e) => panic!("Meta Store Error when retrieving leader info {:?}", e), + Ok(v) => v, + } + .as_slice(), + ) + .unwrap(); + + let current_leader_lease = MetaLeaseInfo::decode( + match meta_store + .get_cf(META_CF_NAME, META_LEASE_KEY.as_bytes()) + .await + { + Err(MetaStoreError::ItemNotFound(_)) => return None, + Err(e) => panic!("Meta Store Error when retrieving lease info {:?}", e), + Ok(v) => v, + } + .as_slice(), + ) + .unwrap(); + + Some((current_leader_info, current_leader_lease)) +} + +fn gen_rand_lease_id(addr: &str) -> u64 { + let mut ds = DefaultHasher::new(); + addr.hash(&mut ds); + ds.finish() + // FIXME: We are unable to use a random lease at the moment + // During testing, meta gets killed, new meta starts + // meta detects that lease is still there, with same addr, but diff ID + // meta believes that leader is out there and becomes follower + // IMHO we can only use random lease id, if we have at least 2 meta nodes + // https://github.com/risingwavelabs/risingwave/issues/6844 + // rand::thread_rng().gen_range(0..std::u64::MAX) +} + +/// Used to manage single leader setup. `run_elections` will continuously run elections to determine +/// which nodes is the **leader** and which are **followers**. +/// +/// To become a leader a **follower** node **campaigns**. A follower only ever campaigns if it +/// detects that the current leader is down. The follower becomes a leader by acquiring a lease +/// from the **meta store**. After getting elected the new node will start its **term** as a leader. +/// A term lasts until the current leader crashes. +/// +/// ## Arguments +/// `addr`: Address of the current leader, e.g. "127.0.0.1:5690". +/// `meta_store`: Holds information about the leader. +/// `lease_time_sec`: Time in seconds that a lease will be valid for. +/// A large value reduces the meta store traffic. A small value reduces the downtime during failover +/// +/// ## Returns: +/// `MetaLeaderInfo` containing the leader who got elected initially. +/// `JoinHandle` running all future elections concurrently. +/// `Sender` for signaling a shutdown. +/// `Receiver` receiving true if this node got elected as leader and false if it is a follower. +pub async fn run_elections( + addr: String, + meta_store: Arc, + lease_time_sec: u64, +) -> MetaResult<( + MetaLeaderInfo, + JoinHandle<()>, + Sender<()>, + Receiver<(HostAddr, bool)>, +)> { + // Randomize interval to reduce mitigate likelihood of simultaneous requests + let mut rng: StdRng = SeedableRng::from_entropy(); + + // runs the initial election, determining who the first leader is + 'initial_election: loop { + // every lease gets a random ID to differentiate between leases/leaders + let mut initial_election = true; + + // run the initial election + let election_result = campaign( + &meta_store, + &addr, + lease_time_sec, + gen_rand_lease_id(addr.as_str()), + ) + .await; + let (leader_addr, initial_leader, is_initial_leader) = match election_result { + Ok(elect_result) => { + tracing::info!("initial election finished"); + ( + elect_result.get_leader_addr(), + elect_result.meta_leader_info, + elect_result.is_leader, + ) + } + Err(_) => { + tracing::info!("initial election failed. Repeating election"); + thread::sleep(std::time::Duration::from_millis(500)); + continue 'initial_election; + } + }; + if is_initial_leader { + tracing::info!( + "Initial leader with address '{}' elected. New lease id is {}", + initial_leader.node_address, + initial_leader.lease_id + ); + } + + let initial_leader_clone = initial_leader.clone(); + + // define all follow up elections and terms in handle + let (shutdown_tx, mut shutdown_rx) = tokio::sync::oneshot::channel(); + let (leader_tx, leader_rx) = tokio::sync::watch::channel((leader_addr, is_initial_leader)); + let handle = tokio::spawn(async move { + // runs all followup elections + let mut ticker = tokio::time::interval( + Duration::from_secs(lease_time_sec / 2) + + Duration::from_millis(rng.gen_range(0..500)), + ); + ticker.reset(); + + let mut is_leader = is_initial_leader; + let mut leader_info = initial_leader.clone(); + let n_addr = initial_leader.node_address.as_str(); + let mut leader_addr = n_addr.parse::().unwrap(); + 'election: loop { + // Do not elect new leader directly after running the initial election + if !initial_election { + let (leader_addr_, leader_info_, is_leader_) = match campaign( + &meta_store, + &addr, + lease_time_sec, + gen_rand_lease_id(addr.as_str()), + ) + .await + { + Err(_) => { + tracing::info!("election failed. Repeating election"); + _ = ticker.tick().await; + continue 'election; + } + Ok(elect_result) => { + tracing::info!("election finished"); + ( + elect_result.get_leader_addr(), + elect_result.meta_leader_info, + elect_result.is_leader, + ) + } + }; + + if is_leader_ { + tracing::info!( + "Leader with address '{}' elected. New lease id is {}", + leader_info_.node_address, + leader_info_.lease_id + ); + } + leader_info = leader_info_; + is_leader = is_leader_; + leader_addr = leader_addr_; + } + initial_election = false; + + // signal to observers if there is a change in leadership + leader_tx.send((leader_addr.clone(), is_leader)).unwrap(); + + // election done. Enter the term of the current leader + // Leader stays in power until leader crashes + '_term: loop { + // sleep OR abort if shutdown + tokio::select! { + _ = &mut shutdown_rx => { + tracing::info!("Register leader info is stopped"); + return; + } + _ = ticker.tick() => {}, + } + + if let Ok(leader_alive) = + manage_term(is_leader, &leader_info, lease_time_sec, &meta_store).await && !leader_alive { + // leader failed. Elect new leader + continue 'election; + } + } + } + }); + return Ok((initial_leader_clone, handle, shutdown_tx, leader_rx)); + } +} + +/// Acts on the current leaders term +/// Leaders will try to extend the term +/// Followers will check if the leader is still alive +/// +/// ## Arguments: +/// `is_leader`: True if this node currently is a leader +/// `leader_info`: Info about the last observed leader +/// `lease_time_sec`: Time in seconds that a lease is valid +/// `meta_store`: Holds lease and leader data +/// +/// ## Returns +/// True if the leader defined in `leader_info` is still in power. +/// False if the old leader failed, there is no leader, or there a new leader got elected +/// `MetaError` if there was an error. +async fn manage_term( + is_leader: bool, + leader_info: &MetaLeaderInfo, + lease_time_sec: u64, + meta_store: &Arc, +) -> MetaResult { + // try to renew/acquire the lease if this node is a leader + if is_leader { + return Ok(renew_lease(leader_info, lease_time_sec, meta_store) + .await + .unwrap_or(false)); + }; + + // get leader info + let leader_lease_result = get_leader_lease_obj(meta_store).await; + let has_leader = leader_lease_result.is_some(); + + if !has_leader { + // ETCD does not have leader lease. Elect new leader + tracing::info!("ETCD does not have leader lease. Running new election"); + return Ok(false); + } + + match leader_changed(leader_info, meta_store).await { + Err(e) => { + tracing::warn!("Error when observing leader change {}", e); + return Err(e); + } + Ok(has_new_leader) => { + if has_new_leader { + return Ok(false); + } + } + } + + // delete lease and run new election if lease is expired for some time + let some_time = lease_time_sec / 2; + let (_, lease_info) = leader_lease_result.unwrap(); + if lease_info.get_lease_expire_time() + some_time < since_epoch().as_secs() { + tracing::warn!("Detected that leader is down"); + let mut txn = Transaction::default(); + // FIXME: No deletion here, directly write new key + txn.delete( + META_CF_NAME.to_string(), + META_LEADER_KEY.as_bytes().to_vec(), + ); + txn.delete(META_CF_NAME.to_string(), META_LEASE_KEY.as_bytes().to_vec()); + match meta_store.txn(txn).await { + Err(e) => tracing::warn!("Unable to update lease. Error {:?}", e), + Ok(_) => tracing::info!("Deleted leader and lease"), + } + return Ok(false); + } + + // lease exists and the same leader continues term + Ok(true) +} + +/// True if leader changed +/// False if leader is still the leader defined in `leader_info` +/// `MetaError` on error +async fn leader_changed( + leader_info: &MetaLeaderInfo, + meta_store: &Arc, +) -> MetaResult { + let mut txn = Transaction::default(); + txn.check_equal( + META_CF_NAME.to_string(), + META_LEADER_KEY.as_bytes().to_vec(), + leader_info.encode_to_vec(), + ); + + return match meta_store.txn(txn).await { + Err(e) => match e { + MetaStoreError::TransactionAbort() => Ok(true), + e => return Err(e.into()), + }, + Ok(_) => Ok(false), + }; +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::storage::MemStore; + + #[tokio::test] + async fn test_get_leader_lease_obj() { + // no impfo present should give empty results or default objects + let mock_meta_store = Arc::new(MemStore::new()); + let res = get_leader_lease_obj(&mock_meta_store).await; + assert!(res.is_none()); + + // get_info should retrieve old leader info + let test_leader = MetaLeaderInfo { + node_address: "some_address".into(), + lease_id: 123, + }; + let res = mock_meta_store + .put_cf( + META_CF_NAME, + META_LEADER_KEY.as_bytes().to_vec(), + test_leader.encode_to_vec(), + ) + .await; + assert!(res.is_ok(), "unable to send leader info to mock store"); + let info_res = get_leader_lease_obj(&mock_meta_store).await; + assert!( + info_res.is_none(), + "get infos about leader should be none if either leader or lease is not set" + ); + let res = mock_meta_store + .put_cf( + META_CF_NAME, + META_LEASE_KEY.as_bytes().to_vec(), + MetaLeaseInfo { + leader: Some(test_leader.clone()), + lease_register_time: since_epoch().as_secs(), + lease_expire_time: since_epoch().as_secs() + 1, + } + .encode_to_vec(), + ) + .await; + assert!(res.is_ok(), "unable to send lease info to mock store"); + let (leader, _) = get_leader_lease_obj(&mock_meta_store).await.unwrap(); + assert_eq!( + leader, test_leader, + "leader_info retrieved != leader_info send" + ); + } + + async fn put_lease_info(lease: &MetaLeaseInfo, meta_store: &Arc) { + let mut txn = Transaction::default(); + txn.put( + META_CF_NAME.to_string(), + META_LEASE_KEY.as_bytes().to_vec(), + lease.encode_to_vec(), + ); + meta_store + .txn(txn) + .await + .expect("Putting test lease failed"); + } + + async fn put_leader_info(leader: &MetaLeaderInfo, meta_store: &Arc) { + let mut txn = Transaction::default(); + txn.put( + META_CF_NAME.to_string(), + META_LEADER_KEY.as_bytes().to_vec(), + leader.encode_to_vec(), + ); + meta_store + .txn(txn) + .await + .expect("Putting test lease failed"); + } + + async fn put_leader_lease( + leader: &MetaLeaderInfo, + lease: &MetaLeaseInfo, + meta_store: &Arc, + ) { + put_leader_info(leader, meta_store).await; + put_lease_info(lease, meta_store).await; + } + + /// Default setup + /// ## Returns: + /// lease timeout, meta store, leader info, lease info, lease registration time + async fn default_setup() -> (u64, Arc, MetaLeaderInfo, MetaLeaseInfo, Duration) { + let lease_timeout = 10; + let mock_meta_store = Arc::new(MemStore::new()); + let leader_info = MetaLeaderInfo { + node_address: "localhost:1234".into(), + lease_id: 123, + }; + let now = since_epoch(); + let lease_info = MetaLeaseInfo { + leader: Some(leader_info.clone()), + lease_register_time: now.as_secs(), + lease_expire_time: now.as_secs() + lease_timeout, + }; + put_leader_lease(&leader_info, &lease_info, &mock_meta_store).await; + (lease_timeout, mock_meta_store, leader_info, lease_info, now) + } + + #[tokio::test] + async fn test_manage_term() { + let mock_meta_store = Arc::new(MemStore::new()); + let lease_timeout = 10; + + // Leader: If nobody was elected leader renewing lease fails and leader is marked as failed + let leader_info = MetaLeaderInfo { + node_address: "localhost:1234".into(), + lease_id: 123, + }; + assert!( + !manage_term(true, &leader_info, lease_timeout, &mock_meta_store) + .await + .unwrap() + ); + + // Follower: If nobody was elected leader renewing lease also fails + assert!( + !manage_term(false, &leader_info, lease_timeout, &mock_meta_store) + .await + .unwrap() + ); + } + + #[tokio::test] + async fn leader_should_renew_lease() { + // if node is leader lease should be renewed + let (lease_timeout, mock_meta_store, leader_info, _, _) = default_setup().await; + let now = since_epoch(); + let lease_info = MetaLeaseInfo { + leader: Some(leader_info.clone()), + lease_register_time: now.as_secs(), + lease_expire_time: now.as_secs() + lease_timeout, + }; + put_leader_lease(&leader_info, &lease_info, &mock_meta_store).await; + assert!( + manage_term(true, &leader_info, lease_timeout, &mock_meta_store) + .await + .unwrap(), + "Leader should still be in power after updating lease" + ); + let (_, new_lease_info) = get_leader_lease_obj(&mock_meta_store).await.unwrap(); + assert_eq!( + now.as_secs() + lease_timeout, + new_lease_info.get_lease_expire_time(), + "Lease was not extended by {}s, but by {}s", + lease_timeout, + new_lease_info.get_lease_expire_time() - lease_info.get_lease_expire_time() + ); + } + + #[tokio::test] + async fn follower_cannot_renew_lease() { + // If node is follower, lease should not be renewed + let (lease_timeout, mock_meta_store, leader_info, _, _) = default_setup().await; + let now = since_epoch(); + let lease_info = MetaLeaseInfo { + leader: Some(leader_info.clone()), + lease_register_time: now.as_secs(), + lease_expire_time: now.as_secs() + lease_timeout, + }; + put_leader_lease(&leader_info, &lease_info, &mock_meta_store).await; + assert!( + manage_term(false, &leader_info, lease_timeout, &mock_meta_store) + .await + .unwrap(), + "Leader should still be in power if follower fails to renew lease" + ); + let (_, new_lease_info) = get_leader_lease_obj(&mock_meta_store).await.unwrap(); + assert_eq!( + lease_info.get_lease_expire_time(), + new_lease_info.get_lease_expire_time(), + "Lease should not be extended by follower, but was extended by by {}s", + new_lease_info.get_lease_expire_time() - lease_info.get_lease_expire_time() + ); + } + + #[tokio::test] + async fn not_renew_lease() { + let (lease_timeout, mock_meta_store, ..) = default_setup().await; + // Leader: If new leader was elected old leader should NOT renew lease + let other_leader_info = MetaLeaderInfo { + node_address: "other:1234".into(), + lease_id: 456, + }; + assert!( + !manage_term(true, &other_leader_info, lease_timeout, &mock_meta_store) + .await + .unwrap(), + "Leader: If new leader was elected old leader should NOT renew lease" + ); + // Follower: If new leader was, start election cycle + assert!( + !manage_term(false, &other_leader_info, lease_timeout, &mock_meta_store) + .await + .unwrap(), + "Follower: If new leader was elected, follower should enter election cycle" + ); + } + + #[tokio::test] + async fn lease_outdated() { + // Follower: If lease is outdated, follower should delete leader and lease + let lease_timeout = 10; + let mock_meta_store = Arc::new(MemStore::new()); + let leader_info = MetaLeaderInfo { + node_address: "localhost:1234".into(), + lease_id: 123, + }; + let now = since_epoch(); + // lease is expired + let lease_info = MetaLeaseInfo { + leader: Some(leader_info.clone()), + lease_register_time: now.as_secs() - 2 * lease_timeout, + lease_expire_time: now.as_secs() - lease_timeout, + }; + put_leader_lease(&leader_info, &lease_info, &mock_meta_store).await; + assert!( + !manage_term(false, &leader_info, lease_timeout, &mock_meta_store) + .await + .unwrap(), + "Should have determined that new election is needed if lease is no longer valid" + ); + let res = get_leader_lease_obj(&mock_meta_store).await; + assert!( + res.is_none(), + "Expected that leader and lease were deleted after lease expired. {:?}", + res + ) + } + + #[tokio::test] + async fn test_leader_not_changed() { + // leader_changed should return false, if leader did not change. Independent of lease + // changes + let (lease_timeout, mock_meta_store, leader_info, _, old_lease_reg_time) = + default_setup().await; + assert!( + !leader_changed(&leader_info, &mock_meta_store) + .await + .unwrap(), + "Leader not changed and lease not changed" + ); + let new_lease = MetaLeaseInfo { + leader: Some(leader_info.clone()), + lease_register_time: old_lease_reg_time.as_secs() + lease_timeout / 2, + lease_expire_time: old_lease_reg_time.as_secs() + lease_timeout / 2 + lease_timeout, + }; + put_lease_info(&new_lease, &mock_meta_store).await; + assert!( + !leader_changed(&leader_info, &mock_meta_store) + .await + .unwrap(), + "Leader not changed" + ); + } + + #[tokio::test] + async fn test_leader_changed() { + // leader_changed should return true, if leader did change. Independent of if lease changed + let (lease_timeout, mock_meta_store, leader_info, _, old_lease_reg_time) = + default_setup().await; + + let new_lease = MetaLeaseInfo { + leader: Some(leader_info.clone()), + lease_register_time: old_lease_reg_time.as_secs() + lease_timeout / 2, + lease_expire_time: old_lease_reg_time.as_secs() + lease_timeout / 2 + lease_timeout, + }; + let new_leader = MetaLeaderInfo { + node_address: "other:789".to_owned(), + lease_id: gen_rand_lease_id("other:789"), + }; + put_leader_info(&new_leader, &mock_meta_store).await; + assert!( + leader_changed(&leader_info, &mock_meta_store) + .await + .unwrap(), + "Leader changed and lease not changed" + ); + put_lease_info(&new_lease, &mock_meta_store).await; + assert!( + leader_changed(&leader_info, &mock_meta_store) + .await + .unwrap(), + "Leader changed and lease changed" + ); + } +} diff --git a/src/meta/src/rpc/mod.rs b/src/meta/src/rpc/mod.rs index 16d1c8e9d6589..d600358677938 100644 --- a/src/meta/src/rpc/mod.rs +++ b/src/meta/src/rpc/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod elections; mod intercept; pub mod metrics; pub mod server; diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 1ac7822ff2dd9..34433ed087e7e 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -14,12 +14,10 @@ use std::net::SocketAddr; use std::sync::Arc; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::Duration; use etcd_client::ConnectOptions; -use prost::Message; use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; -use risingwave_common::bail; use risingwave_common::monitor::process_linux::monitor_process; use risingwave_common_service::metrics_manager::MetricsManager; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; @@ -33,11 +31,11 @@ use risingwave_pb::meta::heartbeat_service_server::HeartbeatServiceServer; use risingwave_pb::meta::notification_service_server::NotificationServiceServer; use risingwave_pb::meta::scale_service_server::ScaleServiceServer; use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; -use risingwave_pb::meta::{MetaLeaderInfo, MetaLeaseInfo}; use risingwave_pb::user::user_service_server::UserServiceServer; use tokio::sync::oneshot::Sender; use tokio::task::JoinHandle; +use super::elections::run_elections; use super::intercept::MetricsMiddlewareLayer; use super::service::health_service::HealthServiceImpl; use super::service::notification_service::NotificationServiceImpl; @@ -56,11 +54,7 @@ use crate::rpc::service::heartbeat_service::HeartbeatServiceImpl; use crate::rpc::service::hummock_service::HummockServiceImpl; use crate::rpc::service::stream_service::StreamServiceImpl; use crate::rpc::service::user_service::UserServiceImpl; -use crate::rpc::{META_CF_NAME, META_LEADER_KEY, META_LEASE_KEY}; -use crate::storage::{ - EtcdMetaStore, MemStore, MetaStore, MetaStoreError, Transaction, - WrappedEtcdClient as EtcdClient, -}; +use crate::storage::{EtcdMetaStore, MemStore, MetaStore, WrappedEtcdClient as EtcdClient}; use crate::stream::{GlobalStreamManager, SourceManager}; use crate::{hummock, MetaResult}; @@ -138,170 +132,6 @@ pub async fn rpc_serve( } } -pub async fn register_leader_for_meta( - addr: String, - meta_store: Arc, - lease_time: u64, -) -> MetaResult<(MetaLeaderInfo, JoinHandle<()>, Sender<()>)> { - let mut tick_interval = tokio::time::interval(Duration::from_secs(lease_time / 2)); - loop { - tick_interval.tick().await; - let old_leader_info = match meta_store - .get_cf(META_CF_NAME, META_LEADER_KEY.as_bytes()) - .await - { - Err(MetaStoreError::ItemNotFound(_)) => vec![], - Ok(v) => v, - _ => { - continue; - } - }; - let old_leader_lease = match meta_store - .get_cf(META_CF_NAME, META_LEASE_KEY.as_bytes()) - .await - { - Err(MetaStoreError::ItemNotFound(_)) => vec![], - Ok(v) => v, - _ => { - continue; - } - }; - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards"); - if !old_leader_lease.is_empty() { - let lease_info = MetaLeaseInfo::decode(&mut old_leader_lease.as_slice()).unwrap(); - - if lease_info.lease_expire_time > now.as_secs() - && lease_info.leader.as_ref().unwrap().node_address != addr - { - let err_info = format!( - "the lease {:?} does not expire, now time: {}", - lease_info, - now.as_secs(), - ); - tracing::error!("{}", err_info); - bail!(err_info); - } - } - let lease_id = if !old_leader_info.is_empty() { - let leader_info = MetaLeaderInfo::decode(&mut old_leader_info.as_slice()).unwrap(); - leader_info.lease_id + 1 - } else { - 0 - }; - let mut txn = Transaction::default(); - let leader_info = MetaLeaderInfo { - lease_id, - node_address: addr.to_string(), - }; - let lease_info = MetaLeaseInfo { - leader: Some(leader_info.clone()), - lease_register_time: now.as_secs(), - lease_expire_time: now.as_secs() + lease_time, - }; - - if !old_leader_info.is_empty() { - txn.check_equal( - META_CF_NAME.to_string(), - META_LEADER_KEY.as_bytes().to_vec(), - old_leader_info, - ); - txn.put( - META_CF_NAME.to_string(), - META_LEADER_KEY.as_bytes().to_vec(), - leader_info.encode_to_vec(), - ); - } else { - if let Err(e) = meta_store - .put_cf( - META_CF_NAME, - META_LEADER_KEY.as_bytes().to_vec(), - leader_info.encode_to_vec(), - ) - .await - { - tracing::warn!( - "new cluster put leader info failed, MetaStoreError: {:?}", - e - ); - continue; - } - txn.check_equal( - META_CF_NAME.to_string(), - META_LEADER_KEY.as_bytes().to_vec(), - leader_info.encode_to_vec(), - ); - } - txn.put( - META_CF_NAME.to_string(), - META_LEASE_KEY.as_bytes().to_vec(), - lease_info.encode_to_vec(), - ); - if let Err(e) = meta_store.txn(txn).await { - tracing::warn!( - "add leader info failed, MetaStoreError: {:?}, try again later", - e - ); - continue; - } - let leader = leader_info.clone(); - let (shutdown_tx, mut shutdown_rx) = tokio::sync::oneshot::channel(); - let handle = tokio::spawn(async move { - let mut ticker = tokio::time::interval(Duration::from_secs(lease_time / 2)); - loop { - let mut txn = Transaction::default(); - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards"); - let lease_info = MetaLeaseInfo { - leader: Some(leader_info.clone()), - lease_register_time: now.as_secs(), - lease_expire_time: now.as_secs() + lease_time, - }; - txn.check_equal( - META_CF_NAME.to_string(), - META_LEADER_KEY.as_bytes().to_vec(), - leader_info.encode_to_vec(), - ); - txn.put( - META_CF_NAME.to_string(), - META_LEASE_KEY.as_bytes().to_vec(), - lease_info.encode_to_vec(), - ); - if let Err(e) = meta_store.txn(txn).await { - match e { - MetaStoreError::TransactionAbort() => { - tracing::error!( - "keep lease failed, another node has become new leader" - ); - futures::future::pending::<()>().await; - } - MetaStoreError::Internal(e) => { - tracing::warn!( - "keep lease failed, try again later, MetaStoreError: {:?}", - e - ); - } - MetaStoreError::ItemNotFound(e) => { - tracing::warn!("keep lease failed, MetaStoreError: {:?}", e); - } - } - } - tokio::select! { - _ = &mut shutdown_rx => { - tracing::info!("Register leader info is stopped"); - return; - } - // Wait for the minimal interval, - _ = ticker.tick() => {}, - } - } - }); - return Ok((leader, handle, shutdown_tx)); - } -} - pub async fn rpc_serve_with_store( meta_store: Arc, address_info: AddressInfo, @@ -309,30 +139,113 @@ pub async fn rpc_serve_with_store( lease_interval_secs: u64, opts: MetaOpts, ) -> MetaResult<(JoinHandle<()>, Sender<()>)> { - // Initialize managers. - let (info, lease_handle, lease_shutdown) = register_leader_for_meta( - address_info.addr.clone(), + let (current_leader_info, election_handle, election_shutdown, mut leader_rx) = run_elections( + address_info.listen_addr.clone().to_string(), meta_store.clone(), lease_interval_secs, ) .await?; + let prometheus_endpoint = opts.prometheus_endpoint.clone(); - let env = MetaSrvEnv::::new(opts, meta_store.clone(), info).await; + + // wait until initial election is done + if leader_rx.changed().await.is_err() { + panic!("Issue receiving leader value from channel"); + } + + // print current leader/follower status of this node + let mut note_status_leader_rx = leader_rx.clone(); + tokio::spawn(async move { + let span = tracing::span!(tracing::Level::INFO, "node_status"); + let _enter = span.enter(); + loop { + let (leader_addr, is_leader) = note_status_leader_rx.borrow().clone(); + + tracing::info!( + "This node currently is a {} at {}:{}", + if is_leader { + "leader. Serving" + } else { + "follower. Leader serving" + }, + leader_addr.host, + leader_addr.port + ); + + if note_status_leader_rx.changed().await.is_err() { + panic!("Issue receiving leader value from channel"); + } + } + }); + + // FIXME: Start leader services if follower becomes leader + // failover logic + + // Start follower services + if !leader_rx.borrow().1 { + tracing::info!("Node initially elected as follower"); + tokio::spawn(async move { + let span = tracing::span!(tracing::Level::INFO, "services"); + let _enter = span.enter(); + + let health_srv = HealthServiceImpl::new(); + // run follower services until node becomes leader + tracing::info!("Starting follower services"); + tokio::spawn(async move { + tonic::transport::Server::builder() + .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) + .add_service(HealthServer::new(health_srv)) + .serve(address_info.listen_addr) + .await + .unwrap(); + }); + }); + + let shutdown_election = async move { + if election_shutdown.send(()).is_err() { + tracing::warn!("election service already shut down"); + } else if let Err(err) = election_handle.await { + tracing::warn!("Failed to join shutdown: {:?}", err); + } + }; + + let (svc_shutdown_tx, mut svc_shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + + let join_handle = tokio::spawn(async move { + tokio::select! { + _ = tokio::signal::ctrl_c() => {} + _ = &mut svc_shutdown_rx => { + shutdown_election.await; + } + } + }); + + // FIXME: Avoid using join_handler and just pass around shutdown_sender + // https://github.com/risingwavelabs/risingwave/pull/6771 + return Ok((join_handle, svc_shutdown_tx)); + } + + tracing::info!("Node initially elected as leader"); + + let env = MetaSrvEnv::::new(opts, meta_store.clone(), current_leader_info).await; let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); let meta_metrics = Arc::new(MetaMetrics::new()); let registry = meta_metrics.registry(); monitor_process(registry).unwrap(); - let compactor_manager = Arc::new( - hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) + + let cluster_manager = Arc::new( + ClusterManager::new(env.clone(), max_heartbeat_interval) .await .unwrap(), ); + let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); - let cluster_manager = Arc::new( - ClusterManager::new(env.clone(), max_heartbeat_interval) + let compactor_manager = Arc::new( + hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) .await .unwrap(), ); + let hummock_manager = hummock::HummockManager::new( env.clone(), cluster_manager.clone(), @@ -443,7 +356,6 @@ pub async fn rpc_serve_with_store( compactor_manager.clone(), )); - let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); let ddl_srv = DdlServiceImpl::::new( env.clone(), catalog_manager.clone(), @@ -512,7 +424,7 @@ pub async fn rpc_serve_with_store( .await, ); sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); - sub_tasks.push((lease_handle, lease_shutdown)); + sub_tasks.push((election_handle, election_shutdown)); if cfg!(not(test)) { sub_tasks.push( ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)).await, @@ -539,6 +451,7 @@ pub async fn rpc_serve_with_store( let (svc_shutdown_tx, svc_shutdown_rx) = tokio::sync::oneshot::channel::<()>(); // Start services. + tracing::info!("Starting leader services"); let join_handle = tokio::spawn(async move { tonic::transport::Server::builder() .layer(MetricsMiddlewareLayer::new(meta_metrics.clone())) @@ -569,54 +482,3 @@ pub async fn rpc_serve_with_store( Ok((join_handle, svc_shutdown_tx)) } - -#[cfg(test)] -mod tests { - use tokio::time::sleep; - - use super::*; - - #[tokio::test] - async fn test_leader_lease() { - let info = AddressInfo { - addr: "node1".to_string(), - ..Default::default() - }; - let meta_store = Arc::new(MemStore::default()); - let (handle, closer) = rpc_serve_with_store( - meta_store.clone(), - info, - Duration::from_secs(10), - 2, - MetaOpts::test(false), - ) - .await - .unwrap(); - sleep(Duration::from_secs(4)).await; - let info2 = AddressInfo { - addr: "node2".to_string(), - ..Default::default() - }; - let ret = rpc_serve_with_store( - meta_store.clone(), - info2.clone(), - Duration::from_secs(10), - 2, - MetaOpts::test(false), - ) - .await; - assert!(ret.is_err()); - closer.send(()).unwrap(); - handle.await.unwrap(); - sleep(Duration::from_secs(3)).await; - rpc_serve_with_store( - meta_store.clone(), - info2, - Duration::from_secs(10), - 2, - MetaOpts::test(false), - ) - .await - .unwrap(); - } -} From d0c2ee4f28580f4efad28cb8bcb53acf3bce8c7b Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 16 Dec 2022 11:17:47 +0100 Subject: [PATCH 02/61] Failover from follower to leader --- src/meta/src/rpc/server.rs | 549 ++++++++++++++++++++----------------- 1 file changed, 300 insertions(+), 249 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 34433ed087e7e..a76be13d45d77 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -32,7 +32,6 @@ use risingwave_pb::meta::notification_service_server::NotificationServiceServer; use risingwave_pb::meta::scale_service_server::ScaleServiceServer; use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; use risingwave_pb::user::user_service_server::UserServiceServer; -use tokio::sync::oneshot::Sender; use tokio::task::JoinHandle; use super::elections::run_elections; @@ -88,13 +87,16 @@ impl Default for AddressInfo { } } +// TODO: in imports do something like tokio::sync::watch::Sender<()> as watchSender +// and oneshot::sender as oneshotSender + pub async fn rpc_serve( address_info: AddressInfo, meta_store_backend: MetaStoreBackend, max_heartbeat_interval: Duration, lease_interval_secs: u64, opts: MetaOpts, -) -> MetaResult<(JoinHandle<()>, Sender<()>)> { +) -> MetaResult<(JoinHandle<()>, tokio::sync::watch::Sender<()>)> { match meta_store_backend { MetaStoreBackend::Etcd { endpoints, @@ -138,27 +140,32 @@ pub async fn rpc_serve_with_store( max_heartbeat_interval: Duration, lease_interval_secs: u64, opts: MetaOpts, -) -> MetaResult<(JoinHandle<()>, Sender<()>)> { - let (current_leader_info, election_handle, election_shutdown, mut leader_rx) = run_elections( +) -> MetaResult<(JoinHandle<()>, tokio::sync::watch::Sender<()>)> { + // Contains address info with port + // address_info.listen_addr; + + // Initialize managers. + let (info, lease_handle, lease_shutdown, leader_rx) = run_elections( address_info.listen_addr.clone().to_string(), meta_store.clone(), lease_interval_secs, ) .await?; - let prometheus_endpoint = opts.prometheus_endpoint.clone(); + let mut services_leader_rx = leader_rx.clone(); + let mut note_status_leader_rx = leader_rx; - // wait until initial election is done - if leader_rx.changed().await.is_err() { - panic!("Issue receiving leader value from channel"); - } + // FIXME: add fencing mechanism + // https://github.com/risingwavelabs/risingwave/issues/6786 // print current leader/follower status of this node - let mut note_status_leader_rx = leader_rx.clone(); tokio::spawn(async move { let span = tracing::span!(tracing::Level::INFO, "node_status"); let _enter = span.enter(); loop { + if note_status_leader_rx.changed().await.is_err() { + panic!("Leader sender dropped"); + } let (leader_addr, is_leader) = note_status_leader_rx.borrow().clone(); tracing::info!( @@ -171,288 +178,327 @@ pub async fn rpc_serve_with_store( leader_addr.host, leader_addr.port ); - - if note_status_leader_rx.changed().await.is_err() { - panic!("Issue receiving leader value from channel"); - } } }); - // FIXME: Start leader services if follower becomes leader - // failover logic + // TODO: maybe do not use a channel here + // What I need is basically a oneshot channel with one producer and multiple consumers + let (svc_shutdown_tx, mut svc_shutdown_rx) = tokio::sync::watch::channel(()); - // Start follower services - if !leader_rx.borrow().1 { - tracing::info!("Node initially elected as follower"); - tokio::spawn(async move { - let span = tracing::span!(tracing::Level::INFO, "services"); - let _enter = span.enter(); + let join_handle = tokio::spawn(async move { + let span = tracing::span!(tracing::Level::INFO, "services"); + let _enter = span.enter(); + + // failover logic + if services_leader_rx.changed().await.is_err() { + panic!("Leader sender dropped"); + } + + let is_leader = services_leader_rx.borrow().clone().1; + let was_follower = !is_leader; - let health_srv = HealthServiceImpl::new(); - // run follower services until node becomes leader + // FIXME: Add service discovery for follower + // https://github.com/risingwavelabs/risingwave/issues/6755 + + // run follower services until node becomes leader + let mut svc_shutdown_rx_clone = svc_shutdown_rx.clone(); + let (follower_shutdown_tx, follower_shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + let (follower_finished_tx, follower_finished_rx) = tokio::sync::oneshot::channel::<()>(); + if !is_leader { tracing::info!("Starting follower services"); tokio::spawn(async move { + let health_srv = HealthServiceImpl::new(); + // TODO: Use prometheus endpoint in follower? tonic::transport::Server::builder() .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) .add_service(HealthServer::new(health_srv)) - .serve(address_info.listen_addr) + .serve_with_shutdown(address_info.listen_addr, async move { + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + // shutdown service if all services should be shut down + res = svc_shutdown_rx_clone.changed() => { + if res.is_err() { + tracing::error!("service shutdown sender dropped"); + } + }, + // shutdown service if follower becomes leader + res = follower_shutdown_rx => { + if res.is_err() { + tracing::error!("follower service shutdown sender dropped"); + } + }, + } + }) .await .unwrap(); + match follower_finished_tx.send(()) { + Ok(_) => tracing::info!("Signaling that follower service is down"), + Err(_) => tracing::error!( + "Error when signaling that follower service is down. Receiver dropped" + ), + } }); - }); + } - let shutdown_election = async move { - if election_shutdown.send(()).is_err() { - tracing::warn!("election service already shut down"); - } else if let Err(err) = election_handle.await { - tracing::warn!("Failed to join shutdown: {:?}", err); + // wait until this node becomes a leader + while !services_leader_rx.borrow().clone().1 { + if services_leader_rx.changed().await.is_err() { + panic!("Leader sender dropped"); } - }; - - let (svc_shutdown_tx, mut svc_shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + } - let join_handle = tokio::spawn(async move { - tokio::select! { - _ = tokio::signal::ctrl_c() => {} - _ = &mut svc_shutdown_rx => { - shutdown_election.await; - } + // shut down follower svc if node used to be follower + if was_follower { + match follower_shutdown_tx.send(()) { + // TODO: Do I always use this error format? + // Receiver, sender dropped? + Ok(_) => tracing::info!("Shutting down follower services"), + Err(_) => tracing::error!("Follower service receiver dropped"), } - }); + // Wait until follower service is down + match follower_finished_rx.await { + Ok(_) => tracing::info!("Follower services shut down"), + Err(_) => tracing::error!("Follower service shutdown finished sender dropped"), + }; + } - // FIXME: Avoid using join_handler and just pass around shutdown_sender - // https://github.com/risingwavelabs/risingwave/pull/6771 - return Ok((join_handle, svc_shutdown_tx)); - } + // leader services defined below + tracing::info!("Starting leader services"); + + // TODO: put leader service definition in separate function or maybe even separate file + let prometheus_endpoint = opts.prometheus_endpoint.clone(); + let env = MetaSrvEnv::::new(opts, meta_store.clone(), info).await; + let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); + let meta_metrics = Arc::new(MetaMetrics::new()); + let registry = meta_metrics.registry(); + monitor_process(registry).unwrap(); + let compactor_manager = Arc::new( + hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) + .await + .unwrap(), + ); - tracing::info!("Node initially elected as leader"); + let cluster_manager = Arc::new( + ClusterManager::new(env.clone(), max_heartbeat_interval) + .await + .unwrap(), + ); + let hummock_manager = hummock::HummockManager::new( + env.clone(), + cluster_manager.clone(), + meta_metrics.clone(), + compactor_manager.clone(), + ) + .await + .unwrap(); - let env = MetaSrvEnv::::new(opts, meta_store.clone(), current_leader_info).await; - let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); - let meta_metrics = Arc::new(MetaMetrics::new()); - let registry = meta_metrics.registry(); - monitor_process(registry).unwrap(); + #[cfg(not(madsim))] + if let Some(ref dashboard_addr) = address_info.dashboard_addr { + let dashboard_service = crate::dashboard::DashboardService { + dashboard_addr: *dashboard_addr, + cluster_manager: cluster_manager.clone(), + fragment_manager: fragment_manager.clone(), + meta_store: env.meta_store_ref(), + prometheus_endpoint: prometheus_endpoint.clone(), + prometheus_client: prometheus_endpoint.as_ref().map(|x| { + use std::str::FromStr; + prometheus_http_query::Client::from_str(x).unwrap() + }), + }; + // TODO: join dashboard service back to local thread. + tokio::spawn(dashboard_service.serve(address_info.ui_path)); + } - let cluster_manager = Arc::new( - ClusterManager::new(env.clone(), max_heartbeat_interval) + let catalog_manager = Arc::new(CatalogManager::new(env.clone()).await.unwrap()); + + let (barrier_scheduler, scheduled_barriers) = + BarrierScheduler::new_pair(hummock_manager.clone(), env.opts.checkpoint_frequency); + + let source_manager = Arc::new( + SourceManager::new( + barrier_scheduler.clone(), + catalog_manager.clone(), + fragment_manager.clone(), + ) .await .unwrap(), - ); - let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); + ); - let compactor_manager = Arc::new( - hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) - .await + let barrier_manager = Arc::new(GlobalBarrierManager::new( + scheduled_barriers, + env.clone(), + cluster_manager.clone(), + catalog_manager.clone(), + fragment_manager.clone(), + hummock_manager.clone(), + source_manager.clone(), + meta_metrics.clone(), + )); + + { + let source_manager = source_manager.clone(); + tokio::spawn(async move { + source_manager.run().await.unwrap(); + }); + } + + let stream_manager = Arc::new( + GlobalStreamManager::new( + env.clone(), + fragment_manager.clone(), + barrier_scheduler.clone(), + cluster_manager.clone(), + source_manager.clone(), + hummock_manager.clone(), + ) .unwrap(), - ); + ); - let hummock_manager = hummock::HummockManager::new( - env.clone(), - cluster_manager.clone(), - meta_metrics.clone(), - compactor_manager.clone(), - ) - .await - .unwrap(); - - #[cfg(not(madsim))] - if let Some(ref dashboard_addr) = address_info.dashboard_addr { - let dashboard_service = crate::dashboard::DashboardService { - dashboard_addr: *dashboard_addr, - cluster_manager: cluster_manager.clone(), - fragment_manager: fragment_manager.clone(), - meta_store: env.meta_store_ref(), - prometheus_endpoint: prometheus_endpoint.clone(), - prometheus_client: prometheus_endpoint.as_ref().map(|x| { - use std::str::FromStr; - prometheus_http_query::Client::from_str(x).unwrap() - }), - }; - // TODO: join dashboard service back to local thread. - tokio::spawn(dashboard_service.serve(address_info.ui_path)); - } + hummock_manager + .purge_stale( + &fragment_manager + .list_table_fragments() + .await + .expect("list_table_fragments"), + ) + .await + .unwrap(); - let catalog_manager = Arc::new(CatalogManager::new(env.clone()).await.unwrap()); + // Initialize services. + let backup_object_store = Arc::new( + parse_remote_object_store( + &env.opts.backup_storage_url, + Arc::new(ObjectStoreMetrics::unused()), + true, + ) + .await, + ); + let backup_storage = Arc::new( + ObjectStoreMetaSnapshotStorage::new( + &env.opts.backup_storage_directory, + backup_object_store, + ) + .await + .unwrap(), + // FIXME: Do not use unwrap here + ); + let backup_manager = Arc::new(BackupManager::new( + env.clone(), + hummock_manager.clone(), + backup_storage, + meta_metrics.registry().clone(), + )); + let vacuum_manager = Arc::new(hummock::VacuumManager::new( + env.clone(), + hummock_manager.clone(), + backup_manager.clone(), + compactor_manager.clone(), + )); + + let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); + let ddl_srv = DdlServiceImpl::::new( + env.clone(), + catalog_manager.clone(), + stream_manager.clone(), + source_manager.clone(), + cluster_manager.clone(), + fragment_manager.clone(), + barrier_manager.clone(), + ); - let (barrier_scheduler, scheduled_barriers) = - BarrierScheduler::new_pair(hummock_manager.clone(), env.opts.checkpoint_frequency); + let user_srv = UserServiceImpl::::new(env.clone(), catalog_manager.clone()); - let source_manager = Arc::new( - SourceManager::new( + let scale_srv = ScaleServiceImpl::::new( barrier_scheduler.clone(), - catalog_manager.clone(), fragment_manager.clone(), - ) - .await - .unwrap(), - ); - - let barrier_manager = Arc::new(GlobalBarrierManager::new( - scheduled_barriers, - env.clone(), - cluster_manager.clone(), - catalog_manager.clone(), - fragment_manager.clone(), - hummock_manager.clone(), - source_manager.clone(), - meta_metrics.clone(), - )); - - { - let source_manager = source_manager.clone(); - tokio::spawn(async move { - source_manager.run().await.unwrap(); - }); - } + cluster_manager.clone(), + source_manager, + catalog_manager.clone(), + stream_manager.clone(), + ); - let stream_manager = Arc::new( - GlobalStreamManager::new( + let cluster_srv = ClusterServiceImpl::::new(cluster_manager.clone()); + let stream_srv = StreamServiceImpl::::new( env.clone(), - fragment_manager.clone(), barrier_scheduler.clone(), + fragment_manager.clone(), + ); + let hummock_srv = HummockServiceImpl::new( + hummock_manager.clone(), + compactor_manager.clone(), + vacuum_manager.clone(), + fragment_manager.clone(), + ); + let notification_srv = NotificationServiceImpl::new( + env.clone(), + catalog_manager, cluster_manager.clone(), - source_manager.clone(), hummock_manager.clone(), - ) - .unwrap(), - ); - - hummock_manager - .purge_stale( - &fragment_manager - .list_table_fragments() - .await - .expect("list_table_fragments"), - ) - .await - .unwrap(); + fragment_manager.clone(), + ); + let health_srv = HealthServiceImpl::new(); + let backup_srv = BackupServiceImpl::new(backup_manager); - // Initialize services. - let backup_object_store = Arc::new( - parse_remote_object_store( - &env.opts.backup_storage_url, - Arc::new(ObjectStoreMetrics::unused()), - true, - ) - .await, - ); - let backup_storage = Arc::new( - ObjectStoreMetaSnapshotStorage::new( - &env.opts.backup_storage_directory, - backup_object_store, - ) - .await?, - ); - let backup_manager = Arc::new(BackupManager::new( - env.clone(), - hummock_manager.clone(), - backup_storage, - meta_metrics.registry().clone(), - )); - let vacuum_manager = Arc::new(hummock::VacuumManager::new( - env.clone(), - hummock_manager.clone(), - backup_manager.clone(), - compactor_manager.clone(), - )); - - let ddl_srv = DdlServiceImpl::::new( - env.clone(), - catalog_manager.clone(), - stream_manager.clone(), - source_manager.clone(), - cluster_manager.clone(), - fragment_manager.clone(), - barrier_manager.clone(), - ); - - let user_srv = UserServiceImpl::::new(env.clone(), catalog_manager.clone()); - - let scale_srv = ScaleServiceImpl::::new( - barrier_scheduler.clone(), - fragment_manager.clone(), - cluster_manager.clone(), - source_manager, - catalog_manager.clone(), - stream_manager.clone(), - ); - - let cluster_srv = ClusterServiceImpl::::new(cluster_manager.clone()); - let stream_srv = StreamServiceImpl::::new( - env.clone(), - barrier_scheduler.clone(), - fragment_manager.clone(), - ); - let hummock_srv = HummockServiceImpl::new( - hummock_manager.clone(), - compactor_manager.clone(), - vacuum_manager.clone(), - fragment_manager.clone(), - ); - let notification_srv = NotificationServiceImpl::new( - env.clone(), - catalog_manager, - cluster_manager.clone(), - hummock_manager.clone(), - fragment_manager.clone(), - ); - let health_srv = HealthServiceImpl::new(); - let backup_srv = BackupServiceImpl::new(backup_manager); - - if let Some(prometheus_addr) = address_info.prometheus_addr { - MetricsManager::boot_metrics_service( - prometheus_addr.to_string(), - meta_metrics.registry().clone(), - ) - } + if let Some(prometheus_addr) = address_info.prometheus_addr { + MetricsManager::boot_metrics_service( + prometheus_addr.to_string(), + meta_metrics.registry().clone(), + ) + } - let compaction_scheduler = Arc::new(CompactionScheduler::new( - env.clone(), - hummock_manager.clone(), - compactor_manager.clone(), - )); - - // sub_tasks executed concurrently. Can be shutdown via shutdown_all - let mut sub_tasks = - hummock::start_hummock_workers(vacuum_manager, compaction_scheduler, &env.opts); - sub_tasks.push( - ClusterManager::start_worker_num_monitor( - cluster_manager.clone(), - Duration::from_secs(env.opts.node_num_monitor_interval_sec), - meta_metrics.clone(), - ) - .await, - ); - sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); - sub_tasks.push((election_handle, election_shutdown)); - if cfg!(not(test)) { + // Initialize sub-tasks. + // sub_tasks executed concurrently. Can be shutdown via shutdown_all + let compaction_scheduler = Arc::new(CompactionScheduler::new( + env.clone(), + hummock_manager.clone(), + compactor_manager.clone(), + )); + let mut sub_tasks = + hummock::start_hummock_workers(vacuum_manager, compaction_scheduler, &env.opts); sub_tasks.push( - ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)).await, + ClusterManager::start_worker_num_monitor( + cluster_manager.clone(), + Duration::from_secs(env.opts.node_num_monitor_interval_sec), + meta_metrics.clone(), + ) + .await, ); - sub_tasks.push(GlobalBarrierManager::start(barrier_manager).await); - } - let (idle_send, idle_recv) = tokio::sync::oneshot::channel(); - sub_tasks.push( - IdleManager::start_idle_checker(env.idle_manager_ref(), Duration::from_secs(30), idle_send) + sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); + sub_tasks.push((lease_handle, lease_shutdown)); + if cfg!(not(test)) { + sub_tasks.push( + ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)) + .await, + ); + sub_tasks.push(GlobalBarrierManager::start(barrier_manager).await); + } + + let (idle_send, idle_recv) = tokio::sync::oneshot::channel(); + sub_tasks.push( + IdleManager::start_idle_checker( + env.idle_manager_ref(), + Duration::from_secs(30), + idle_send, + ) .await, - ); - let shutdown_all = async move { - for (join_handle, shutdown_sender) in sub_tasks { - if let Err(_err) = shutdown_sender.send(()) { - // Maybe it is already shut down - continue; - } - if let Err(err) = join_handle.await { - tracing::warn!("Failed to join shutdown: {:?}", err); + ); + + let shutdown_all = async move { + for (join_handle, shutdown_sender) in sub_tasks { + if let Err(_err) = shutdown_sender.send(()) { + // Maybe it is already shut down + continue; + } + if let Err(err) = join_handle.await { + tracing::warn!("Failed to join shutdown: {:?}", err); + } } - } - }; + }; - let (svc_shutdown_tx, svc_shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + // FIXME: Add service discovery for leader + // https://github.com/risingwavelabs/risingwave/issues/6755 - // Start services. - tracing::info!("Starting leader services"); - let join_handle = tokio::spawn(async move { + // leader services defined above tonic::transport::Server::builder() .layer(MetricsMiddlewareLayer::new(meta_metrics.clone())) .add_service(HeartbeatServiceServer::new(heartbeat_srv)) @@ -468,10 +514,15 @@ pub async fn rpc_serve_with_store( .serve_with_shutdown(address_info.listen_addr, async move { tokio::select! { _ = tokio::signal::ctrl_c() => {}, - _ = svc_shutdown_rx => { + res = svc_shutdown_rx.changed() => { + tracing::info!("svc_shutdown_rx.changed()"); + if res.is_err() { + tracing::error!("service shutdown sender dropped"); + } shutdown_all.await; }, _ = idle_recv => { + tracing::info!("idle_recv"); shutdown_all.await; }, } From bec44d7cb49d42da6bf3d4eee3faa2df43de79db Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 16 Dec 2022 11:23:15 +0100 Subject: [PATCH 03/61] add notes --- src/meta/src/rpc/server.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index a76be13d45d77..0ca90eb2fb8f8 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -266,6 +266,8 @@ pub async fn rpc_serve_with_store( tracing::info!("Starting leader services"); // TODO: put leader service definition in separate function or maybe even separate file + // Do that in this PR see https://github.com/risingwavelabs/risingwave/pull/6937#issuecomment-1354518161 + let prometheus_endpoint = opts.prometheus_endpoint.clone(); let env = MetaSrvEnv::::new(opts, meta_store.clone(), info).await; let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); From 0bc2fa5e8b261b3e13d709972c5cf0583457bfe6 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 16 Dec 2022 12:52:46 +0100 Subject: [PATCH 04/61] disable commit tx commit_trx --- src/meta/src/hummock/manager/mod.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index b9505cd2cd5d3..5d3aee03f119e 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -23,7 +23,6 @@ use arc_swap::ArcSwap; use fail::fail_point; use function_name::named; use itertools::Itertools; -use prost::Message; use risingwave_common::monitor::rwlock::MonitoredRwLock; use risingwave_common::util::epoch::{Epoch, INVALID_EPOCH}; use risingwave_hummock_sdk::compact::compact_task_to_string; @@ -66,7 +65,6 @@ use crate::model::{ BTreeMapEntryTransaction, BTreeMapTransaction, MetadataModel, ValTransaction, VarTransaction, }; use crate::rpc::metrics::MetaMetrics; -use crate::rpc::{META_CF_NAME, META_LEADER_KEY}; use crate::storage::{MetaStore, Transaction}; mod compaction_group_manager; @@ -491,9 +489,9 @@ where async fn commit_trx( &self, meta_store: &S, - mut trx: Transaction, + trx: Transaction, context_id: Option, - info: MetaLeaderInfo, + _info: MetaLeaderInfo, ) -> Result<()> { if let Some(context_id) = context_id { if context_id == META_NODE_ID { @@ -503,11 +501,13 @@ where return Err(Error::InvalidContext(context_id)); } } - trx.check_equal( - META_CF_NAME.to_owned(), - META_LEADER_KEY.as_bytes().to_vec(), - info.encode_to_vec(), - ); + // FIXME: https://github.com/risingwavelabs/risingwave/issues/6534 + // check prevents meta node failover + // trx.check_equal( + // META_CF_NAME.to_owned(), + // META_LEADER_KEY.as_bytes().to_vec(), + // info.encode_to_vec(), + // ); meta_store.txn(trx).await.map_err(Into::into) } From 30b8e4a855d60f257d612c5934255cd6ecf04b5d Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 16 Dec 2022 14:47:13 +0100 Subject: [PATCH 05/61] fix: Failover crashes hummock --- src/common/src/util/addr.rs | 9 +++++++ src/meta/src/hummock/manager/mod.rs | 19 +++++++------ src/meta/src/manager/env.rs | 17 ++++++++---- src/meta/src/rpc/elections.rs | 41 ++++++++++------------------- src/meta/src/rpc/server.rs | 10 ++++--- 5 files changed, 52 insertions(+), 44 deletions(-) diff --git a/src/common/src/util/addr.rs b/src/common/src/util/addr.rs index 922f149c9f8c4..42ef68b05b423 100644 --- a/src/common/src/util/addr.rs +++ b/src/common/src/util/addr.rs @@ -16,6 +16,7 @@ use std::net::SocketAddr; use std::str::FromStr; use risingwave_pb::common::HostAddress as ProstHostAddress; +use risingwave_pb::meta::MetaLeaderInfo; use crate::error::{internal_error, Result}; @@ -41,6 +42,14 @@ impl From for HostAddr { } } +impl From for HostAddr { + fn from(mli: MetaLeaderInfo) -> Self { + mli.node_address + .parse::() + .expect("invalid leader addr") + } +} + impl TryFrom<&str> for HostAddr { type Error = crate::error::RwError; diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index 5d3aee03f119e..d5365711a38b1 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -23,6 +23,7 @@ use arc_swap::ArcSwap; use fail::fail_point; use function_name::named; use itertools::Itertools; +use prost::Message; use risingwave_common::monitor::rwlock::MonitoredRwLock; use risingwave_common::util::epoch::{Epoch, INVALID_EPOCH}; use risingwave_hummock_sdk::compact::compact_task_to_string; @@ -65,6 +66,7 @@ use crate::model::{ BTreeMapEntryTransaction, BTreeMapTransaction, MetadataModel, ValTransaction, VarTransaction, }; use crate::rpc::metrics::MetaMetrics; +use crate::rpc::{META_CF_NAME, META_LEADER_KEY}; use crate::storage::{MetaStore, Transaction}; mod compaction_group_manager; @@ -489,9 +491,9 @@ where async fn commit_trx( &self, meta_store: &S, - trx: Transaction, + mut trx: Transaction, context_id: Option, - _info: MetaLeaderInfo, + info: MetaLeaderInfo, ) -> Result<()> { if let Some(context_id) = context_id { if context_id == META_NODE_ID { @@ -501,13 +503,14 @@ where return Err(Error::InvalidContext(context_id)); } } - // FIXME: https://github.com/risingwavelabs/risingwave/issues/6534 + + // FIXME: This should not panic // check prevents meta node failover - // trx.check_equal( - // META_CF_NAME.to_owned(), - // META_LEADER_KEY.as_bytes().to_vec(), - // info.encode_to_vec(), - // ); + trx.check_equal( + META_CF_NAME.to_owned(), + META_LEADER_KEY.as_bytes().to_vec(), + info.encode_to_vec(), + ); meta_store.txn(trx).await.map_err(Into::into) } diff --git a/src/meta/src/manager/env.rs b/src/meta/src/manager/env.rs index 46a0e721551b1..edbd0caf9c14c 100644 --- a/src/meta/src/manager/env.rs +++ b/src/meta/src/manager/env.rs @@ -22,6 +22,7 @@ use risingwave_pb::meta::MetaLeaderInfo; #[cfg(any(test, feature = "test"))] use risingwave_pb::meta::MetaLeaseInfo; use risingwave_rpc_client::{StreamClientPool, StreamClientPoolRef}; +use tokio::sync::watch::Receiver; use crate::manager::{ IdGeneratorManager, IdGeneratorManagerRef, IdleManager, IdleManagerRef, NotificationManager, @@ -55,7 +56,7 @@ where /// idle status manager. idle_manager: IdleManagerRef, - info: MetaLeaderInfo, + leader_rx: Receiver<(MetaLeaderInfo, bool)>, /// options read by all services pub opts: Arc, @@ -133,7 +134,11 @@ impl MetaSrvEnv where S: MetaStore, { - pub async fn new(opts: MetaOpts, meta_store: Arc, info: MetaLeaderInfo) -> Self { + pub async fn new( + opts: MetaOpts, + meta_store: Arc, + leader_rx: Receiver<(MetaLeaderInfo, bool)>, + ) -> Self { // change to sync after refactor `IdGeneratorManager::new` sync. let id_gen_manager = Arc::new(IdGeneratorManager::new(meta_store.clone()).await); let stream_client_pool = Arc::new(StreamClientPool::default()); @@ -146,7 +151,7 @@ where notification_manager, stream_client_pool, idle_manager, - info, + leader_rx, opts: opts.into(), } } @@ -192,7 +197,7 @@ where } pub fn get_leader_info(&self) -> MetaLeaderInfo { - self.info.clone() + self.leader_rx.borrow().0.clone() } } @@ -209,6 +214,8 @@ impl MetaSrvEnv { lease_id: 0, node_address: "".to_string(), }; + let (_, leader_rx) = tokio::sync::watch::channel((leader_info.clone(), true)); + let lease_info = MetaLeaseInfo { leader: Some(leader_info.clone()), lease_register_time: 0, @@ -242,7 +249,7 @@ impl MetaSrvEnv { notification_manager, stream_client_pool, idle_manager, - info: leader_info, + leader_rx, opts, } } diff --git a/src/meta/src/rpc/elections.rs b/src/meta/src/rpc/elections.rs index 8c6b4439d3b55..437931cfcb862 100644 --- a/src/meta/src/rpc/elections.rs +++ b/src/meta/src/rpc/elections.rs @@ -21,7 +21,6 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use prost::Message; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; -use risingwave_common::util::addr::HostAddr; use risingwave_pb::meta::{MetaLeaderInfo, MetaLeaseInfo}; use tokio::sync::oneshot::Sender; use tokio::sync::watch::Receiver; @@ -48,14 +47,12 @@ struct ElectionResult { pub is_leader: bool, } -impl ElectionResult { - pub fn get_leader_addr(&self) -> HostAddr { - self.meta_leader_info - .node_address - .parse::() - .expect("invalid leader addr") - } -} +// TODO: remove +// impl ElectionResult { +// pub fn get_leader_addr(&self) -> HostAddr { +// HostAddr::from(self.meta_leader_info.clone()) +// } +// } /// Runs for election in an attempt to become leader /// @@ -299,7 +296,7 @@ pub async fn run_elections( MetaLeaderInfo, JoinHandle<()>, Sender<()>, - Receiver<(HostAddr, bool)>, + Receiver<(MetaLeaderInfo, bool)>, )> { // Randomize interval to reduce mitigate likelihood of simultaneous requests let mut rng: StdRng = SeedableRng::from_entropy(); @@ -317,14 +314,10 @@ pub async fn run_elections( gen_rand_lease_id(addr.as_str()), ) .await; - let (leader_addr, initial_leader, is_initial_leader) = match election_result { + let (initial_leader, is_initial_leader) = match election_result { Ok(elect_result) => { tracing::info!("initial election finished"); - ( - elect_result.get_leader_addr(), - elect_result.meta_leader_info, - elect_result.is_leader, - ) + (elect_result.meta_leader_info, elect_result.is_leader) } Err(_) => { tracing::info!("initial election failed. Repeating election"); @@ -344,7 +337,8 @@ pub async fn run_elections( // define all follow up elections and terms in handle let (shutdown_tx, mut shutdown_rx) = tokio::sync::oneshot::channel(); - let (leader_tx, leader_rx) = tokio::sync::watch::channel((leader_addr, is_initial_leader)); + let (leader_tx, leader_rx) = + tokio::sync::watch::channel((initial_leader.clone(), is_initial_leader)); let handle = tokio::spawn(async move { // runs all followup elections let mut ticker = tokio::time::interval( @@ -355,12 +349,10 @@ pub async fn run_elections( let mut is_leader = is_initial_leader; let mut leader_info = initial_leader.clone(); - let n_addr = initial_leader.node_address.as_str(); - let mut leader_addr = n_addr.parse::().unwrap(); 'election: loop { // Do not elect new leader directly after running the initial election if !initial_election { - let (leader_addr_, leader_info_, is_leader_) = match campaign( + let (leader_info_, is_leader_) = match campaign( &meta_store, &addr, lease_time_sec, @@ -375,11 +367,7 @@ pub async fn run_elections( } Ok(elect_result) => { tracing::info!("election finished"); - ( - elect_result.get_leader_addr(), - elect_result.meta_leader_info, - elect_result.is_leader, - ) + (elect_result.meta_leader_info, elect_result.is_leader) } }; @@ -392,12 +380,11 @@ pub async fn run_elections( } leader_info = leader_info_; is_leader = is_leader_; - leader_addr = leader_addr_; } initial_election = false; // signal to observers if there is a change in leadership - leader_tx.send((leader_addr.clone(), is_leader)).unwrap(); + leader_tx.send((leader_info.clone(), is_leader)).unwrap(); // election done. Enter the term of the current leader // Leader stays in power until leader crashes diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 0ca90eb2fb8f8..c6ca1ba194954 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -19,6 +19,7 @@ use std::time::Duration; use etcd_client::ConnectOptions; use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; use risingwave_common::monitor::process_linux::monitor_process; +use risingwave_common::util::addr::HostAddr; use risingwave_common_service::metrics_manager::MetricsManager; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_object_store::object::parse_remote_object_store; @@ -145,7 +146,7 @@ pub async fn rpc_serve_with_store( // address_info.listen_addr; // Initialize managers. - let (info, lease_handle, lease_shutdown, leader_rx) = run_elections( + let (_, lease_handle, lease_shutdown, leader_rx) = run_elections( address_info.listen_addr.clone().to_string(), meta_store.clone(), lease_interval_secs, @@ -153,7 +154,7 @@ pub async fn rpc_serve_with_store( .await?; let mut services_leader_rx = leader_rx.clone(); - let mut note_status_leader_rx = leader_rx; + let mut note_status_leader_rx = leader_rx.clone(); // FIXME: add fencing mechanism // https://github.com/risingwavelabs/risingwave/issues/6786 @@ -166,7 +167,8 @@ pub async fn rpc_serve_with_store( if note_status_leader_rx.changed().await.is_err() { panic!("Leader sender dropped"); } - let (leader_addr, is_leader) = note_status_leader_rx.borrow().clone(); + let (leader_info, is_leader) = note_status_leader_rx.borrow().clone(); + let leader_addr = HostAddr::from(leader_info); tracing::info!( "This node currently is a {} at {}:{}", @@ -269,7 +271,7 @@ pub async fn rpc_serve_with_store( // Do that in this PR see https://github.com/risingwavelabs/risingwave/pull/6937#issuecomment-1354518161 let prometheus_endpoint = opts.prometheus_endpoint.clone(); - let env = MetaSrvEnv::::new(opts, meta_store.clone(), info).await; + let env = MetaSrvEnv::::new(opts, meta_store.clone(), leader_rx.clone()).await; let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); let meta_metrics = Arc::new(MetaMetrics::new()); let registry = meta_metrics.registry(); From ad3d442689b65aaf731d49c6349c150ed86df6df Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 11:27:09 +0100 Subject: [PATCH 06/61] introduce leader_svc mod --- src/meta/src/rpc/leader_svs.rs | 294 +++++++++++++++++++++++++++++++++ src/meta/src/rpc/mod.rs | 1 + src/meta/src/rpc/server.rs | 242 +++------------------------ 3 files changed, 320 insertions(+), 217 deletions(-) create mode 100644 src/meta/src/rpc/leader_svs.rs diff --git a/src/meta/src/rpc/leader_svs.rs b/src/meta/src/rpc/leader_svs.rs new file mode 100644 index 0000000000000..1310c94098e7b --- /dev/null +++ b/src/meta/src/rpc/leader_svs.rs @@ -0,0 +1,294 @@ +use std::net::SocketAddr; +use std::sync::Arc; +use std::time::Duration; + +use etcd_client::ConnectOptions; +use risingwave_backup::error::BackupError; +use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; +use risingwave_common::monitor::process_linux::monitor_process; +use risingwave_common::util::addr::HostAddr; +use risingwave_common_service::metrics_manager::MetricsManager; +use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; +use risingwave_object_store::object::parse_remote_object_store; +use risingwave_pb::meta::MetaLeaderInfo; +use tokio::sync::oneshot::{Receiver as OneReceiver, Sender as OneSender}; +use tokio::sync::watch::Receiver as WatchReceiver; +use tokio::task::JoinHandle; + +use super::service::health_service::HealthServiceImpl; +use super::service::notification_service::NotificationServiceImpl; +use super::service::scale_service::ScaleServiceImpl; +use super::DdlServiceImpl; +use crate::backup_restore::BackupManager; +use crate::barrier::{BarrierScheduler, GlobalBarrierManager}; +use crate::hummock::{CompactionScheduler, HummockManager}; +use crate::manager::{ + CatalogManager, ClusterManager, FragmentManager, IdleManager, MetaOpts, MetaSrvEnv, +}; +use crate::rpc::metrics::MetaMetrics; +use crate::rpc::server::AddressInfo; +use crate::rpc::service::backup_service::BackupServiceImpl; +use crate::rpc::service::cluster_service::ClusterServiceImpl; +use crate::rpc::service::heartbeat_service::HeartbeatServiceImpl; +use crate::rpc::service::hummock_service::HummockServiceImpl; +use crate::rpc::service::stream_service::StreamServiceImpl; +use crate::rpc::service::user_service::UserServiceImpl; +use crate::storage::{EtcdMetaStore, MemStore, MetaStore, WrappedEtcdClient as EtcdClient}; +use crate::stream::{GlobalStreamManager, SourceManager}; +use crate::{hummock, MetaResult}; + +pub struct LeaderServices { + pub meta_metrics: Arc, + pub heartbeat_srv: HeartbeatServiceImpl, + pub cluster_srv: ClusterServiceImpl, + pub stream_srv: StreamServiceImpl, + pub hummock_srv: HummockServiceImpl, + pub notification_srv: NotificationServiceImpl, + pub ddl_srv: DdlServiceImpl, + pub user_srv: UserServiceImpl, + pub scale_srv: ScaleServiceImpl, + pub health_srv: HealthServiceImpl, + pub backup_srv: BackupServiceImpl, + pub sub_tasks: Vec<(JoinHandle<()>, OneSender<()>)>, + pub idle_recv: OneReceiver<()>, +} + +// TODO: Write docstring +// TODO: Only call this function once +pub async fn get_leader_srv( + meta_store: Arc, + address_info: AddressInfo, + max_heartbeat_interval: Duration, + opts: MetaOpts, + leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, + lease_handle: JoinHandle<()>, + lease_shutdown: OneSender<()>, +) -> Result, BackupError> { + tracing::info!("Defining leader services"); + + let prometheus_endpoint = opts.prometheus_endpoint.clone(); + let env = MetaSrvEnv::::new(opts, meta_store.clone(), leader_rx.clone()).await; + let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); + let meta_metrics = Arc::new(MetaMetrics::new()); + let registry = meta_metrics.registry(); + monitor_process(registry).unwrap(); + let compactor_manager = Arc::new( + hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) + .await + .unwrap(), + ); + + let cluster_manager = Arc::new( + ClusterManager::new(env.clone(), max_heartbeat_interval) + .await + .unwrap(), + ); + let hummock_manager = hummock::HummockManager::new( + env.clone(), + cluster_manager.clone(), + meta_metrics.clone(), + compactor_manager.clone(), + ) + .await + .unwrap(); + + #[cfg(not(madsim))] + if let Some(ref dashboard_addr) = address_info.dashboard_addr { + let dashboard_service = crate::dashboard::DashboardService { + dashboard_addr: *dashboard_addr, + cluster_manager: cluster_manager.clone(), + fragment_manager: fragment_manager.clone(), + meta_store: env.meta_store_ref(), + prometheus_endpoint: prometheus_endpoint.clone(), + prometheus_client: prometheus_endpoint.as_ref().map(|x| { + use std::str::FromStr; + prometheus_http_query::Client::from_str(x).unwrap() + }), + }; + // TODO: join dashboard service back to local thread. + tokio::spawn(dashboard_service.serve(address_info.ui_path)); + } + + let catalog_manager = Arc::new(CatalogManager::new(env.clone()).await.unwrap()); + + let (barrier_scheduler, scheduled_barriers) = + BarrierScheduler::new_pair(hummock_manager.clone(), env.opts.checkpoint_frequency); + + let source_manager = Arc::new( + SourceManager::new( + barrier_scheduler.clone(), + catalog_manager.clone(), + fragment_manager.clone(), + ) + .await + .unwrap(), + ); + + let barrier_manager = Arc::new(GlobalBarrierManager::new( + scheduled_barriers, + env.clone(), + cluster_manager.clone(), + catalog_manager.clone(), + fragment_manager.clone(), + hummock_manager.clone(), + source_manager.clone(), + meta_metrics.clone(), + )); + + { + let source_manager = source_manager.clone(); + tokio::spawn(async move { + source_manager.run().await.unwrap(); + }); + } + + let stream_manager = Arc::new( + GlobalStreamManager::new( + env.clone(), + fragment_manager.clone(), + barrier_scheduler.clone(), + cluster_manager.clone(), + source_manager.clone(), + hummock_manager.clone(), + ) + .unwrap(), + ); + + hummock_manager + .purge_stale( + &fragment_manager + .list_table_fragments() + .await + .expect("list_table_fragments"), + ) + .await + .unwrap(); + + // Initialize services. + let backup_object_store = Arc::new( + parse_remote_object_store( + &env.opts.backup_storage_url, + Arc::new(ObjectStoreMetrics::unused()), + true, + ) + .await, + ); + let backup_storage = Arc::new( + ObjectStoreMetaSnapshotStorage::new( + &env.opts.backup_storage_directory, + backup_object_store, + ) + .await?, + ); + let backup_manager = Arc::new(BackupManager::new( + env.clone(), + hummock_manager.clone(), + backup_storage, + meta_metrics.registry().clone(), + )); + let vacuum_manager = Arc::new(hummock::VacuumManager::new( + env.clone(), + hummock_manager.clone(), + backup_manager.clone(), + compactor_manager.clone(), + )); + + let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); + let ddl_srv = DdlServiceImpl::::new( + env.clone(), + catalog_manager.clone(), + stream_manager.clone(), + source_manager.clone(), + cluster_manager.clone(), + fragment_manager.clone(), + barrier_manager.clone(), + ); + + let user_srv = UserServiceImpl::::new(env.clone(), catalog_manager.clone()); + + let scale_srv = ScaleServiceImpl::::new( + barrier_scheduler.clone(), + fragment_manager.clone(), + cluster_manager.clone(), + source_manager, + catalog_manager.clone(), + stream_manager.clone(), + ); + + let cluster_srv = ClusterServiceImpl::::new(cluster_manager.clone()); + let stream_srv = StreamServiceImpl::::new( + env.clone(), + barrier_scheduler.clone(), + fragment_manager.clone(), + ); + let hummock_srv = HummockServiceImpl::new( + hummock_manager.clone(), + compactor_manager.clone(), + vacuum_manager.clone(), + fragment_manager.clone(), + ); + let notification_srv = NotificationServiceImpl::new( + env.clone(), + catalog_manager, + cluster_manager.clone(), + hummock_manager.clone(), + fragment_manager.clone(), + ); + let health_srv = HealthServiceImpl::new(); + let backup_srv = BackupServiceImpl::new(backup_manager); + + if let Some(prometheus_addr) = address_info.prometheus_addr { + MetricsManager::boot_metrics_service( + prometheus_addr.to_string(), + meta_metrics.registry().clone(), + ) + } + + // Initialize sub-tasks. + // sub_tasks executed concurrently. Can be shutdown via shutdown_all + let compaction_scheduler = Arc::new(CompactionScheduler::new( + env.clone(), + hummock_manager.clone(), + compactor_manager.clone(), + )); + let mut sub_tasks = + hummock::start_hummock_workers(vacuum_manager, compaction_scheduler, &env.opts); + sub_tasks.push( + ClusterManager::start_worker_num_monitor( + cluster_manager.clone(), + Duration::from_secs(env.opts.node_num_monitor_interval_sec), + meta_metrics.clone(), + ) + .await, + ); + sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); + sub_tasks.push((lease_handle, lease_shutdown)); + if cfg!(not(test)) { + sub_tasks.push( + ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)).await, + ); + sub_tasks.push(GlobalBarrierManager::start(barrier_manager).await); + } + + let (idle_send, idle_recv) = tokio::sync::oneshot::channel(); + sub_tasks.push( + IdleManager::start_idle_checker(env.idle_manager_ref(), Duration::from_secs(30), idle_send) + .await, + ); + + Ok(LeaderServices { + meta_metrics, + heartbeat_srv, + cluster_srv, + hummock_srv, + stream_srv, + ddl_srv, + notification_srv, + user_srv, + health_srv, + scale_srv, + backup_srv, + sub_tasks, + idle_recv, + }) +} diff --git a/src/meta/src/rpc/mod.rs b/src/meta/src/rpc/mod.rs index d600358677938..f3df22aa5751e 100644 --- a/src/meta/src/rpc/mod.rs +++ b/src/meta/src/rpc/mod.rs @@ -14,6 +14,7 @@ mod elections; mod intercept; +mod leader_svs; pub mod metrics; pub mod server; mod service; diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index c6ca1ba194954..8befffadd5abe 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -37,6 +37,7 @@ use tokio::task::JoinHandle; use super::elections::run_elections; use super::intercept::MetricsMiddlewareLayer; +use super::leader_svs::get_leader_srv; use super::service::health_service::HealthServiceImpl; use super::service::notification_service::NotificationServiceImpl; use super::service::scale_service::ScaleServiceImpl; @@ -267,225 +268,32 @@ pub async fn rpc_serve_with_store( // leader services defined below tracing::info!("Starting leader services"); - // TODO: put leader service definition in separate function or maybe even separate file - // Do that in this PR see https://github.com/risingwavelabs/risingwave/pull/6937#issuecomment-1354518161 - - let prometheus_endpoint = opts.prometheus_endpoint.clone(); - let env = MetaSrvEnv::::new(opts, meta_store.clone(), leader_rx.clone()).await; - let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); - let meta_metrics = Arc::new(MetaMetrics::new()); - let registry = meta_metrics.registry(); - monitor_process(registry).unwrap(); - let compactor_manager = Arc::new( - hummock::CompactorManager::with_meta(env.clone(), max_heartbeat_interval.as_secs()) - .await - .unwrap(), - ); - - let cluster_manager = Arc::new( - ClusterManager::new(env.clone(), max_heartbeat_interval) - .await - .unwrap(), - ); - let hummock_manager = hummock::HummockManager::new( - env.clone(), - cluster_manager.clone(), - meta_metrics.clone(), - compactor_manager.clone(), + let svc = get_leader_srv( + meta_store, + address_info.clone(), + max_heartbeat_interval, + opts, + leader_rx, + lease_handle, + lease_shutdown, ) .await - .unwrap(); - - #[cfg(not(madsim))] - if let Some(ref dashboard_addr) = address_info.dashboard_addr { - let dashboard_service = crate::dashboard::DashboardService { - dashboard_addr: *dashboard_addr, - cluster_manager: cluster_manager.clone(), - fragment_manager: fragment_manager.clone(), - meta_store: env.meta_store_ref(), - prometheus_endpoint: prometheus_endpoint.clone(), - prometheus_client: prometheus_endpoint.as_ref().map(|x| { - use std::str::FromStr; - prometheus_http_query::Client::from_str(x).unwrap() - }), - }; - // TODO: join dashboard service back to local thread. - tokio::spawn(dashboard_service.serve(address_info.ui_path)); - } - - let catalog_manager = Arc::new(CatalogManager::new(env.clone()).await.unwrap()); - - let (barrier_scheduler, scheduled_barriers) = - BarrierScheduler::new_pair(hummock_manager.clone(), env.opts.checkpoint_frequency); - - let source_manager = Arc::new( - SourceManager::new( - barrier_scheduler.clone(), - catalog_manager.clone(), - fragment_manager.clone(), - ) - .await - .unwrap(), - ); - - let barrier_manager = Arc::new(GlobalBarrierManager::new( - scheduled_barriers, - env.clone(), - cluster_manager.clone(), - catalog_manager.clone(), - fragment_manager.clone(), - hummock_manager.clone(), - source_manager.clone(), - meta_metrics.clone(), - )); - - { - let source_manager = source_manager.clone(); - tokio::spawn(async move { - source_manager.run().await.unwrap(); - }); - } - - let stream_manager = Arc::new( - GlobalStreamManager::new( - env.clone(), - fragment_manager.clone(), - barrier_scheduler.clone(), - cluster_manager.clone(), - source_manager.clone(), - hummock_manager.clone(), - ) - .unwrap(), - ); - - hummock_manager - .purge_stale( - &fragment_manager - .list_table_fragments() - .await - .expect("list_table_fragments"), - ) - .await - .unwrap(); - - // Initialize services. - let backup_object_store = Arc::new( - parse_remote_object_store( - &env.opts.backup_storage_url, - Arc::new(ObjectStoreMetrics::unused()), - true, - ) - .await, - ); - let backup_storage = Arc::new( - ObjectStoreMetaSnapshotStorage::new( - &env.opts.backup_storage_directory, - backup_object_store, - ) - .await - .unwrap(), - // FIXME: Do not use unwrap here - ); - let backup_manager = Arc::new(BackupManager::new( - env.clone(), - hummock_manager.clone(), - backup_storage, - meta_metrics.registry().clone(), - )); - let vacuum_manager = Arc::new(hummock::VacuumManager::new( - env.clone(), - hummock_manager.clone(), - backup_manager.clone(), - compactor_manager.clone(), - )); - - let heartbeat_srv = HeartbeatServiceImpl::new(cluster_manager.clone()); - let ddl_srv = DdlServiceImpl::::new( - env.clone(), - catalog_manager.clone(), - stream_manager.clone(), - source_manager.clone(), - cluster_manager.clone(), - fragment_manager.clone(), - barrier_manager.clone(), - ); - - let user_srv = UserServiceImpl::::new(env.clone(), catalog_manager.clone()); - - let scale_srv = ScaleServiceImpl::::new( - barrier_scheduler.clone(), - fragment_manager.clone(), - cluster_manager.clone(), - source_manager, - catalog_manager.clone(), - stream_manager.clone(), - ); - - let cluster_srv = ClusterServiceImpl::::new(cluster_manager.clone()); - let stream_srv = StreamServiceImpl::::new( - env.clone(), - barrier_scheduler.clone(), - fragment_manager.clone(), - ); - let hummock_srv = HummockServiceImpl::new( - hummock_manager.clone(), - compactor_manager.clone(), - vacuum_manager.clone(), - fragment_manager.clone(), - ); - let notification_srv = NotificationServiceImpl::new( - env.clone(), - catalog_manager, - cluster_manager.clone(), - hummock_manager.clone(), - fragment_manager.clone(), - ); - let health_srv = HealthServiceImpl::new(); - let backup_srv = BackupServiceImpl::new(backup_manager); - - if let Some(prometheus_addr) = address_info.prometheus_addr { - MetricsManager::boot_metrics_service( - prometheus_addr.to_string(), - meta_metrics.registry().clone(), - ) - } - - // Initialize sub-tasks. - // sub_tasks executed concurrently. Can be shutdown via shutdown_all - let compaction_scheduler = Arc::new(CompactionScheduler::new( - env.clone(), - hummock_manager.clone(), - compactor_manager.clone(), - )); - let mut sub_tasks = - hummock::start_hummock_workers(vacuum_manager, compaction_scheduler, &env.opts); - sub_tasks.push( - ClusterManager::start_worker_num_monitor( - cluster_manager.clone(), - Duration::from_secs(env.opts.node_num_monitor_interval_sec), - meta_metrics.clone(), - ) - .await, - ); - sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); - sub_tasks.push((lease_handle, lease_shutdown)); - if cfg!(not(test)) { - sub_tasks.push( - ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)) - .await, - ); - sub_tasks.push(GlobalBarrierManager::start(barrier_manager).await); - } - - let (idle_send, idle_recv) = tokio::sync::oneshot::channel(); - sub_tasks.push( - IdleManager::start_idle_checker( - env.idle_manager_ref(), - Duration::from_secs(30), - idle_send, - ) - .await, - ); + .expect("Unable to create leader services"); + + // TODO: Do not define all these vars, just use ls. + let meta_metrics = svc.meta_metrics; + let heartbeat_srv = svc.heartbeat_srv; + let cluster_srv = svc.cluster_srv; + let hummock_srv = svc.hummock_srv; + let stream_srv = svc.stream_srv; + let ddl_srv = svc.ddl_srv; + let notification_srv = svc.notification_srv; + let user_srv = svc.user_srv; + let health_srv = svc.health_srv; + let scale_srv = svc.scale_srv; + let backup_srv = svc.backup_srv; + let sub_tasks = svc.sub_tasks; + let idle_recv = svc.idle_recv; let shutdown_all = async move { for (join_handle, shutdown_sender) in sub_tasks { From 18e3ebb3490538dd0163b632fe22b9708799fffc Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 11:29:38 +0100 Subject: [PATCH 07/61] delete unecessary var definitions --- src/meta/src/rpc/server.rs | 43 ++++++++++++-------------------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 8befffadd5abe..4ff9045ea291a 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -265,7 +265,6 @@ pub async fn rpc_serve_with_store( }; } - // leader services defined below tracing::info!("Starting leader services"); let svc = get_leader_srv( @@ -280,23 +279,8 @@ pub async fn rpc_serve_with_store( .await .expect("Unable to create leader services"); - // TODO: Do not define all these vars, just use ls. - let meta_metrics = svc.meta_metrics; - let heartbeat_srv = svc.heartbeat_srv; - let cluster_srv = svc.cluster_srv; - let hummock_srv = svc.hummock_srv; - let stream_srv = svc.stream_srv; - let ddl_srv = svc.ddl_srv; - let notification_srv = svc.notification_srv; - let user_srv = svc.user_srv; - let health_srv = svc.health_srv; - let scale_srv = svc.scale_srv; - let backup_srv = svc.backup_srv; - let sub_tasks = svc.sub_tasks; - let idle_recv = svc.idle_recv; - let shutdown_all = async move { - for (join_handle, shutdown_sender) in sub_tasks { + for (join_handle, shutdown_sender) in svc.sub_tasks { if let Err(_err) = shutdown_sender.send(()) { // Maybe it is already shut down continue; @@ -310,19 +294,18 @@ pub async fn rpc_serve_with_store( // FIXME: Add service discovery for leader // https://github.com/risingwavelabs/risingwave/issues/6755 - // leader services defined above tonic::transport::Server::builder() - .layer(MetricsMiddlewareLayer::new(meta_metrics.clone())) - .add_service(HeartbeatServiceServer::new(heartbeat_srv)) - .add_service(ClusterServiceServer::new(cluster_srv)) - .add_service(StreamManagerServiceServer::new(stream_srv)) - .add_service(HummockManagerServiceServer::new(hummock_srv)) - .add_service(NotificationServiceServer::new(notification_srv)) - .add_service(DdlServiceServer::new(ddl_srv)) - .add_service(UserServiceServer::new(user_srv)) - .add_service(ScaleServiceServer::new(scale_srv)) - .add_service(HealthServer::new(health_srv)) - .add_service(BackupServiceServer::new(backup_srv)) + .layer(MetricsMiddlewareLayer::new(svc.meta_metrics.clone())) + .add_service(HeartbeatServiceServer::new(svc.heartbeat_srv)) + .add_service(ClusterServiceServer::new(svc.cluster_srv)) + .add_service(StreamManagerServiceServer::new(svc.stream_srv)) + .add_service(HummockManagerServiceServer::new(svc.hummock_srv)) + .add_service(NotificationServiceServer::new(svc.notification_srv)) + .add_service(DdlServiceServer::new(svc.ddl_srv)) + .add_service(UserServiceServer::new(svc.user_srv)) + .add_service(ScaleServiceServer::new(svc.scale_srv)) + .add_service(HealthServer::new(svc.health_srv)) + .add_service(BackupServiceServer::new(svc.backup_srv)) .serve_with_shutdown(address_info.listen_addr, async move { tokio::select! { _ = tokio::signal::ctrl_c() => {}, @@ -333,7 +316,7 @@ pub async fn rpc_serve_with_store( } shutdown_all.await; }, - _ = idle_recv => { + _ = svc.idle_recv => { tracing::info!("idle_recv"); shutdown_all.await; }, From 642a2dc3ac164307044752b3885675b4062d652f Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 11:30:25 +0100 Subject: [PATCH 08/61] remove notes --- src/meta/src/rpc/server.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 4ff9045ea291a..c40a651715d6d 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -143,9 +143,6 @@ pub async fn rpc_serve_with_store( lease_interval_secs: u64, opts: MetaOpts, ) -> MetaResult<(JoinHandle<()>, tokio::sync::watch::Sender<()>)> { - // Contains address info with port - // address_info.listen_addr; - // Initialize managers. let (_, lease_handle, lease_shutdown, leader_rx) = run_elections( address_info.listen_addr.clone().to_string(), From f8bfa6e75cceaf7b6dc956183e8acc18d16a72a8 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 11:41:37 +0100 Subject: [PATCH 09/61] copyright Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/meta/src/rpc/leader_svs.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/meta/src/rpc/leader_svs.rs b/src/meta/src/rpc/leader_svs.rs index 178a57e5dea9a..8c742453e941e 100644 --- a/src/meta/src/rpc/leader_svs.rs +++ b/src/meta/src/rpc/leader_svs.rs @@ -1,3 +1,17 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; From 6b7ce5938917f90ecc22b85542b47c1e73c5c184 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 11:55:19 +0100 Subject: [PATCH 10/61] risedev c --- src/meta/src/rpc/leader_svs.rs | 47 ++++++++++++++++------------------ src/meta/src/rpc/server.rs | 45 +++++++++----------------------- 2 files changed, 34 insertions(+), 58 deletions(-) diff --git a/src/meta/src/rpc/leader_svs.rs b/src/meta/src/rpc/leader_svs.rs index 8c742453e941e..3706eb0143ae9 100644 --- a/src/meta/src/rpc/leader_svs.rs +++ b/src/meta/src/rpc/leader_svs.rs @@ -12,15 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; -use etcd_client::ConnectOptions; use risingwave_backup::error::BackupError; use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; use risingwave_common::monitor::process_linux::monitor_process; -use risingwave_common::util::addr::HostAddr; use risingwave_common_service::metrics_manager::MetricsManager; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_object_store::object::parse_remote_object_store; @@ -35,6 +32,7 @@ use super::service::scale_service::ScaleServiceImpl; use super::DdlServiceImpl; use crate::backup_restore::BackupManager; use crate::barrier::{BarrierScheduler, GlobalBarrierManager}; +use crate::hummock; use crate::hummock::{CompactionScheduler, HummockManager}; use crate::manager::{ CatalogManager, ClusterManager, FragmentManager, IdleManager, MetaOpts, MetaSrvEnv, @@ -47,24 +45,23 @@ use crate::rpc::service::heartbeat_service::HeartbeatServiceImpl; use crate::rpc::service::hummock_service::HummockServiceImpl; use crate::rpc::service::stream_service::StreamServiceImpl; use crate::rpc::service::user_service::UserServiceImpl; -use crate::storage::{EtcdMetaStore, MemStore, MetaStore, WrappedEtcdClient as EtcdClient}; +use crate::storage::MetaStore; use crate::stream::{GlobalStreamManager, SourceManager}; -use crate::{hummock, MetaResult}; pub struct LeaderServices { - pub meta_metrics: Arc, - pub heartbeat_srv: HeartbeatServiceImpl, + pub backup_srv: BackupServiceImpl, pub cluster_srv: ClusterServiceImpl, - pub stream_srv: StreamServiceImpl, + pub ddl_srv: DdlServiceImpl, + pub health_srv: HealthServiceImpl, + pub heartbeat_srv: HeartbeatServiceImpl, pub hummock_srv: HummockServiceImpl, + pub idle_recv: OneReceiver<()>, + pub meta_metrics: Arc, pub notification_srv: NotificationServiceImpl, - pub ddl_srv: DdlServiceImpl, - pub user_srv: UserServiceImpl, pub scale_srv: ScaleServiceImpl, - pub health_srv: HealthServiceImpl, - pub backup_srv: BackupServiceImpl, + pub stream_srv: StreamServiceImpl, pub sub_tasks: Vec<(JoinHandle<()>, OneSender<()>)>, - pub idle_recv: OneReceiver<()>, + pub user_srv: UserServiceImpl, } // TODO: Write docstring @@ -75,12 +72,12 @@ pub async fn get_leader_srv( max_heartbeat_interval: Duration, opts: MetaOpts, leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, - lease_handle: JoinHandle<()>, - lease_shutdown: OneSender<()>, + election_handle: JoinHandle<()>, + election_shutdown: OneSender<()>, ) -> Result, BackupError> { tracing::info!("Defining leader services"); - - let env = MetaSrvEnv::::new(opts, meta_store.clone(), current_leader_info).await; + let prometheus_endpoint = opts.prometheus_endpoint.clone(); + let env = MetaSrvEnv::::new(opts, meta_store.clone(), leader_rx).await; let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); let meta_metrics = Arc::new(MetaMetrics::new()); let registry = meta_metrics.registry(); @@ -291,18 +288,18 @@ pub async fn get_leader_srv( .await, ); Ok(LeaderServices { - meta_metrics, - heartbeat_srv, + backup_srv, cluster_srv, - hummock_srv, - stream_srv, ddl_srv, - notification_srv, - user_srv, health_srv, + heartbeat_srv, + hummock_srv, + idle_recv, + meta_metrics, + notification_srv, scale_srv, - backup_srv, + stream_srv, sub_tasks, - idle_recv, + user_srv, }) } diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index c40a651715d6d..61a536fc88b92 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -17,12 +17,7 @@ use std::sync::Arc; use std::time::Duration; use etcd_client::ConnectOptions; -use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; -use risingwave_common::monitor::process_linux::monitor_process; use risingwave_common::util::addr::HostAddr; -use risingwave_common_service::metrics_manager::MetricsManager; -use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; -use risingwave_object_store::object::parse_remote_object_store; use risingwave_pb::backup_service::backup_service_server::BackupServiceServer; use risingwave_pb::ddl_service::ddl_service_server::DdlServiceServer; use risingwave_pb::health::health_server::HealthServer; @@ -33,31 +28,18 @@ use risingwave_pb::meta::notification_service_server::NotificationServiceServer; use risingwave_pb::meta::scale_service_server::ScaleServiceServer; use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; use risingwave_pb::user::user_service_server::UserServiceServer; +use tokio::sync::oneshot::channel as OneChannel; +use tokio::sync::watch::{channel as WatchChannel, Sender as WatchSender}; use tokio::task::JoinHandle; use super::elections::run_elections; use super::intercept::MetricsMiddlewareLayer; use super::leader_svs::get_leader_srv; use super::service::health_service::HealthServiceImpl; -use super::service::notification_service::NotificationServiceImpl; -use super::service::scale_service::ScaleServiceImpl; -use super::DdlServiceImpl; -use crate::backup_restore::BackupManager; -use crate::barrier::{BarrierScheduler, GlobalBarrierManager}; -use crate::hummock::{CompactionScheduler, HummockManager}; -use crate::manager::{ - CatalogManager, ClusterManager, FragmentManager, IdleManager, MetaOpts, MetaSrvEnv, -}; +use crate::manager::MetaOpts; use crate::rpc::metrics::MetaMetrics; -use crate::rpc::service::backup_service::BackupServiceImpl; -use crate::rpc::service::cluster_service::ClusterServiceImpl; -use crate::rpc::service::heartbeat_service::HeartbeatServiceImpl; -use crate::rpc::service::hummock_service::HummockServiceImpl; -use crate::rpc::service::stream_service::StreamServiceImpl; -use crate::rpc::service::user_service::UserServiceImpl; use crate::storage::{EtcdMetaStore, MemStore, MetaStore, WrappedEtcdClient as EtcdClient}; -use crate::stream::{GlobalStreamManager, SourceManager}; -use crate::{hummock, MetaResult}; +use crate::MetaResult; #[derive(Debug)] pub enum MetaStoreBackend { @@ -89,16 +71,13 @@ impl Default for AddressInfo { } } -// TODO: in imports do something like tokio::sync::watch::Sender<()> as watchSender -// and oneshot::sender as oneshotSender - pub async fn rpc_serve( address_info: AddressInfo, meta_store_backend: MetaStoreBackend, max_heartbeat_interval: Duration, lease_interval_secs: u64, opts: MetaOpts, -) -> MetaResult<(JoinHandle<()>, tokio::sync::watch::Sender<()>)> { +) -> MetaResult<(JoinHandle<()>, WatchSender<()>)> { match meta_store_backend { MetaStoreBackend::Etcd { endpoints, @@ -142,9 +121,9 @@ pub async fn rpc_serve_with_store( max_heartbeat_interval: Duration, lease_interval_secs: u64, opts: MetaOpts, -) -> MetaResult<(JoinHandle<()>, tokio::sync::watch::Sender<()>)> { +) -> MetaResult<(JoinHandle<()>, WatchSender<()>)> { // Initialize managers. - let (_, lease_handle, lease_shutdown, leader_rx) = run_elections( + let (_, election_handle, election_shutdown, leader_rx) = run_elections( address_info.listen_addr.clone().to_string(), meta_store.clone(), lease_interval_secs, @@ -183,7 +162,7 @@ pub async fn rpc_serve_with_store( // TODO: maybe do not use a channel here // What I need is basically a oneshot channel with one producer and multiple consumers - let (svc_shutdown_tx, mut svc_shutdown_rx) = tokio::sync::watch::channel(()); + let (svc_shutdown_tx, mut svc_shutdown_rx) = WatchChannel(()); let join_handle = tokio::spawn(async move { let span = tracing::span!(tracing::Level::INFO, "services"); @@ -202,8 +181,8 @@ pub async fn rpc_serve_with_store( // run follower services until node becomes leader let mut svc_shutdown_rx_clone = svc_shutdown_rx.clone(); - let (follower_shutdown_tx, follower_shutdown_rx) = tokio::sync::oneshot::channel::<()>(); - let (follower_finished_tx, follower_finished_rx) = tokio::sync::oneshot::channel::<()>(); + let (follower_shutdown_tx, follower_shutdown_rx) = OneChannel::<()>(); + let (follower_finished_tx, follower_finished_rx) = OneChannel::<()>(); if !is_leader { tracing::info!("Starting follower services"); tokio::spawn(async move { @@ -270,8 +249,8 @@ pub async fn rpc_serve_with_store( max_heartbeat_interval, opts, leader_rx, - lease_handle, - lease_shutdown, + election_handle, + election_shutdown, ) .await .expect("Unable to create leader services"); From 11e5b68c068a2eee1127a3a53bd88a3966484720 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 12:02:32 +0100 Subject: [PATCH 11/61] typo --- src/meta/src/rpc/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 61a536fc88b92..d9385cd48b748 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -253,7 +253,7 @@ pub async fn rpc_serve_with_store( election_shutdown, ) .await - .expect("Unable to create leader services"); + .expect("Unable to start leader services"); let shutdown_all = async move { for (join_handle, shutdown_sender) in svc.sub_tasks { From 4f3797ded6297484e9e90bfb4588d63de04346a9 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 12:04:25 +0100 Subject: [PATCH 12/61] notes --- src/meta/src/rpc/server.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index d9385cd48b748..9d2ea09dacd9a 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -160,7 +160,7 @@ pub async fn rpc_serve_with_store( } }); - // TODO: maybe do not use a channel here + // FIXME: maybe do not use a channel here // What I need is basically a oneshot channel with one producer and multiple consumers let (svc_shutdown_tx, mut svc_shutdown_rx) = WatchChannel(()); @@ -176,10 +176,9 @@ pub async fn rpc_serve_with_store( let is_leader = services_leader_rx.borrow().clone().1; let was_follower = !is_leader; + // run follower services until node becomes leader // FIXME: Add service discovery for follower // https://github.com/risingwavelabs/risingwave/issues/6755 - - // run follower services until node becomes leader let mut svc_shutdown_rx_clone = svc_shutdown_rx.clone(); let (follower_shutdown_tx, follower_shutdown_rx) = OneChannel::<()>(); let (follower_finished_tx, follower_finished_rx) = OneChannel::<()>(); From 7513e2b379e435c02b621a00678e4bed8cef1c6a Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 13:11:28 +0100 Subject: [PATCH 13/61] docstrings --- src/meta/src/rpc/leader_svs.rs | 15 +++++++++++++-- src/meta/src/rpc/server.rs | 1 - 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/meta/src/rpc/leader_svs.rs b/src/meta/src/rpc/leader_svs.rs index 3706eb0143ae9..0e2358681274f 100644 --- a/src/meta/src/rpc/leader_svs.rs +++ b/src/meta/src/rpc/leader_svs.rs @@ -48,6 +48,7 @@ use crate::rpc::service::user_service::UserServiceImpl; use crate::storage::MetaStore; use crate::stream::{GlobalStreamManager, SourceManager}; +/// Simple struct containing everything required to start services on a meta leader node pub struct LeaderServices { pub backup_srv: BackupServiceImpl, pub cluster_srv: ClusterServiceImpl, @@ -55,17 +56,27 @@ pub struct LeaderServices { pub health_srv: HealthServiceImpl, pub heartbeat_srv: HeartbeatServiceImpl, pub hummock_srv: HummockServiceImpl, + + /// OneShot receiver that signals if sub tasks is idle pub idle_recv: OneReceiver<()>, + pub meta_metrics: Arc, pub notification_srv: NotificationServiceImpl, pub scale_srv: ScaleServiceImpl, pub stream_srv: StreamServiceImpl, + + /// sub_tasks executed concurrently. Can be shutdown via shutdown_all pub sub_tasks: Vec<(JoinHandle<()>, OneSender<()>)>, + pub user_srv: UserServiceImpl, } -// TODO: Write docstring -// TODO: Only call this function once +/// Initializing all services needed for the meta leader node +/// Only call this function once, since initializing the services multiple times will result in an +/// inconsistent state +/// +/// ## Returns +/// Returns an instance of `LeaderServices` or an Error if initializing leader services failed pub async fn get_leader_srv( meta_store: Arc, address_info: AddressInfo, diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 9d2ea09dacd9a..d88d8df7612fc 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -292,7 +292,6 @@ pub async fn rpc_serve_with_store( shutdown_all.await; }, _ = svc.idle_recv => { - tracing::info!("idle_recv"); shutdown_all.await; }, } From 5d696540325bfa4770d5aa10ad1d1d5428ba3354 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 13:30:52 +0100 Subject: [PATCH 14/61] expect instead of panic --- src/meta/src/rpc/server.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index d88d8df7612fc..12e251dfa02ca 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -141,9 +141,11 @@ pub async fn rpc_serve_with_store( let span = tracing::span!(tracing::Level::INFO, "node_status"); let _enter = span.enter(); loop { - if note_status_leader_rx.changed().await.is_err() { - panic!("Leader sender dropped"); - } + note_status_leader_rx + .changed() + .await + .expect("Leader sender dropped"); + let (leader_info, is_leader) = note_status_leader_rx.borrow().clone(); let leader_addr = HostAddr::from(leader_info); @@ -169,9 +171,10 @@ pub async fn rpc_serve_with_store( let _enter = span.enter(); // failover logic - if services_leader_rx.changed().await.is_err() { - panic!("Leader sender dropped"); - } + services_leader_rx + .changed() + .await + .expect("Leader sender dropped"); let is_leader = services_leader_rx.borrow().clone().1; let was_follower = !is_leader; @@ -186,7 +189,6 @@ pub async fn rpc_serve_with_store( tracing::info!("Starting follower services"); tokio::spawn(async move { let health_srv = HealthServiceImpl::new(); - // TODO: Use prometheus endpoint in follower? tonic::transport::Server::builder() .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) .add_service(HealthServer::new(health_srv)) @@ -220,9 +222,10 @@ pub async fn rpc_serve_with_store( // wait until this node becomes a leader while !services_leader_rx.borrow().clone().1 { - if services_leader_rx.changed().await.is_err() { - panic!("Leader sender dropped"); - } + services_leader_rx + .changed() + .await + .expect("Leader sender dropped"); } // shut down follower svc if node used to be follower @@ -270,7 +273,7 @@ pub async fn rpc_serve_with_store( // https://github.com/risingwavelabs/risingwave/issues/6755 tonic::transport::Server::builder() - .layer(MetricsMiddlewareLayer::new(svc.meta_metrics.clone())) + .layer(MetricsMiddlewareLayer::new(svc.meta_metrics)) .add_service(HeartbeatServiceServer::new(svc.heartbeat_srv)) .add_service(ClusterServiceServer::new(svc.cluster_srv)) .add_service(StreamManagerServiceServer::new(svc.stream_srv)) From 178f9a43c8e55a9453c06a5981de5b82cdd63149 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 13:43:37 +0100 Subject: [PATCH 15/61] change logging --- src/meta/src/rpc/server.rs | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 12e251dfa02ca..4d0c826b47a6a 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -138,8 +138,7 @@ pub async fn rpc_serve_with_store( // print current leader/follower status of this node tokio::spawn(async move { - let span = tracing::span!(tracing::Level::INFO, "node_status"); - let _enter = span.enter(); + let _ = tracing::span!(tracing::Level::INFO, "node_status").enter(); loop { note_status_leader_rx .changed() @@ -186,36 +185,39 @@ pub async fn rpc_serve_with_store( let (follower_shutdown_tx, follower_shutdown_rx) = OneChannel::<()>(); let (follower_finished_tx, follower_finished_rx) = OneChannel::<()>(); if !is_leader { - tracing::info!("Starting follower services"); tokio::spawn(async move { + let _ = tracing::span!(tracing::Level::INFO, "follower services").enter(); + tracing::info!("Starting follower services"); let health_srv = HealthServiceImpl::new(); tonic::transport::Server::builder() .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) .add_service(HealthServer::new(health_srv)) .serve_with_shutdown(address_info.listen_addr, async move { tokio::select! { - _ = tokio::signal::ctrl_c() => {}, - // shutdown service if all services should be shut down - res = svc_shutdown_rx_clone.changed() => { - if res.is_err() { - tracing::error!("service shutdown sender dropped"); - } - }, - // shutdown service if follower becomes leader - res = follower_shutdown_rx => { - if res.is_err() { - tracing::error!("follower service shutdown sender dropped"); - } + _ = tokio::signal::ctrl_c() => {}, + // shutdown service if all services should be shut down + res = svc_shutdown_rx_clone.changed() => { + match res { + Ok(_) => tracing::info!("Shutting down services"), + Err(_) => tracing::error!("Service shutdown sender dropped") + } + }, + // shutdown service if follower becomes leader + res = follower_shutdown_rx => { + match res { + Ok(_) => tracing::info!("Shutting down follower services"), + Err(_) => tracing::error!("Follower service shutdown sender dropped") + } }, - } + } }) .await .unwrap(); match follower_finished_tx.send(()) { - Ok(_) => tracing::info!("Signaling that follower service is down"), - Err(_) => tracing::error!( - "Error when signaling that follower service is down. Receiver dropped" - ), + Ok(_) => tracing::info!("Shutting down follower services done"), + Err(_) => { + tracing::error!("Follower service shutdown done sender receiver dropped") + } } }); } @@ -231,8 +233,6 @@ pub async fn rpc_serve_with_store( // shut down follower svc if node used to be follower if was_follower { match follower_shutdown_tx.send(()) { - // TODO: Do I always use this error format? - // Receiver, sender dropped? Ok(_) => tracing::info!("Shutting down follower services"), Err(_) => tracing::error!("Follower service receiver dropped"), } @@ -288,9 +288,9 @@ pub async fn rpc_serve_with_store( tokio::select! { _ = tokio::signal::ctrl_c() => {}, res = svc_shutdown_rx.changed() => { - tracing::info!("svc_shutdown_rx.changed()"); - if res.is_err() { - tracing::error!("service shutdown sender dropped"); + match res { + Ok(_) => tracing::info!("Shutting down services"), + Err(_) => tracing::error!("Service shutdown receiver dropped") } shutdown_all.await; }, From f733d9f24834db0c74897283a2fc9a5c00ebd122 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 13:55:42 +0100 Subject: [PATCH 16/61] remove note --- src/meta/src/hummock/manager/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index d5365711a38b1..45faf7872229e 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -504,8 +504,6 @@ where } } - // FIXME: This should not panic - // check prevents meta node failover trx.check_equal( META_CF_NAME.to_owned(), META_LEADER_KEY.as_bytes().to_vec(), From 9ea645435db3dac921198876f9a59517b50ddb10 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 13:58:30 +0100 Subject: [PATCH 17/61] change import --- src/meta/src/manager/env.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/meta/src/manager/env.rs b/src/meta/src/manager/env.rs index edbd0caf9c14c..0c0ec84be7eb4 100644 --- a/src/meta/src/manager/env.rs +++ b/src/meta/src/manager/env.rs @@ -22,7 +22,7 @@ use risingwave_pb::meta::MetaLeaderInfo; #[cfg(any(test, feature = "test"))] use risingwave_pb::meta::MetaLeaseInfo; use risingwave_rpc_client::{StreamClientPool, StreamClientPoolRef}; -use tokio::sync::watch::Receiver; +use tokio::sync::watch::{channel as WatchChannel, Receiver as WatchReceiver}; use crate::manager::{ IdGeneratorManager, IdGeneratorManagerRef, IdleManager, IdleManagerRef, NotificationManager, @@ -56,7 +56,7 @@ where /// idle status manager. idle_manager: IdleManagerRef, - leader_rx: Receiver<(MetaLeaderInfo, bool)>, + leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, /// options read by all services pub opts: Arc, @@ -137,7 +137,7 @@ where pub async fn new( opts: MetaOpts, meta_store: Arc, - leader_rx: Receiver<(MetaLeaderInfo, bool)>, + leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, ) -> Self { // change to sync after refactor `IdGeneratorManager::new` sync. let id_gen_manager = Arc::new(IdGeneratorManager::new(meta_store.clone()).await); @@ -214,7 +214,7 @@ impl MetaSrvEnv { lease_id: 0, node_address: "".to_string(), }; - let (_, leader_rx) = tokio::sync::watch::channel((leader_info.clone(), true)); + let (_, leader_rx) = WatchChannel((leader_info.clone(), true)); let lease_info = MetaLeaseInfo { leader: Some(leader_info.clone()), From 95881519f6f927b712bddc0550e0dbce0fc9f121 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 13:59:15 +0100 Subject: [PATCH 18/61] remove comments --- src/meta/src/rpc/elections.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/meta/src/rpc/elections.rs b/src/meta/src/rpc/elections.rs index 437931cfcb862..7f10752c5c36c 100644 --- a/src/meta/src/rpc/elections.rs +++ b/src/meta/src/rpc/elections.rs @@ -47,13 +47,6 @@ struct ElectionResult { pub is_leader: bool, } -// TODO: remove -// impl ElectionResult { -// pub fn get_leader_addr(&self) -> HostAddr { -// HostAddr::from(self.meta_leader_info.clone()) -// } -// } - /// Runs for election in an attempt to become leader /// /// ## Returns From 810c4f140996da7c37ac81a7c5c546f0c6c63817 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Mon, 19 Dec 2022 14:06:39 +0100 Subject: [PATCH 19/61] remove log line --- src/meta/src/rpc/server.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 4d0c826b47a6a..e352752592282 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -243,8 +243,6 @@ pub async fn rpc_serve_with_store( }; } - tracing::info!("Starting leader services"); - let svc = get_leader_srv( meta_store, address_info.clone(), From c36d7d0236ad790e693b86202aa592161c428745 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 09:54:43 +0100 Subject: [PATCH 20/61] change docstrings --- src/meta/src/rpc/elections.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/meta/src/rpc/elections.rs b/src/meta/src/rpc/elections.rs index 7f10752c5c36c..968b39316ddbb 100644 --- a/src/meta/src/rpc/elections.rs +++ b/src/meta/src/rpc/elections.rs @@ -49,15 +49,15 @@ struct ElectionResult { /// Runs for election in an attempt to become leader /// -/// ## Returns -/// Returns `ElectionResult`, containing infos about the node who won the election or -/// `MetaError` if the election ran into an error -/// /// ## Arguments /// `meta_store`: The meta store which holds the lease, deciding about the election result /// `addr`: Address of the node that runs for election /// `lease_time_sec`: Amount of seconds that this lease will be valid /// `next_lease_id`: If the node wins, the lease used until the next election will have this id +/// +/// ## Returns +/// Returns `ElectionResult`, containing infos about the node who won the election or +/// `MetaError` if the election ran into an error async fn campaign( meta_store: &Arc, addr: &String, @@ -165,15 +165,16 @@ async fn campaign( /// Try to renew/acquire the leader lease /// -/// ## Returns -/// True if node was leader and was able to renew/acquire the lease. -/// False if node was follower and thus could not renew/acquire lease. -/// `MetaError` if operation ran into an error /// /// ## Arguments /// `leader_info`: Info of the node that trie /// `lease_time_sec`: Time in seconds that the lease is valid -/// `meta_store`: Store which holds the lease +/// `meta_store`: Store which holds the lease# +/// +/// ## Returns +/// True if node was leader and was able to renew/acquire the lease. +/// False if node was follower and thus could not renew/acquire lease. +/// `MetaError` if operation ran into an error async fn renew_lease( leader_info: &MetaLeaderInfo, lease_time_sec: u64, @@ -210,7 +211,7 @@ async fn renew_lease( /// Retrieve infos about the current leader /// -/// ## Attributes: +/// ## Arguments: /// `meta_store`: The store holding information about the leader /// /// ## Returns From 0a2b5a8d1327cac373b4257393e49a988cb0c143 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 17:19:52 +0100 Subject: [PATCH 21/61] leader_info_to_host_addr --- src/common/src/util/addr.rs | 15 ++++++--------- src/meta/src/rpc/server.rs | 5 +++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/common/src/util/addr.rs b/src/common/src/util/addr.rs index 42ef68b05b423..50ee49e429c01 100644 --- a/src/common/src/util/addr.rs +++ b/src/common/src/util/addr.rs @@ -20,6 +20,12 @@ use risingwave_pb::meta::MetaLeaderInfo; use crate::error::{internal_error, Result}; +pub fn leader_info_to_host_addr(mli: MetaLeaderInfo) -> HostAddr { + mli.node_address + .parse::() + .expect("invalid leader addr") +} + /// General host address and port. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct HostAddr { @@ -32,7 +38,6 @@ impl std::fmt::Display for HostAddr { write!(f, "{}:{}", self.host, self.port) } } - impl From for HostAddr { fn from(addr: SocketAddr) -> Self { HostAddr { @@ -42,14 +47,6 @@ impl From for HostAddr { } } -impl From for HostAddr { - fn from(mli: MetaLeaderInfo) -> Self { - mli.node_address - .parse::() - .expect("invalid leader addr") - } -} - impl TryFrom<&str> for HostAddr { type Error = crate::error::RwError; diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index e352752592282..9505605fd6293 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use std::time::Duration; use etcd_client::ConnectOptions; -use risingwave_common::util::addr::HostAddr; +use risingwave_common::util::addr::{leader_info_to_host_addr, HostAddr}; use risingwave_pb::backup_service::backup_service_server::BackupServiceServer; use risingwave_pb::ddl_service::ddl_service_server::DdlServiceServer; use risingwave_pb::health::health_server::HealthServer; @@ -27,6 +27,7 @@ use risingwave_pb::meta::heartbeat_service_server::HeartbeatServiceServer; use risingwave_pb::meta::notification_service_server::NotificationServiceServer; use risingwave_pb::meta::scale_service_server::ScaleServiceServer; use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; +use risingwave_pb::meta::MetaLeaderInfo; use risingwave_pb::user::user_service_server::UserServiceServer; use tokio::sync::oneshot::channel as OneChannel; use tokio::sync::watch::{channel as WatchChannel, Sender as WatchSender}; @@ -146,7 +147,7 @@ pub async fn rpc_serve_with_store( .expect("Leader sender dropped"); let (leader_info, is_leader) = note_status_leader_rx.borrow().clone(); - let leader_addr = HostAddr::from(leader_info); + let leader_addr = leader_info_to_host_addr(leader_info); tracing::info!( "This node currently is a {} at {}:{}", From 916d92ac5f9f5f17b9f20e4408b68d589ba16fa9 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 17:36:53 +0100 Subject: [PATCH 22/61] move start services to leader_svc --- src/meta/src/rpc/leader_svs.rs | 109 +++++++++++++++++++-------------- src/meta/src/rpc/server.rs | 64 ++----------------- 2 files changed, 69 insertions(+), 104 deletions(-) diff --git a/src/meta/src/rpc/leader_svs.rs b/src/meta/src/rpc/leader_svs.rs index 0e2358681274f..4f9116be303ee 100644 --- a/src/meta/src/rpc/leader_svs.rs +++ b/src/meta/src/rpc/leader_svs.rs @@ -15,24 +15,33 @@ use std::sync::Arc; use std::time::Duration; -use risingwave_backup::error::BackupError; use risingwave_backup::storage::ObjectStoreMetaSnapshotStorage; use risingwave_common::monitor::process_linux::monitor_process; use risingwave_common_service::metrics_manager::MetricsManager; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_object_store::object::parse_remote_object_store; +use risingwave_pb::backup_service::backup_service_server::BackupServiceServer; +use risingwave_pb::ddl_service::ddl_service_server::DdlServiceServer; +use risingwave_pb::health::health_server::HealthServer; +use risingwave_pb::hummock::hummock_manager_service_server::HummockManagerServiceServer; +use risingwave_pb::meta::cluster_service_server::ClusterServiceServer; +use risingwave_pb::meta::heartbeat_service_server::HeartbeatServiceServer; +use risingwave_pb::meta::notification_service_server::NotificationServiceServer; +use risingwave_pb::meta::scale_service_server::ScaleServiceServer; +use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; use risingwave_pb::meta::MetaLeaderInfo; -use tokio::sync::oneshot::{Receiver as OneReceiver, Sender as OneSender}; +use risingwave_pb::user::user_service_server::UserServiceServer; +use tokio::sync::oneshot::Sender as OneSender; use tokio::sync::watch::Receiver as WatchReceiver; use tokio::task::JoinHandle; +use super::intercept::MetricsMiddlewareLayer; use super::service::health_service::HealthServiceImpl; use super::service::notification_service::NotificationServiceImpl; use super::service::scale_service::ScaleServiceImpl; use super::DdlServiceImpl; use crate::backup_restore::BackupManager; use crate::barrier::{BarrierScheduler, GlobalBarrierManager}; -use crate::hummock; use crate::hummock::{CompactionScheduler, HummockManager}; use crate::manager::{ CatalogManager, ClusterManager, FragmentManager, IdleManager, MetaOpts, MetaSrvEnv, @@ -47,37 +56,15 @@ use crate::rpc::service::stream_service::StreamServiceImpl; use crate::rpc::service::user_service::UserServiceImpl; use crate::storage::MetaStore; use crate::stream::{GlobalStreamManager, SourceManager}; +use crate::{hummock, MetaResult}; -/// Simple struct containing everything required to start services on a meta leader node -pub struct LeaderServices { - pub backup_srv: BackupServiceImpl, - pub cluster_srv: ClusterServiceImpl, - pub ddl_srv: DdlServiceImpl, - pub health_srv: HealthServiceImpl, - pub heartbeat_srv: HeartbeatServiceImpl, - pub hummock_srv: HummockServiceImpl, - - /// OneShot receiver that signals if sub tasks is idle - pub idle_recv: OneReceiver<()>, - - pub meta_metrics: Arc, - pub notification_srv: NotificationServiceImpl, - pub scale_srv: ScaleServiceImpl, - pub stream_srv: StreamServiceImpl, - - /// sub_tasks executed concurrently. Can be shutdown via shutdown_all - pub sub_tasks: Vec<(JoinHandle<()>, OneSender<()>)>, - - pub user_srv: UserServiceImpl, -} - -/// Initializing all services needed for the meta leader node +/// Starts all services needed for the meta leader node /// Only call this function once, since initializing the services multiple times will result in an /// inconsistent state /// /// ## Returns -/// Returns an instance of `LeaderServices` or an Error if initializing leader services failed -pub async fn get_leader_srv( +/// Returns an error if the service initialization failed +pub async fn start_leader_srv( meta_store: Arc, address_info: AddressInfo, max_heartbeat_interval: Duration, @@ -85,7 +72,8 @@ pub async fn get_leader_srv( leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, election_handle: JoinHandle<()>, election_shutdown: OneSender<()>, -) -> Result, BackupError> { + mut svc_shutdown_rx: WatchReceiver<()>, +) -> MetaResult<()> { tracing::info!("Defining leader services"); let prometheus_endpoint = opts.prometheus_endpoint.clone(); let env = MetaSrvEnv::::new(opts, meta_store.clone(), leader_rx).await; @@ -298,19 +286,50 @@ pub async fn get_leader_srv( IdleManager::start_idle_checker(env.idle_manager_ref(), Duration::from_secs(30), idle_send) .await, ); - Ok(LeaderServices { - backup_srv, - cluster_srv, - ddl_srv, - health_srv, - heartbeat_srv, - hummock_srv, - idle_recv, - meta_metrics, - notification_srv, - scale_srv, - stream_srv, - sub_tasks, - user_srv, - }) + + let shutdown_all = async move { + for (join_handle, shutdown_sender) in sub_tasks { + if let Err(_err) = shutdown_sender.send(()) { + // Maybe it is already shut down + continue; + } + if let Err(err) = join_handle.await { + tracing::warn!("Failed to join shutdown: {:?}", err); + } + } + }; + + // FIXME: Add service discovery for leader + // https://github.com/risingwavelabs/risingwave/issues/6755 + + tonic::transport::Server::builder() + .layer(MetricsMiddlewareLayer::new(meta_metrics)) + .add_service(HeartbeatServiceServer::new(heartbeat_srv)) + .add_service(ClusterServiceServer::new(cluster_srv)) + .add_service(StreamManagerServiceServer::new(stream_srv)) + .add_service(HummockManagerServiceServer::new(hummock_srv)) + .add_service(NotificationServiceServer::new(notification_srv)) + .add_service(DdlServiceServer::new(ddl_srv)) + .add_service(UserServiceServer::new(user_srv)) + .add_service(ScaleServiceServer::new(scale_srv)) + .add_service(HealthServer::new(health_srv)) + .add_service(BackupServiceServer::new(backup_srv)) + .serve_with_shutdown(address_info.listen_addr, async move { + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + res = svc_shutdown_rx.changed() => { + match res { + Ok(_) => tracing::info!("Shutting down services"), + Err(_) => tracing::error!("Service shutdown receiver dropped") + } + shutdown_all.await; + }, + _ = idle_recv => { + shutdown_all.await; + }, + } + }) + .await + .unwrap(); + Ok(()) } diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 9505605fd6293..bd9b4f3f573b0 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -17,25 +17,15 @@ use std::sync::Arc; use std::time::Duration; use etcd_client::ConnectOptions; -use risingwave_common::util::addr::{leader_info_to_host_addr, HostAddr}; -use risingwave_pb::backup_service::backup_service_server::BackupServiceServer; -use risingwave_pb::ddl_service::ddl_service_server::DdlServiceServer; +use risingwave_common::util::addr::leader_info_to_host_addr; use risingwave_pb::health::health_server::HealthServer; -use risingwave_pb::hummock::hummock_manager_service_server::HummockManagerServiceServer; -use risingwave_pb::meta::cluster_service_server::ClusterServiceServer; -use risingwave_pb::meta::heartbeat_service_server::HeartbeatServiceServer; -use risingwave_pb::meta::notification_service_server::NotificationServiceServer; -use risingwave_pb::meta::scale_service_server::ScaleServiceServer; -use risingwave_pb::meta::stream_manager_service_server::StreamManagerServiceServer; -use risingwave_pb::meta::MetaLeaderInfo; -use risingwave_pb::user::user_service_server::UserServiceServer; use tokio::sync::oneshot::channel as OneChannel; use tokio::sync::watch::{channel as WatchChannel, Sender as WatchSender}; use tokio::task::JoinHandle; use super::elections::run_elections; use super::intercept::MetricsMiddlewareLayer; -use super::leader_svs::get_leader_srv; +use super::leader_svs::start_leader_srv; use super::service::health_service::HealthServiceImpl; use crate::manager::MetaOpts; use crate::rpc::metrics::MetaMetrics; @@ -164,7 +154,7 @@ pub async fn rpc_serve_with_store( // FIXME: maybe do not use a channel here // What I need is basically a oneshot channel with one producer and multiple consumers - let (svc_shutdown_tx, mut svc_shutdown_rx) = WatchChannel(()); + let (svc_shutdown_tx, svc_shutdown_rx) = WatchChannel(()); let join_handle = tokio::spawn(async move { let span = tracing::span!(tracing::Level::INFO, "services"); @@ -244,7 +234,7 @@ pub async fn rpc_serve_with_store( }; } - let svc = get_leader_srv( + start_leader_srv( meta_store, address_info.clone(), max_heartbeat_interval, @@ -252,54 +242,10 @@ pub async fn rpc_serve_with_store( leader_rx, election_handle, election_shutdown, + svc_shutdown_rx, ) .await .expect("Unable to start leader services"); - - let shutdown_all = async move { - for (join_handle, shutdown_sender) in svc.sub_tasks { - if let Err(_err) = shutdown_sender.send(()) { - // Maybe it is already shut down - continue; - } - if let Err(err) = join_handle.await { - tracing::warn!("Failed to join shutdown: {:?}", err); - } - } - }; - - // FIXME: Add service discovery for leader - // https://github.com/risingwavelabs/risingwave/issues/6755 - - tonic::transport::Server::builder() - .layer(MetricsMiddlewareLayer::new(svc.meta_metrics)) - .add_service(HeartbeatServiceServer::new(svc.heartbeat_srv)) - .add_service(ClusterServiceServer::new(svc.cluster_srv)) - .add_service(StreamManagerServiceServer::new(svc.stream_srv)) - .add_service(HummockManagerServiceServer::new(svc.hummock_srv)) - .add_service(NotificationServiceServer::new(svc.notification_srv)) - .add_service(DdlServiceServer::new(svc.ddl_srv)) - .add_service(UserServiceServer::new(svc.user_srv)) - .add_service(ScaleServiceServer::new(svc.scale_srv)) - .add_service(HealthServer::new(svc.health_srv)) - .add_service(BackupServiceServer::new(svc.backup_srv)) - .serve_with_shutdown(address_info.listen_addr, async move { - tokio::select! { - _ = tokio::signal::ctrl_c() => {}, - res = svc_shutdown_rx.changed() => { - match res { - Ok(_) => tracing::info!("Shutting down services"), - Err(_) => tracing::error!("Service shutdown receiver dropped") - } - shutdown_all.await; - }, - _ = svc.idle_recv => { - shutdown_all.await; - }, - } - }) - .await - .unwrap(); }); Ok((join_handle, svc_shutdown_tx)) From d3eb29616c79860c3c719305838e504b169aa093 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 17:51:36 +0100 Subject: [PATCH 23/61] use follower service handle --- src/meta/src/rpc/server.rs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index bd9b4f3f573b0..8f7b3a2596048 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -174,9 +174,8 @@ pub async fn rpc_serve_with_store( // https://github.com/risingwavelabs/risingwave/issues/6755 let mut svc_shutdown_rx_clone = svc_shutdown_rx.clone(); let (follower_shutdown_tx, follower_shutdown_rx) = OneChannel::<()>(); - let (follower_finished_tx, follower_finished_rx) = OneChannel::<()>(); - if !is_leader { - tokio::spawn(async move { + let follower_handle: Option> = if !is_leader { + Some(tokio::spawn(async move { let _ = tracing::span!(tracing::Level::INFO, "follower services").enter(); tracing::info!("Starting follower services"); let health_srv = HealthServiceImpl::new(); @@ -204,14 +203,10 @@ pub async fn rpc_serve_with_store( }) .await .unwrap(); - match follower_finished_tx.send(()) { - Ok(_) => tracing::info!("Shutting down follower services done"), - Err(_) => { - tracing::error!("Follower service shutdown done sender receiver dropped") - } - } - }); - } + })) + } else { + None + }; // wait until this node becomes a leader while !services_leader_rx.borrow().clone().1 { @@ -228,10 +223,7 @@ pub async fn rpc_serve_with_store( Err(_) => tracing::error!("Follower service receiver dropped"), } // Wait until follower service is down - match follower_finished_rx.await { - Ok(_) => tracing::info!("Follower services shut down"), - Err(_) => tracing::error!("Follower service shutdown finished sender dropped"), - }; + let _ = follower_handle.unwrap().await; } start_leader_srv( From 9a3af54272484f0c590486ebbce4d254248f533f Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 18:03:13 +0100 Subject: [PATCH 24/61] node_is_leader --- src/meta/src/rpc/server.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 8f7b3a2596048..6808cfeec36c2 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -19,8 +19,11 @@ use std::time::Duration; use etcd_client::ConnectOptions; use risingwave_common::util::addr::leader_info_to_host_addr; use risingwave_pb::health::health_server::HealthServer; +use risingwave_pb::meta::MetaLeaderInfo; use tokio::sync::oneshot::channel as OneChannel; -use tokio::sync::watch::{channel as WatchChannel, Sender as WatchSender}; +use tokio::sync::watch::{ + channel as WatchChannel, Receiver as WatchReceiver, Sender as WatchSender, +}; use tokio::task::JoinHandle; use super::elections::run_elections; @@ -106,6 +109,10 @@ pub async fn rpc_serve( } } +fn node_is_leader(leader_rx: &WatchReceiver<(MetaLeaderInfo, bool)>) -> bool { + leader_rx.borrow().clone().1 +} + pub async fn rpc_serve_with_store( meta_store: Arc, address_info: AddressInfo, @@ -114,7 +121,7 @@ pub async fn rpc_serve_with_store( opts: MetaOpts, ) -> MetaResult<(JoinHandle<()>, WatchSender<()>)> { // Initialize managers. - let (_, election_handle, election_shutdown, leader_rx) = run_elections( + let (_, election_handle, election_shutdown, mut leader_rx) = run_elections( address_info.listen_addr.clone().to_string(), meta_store.clone(), lease_interval_secs, @@ -209,11 +216,8 @@ pub async fn rpc_serve_with_store( }; // wait until this node becomes a leader - while !services_leader_rx.borrow().clone().1 { - services_leader_rx - .changed() - .await - .expect("Leader sender dropped"); + while !node_is_leader(&leader_rx) { + leader_rx.changed().await.expect("Leader sender dropped"); } // shut down follower svc if node used to be follower From c68227a852dbefe346d00fae64186d46700ca805 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 18:10:46 +0100 Subject: [PATCH 25/61] remove was_follower --- src/meta/src/rpc/server.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 6808cfeec36c2..10ffba954799f 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -174,7 +174,6 @@ pub async fn rpc_serve_with_store( .expect("Leader sender dropped"); let is_leader = services_leader_rx.borrow().clone().1; - let was_follower = !is_leader; // run follower services until node becomes leader // FIXME: Add service discovery for follower @@ -221,7 +220,7 @@ pub async fn rpc_serve_with_store( } // shut down follower svc if node used to be follower - if was_follower { + if follower_handle.is_some() { match follower_shutdown_tx.send(()) { Ok(_) => tracing::info!("Shutting down follower services"), Err(_) => tracing::error!("Follower service receiver dropped"), From 1845b9cdfbc8903607d3f5536e23fcaa4092594a Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 20:52:54 +0100 Subject: [PATCH 26/61] rename leader_svc --- src/meta/src/rpc/{leader_svs.rs => leader_svc.rs} | 0 src/meta/src/rpc/mod.rs | 2 +- src/meta/src/rpc/server.rs | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/meta/src/rpc/{leader_svs.rs => leader_svc.rs} (100%) diff --git a/src/meta/src/rpc/leader_svs.rs b/src/meta/src/rpc/leader_svc.rs similarity index 100% rename from src/meta/src/rpc/leader_svs.rs rename to src/meta/src/rpc/leader_svc.rs diff --git a/src/meta/src/rpc/mod.rs b/src/meta/src/rpc/mod.rs index f3df22aa5751e..aadc06e13877f 100644 --- a/src/meta/src/rpc/mod.rs +++ b/src/meta/src/rpc/mod.rs @@ -14,7 +14,7 @@ mod elections; mod intercept; -mod leader_svs; +mod leader_svc; pub mod metrics; pub mod server; mod service; diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 10ffba954799f..0c2298f7d06d4 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -28,7 +28,7 @@ use tokio::task::JoinHandle; use super::elections::run_elections; use super::intercept::MetricsMiddlewareLayer; -use super::leader_svs::start_leader_srv; +use super::leader_svc::start_leader_srv; use super::service::health_service::HealthServiceImpl; use crate::manager::MetaOpts; use crate::rpc::metrics::MetaMetrics; From a105a346c6138883233a49d3c40fa8bffbe0ab4b Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 21:09:53 +0100 Subject: [PATCH 27/61] move follower definition to leader_svc --- src/meta/src/rpc/follower_svc.rs | 57 ++++++++++++++++++++++++++++++++ src/meta/src/rpc/mod.rs | 1 + src/meta/src/rpc/server.rs | 42 ++++++----------------- 3 files changed, 68 insertions(+), 32 deletions(-) create mode 100644 src/meta/src/rpc/follower_svc.rs diff --git a/src/meta/src/rpc/follower_svc.rs b/src/meta/src/rpc/follower_svc.rs new file mode 100644 index 0000000000000..a544ba40977c3 --- /dev/null +++ b/src/meta/src/rpc/follower_svc.rs @@ -0,0 +1,57 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use risingwave_pb::health::health_server::HealthServer; +use tokio::sync::oneshot::Receiver as OneReceiver; +use tokio::sync::watch::Receiver as WatchReceiver; + +use super::intercept::MetricsMiddlewareLayer; +use super::server::AddressInfo; +use super::service::health_service::HealthServiceImpl; +use crate::rpc::metrics::MetaMetrics; + +/// Starts all services needed for the meta follower node +pub async fn start_follower_srv( + mut svc_shutdown_rx_clone: WatchReceiver<()>, + follower_shutdown_rx: OneReceiver<()>, + address_info: AddressInfo, +) { + let health_srv = HealthServiceImpl::new(); + tonic::transport::Server::builder() + .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) + .add_service(HealthServer::new(health_srv)) + .serve_with_shutdown(address_info.listen_addr, async move { + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + // shutdown service if all services should be shut down + res = svc_shutdown_rx_clone.changed() => { + match res { + Ok(_) => tracing::info!("Shutting down services"), + Err(_) => tracing::error!("Service shutdown sender dropped") + } + }, + // shutdown service if follower becomes leader + res = follower_shutdown_rx => { + match res { + Ok(_) => tracing::info!("Shutting down follower services"), + Err(_) => tracing::error!("Follower service shutdown sender dropped") + } + }, + } + }) + .await + .unwrap(); +} diff --git a/src/meta/src/rpc/mod.rs b/src/meta/src/rpc/mod.rs index aadc06e13877f..9afb823e66da7 100644 --- a/src/meta/src/rpc/mod.rs +++ b/src/meta/src/rpc/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. mod elections; +mod follower_svc; mod intercept; mod leader_svc; pub mod metrics; diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 0c2298f7d06d4..304e13a4e5f42 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -18,7 +18,6 @@ use std::time::Duration; use etcd_client::ConnectOptions; use risingwave_common::util::addr::leader_info_to_host_addr; -use risingwave_pb::health::health_server::HealthServer; use risingwave_pb::meta::MetaLeaderInfo; use tokio::sync::oneshot::channel as OneChannel; use tokio::sync::watch::{ @@ -27,11 +26,9 @@ use tokio::sync::watch::{ use tokio::task::JoinHandle; use super::elections::run_elections; -use super::intercept::MetricsMiddlewareLayer; +use super::follower_svc::start_follower_srv; use super::leader_svc::start_leader_srv; -use super::service::health_service::HealthServiceImpl; use crate::manager::MetaOpts; -use crate::rpc::metrics::MetaMetrics; use crate::storage::{EtcdMetaStore, MemStore, MetaStore, WrappedEtcdClient as EtcdClient}; use crate::MetaResult; @@ -178,37 +175,18 @@ pub async fn rpc_serve_with_store( // run follower services until node becomes leader // FIXME: Add service discovery for follower // https://github.com/risingwavelabs/risingwave/issues/6755 - let mut svc_shutdown_rx_clone = svc_shutdown_rx.clone(); + let svc_shutdown_rx_clone = svc_shutdown_rx.clone(); let (follower_shutdown_tx, follower_shutdown_rx) = OneChannel::<()>(); let follower_handle: Option> = if !is_leader { + let address_info_clone = address_info.clone(); Some(tokio::spawn(async move { let _ = tracing::span!(tracing::Level::INFO, "follower services").enter(); - tracing::info!("Starting follower services"); - let health_srv = HealthServiceImpl::new(); - tonic::transport::Server::builder() - .layer(MetricsMiddlewareLayer::new(Arc::new(MetaMetrics::new()))) - .add_service(HealthServer::new(health_srv)) - .serve_with_shutdown(address_info.listen_addr, async move { - tokio::select! { - _ = tokio::signal::ctrl_c() => {}, - // shutdown service if all services should be shut down - res = svc_shutdown_rx_clone.changed() => { - match res { - Ok(_) => tracing::info!("Shutting down services"), - Err(_) => tracing::error!("Service shutdown sender dropped") - } - }, - // shutdown service if follower becomes leader - res = follower_shutdown_rx => { - match res { - Ok(_) => tracing::info!("Shutting down follower services"), - Err(_) => tracing::error!("Follower service shutdown sender dropped") - } - }, - } - }) - .await - .unwrap(); + start_follower_srv( + svc_shutdown_rx_clone, + follower_shutdown_rx, + address_info_clone, + ) + .await; })) } else { None @@ -231,7 +209,7 @@ pub async fn rpc_serve_with_store( start_leader_srv( meta_store, - address_info.clone(), + address_info, max_heartbeat_interval, opts, leader_rx, From 33a1281ef6ae57104fe8146c50c128de773cf3a8 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Tue, 20 Dec 2022 21:18:00 +0100 Subject: [PATCH 28/61] risedev c --- src/meta/src/rpc/leader_svc.rs | 14 +++++++++++--- src/meta/src/rpc/server.rs | 10 +++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/meta/src/rpc/leader_svc.rs b/src/meta/src/rpc/leader_svc.rs index 4f9116be303ee..f63ff6667b1bf 100644 --- a/src/meta/src/rpc/leader_svc.rs +++ b/src/meta/src/rpc/leader_svc.rs @@ -58,6 +58,12 @@ use crate::storage::MetaStore; use crate::stream::{GlobalStreamManager, SourceManager}; use crate::{hummock, MetaResult}; +// simple wrapper containing election sync related objects +pub struct ElectionCoordination { + pub election_handle: JoinHandle<()>, + pub election_shutdown: OneSender<()>, +} + /// Starts all services needed for the meta leader node /// Only call this function once, since initializing the services multiple times will result in an /// inconsistent state @@ -70,8 +76,7 @@ pub async fn start_leader_srv( max_heartbeat_interval: Duration, opts: MetaOpts, leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, - election_handle: JoinHandle<()>, - election_shutdown: OneSender<()>, + election_coordination: ElectionCoordination, mut svc_shutdown_rx: WatchReceiver<()>, ) -> MetaResult<()> { tracing::info!("Defining leader services"); @@ -274,7 +279,10 @@ pub async fn start_leader_srv( .await, ); sub_tasks.push(HummockManager::start_compaction_heartbeat(hummock_manager).await); - sub_tasks.push((election_handle, election_shutdown)); + sub_tasks.push(( + election_coordination.election_handle, + election_coordination.election_shutdown, + )); if cfg!(not(test)) { sub_tasks.push( ClusterManager::start_heartbeat_checker(cluster_manager, Duration::from_secs(1)).await, diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 304e13a4e5f42..215bbfef3339d 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -27,7 +27,7 @@ use tokio::task::JoinHandle; use super::elections::run_elections; use super::follower_svc::start_follower_srv; -use super::leader_svc::start_leader_srv; +use super::leader_svc::{start_leader_srv, ElectionCoordination}; use crate::manager::MetaOpts; use crate::storage::{EtcdMetaStore, MemStore, MetaStore, WrappedEtcdClient as EtcdClient}; use crate::MetaResult; @@ -207,14 +207,18 @@ pub async fn rpc_serve_with_store( let _ = follower_handle.unwrap().await; } + let elect_coord = ElectionCoordination { + election_handle, + election_shutdown, + }; + start_leader_srv( meta_store, address_info, max_heartbeat_interval, opts, leader_rx, - election_handle, - election_shutdown, + elect_coord, svc_shutdown_rx, ) .await From 31f98be237390e2aab8502cbca05133c74193d8c Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 10:25:35 +0100 Subject: [PATCH 29/61] nit --- src/meta/src/rpc/server.rs | 171 ++++++++++++++++++++++++++++++++++++- 1 file changed, 169 insertions(+), 2 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 215bbfef3339d..ee2281c965729 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -198,13 +198,13 @@ pub async fn rpc_serve_with_store( } // shut down follower svc if node used to be follower - if follower_handle.is_some() { + if let Some(handle) = follower_handle { match follower_shutdown_tx.send(()) { Ok(_) => tracing::info!("Shutting down follower services"), Err(_) => tracing::error!("Follower service receiver dropped"), } // Wait until follower service is down - let _ = follower_handle.unwrap().await; + handle.await.unwrap(); } let elect_coord = ElectionCoordination { @@ -227,3 +227,170 @@ pub async fn rpc_serve_with_store( Ok((join_handle, svc_shutdown_tx)) } + +// TODO: repeat test 100 times. Start test with different sleep intervals and so on +// Print the sleep intervals +// use pseudo rand gen with seed +mod tests { + use std::net::{IpAddr, Ipv4Addr}; + + use risingwave_common::util::addr::HostAddr; + use risingwave_pb::common::WorkerType; + use risingwave_rpc_client::MetaClient; + use tokio::time::sleep; + + use super::*; + + async fn setup_n_nodes(n: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { + let meta_store = Arc::new(MemStore::default()); + + let mut node_controllers: Vec<(JoinHandle<()>, WatchSender<()>)> = vec![]; + for i in 0..n { + // TODO: add pseudo random sleep here + let node = format!("node{}", i); + let err_msg = format!("Meta {} failed in setup", node); + let err_msg = err_msg.as_str(); + + let info = AddressInfo { + addr: node, + listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 1234 + i), + prometheus_addr: None, + dashboard_addr: None, + ui_path: None, + }; + node_controllers.push( + rpc_serve_with_store( + meta_store.clone(), + info, + Duration::from_secs(4), // What values do we want to use here + 2, // What values do we want to use here + MetaOpts::test(true), // True or false? + ) + .await + .expect(err_msg), + ); + } // sleep duration of election cycle, not fixed duration + sleep(Duration::from_secs(6)).await; + node_controllers + } + + #[tokio::test] + async fn test_single_leader_setup() { + // make n also pseudo random? + let n = 5; + let _node_controllers = setup_n_nodes(n).await; + + let mut leader_count = 0; + for i in 0..n { + // create client connecting against meta_i + let port = 1234 + i; + let meta_addr = format!("http://127.0.0.1:{}", port); + let host_addr = "127.0.0.1:5688".parse::().unwrap(); + let host_addr = HostAddr { + host: host_addr.host, + port: host_addr.port + i, // Do I need to change the port? + }; + + // register_new fails + // Do I need to start some service first? + // are the other services still running here? + // yes! Check via lsof -i @localhost + let err_msg = format!("Unable to connect against client {}", i); + let client_i = MetaClient::register_new( + meta_addr.as_str(), + WorkerType::ComputeNode, + &host_addr, + 0, + ) + .await + .expect(&err_msg); + + match client_i.send_heartbeat(i as u32, vec![]).await { + Ok(_) => { + leader_count += 1; + tracing::info!("Node {} is leader", i); + } + Err(_) => tracing::info!("Node {} is follower", i), + } + } + + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + + // Also use pseudo random delays here? + // https://github.com/risingwavelabs/risingwave/issues/6884 + + // Use heartbeat to determine if node is leader + // Use health service to determine if node is up + + // Tests: + // Check if nodes conflict when they report leader + // Check if the reported leader node is the actual leader node + // Check if there are != 1 leader nodes + // No failover should happen here, since system is idle + } + + // Kill nodes via sender + // Where do I write the logs to? + // TODO: how do I know if a node is a leader or a follower? + + // TODO: implement failover test +} + +// Old test below +// TODO: remove +// #[cfg(test)] +// mod testsdeprecated { +// use tokio::time::sleep; +// +// use super::*; +// +// #[tokio::test] +// async fn test_leader_lease() { +// let info = AddressInfo { +// addr: "node1".to_string(), +// ..Default::default() +// }; +// let meta_store = Arc::new(MemStore::default()); +// let (handle, closer) = rpc_serve_with_store( +// meta_store.clone(), +// info, +// Duration::from_secs(10), +// 2, +// MetaOpts::test(false), +// ) +// .await +// .unwrap(); +// sleep(Duration::from_secs(4)).await; +// let info2 = AddressInfo { +// addr: "node2".to_string(), +// ..Default::default() +// }; +// let ret = rpc_serve_with_store( +// meta_store.clone(), +// info2.clone(), +// Duration::from_secs(10), +// 2, +// MetaOpts::test(false), +// ) +// .await; +// assert!(ret.is_err()); +// closer.send(()).unwrap(); +// handle.await.unwrap(); +// sleep(Duration::from_secs(3)).await; +// rpc_serve_with_store( +// meta_store.clone(), +// info2, +// Duration::from_secs(10), +// 2, +// MetaOpts::test(false), +// ) +// .await +// .unwrap(); +// } +// } +// +// Old test above From 6f3dff7f2ef74653598a08c3db391b9ac21db5b3 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 10:26:25 +0100 Subject: [PATCH 30/61] rename var --- src/meta/src/rpc/follower_svc.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/meta/src/rpc/follower_svc.rs b/src/meta/src/rpc/follower_svc.rs index a544ba40977c3..e8f7bf9ac48e2 100644 --- a/src/meta/src/rpc/follower_svc.rs +++ b/src/meta/src/rpc/follower_svc.rs @@ -25,7 +25,7 @@ use crate::rpc::metrics::MetaMetrics; /// Starts all services needed for the meta follower node pub async fn start_follower_srv( - mut svc_shutdown_rx_clone: WatchReceiver<()>, + mut svc_shutdown_rx: WatchReceiver<()>, follower_shutdown_rx: OneReceiver<()>, address_info: AddressInfo, ) { @@ -37,7 +37,7 @@ pub async fn start_follower_srv( tokio::select! { _ = tokio::signal::ctrl_c() => {}, // shutdown service if all services should be shut down - res = svc_shutdown_rx_clone.changed() => { + res = svc_shutdown_rx.changed() => { match res { Ok(_) => tracing::info!("Shutting down services"), Err(_) => tracing::error!("Service shutdown sender dropped") From beacd7b4fbbae95e5e299c2f11b958f6f58b03b7 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 10:44:35 +0100 Subject: [PATCH 31/61] implement test feedback --- src/meta/src/rpc/server.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index ee2281c965729..f6105bb160732 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -241,19 +241,20 @@ mod tests { use super::*; - async fn setup_n_nodes(n: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { + async fn setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { let meta_store = Arc::new(MemStore::default()); let mut node_controllers: Vec<(JoinHandle<()>, WatchSender<()>)> = vec![]; for i in 0..n { // TODO: add pseudo random sleep here let node = format!("node{}", i); - let err_msg = format!("Meta {} failed in setup", node); - let err_msg = err_msg.as_str(); let info = AddressInfo { addr: node, - listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 1234 + i), + listen_addr: SocketAddr::new( + IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), + meta_port + i, + ), prometheus_addr: None, dashboard_addr: None, ui_path: None, @@ -267,7 +268,7 @@ mod tests { MetaOpts::test(true), // True or false? ) .await - .expect(err_msg), + .unwrap_or_else(|e| panic!("Meta node{} failed in setup. Err: {}", i, e)), ); } // sleep duration of election cycle, not fixed duration sleep(Duration::from_secs(6)).await; @@ -278,12 +279,13 @@ mod tests { async fn test_single_leader_setup() { // make n also pseudo random? let n = 5; - let _node_controllers = setup_n_nodes(n).await; + let meta_port = 1234; + let _node_controllers = setup_n_nodes(n, meta_port).await; let mut leader_count = 0; for i in 0..n { // create client connecting against meta_i - let port = 1234 + i; + let port = meta_port + i; let meta_addr = format!("http://127.0.0.1:{}", port); let host_addr = "127.0.0.1:5688".parse::().unwrap(); let host_addr = HostAddr { From 0f82487f54b18c18feb4728140c00d48a16d0f87 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 14:20:24 +0100 Subject: [PATCH 32/61] test is working --- src/meta/src/rpc/server.rs | 53 ++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index f6105bb160732..f7c45cce61317 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -232,6 +232,7 @@ pub async fn rpc_serve_with_store( // Print the sleep intervals // use pseudo rand gen with seed mod tests { + use core::panic; use std::net::{IpAddr, Ipv4Addr}; use risingwave_common::util::addr::HostAddr; @@ -246,16 +247,16 @@ mod tests { let mut node_controllers: Vec<(JoinHandle<()>, WatchSender<()>)> = vec![]; for i in 0..n { - // TODO: add pseudo random sleep here - let node = format!("node{}", i); + // TODO: use http or https here? + let addr = format!("http://127.0.0.1:{}", meta_port + i); let info = AddressInfo { - addr: node, + addr, listen_addr: SocketAddr::new( IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), meta_port + i, ), - prometheus_addr: None, + prometheus_addr: None, // TODO: use default here dashboard_addr: None, ui_path: None, }; @@ -286,6 +287,7 @@ mod tests { for i in 0..n { // create client connecting against meta_i let port = meta_port + i; + // https or http? let meta_addr = format!("http://127.0.0.1:{}", port); let host_addr = "127.0.0.1:5688".parse::().unwrap(); let host_addr = HostAddr { @@ -297,22 +299,35 @@ mod tests { // Do I need to start some service first? // are the other services still running here? // yes! Check via lsof -i @localhost - let err_msg = format!("Unable to connect against client {}", i); - let client_i = MetaClient::register_new( - meta_addr.as_str(), - WorkerType::ComputeNode, - &host_addr, - 0, - ) - .await - .expect(&err_msg); - - match client_i.send_heartbeat(i as u32, vec![]).await { - Ok(_) => { - leader_count += 1; - tracing::info!("Node {} is leader", i); + // unspecified: Got error gRPC error (Internal error): worker_type + // Compactor: Got error gRPC error (Operation is not implemented or not supported) + // Frontend: (Operation is not implemented or not supported) + // ComputeNode: Got error gRPC error (Operation is not implemented or not supported) + // RiseCtl: Got error gRPC error (Operation is not implemented or not supported) + + // I get this error because I am talking to a non-leader? + let is_leader = tokio::time::timeout(std::time::Duration::from_secs(1), async move { + let client_i = MetaClient::register_new( + meta_addr.as_str(), + WorkerType::ComputeNode, + &host_addr, + 1, + ) + .await; + match client_i { + Ok(client_i) => { + match client_i.send_heartbeat(client_i.worker_id(), vec![]).await { + Ok(_) => true, + Err(_) => false, + } + } + Err(_) => false, } - Err(_) => tracing::info!("Node {} is follower", i), + }) + .await + .unwrap_or(false); + if is_leader { + leader_count += 1; } } From 257983d1ca933a87a582379afd2a759e90aa6907 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 15:18:47 +0100 Subject: [PATCH 33/61] remove notes --- src/meta/src/rpc/server.rs | 70 +------------------------------------- 1 file changed, 1 insertion(+), 69 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index f7c45cce61317..1f38b5b7b5332 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -285,27 +285,14 @@ mod tests { let mut leader_count = 0; for i in 0..n { - // create client connecting against meta_i let port = meta_port + i; - // https or http? let meta_addr = format!("http://127.0.0.1:{}", port); let host_addr = "127.0.0.1:5688".parse::().unwrap(); let host_addr = HostAddr { host: host_addr.host, - port: host_addr.port + i, // Do I need to change the port? + port: host_addr.port + i, }; - // register_new fails - // Do I need to start some service first? - // are the other services still running here? - // yes! Check via lsof -i @localhost - // unspecified: Got error gRPC error (Internal error): worker_type - // Compactor: Got error gRPC error (Operation is not implemented or not supported) - // Frontend: (Operation is not implemented or not supported) - // ComputeNode: Got error gRPC error (Operation is not implemented or not supported) - // RiseCtl: Got error gRPC error (Operation is not implemented or not supported) - - // I get this error because I am talking to a non-leader? let is_leader = tokio::time::timeout(std::time::Duration::from_secs(1), async move { let client_i = MetaClient::register_new( meta_addr.as_str(), @@ -356,58 +343,3 @@ mod tests { // TODO: implement failover test } - -// Old test below -// TODO: remove -// #[cfg(test)] -// mod testsdeprecated { -// use tokio::time::sleep; -// -// use super::*; -// -// #[tokio::test] -// async fn test_leader_lease() { -// let info = AddressInfo { -// addr: "node1".to_string(), -// ..Default::default() -// }; -// let meta_store = Arc::new(MemStore::default()); -// let (handle, closer) = rpc_serve_with_store( -// meta_store.clone(), -// info, -// Duration::from_secs(10), -// 2, -// MetaOpts::test(false), -// ) -// .await -// .unwrap(); -// sleep(Duration::from_secs(4)).await; -// let info2 = AddressInfo { -// addr: "node2".to_string(), -// ..Default::default() -// }; -// let ret = rpc_serve_with_store( -// meta_store.clone(), -// info2.clone(), -// Duration::from_secs(10), -// 2, -// MetaOpts::test(false), -// ) -// .await; -// assert!(ret.is_err()); -// closer.send(()).unwrap(); -// handle.await.unwrap(); -// sleep(Duration::from_secs(3)).await; -// rpc_serve_with_store( -// meta_store.clone(), -// info2, -// Duration::from_secs(10), -// 2, -// MetaOpts::test(false), -// ) -// .await -// .unwrap(); -// } -// } -// -// Old test above From 695d9e609c86eceaa421b3cb442bae8391677bfd Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 15:29:38 +0100 Subject: [PATCH 34/61] simplify test a little --- src/meta/src/rpc/server.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 1f38b5b7b5332..ca22f708ab5f9 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -285,12 +285,12 @@ mod tests { let mut leader_count = 0; for i in 0..n { + let local = "127.0.0.1".to_owned(); let port = meta_port + i; - let meta_addr = format!("http://127.0.0.1:{}", port); - let host_addr = "127.0.0.1:5688".parse::().unwrap(); + let meta_addr = format!("http://{}:{}", local, port); let host_addr = HostAddr { - host: host_addr.host, - port: host_addr.port + i, + host: local, + port: 5688 + i, }; let is_leader = tokio::time::timeout(std::time::Duration::from_secs(1), async move { From d55b1cabba4b76234de114677d8ac12c84b5a685 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 15:45:14 +0100 Subject: [PATCH 35/61] Test single leader with diff amount of nodes --- src/meta/src/rpc/server.rs | 99 ++++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 31 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index ca22f708ab5f9..fa7d5608febb6 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -276,53 +276,48 @@ mod tests { node_controllers } - #[tokio::test] - async fn test_single_leader_setup() { + async fn number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { // make n also pseudo random? - let n = 5; - let meta_port = 1234; - let _node_controllers = setup_n_nodes(n, meta_port).await; + let _node_controllers = setup_n_nodes(number_of_nodes, meta_port).await; let mut leader_count = 0; - for i in 0..n { + for i in 0..number_of_nodes { let local = "127.0.0.1".to_owned(); let port = meta_port + i; let meta_addr = format!("http://{}:{}", local, port); let host_addr = HostAddr { host: local, - port: 5688 + i, + port: host_port + i, }; - let is_leader = tokio::time::timeout(std::time::Duration::from_secs(1), async move { - let client_i = MetaClient::register_new( - meta_addr.as_str(), - WorkerType::ComputeNode, - &host_addr, - 1, - ) - .await; - match client_i { - Ok(client_i) => { - match client_i.send_heartbeat(client_i.worker_id(), vec![]).await { - Ok(_) => true, - Err(_) => false, + // let is_leader = tokio::time::timeout(std::time::Duration::from_secs(1), + // async move { + let is_leader = + tokio::time::timeout(std::time::Duration::from_millis(100), async move { + let client_i = MetaClient::register_new( + meta_addr.as_str(), + WorkerType::ComputeNode, + &host_addr, + 1, + ) + .await; + match client_i { + Ok(client_i) => { + match client_i.send_heartbeat(client_i.worker_id(), vec![]).await { + Ok(_) => true, + Err(_) => false, + } } + Err(_) => false, } - Err(_) => false, - } - }) - .await - .unwrap_or(false); + }) + .await + .unwrap_or(false); if is_leader { leader_count += 1; } } - - assert_eq!( - leader_count, 1, - "Expected to have 1 leader, instead got {} leaders", - leader_count - ); + leader_count // Also use pseudo random delays here? // https://github.com/risingwavelabs/risingwave/issues/6884 @@ -337,6 +332,48 @@ mod tests { // No failover should happen here, since system is idle } + // Writing these tests as separate functions instead of one loop, because functions get executed + // in parallel + #[tokio::test] + async fn test_single_leader_setup_1() { + let leader_count = number_of_leaders(1, 1234, 5678).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + } + + #[tokio::test] + async fn test_single_leader_setup_3() { + let leader_count = number_of_leaders(3, 2345, 6789).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + } + + #[tokio::test] + async fn test_single_leader_setup_10() { + let leader_count = number_of_leaders(10, 3456, 7890).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + } + + #[tokio::test] + async fn test_single_leader_setup_100() { + let leader_count = number_of_leaders(100, 4567, 8901).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + } + // Kill nodes via sender // Where do I write the logs to? // TODO: how do I know if a node is a leader or a follower? From fd85bab0bb99c232f97ca7bf6b89571b7ff0915e Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 15:52:07 +0100 Subject: [PATCH 36/61] cleanup --- src/meta/src/rpc/server.rs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index fa7d5608febb6..7df441e9f9011 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -277,7 +277,6 @@ mod tests { } async fn number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { - // make n also pseudo random? let _node_controllers = setup_n_nodes(number_of_nodes, meta_port).await; let mut leader_count = 0; @@ -290,8 +289,6 @@ mod tests { port: host_port + i, }; - // let is_leader = tokio::time::timeout(std::time::Duration::from_secs(1), - // async move { let is_leader = tokio::time::timeout(std::time::Duration::from_millis(100), async move { let client_i = MetaClient::register_new( @@ -318,18 +315,6 @@ mod tests { } } leader_count - - // Also use pseudo random delays here? - // https://github.com/risingwavelabs/risingwave/issues/6884 - - // Use heartbeat to determine if node is leader - // Use health service to determine if node is up - - // Tests: - // Check if nodes conflict when they report leader - // Check if the reported leader node is the actual leader node - // Check if there are != 1 leader nodes - // No failover should happen here, since system is idle } // Writing these tests as separate functions instead of one loop, because functions get executed @@ -374,9 +359,5 @@ mod tests { ); } - // Kill nodes via sender - // Where do I write the logs to? - // TODO: how do I know if a node is a leader or a follower? - // TODO: implement failover test } From 209747169e3531f2b100c0305951aba0f3ef40b5 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 15:59:31 +0100 Subject: [PATCH 37/61] docstring + notes --- src/meta/src/rpc/server.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 7df441e9f9011..08759f7db0ab1 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -242,6 +242,8 @@ mod tests { use super::*; + /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on + /// `meta_port + 1`, ... async fn setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { let meta_store = Arc::new(MemStore::default()); @@ -276,6 +278,12 @@ mod tests { node_controllers } + /// Check for `number_of_nodes` meta leader nodes, starting at `meta_port`, `meta_port + 1`, ... + /// Simulates `number_of_nodes` compute nodes, starting at `meta_port`, `meta_port + 1`, ... + /// + /// ## Returns + /// Number of nodes which currently are leaders. Number is not snapshoted. If there is a + /// leader failover in process, you may get an incorrect result async fn number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { let _node_controllers = setup_n_nodes(number_of_nodes, meta_port).await; @@ -359,5 +367,12 @@ mod tests { ); } - // TODO: implement failover test + #[tokio::test] + async fn test_failover() { + // TODO: After failover there should be one leader + // How do we simulate failover? -> Kill current leader + // Failover tests should work with 1, 3, 10, 100 nodes + // Afterwards we still want to have 1 leader only + // utilize number_of_leaders function + } } From efcc000eaaa5d525e65a1092b7117723978d5f22 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 16:45:59 +0100 Subject: [PATCH 38/61] add failover test --- src/meta/src/rpc/server.rs | 50 +++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 08759f7db0ab1..b79cda67ed1c4 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -242,6 +242,8 @@ mod tests { use super::*; + static SLEEP_SEC: u64 = 6; + /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on /// `meta_port + 1`, ... async fn setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { @@ -273,8 +275,8 @@ mod tests { .await .unwrap_or_else(|e| panic!("Meta node{} failed in setup. Err: {}", i, e)), ); - } // sleep duration of election cycle, not fixed duration - sleep(Duration::from_secs(6)).await; + } + sleep(Duration::from_secs(SLEEP_SEC)).await; node_controllers } @@ -285,8 +287,6 @@ mod tests { /// Number of nodes which currently are leaders. Number is not snapshoted. If there is a /// leader failover in process, you may get an incorrect result async fn number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { - let _node_controllers = setup_n_nodes(number_of_nodes, meta_port).await; - let mut leader_count = 0; for i in 0..number_of_nodes { let local = "127.0.0.1".to_owned(); @@ -329,6 +329,7 @@ mod tests { // in parallel #[tokio::test] async fn test_single_leader_setup_1() { + let _node_controllers = setup_n_nodes(1, 1234).await; let leader_count = number_of_leaders(1, 1234, 5678).await; assert_eq!( leader_count, 1, @@ -339,6 +340,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_3() { + let _node_controllers = setup_n_nodes(3, 2345).await; let leader_count = number_of_leaders(3, 2345, 6789).await; assert_eq!( leader_count, 1, @@ -349,6 +351,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_10() { + let _node_controllers = setup_n_nodes(10, 3456).await; let leader_count = number_of_leaders(10, 3456, 7890).await; assert_eq!( leader_count, 1, @@ -359,6 +362,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_100() { + let _node_controllers = setup_n_nodes(100, 4567).await; let leader_count = number_of_leaders(100, 4567, 8901).await; assert_eq!( leader_count, 1, @@ -369,10 +373,38 @@ mod tests { #[tokio::test] async fn test_failover() { - // TODO: After failover there should be one leader - // How do we simulate failover? -> Kill current leader - // Failover tests should work with 1, 3, 10, 100 nodes - // Afterwards we still want to have 1 leader only - // utilize number_of_leaders function + let meta_port = 1234; + let number_of_nodes = 2; + let compute_port = 2345; + let vec_meta_handlers = setup_n_nodes(number_of_nodes, meta_port).await; + + // we should have 1 leader at the moment + let leader_count = number_of_leaders(number_of_nodes, meta_port, compute_port).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + + // kill leader to trigger failover + let leader_shutdown_sender = &vec_meta_handlers[0].1; + leader_shutdown_sender + .send(()) + .expect("Sending shutdown to leader should not fail"); + sleep(Duration::from_secs(SLEEP_SEC)).await; + + // expect that we still have 1 leader + let leader_count = number_of_leaders(number_of_nodes, meta_port, compute_port).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); } + + // TODO: After failover there should be one leader + // How do we simulate failover? -> Kill current leader + // Failover tests should work with 1, 3, 10, 100 nodes + // Afterwards we still want to have 1 leader only + // utilize number_of_leaders function } From 193e977ac0280e26da31fbf5ab0f41a74299d1f7 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 17:00:42 +0100 Subject: [PATCH 39/61] add failover tests --- src/meta/src/rpc/server.rs | 75 +++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index b79cda67ed1c4..f5275f03943da 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -242,11 +242,11 @@ mod tests { use super::*; - static SLEEP_SEC: u64 = 6; + static _SLEEP_SEC: u64 = 6; /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on /// `meta_port + 1`, ... - async fn setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { + async fn _setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { let meta_store = Arc::new(MemStore::default()); let mut node_controllers: Vec<(JoinHandle<()>, WatchSender<()>)> = vec![]; @@ -276,7 +276,7 @@ mod tests { .unwrap_or_else(|e| panic!("Meta node{} failed in setup. Err: {}", i, e)), ); } - sleep(Duration::from_secs(SLEEP_SEC)).await; + sleep(Duration::from_secs(_SLEEP_SEC)).await; node_controllers } @@ -286,7 +286,7 @@ mod tests { /// ## Returns /// Number of nodes which currently are leaders. Number is not snapshoted. If there is a /// leader failover in process, you may get an incorrect result - async fn number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { + async fn _number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { let mut leader_count = 0; for i in 0..number_of_nodes { let local = "127.0.0.1".to_owned(); @@ -329,8 +329,8 @@ mod tests { // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let _node_controllers = setup_n_nodes(1, 1234).await; - let leader_count = number_of_leaders(1, 1234, 5678).await; + let _node_controllers = _setup_n_nodes(1, 1234).await; + let leader_count = _number_of_leaders(1, 1234, 5678).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -340,8 +340,8 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_3() { - let _node_controllers = setup_n_nodes(3, 2345).await; - let leader_count = number_of_leaders(3, 2345, 6789).await; + let _node_controllers = _setup_n_nodes(3, 2345).await; + let leader_count = _number_of_leaders(3, 2345, 6789).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -351,8 +351,8 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_10() { - let _node_controllers = setup_n_nodes(10, 3456).await; - let leader_count = number_of_leaders(10, 3456, 7890).await; + let _node_controllers = _setup_n_nodes(10, 3456).await; + let leader_count = _number_of_leaders(10, 3456, 7890).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -362,8 +362,9 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_100() { - let _node_controllers = setup_n_nodes(100, 4567).await; - let leader_count = number_of_leaders(100, 4567, 8901).await; + // TODO: helper func that calls both of these functions below + let _node_controllers = _setup_n_nodes(100, 4567).await; + let leader_count = _number_of_leaders(100, 4567, 8901).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -371,15 +372,12 @@ mod tests { ); } - #[tokio::test] - async fn test_failover() { - let meta_port = 1234; - let number_of_nodes = 2; - let compute_port = 2345; - let vec_meta_handlers = setup_n_nodes(number_of_nodes, meta_port).await; + /// returns number of leaders after failover + async fn _test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { + let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port).await; // we should have 1 leader at the moment - let leader_count = number_of_leaders(number_of_nodes, meta_port, compute_port).await; + let leader_count = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -391,10 +389,45 @@ mod tests { leader_shutdown_sender .send(()) .expect("Sending shutdown to leader should not fail"); - sleep(Duration::from_secs(SLEEP_SEC)).await; + sleep(Duration::from_secs(_SLEEP_SEC)).await; // expect that we still have 1 leader - let leader_count = number_of_leaders(number_of_nodes, meta_port, compute_port).await; + _number_of_leaders(number_of_nodes, meta_port, compute_port).await + } + + #[tokio::test] + async fn test_failover_1() { + let leader_count = _test_failover(1, 9012, 1012).await; + assert_eq!( + leader_count, 0, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + } + + #[tokio::test] + async fn test_failover_3() { + let leader_count = _test_failover(3, 1100, 1200).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + } + + #[tokio::test] + async fn test_failover_10() { + let leader_count = _test_failover(10, 1300, 1400).await; + assert_eq!( + leader_count, 1, + "Expected to have 1 leader, instead got {} leaders", + leader_count + ); + } + + #[tokio::test] + async fn test_failover_100() { + let leader_count = _test_failover(100, 1500, 1600).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", From 472f5746e9cd9953a8dceea727d8c38b9e5dd443 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 17:03:10 +0100 Subject: [PATCH 40/61] remove notes --- src/meta/src/rpc/server.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index f5275f03943da..8dfd29a0ec525 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -434,10 +434,4 @@ mod tests { leader_count ); } - - // TODO: After failover there should be one leader - // How do we simulate failover? -> Kill current leader - // Failover tests should work with 1, 3, 10, 100 nodes - // Afterwards we still want to have 1 leader only - // utilize number_of_leaders function } From d2b9311d2c976f79631063b15219afc5bcc038ab Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 17:05:28 +0100 Subject: [PATCH 41/61] risedev c --- src/meta/src/rpc/server.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 8dfd29a0ec525..056071da0fb45 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -307,12 +307,10 @@ mod tests { ) .await; match client_i { - Ok(client_i) => { - match client_i.send_heartbeat(client_i.worker_id(), vec![]).await { - Ok(_) => true, - Err(_) => false, - } - } + Ok(client_i) => client_i + .send_heartbeat(client_i.worker_id(), vec![]) + .await + .is_ok(), Err(_) => false, } }) From 5da3944320cb91a14fd3ef00c10ec99f4dae9253 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 17:06:45 +0100 Subject: [PATCH 42/61] minor change --- src/meta/src/rpc/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 056071da0fb45..e305fd962eaa1 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -374,7 +374,7 @@ mod tests { async fn _test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port).await; - // we should have 1 leader at the moment + // we should have 1 leader on startup let leader_count = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; assert_eq!( leader_count, 1, From 048278e6957e6d94a08c49558f626d14c1eb6e2b Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 17:11:39 +0100 Subject: [PATCH 43/61] add helper func --- src/meta/src/rpc/server.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index e305fd962eaa1..5bcc0e1f1cddd 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -303,7 +303,7 @@ mod tests { meta_addr.as_str(), WorkerType::ComputeNode, &host_addr, - 1, + 5, ) .await; match client_i { @@ -323,12 +323,16 @@ mod tests { leader_count } + async fn _leader_count_with_setup(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { + let _ = _setup_n_nodes(number_of_nodes, meta_port).await; + _number_of_leaders(number_of_nodes, meta_port, host_port).await + } + // Writing these tests as separate functions instead of one loop, because functions get executed // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let _node_controllers = _setup_n_nodes(1, 1234).await; - let leader_count = _number_of_leaders(1, 1234, 5678).await; + let leader_count = _leader_count_with_setup(1, 1234, 5678).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -338,8 +342,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_3() { - let _node_controllers = _setup_n_nodes(3, 2345).await; - let leader_count = _number_of_leaders(3, 2345, 6789).await; + let leader_count = _leader_count_with_setup(3, 2345, 6789).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -349,8 +352,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_10() { - let _node_controllers = _setup_n_nodes(10, 3456).await; - let leader_count = _number_of_leaders(10, 3456, 7890).await; + let leader_count = _leader_count_with_setup(10, 3456, 7890).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -361,8 +363,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_100() { // TODO: helper func that calls both of these functions below - let _node_controllers = _setup_n_nodes(100, 4567).await; - let leader_count = _number_of_leaders(100, 4567, 8901).await; + let leader_count = _leader_count_with_setup(100, 4567, 8901).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", From 96d91a9fcb38611347727b97eca0a567c481b18a Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Wed, 21 Dec 2022 17:46:35 +0100 Subject: [PATCH 44/61] try using handle during setup --- src/meta/src/rpc/server.rs | 53 ++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 5bcc0e1f1cddd..7b59b6aedf87f 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -246,7 +246,11 @@ mod tests { /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on /// `meta_port + 1`, ... - async fn _setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { + async fn _setup_n_nodes( + n: u16, + meta_port: u16, + enable_recovery: bool, // TODO: remove. Is always false + ) -> Vec<(JoinHandle<()>, WatchSender<()>)> { let meta_store = Arc::new(MemStore::default()); let mut node_controllers: Vec<(JoinHandle<()>, WatchSender<()>)> = vec![]; @@ -268,9 +272,9 @@ mod tests { rpc_serve_with_store( meta_store.clone(), info, - Duration::from_secs(4), // What values do we want to use here - 2, // What values do we want to use here - MetaOpts::test(true), // True or false? + Duration::from_secs(4), + 2, + MetaOpts::test(enable_recovery), ) .await .unwrap_or_else(|e| panic!("Meta node{} failed in setup. Err: {}", i, e)), @@ -323,57 +327,80 @@ mod tests { leader_count } - async fn _leader_count_with_setup(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { - let _ = _setup_n_nodes(number_of_nodes, meta_port).await; - _number_of_leaders(number_of_nodes, meta_port, host_port).await + async fn _leader_count_with_setup( + number_of_nodes: u16, + meta_port: u16, + host_port: u16, + ) -> (u16, Vec>) { + let vec = _setup_n_nodes(number_of_nodes, meta_port, false).await; + let mut handles: Vec> = vec![]; + for v in vec { + handles.push(v.0); + } + ( + _number_of_leaders(number_of_nodes, meta_port, host_port).await, + handles, + ) } // Writing these tests as separate functions instead of one loop, because functions get executed // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let leader_count = _leader_count_with_setup(1, 1234, 5678).await; + let (leader_count, handles) = _leader_count_with_setup(1, 1234, 5678).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); + for handle in handles { + handle.abort(); + } } #[tokio::test] async fn test_single_leader_setup_3() { - let leader_count = _leader_count_with_setup(3, 2345, 6789).await; + let (leader_count, handles) = _leader_count_with_setup(3, 2345, 6789).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); + for handle in handles { + handle.abort(); + } } #[tokio::test] async fn test_single_leader_setup_10() { - let leader_count = _leader_count_with_setup(10, 3456, 7890).await; + let (leader_count, handles) = _leader_count_with_setup(10, 3456, 7890).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); + for handle in handles { + handle.abort(); + } } #[tokio::test] async fn test_single_leader_setup_100() { - // TODO: helper func that calls both of these functions below - let leader_count = _leader_count_with_setup(100, 4567, 8901).await; + let (leader_count, handles) = _leader_count_with_setup(100, 4567, 8901).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); + for handle in handles { + handle.abort(); + } } /// returns number of leaders after failover async fn _test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { - let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port).await; + // TODO: change to false? + let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port, true).await; // we should have 1 leader on startup let leader_count = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; From 9baa5cbf973d88ec01a986ec7b0783c63161ac00 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 10:25:19 +0100 Subject: [PATCH 45/61] break node status loop if leader sender drops --- src/meta/src/rpc/server.rs | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 7b59b6aedf87f..856a4978e804b 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -135,10 +135,10 @@ pub async fn rpc_serve_with_store( tokio::spawn(async move { let _ = tracing::span!(tracing::Level::INFO, "node_status").enter(); loop { - note_status_leader_rx - .changed() - .await - .expect("Leader sender dropped"); + if note_status_leader_rx.changed().await.is_err() { + tracing::error!("Leader sender dropped"); + return; + } let (leader_info, is_leader) = note_status_leader_rx.borrow().clone(); let leader_addr = leader_info_to_host_addr(leader_info); @@ -303,20 +303,16 @@ mod tests { let is_leader = tokio::time::timeout(std::time::Duration::from_millis(100), async move { - let client_i = MetaClient::register_new( + // TODO: Write client that does not use retry logic + // https://risingwave-labs.slack.com/archives/D046HNJ0H4P/p1671629148333139 + MetaClient::register_new( meta_addr.as_str(), WorkerType::ComputeNode, &host_addr, 5, ) - .await; - match client_i { - Ok(client_i) => client_i - .send_heartbeat(client_i.worker_id(), vec![]) - .await - .is_ok(), - Err(_) => false, - } + .await + .is_ok() }) .await .unwrap_or(false); @@ -400,7 +396,7 @@ mod tests { /// returns number of leaders after failover async fn _test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { // TODO: change to false? - let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port, true).await; + let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port, false).await; // we should have 1 leader on startup let leader_count = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; From a406ffede0fbd55a74f623ab575ee5821c1776e6 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 12:52:36 +0100 Subject: [PATCH 46/61] create chanel manually + debugging --- src/meta/src/rpc/server.rs | 103 ++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 18 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 856a4978e804b..beb64a46be5b2 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -235,10 +235,13 @@ mod tests { use core::panic; use std::net::{IpAddr, Ipv4Addr}; + use risingwave_common::config::MAX_CONNECTION_WINDOW_SIZE; use risingwave_common::util::addr::HostAddr; - use risingwave_pb::common::WorkerType; - use risingwave_rpc_client::MetaClient; + use risingwave_pb::common::{HostAddress, WorkerType}; + use risingwave_pb::meta::cluster_service_client::ClusterServiceClient; + use risingwave_pb::meta::AddWorkerNodeRequest; use tokio::time::sleep; + use tonic::transport::Endpoint; use super::*; @@ -295,27 +298,47 @@ mod tests { for i in 0..number_of_nodes { let local = "127.0.0.1".to_owned(); let port = meta_port + i; - let meta_addr = format!("http://{}:{}", local, port); + let meta_addr = format!("http://{}:{}", local, port); // http, https, no protocol let host_addr = HostAddr { host: local, port: host_port + i, }; - - let is_leader = - tokio::time::timeout(std::time::Duration::from_millis(100), async move { - // TODO: Write client that does not use retry logic - // https://risingwave-labs.slack.com/archives/D046HNJ0H4P/p1671629148333139 - MetaClient::register_new( - meta_addr.as_str(), - WorkerType::ComputeNode, - &host_addr, - 5, - ) - .await - .is_ok() - }) + // single leader test 1 + // should be http://127.0.0.1:1234 + // is http://127.0.0.1:1234 + + let endpoint = Endpoint::from_shared(meta_addr.to_string()) + .unwrap() + .initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE); + let channel = endpoint + .http2_keep_alive_interval(Duration::from_secs(60)) + .keep_alive_timeout(Duration::from_secs(60)) + .connect_timeout(Duration::from_secs(5)) + .connect() .await - .unwrap_or(false); + .inspect_err(|e| { + tracing::warn!( + "Failed to connect to meta server {}, wait for online: {}", + meta_addr, + e + ); + }) + .unwrap(); + + let cluster_client = ClusterServiceClient::new(channel); + let resp = cluster_client + .to_owned() + .add_worker_node(AddWorkerNodeRequest { + worker_type: WorkerType::ComputeNode as i32, + host: Some(HostAddress { + host: host_addr.host, + port: host_addr.port as i32, + }), + worker_node_parallelism: 5, + }) + .await; + let is_leader = resp.is_ok(); + if is_leader { leader_count += 1; } @@ -339,6 +362,49 @@ mod tests { ) } + // Is it maybe that we should not drop the handlers? + // What if we abort? + // This seems to work + #[tokio::test] + async fn kinda_like_actual_test_2() { + let v = _setup_n_nodes(1, 15690, false).await; + let x = _number_of_leaders(1, 15690, 5678).await; + assert_eq!(x, 1); + for hanlde_send in v { + hanlde_send.0.abort(); + } + } + + // TODO: test only for debugging. remove + // This no longer works. Do I need to await the handlers. I think so + // below works, but never terminates + #[tokio::test] + async fn kinda_like_actual_test() { + let v = _setup_n_nodes(1, 15690, false).await; + let x = _number_of_leaders(1, 15690, 5678).await; + assert_eq!(x, 1); + for hanlde_send in v { + hanlde_send.0.await.unwrap(); + } + } + + // TODO: test only for debugging. remove + // keel meta node running able to connect against it + #[tokio::test] + async fn test_test_remove_me_setup() { + let x = _setup_n_nodes(1, 15690, false).await; + for hanlde_send in x { + hanlde_send.0.await.unwrap(); + } + } + + // TODO: test only for debugging. remove + #[tokio::test] + async fn test_test_remove_me() { + let x = _number_of_leaders(1, 15690, 5678).await; // always returns 1 + assert_eq!(x, 1); + } + // Writing these tests as separate functions instead of one loop, because functions get executed // in parallel #[tokio::test] @@ -349,6 +415,7 @@ mod tests { "Expected to have 1 leader, instead got {} leaders", leader_count ); + // TODO: Do I need to abort handles? for handle in handles { handle.abort(); } From 6f0ce60155ab5306b9b59cad2b4e01efdfe931d6 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 12:55:45 +0100 Subject: [PATCH 47/61] test seems to work --- src/meta/src/rpc/server.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index beb64a46be5b2..b403d64da623d 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -346,6 +346,7 @@ mod tests { leader_count } + // TODO: return the stuff from setup_n_nodes straight away? async fn _leader_count_with_setup( number_of_nodes: u16, meta_port: u16, @@ -423,14 +424,16 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_3() { - let (leader_count, handles) = _leader_count_with_setup(3, 2345, 6789).await; + let v = _setup_n_nodes(3, 2345, false).await; + let leader_count = _number_of_leaders(3, 2345, 6789).await; + // let (leader_count, handles) = _leader_count_with_setup(3, 2345, 6789).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - for handle in handles { - handle.abort(); + for handle in v { + handle.0.abort(); } } From c1a6b2f38c92f5524c722b5194cf1d55a38676b1 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 13:03:09 +0100 Subject: [PATCH 48/61] fix tests --- src/meta/src/rpc/server.rs | 45 +++++++++++++------------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index b403d64da623d..559eb24fbe972 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -346,23 +346,7 @@ mod tests { leader_count } - // TODO: return the stuff from setup_n_nodes straight away? - async fn _leader_count_with_setup( - number_of_nodes: u16, - meta_port: u16, - host_port: u16, - ) -> (u16, Vec>) { - let vec = _setup_n_nodes(number_of_nodes, meta_port, false).await; - let mut handles: Vec> = vec![]; - for v in vec { - handles.push(v.0); - } - ( - _number_of_leaders(number_of_nodes, meta_port, host_port).await, - handles, - ) - } - + // TODO: test only for debugging. remove // Is it maybe that we should not drop the handlers? // What if we abort? // This seems to work @@ -410,15 +394,15 @@ mod tests { // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let (leader_count, handles) = _leader_count_with_setup(1, 1234, 5678).await; + let v = _setup_n_nodes(1, 1234, false).await; + let leader_count = _number_of_leaders(1, 1234, 5678).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - // TODO: Do I need to abort handles? - for handle in handles { - handle.abort(); + for element in v { + element.0.abort(); } } @@ -426,40 +410,41 @@ mod tests { async fn test_single_leader_setup_3() { let v = _setup_n_nodes(3, 2345, false).await; let leader_count = _number_of_leaders(3, 2345, 6789).await; - // let (leader_count, handles) = _leader_count_with_setup(3, 2345, 6789).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - for handle in v { - handle.0.abort(); + for element in v { + element.0.abort(); } } #[tokio::test] async fn test_single_leader_setup_10() { - let (leader_count, handles) = _leader_count_with_setup(10, 3456, 7890).await; + let v = _setup_n_nodes(10, 3456, false).await; + let leader_count = _number_of_leaders(10, 3456, 7890).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - for handle in handles { - handle.abort(); + for element in v { + element.0.abort(); } } #[tokio::test] async fn test_single_leader_setup_100() { - let (leader_count, handles) = _leader_count_with_setup(100, 4567, 8901).await; + let v = _setup_n_nodes(100, 4567, false).await; + let leader_count = _number_of_leaders(100, 4567, 8901).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - for handle in handles { - handle.abort(); + for element in v { + element.0.abort(); } } From a837ea8058caac8615c85c0cbcfe683873b9884d Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 13:43:45 +0100 Subject: [PATCH 49/61] remove debug tests --- src/meta/src/rpc/server.rs | 48 +++----------------------------------- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 559eb24fbe972..ade5b45723475 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -346,55 +346,11 @@ mod tests { leader_count } - // TODO: test only for debugging. remove - // Is it maybe that we should not drop the handlers? - // What if we abort? - // This seems to work - #[tokio::test] - async fn kinda_like_actual_test_2() { - let v = _setup_n_nodes(1, 15690, false).await; - let x = _number_of_leaders(1, 15690, 5678).await; - assert_eq!(x, 1); - for hanlde_send in v { - hanlde_send.0.abort(); - } - } - - // TODO: test only for debugging. remove - // This no longer works. Do I need to await the handlers. I think so - // below works, but never terminates - #[tokio::test] - async fn kinda_like_actual_test() { - let v = _setup_n_nodes(1, 15690, false).await; - let x = _number_of_leaders(1, 15690, 5678).await; - assert_eq!(x, 1); - for hanlde_send in v { - hanlde_send.0.await.unwrap(); - } - } - - // TODO: test only for debugging. remove - // keel meta node running able to connect against it - #[tokio::test] - async fn test_test_remove_me_setup() { - let x = _setup_n_nodes(1, 15690, false).await; - for hanlde_send in x { - hanlde_send.0.await.unwrap(); - } - } - - // TODO: test only for debugging. remove - #[tokio::test] - async fn test_test_remove_me() { - let x = _number_of_leaders(1, 15690, 5678).await; // always returns 1 - assert_eq!(x, 1); - } - // Writing these tests as separate functions instead of one loop, because functions get executed // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let v = _setup_n_nodes(1, 1234, false).await; + let v = _setup_n_nodes(1, 1234, false).await; // TODO: parameter false not needed. Always false let leader_count = _number_of_leaders(1, 1234, 5678).await; assert_eq!( leader_count, 1, @@ -512,3 +468,5 @@ mod tests { ); } } + +// Tests fail, because address already in use. Test addresses overlap? From 971a98bf73ca76432f2577bb396ea2bc9e61e322 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 13:46:59 +0100 Subject: [PATCH 50/61] continue on channel err --- src/meta/src/rpc/server.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index ade5b45723475..0cdccf97bba68 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -303,14 +303,11 @@ mod tests { host: local, port: host_port + i, }; - // single leader test 1 - // should be http://127.0.0.1:1234 - // is http://127.0.0.1:1234 let endpoint = Endpoint::from_shared(meta_addr.to_string()) .unwrap() .initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE); - let channel = endpoint + let channel = match endpoint .http2_keep_alive_interval(Duration::from_secs(60)) .keep_alive_timeout(Duration::from_secs(60)) .connect_timeout(Duration::from_secs(5)) @@ -322,8 +319,12 @@ mod tests { meta_addr, e ); - }) - .unwrap(); + }) { + Ok(c) => c, + Err(_) => { + continue; + } + }; let cluster_client = ClusterServiceClient::new(channel); let resp = cluster_client @@ -425,7 +426,11 @@ mod tests { sleep(Duration::from_secs(_SLEEP_SEC)).await; // expect that we still have 1 leader - _number_of_leaders(number_of_nodes, meta_port, compute_port).await + let leaders = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; + for ele in vec_meta_handlers { + ele.0.abort(); + } + leaders } #[tokio::test] From 5c4ddbdde081f6be9d73d8a8459d5390adc6af8a Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 13:49:32 +0100 Subject: [PATCH 51/61] skip deleted leader node in failover tests --- src/meta/src/rpc/server.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 0cdccf97bba68..7487bc512881c 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -307,7 +307,7 @@ mod tests { let endpoint = Endpoint::from_shared(meta_addr.to_string()) .unwrap() .initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE); - let channel = match endpoint + let channel = endpoint .http2_keep_alive_interval(Duration::from_secs(60)) .keep_alive_timeout(Duration::from_secs(60)) .connect_timeout(Duration::from_secs(5)) @@ -319,12 +319,8 @@ mod tests { meta_addr, e ); - }) { - Ok(c) => c, - Err(_) => { - continue; - } - }; + }) + .unwrap(); let cluster_client = ClusterServiceClient::new(channel); let resp = cluster_client @@ -426,7 +422,8 @@ mod tests { sleep(Duration::from_secs(_SLEEP_SEC)).await; // expect that we still have 1 leader - let leaders = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; + // skipping first meta_port, since that node was former leader and got killed + let leaders = _number_of_leaders(number_of_nodes - 1, meta_port + 1, compute_port).await; for ele in vec_meta_handlers { ele.0.abort(); } From 9ec41ddb5f200498e89dd7c6351dd5c1fb207434 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 13:54:07 +0100 Subject: [PATCH 52/61] disable meta recovery --- src/meta/src/rpc/server.rs | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 7487bc512881c..10f13720edc51 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -228,9 +228,6 @@ pub async fn rpc_serve_with_store( Ok((join_handle, svc_shutdown_tx)) } -// TODO: repeat test 100 times. Start test with different sleep intervals and so on -// Print the sleep intervals -// use pseudo rand gen with seed mod tests { use core::panic; use std::net::{IpAddr, Ipv4Addr}; @@ -249,16 +246,11 @@ mod tests { /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on /// `meta_port + 1`, ... - async fn _setup_n_nodes( - n: u16, - meta_port: u16, - enable_recovery: bool, // TODO: remove. Is always false - ) -> Vec<(JoinHandle<()>, WatchSender<()>)> { + async fn _setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { let meta_store = Arc::new(MemStore::default()); let mut node_controllers: Vec<(JoinHandle<()>, WatchSender<()>)> = vec![]; for i in 0..n { - // TODO: use http or https here? let addr = format!("http://127.0.0.1:{}", meta_port + i); let info = AddressInfo { @@ -267,9 +259,7 @@ mod tests { IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), meta_port + i, ), - prometheus_addr: None, // TODO: use default here - dashboard_addr: None, - ui_path: None, + ..AddressInfo::default() }; node_controllers.push( rpc_serve_with_store( @@ -277,7 +267,7 @@ mod tests { info, Duration::from_secs(4), 2, - MetaOpts::test(enable_recovery), + MetaOpts::test(false), ) .await .unwrap_or_else(|e| panic!("Meta node{} failed in setup. Err: {}", i, e)), @@ -298,7 +288,7 @@ mod tests { for i in 0..number_of_nodes { let local = "127.0.0.1".to_owned(); let port = meta_port + i; - let meta_addr = format!("http://{}:{}", local, port); // http, https, no protocol + let meta_addr = format!("http://{}:{}", local, port); let host_addr = HostAddr { host: local, port: host_port + i, @@ -347,7 +337,7 @@ mod tests { // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let v = _setup_n_nodes(1, 1234, false).await; // TODO: parameter false not needed. Always false + let v = _setup_n_nodes(1, 1234).await; let leader_count = _number_of_leaders(1, 1234, 5678).await; assert_eq!( leader_count, 1, @@ -361,7 +351,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_3() { - let v = _setup_n_nodes(3, 2345, false).await; + let v = _setup_n_nodes(3, 2345).await; let leader_count = _number_of_leaders(3, 2345, 6789).await; assert_eq!( leader_count, 1, @@ -375,7 +365,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_10() { - let v = _setup_n_nodes(10, 3456, false).await; + let v = _setup_n_nodes(10, 3456).await; let leader_count = _number_of_leaders(10, 3456, 7890).await; assert_eq!( leader_count, 1, @@ -389,7 +379,7 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_100() { - let v = _setup_n_nodes(100, 4567, false).await; + let v = _setup_n_nodes(100, 4567).await; let leader_count = _number_of_leaders(100, 4567, 8901).await; assert_eq!( leader_count, 1, @@ -403,8 +393,7 @@ mod tests { /// returns number of leaders after failover async fn _test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { - // TODO: change to false? - let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port, false).await; + let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port).await; // we should have 1 leader on startup let leader_count = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; From 2d27085ac4a5a7d5349d71301487c844006e639a Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 14:12:14 +0100 Subject: [PATCH 53/61] Leader no longer updated in env --- src/meta/src/manager/env.rs | 17 +++++------------ src/meta/src/rpc/leader_svc.rs | 4 ++-- src/meta/src/rpc/server.rs | 3 ++- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/meta/src/manager/env.rs b/src/meta/src/manager/env.rs index 0c0ec84be7eb4..46a0e721551b1 100644 --- a/src/meta/src/manager/env.rs +++ b/src/meta/src/manager/env.rs @@ -22,7 +22,6 @@ use risingwave_pb::meta::MetaLeaderInfo; #[cfg(any(test, feature = "test"))] use risingwave_pb::meta::MetaLeaseInfo; use risingwave_rpc_client::{StreamClientPool, StreamClientPoolRef}; -use tokio::sync::watch::{channel as WatchChannel, Receiver as WatchReceiver}; use crate::manager::{ IdGeneratorManager, IdGeneratorManagerRef, IdleManager, IdleManagerRef, NotificationManager, @@ -56,7 +55,7 @@ where /// idle status manager. idle_manager: IdleManagerRef, - leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, + info: MetaLeaderInfo, /// options read by all services pub opts: Arc, @@ -134,11 +133,7 @@ impl MetaSrvEnv where S: MetaStore, { - pub async fn new( - opts: MetaOpts, - meta_store: Arc, - leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, - ) -> Self { + pub async fn new(opts: MetaOpts, meta_store: Arc, info: MetaLeaderInfo) -> Self { // change to sync after refactor `IdGeneratorManager::new` sync. let id_gen_manager = Arc::new(IdGeneratorManager::new(meta_store.clone()).await); let stream_client_pool = Arc::new(StreamClientPool::default()); @@ -151,7 +146,7 @@ where notification_manager, stream_client_pool, idle_manager, - leader_rx, + info, opts: opts.into(), } } @@ -197,7 +192,7 @@ where } pub fn get_leader_info(&self) -> MetaLeaderInfo { - self.leader_rx.borrow().0.clone() + self.info.clone() } } @@ -214,8 +209,6 @@ impl MetaSrvEnv { lease_id: 0, node_address: "".to_string(), }; - let (_, leader_rx) = WatchChannel((leader_info.clone(), true)); - let lease_info = MetaLeaseInfo { leader: Some(leader_info.clone()), lease_register_time: 0, @@ -249,7 +242,7 @@ impl MetaSrvEnv { notification_manager, stream_client_pool, idle_manager, - leader_rx, + info: leader_info, opts, } } diff --git a/src/meta/src/rpc/leader_svc.rs b/src/meta/src/rpc/leader_svc.rs index 87f2917b78706..cc6cf7a46a266 100644 --- a/src/meta/src/rpc/leader_svc.rs +++ b/src/meta/src/rpc/leader_svc.rs @@ -75,13 +75,13 @@ pub async fn start_leader_srv( address_info: AddressInfo, max_heartbeat_interval: Duration, opts: MetaOpts, - leader_rx: WatchReceiver<(MetaLeaderInfo, bool)>, + current_leader: MetaLeaderInfo, election_coordination: ElectionCoordination, mut svc_shutdown_rx: WatchReceiver<()>, ) -> MetaResult<()> { tracing::info!("Defining leader services"); let prometheus_endpoint = opts.prometheus_endpoint.clone(); - let env = MetaSrvEnv::::new(opts, meta_store.clone(), leader_rx).await; + let env = MetaSrvEnv::::new(opts, meta_store.clone(), current_leader).await; let fragment_manager = Arc::new(FragmentManager::new(env.clone()).await.unwrap()); let meta_metrics = Arc::new(MetaMetrics::new()); let registry = meta_metrics.registry(); diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 10f13720edc51..a7e1513266114 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -212,12 +212,13 @@ pub async fn rpc_serve_with_store( election_shutdown, }; + let current_leader = services_leader_rx.borrow().0.clone(); start_leader_srv( meta_store, address_info, max_heartbeat_interval, opts, - leader_rx, + current_leader, elect_coord, svc_shutdown_rx, ) From 24b47abf8869073402254fb730ec0f602a4af438 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Thu, 22 Dec 2022 14:30:12 +0100 Subject: [PATCH 54/61] minor changes --- src/meta/src/rpc/server.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index a7e1513266114..c95d0aaaf5e67 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -156,8 +156,6 @@ pub async fn rpc_serve_with_store( } }); - // FIXME: maybe do not use a channel here - // What I need is basically a oneshot channel with one producer and multiple consumers let (svc_shutdown_tx, svc_shutdown_rx) = WatchChannel(()); let join_handle = tokio::spawn(async move { @@ -170,14 +168,12 @@ pub async fn rpc_serve_with_store( .await .expect("Leader sender dropped"); - let is_leader = services_leader_rx.borrow().clone().1; - // run follower services until node becomes leader // FIXME: Add service discovery for follower // https://github.com/risingwavelabs/risingwave/issues/6755 let svc_shutdown_rx_clone = svc_shutdown_rx.clone(); let (follower_shutdown_tx, follower_shutdown_rx) = OneChannel::<()>(); - let follower_handle: Option> = if !is_leader { + let follower_handle: Option> = if !node_is_leader(&leader_rx) { let address_info_clone = address_info.clone(); Some(tokio::spawn(async move { let _ = tracing::span!(tracing::Level::INFO, "follower services").enter(); @@ -460,5 +456,3 @@ mod tests { ); } } - -// Tests fail, because address already in use. Test addresses overlap? From 78b2de56046198131a1781ab88a857a1aa4f2a5d Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 23 Dec 2022 09:27:28 +0100 Subject: [PATCH 55/61] use #[cfg(test)] --- src/meta/src/rpc/server.rs | 66 ++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index c95d0aaaf5e67..f3c434a6b0ec6 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -226,24 +226,26 @@ pub async fn rpc_serve_with_store( } mod tests { + #[cfg(test)] use core::panic; - use std::net::{IpAddr, Ipv4Addr}; - use risingwave_common::config::MAX_CONNECTION_WINDOW_SIZE; - use risingwave_common::util::addr::HostAddr; - use risingwave_pb::common::{HostAddress, WorkerType}; - use risingwave_pb::meta::cluster_service_client::ClusterServiceClient; - use risingwave_pb::meta::AddWorkerNodeRequest; + #[cfg(test)] use tokio::time::sleep; + #[cfg(test)] use tonic::transport::Endpoint; + #[cfg(test)] use super::*; - static _SLEEP_SEC: u64 = 6; + #[cfg(test)] + const WAIT_INTERVAL: Duration = Duration::from_secs(4); /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on /// `meta_port + 1`, ... - async fn _setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { + #[cfg(test)] + async fn setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { + use std::net::{IpAddr, Ipv4Addr}; + let meta_store = Arc::new(MemStore::default()); let mut node_controllers: Vec<(JoinHandle<()>, WatchSender<()>)> = vec![]; @@ -263,14 +265,14 @@ mod tests { meta_store.clone(), info, Duration::from_secs(4), - 2, + 1, MetaOpts::test(false), ) .await .unwrap_or_else(|e| panic!("Meta node{} failed in setup. Err: {}", i, e)), ); } - sleep(Duration::from_secs(_SLEEP_SEC)).await; + sleep(WAIT_INTERVAL).await; node_controllers } @@ -280,7 +282,14 @@ mod tests { /// ## Returns /// Number of nodes which currently are leaders. Number is not snapshoted. If there is a /// leader failover in process, you may get an incorrect result - async fn _number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { + #[cfg(test)] + async fn number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { + use risingwave_common::config::MAX_CONNECTION_WINDOW_SIZE; + use risingwave_common::util::addr::HostAddr; + use risingwave_pb::common::{HostAddress, WorkerType}; + use risingwave_pb::meta::cluster_service_client::ClusterServiceClient; + use risingwave_pb::meta::AddWorkerNodeRequest; + let mut leader_count = 0; for i in 0..number_of_nodes { let local = "127.0.0.1".to_owned(); @@ -334,8 +343,8 @@ mod tests { // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let v = _setup_n_nodes(1, 1234).await; - let leader_count = _number_of_leaders(1, 1234, 5678).await; + let v = setup_n_nodes(1, 1234).await; + let leader_count = number_of_leaders(1, 1234, 5678).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -348,8 +357,8 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_3() { - let v = _setup_n_nodes(3, 2345).await; - let leader_count = _number_of_leaders(3, 2345, 6789).await; + let v = setup_n_nodes(3, 2345).await; + let leader_count = number_of_leaders(3, 2345, 6789).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -362,8 +371,8 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_10() { - let v = _setup_n_nodes(10, 3456).await; - let leader_count = _number_of_leaders(10, 3456, 7890).await; + let v = setup_n_nodes(10, 3456).await; + let leader_count = number_of_leaders(10, 3456, 7890).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -376,8 +385,8 @@ mod tests { #[tokio::test] async fn test_single_leader_setup_100() { - let v = _setup_n_nodes(100, 4567).await; - let leader_count = _number_of_leaders(100, 4567, 8901).await; + let v = setup_n_nodes(100, 4567).await; + let leader_count = number_of_leaders(100, 4567, 8901).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -389,11 +398,12 @@ mod tests { } /// returns number of leaders after failover - async fn _test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { - let vec_meta_handlers = _setup_n_nodes(number_of_nodes, meta_port).await; + #[cfg(test)] + async fn test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { + let vec_meta_handlers = setup_n_nodes(number_of_nodes, meta_port).await; // we should have 1 leader on startup - let leader_count = _number_of_leaders(number_of_nodes, meta_port, compute_port).await; + let leader_count = number_of_leaders(number_of_nodes, meta_port, compute_port).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -405,11 +415,11 @@ mod tests { leader_shutdown_sender .send(()) .expect("Sending shutdown to leader should not fail"); - sleep(Duration::from_secs(_SLEEP_SEC)).await; + sleep(WAIT_INTERVAL).await; // expect that we still have 1 leader // skipping first meta_port, since that node was former leader and got killed - let leaders = _number_of_leaders(number_of_nodes - 1, meta_port + 1, compute_port).await; + let leaders = number_of_leaders(number_of_nodes - 1, meta_port + 1, compute_port).await; for ele in vec_meta_handlers { ele.0.abort(); } @@ -418,7 +428,7 @@ mod tests { #[tokio::test] async fn test_failover_1() { - let leader_count = _test_failover(1, 9012, 1012).await; + let leader_count = test_failover(1, 9012, 1012).await; assert_eq!( leader_count, 0, "Expected to have 1 leader, instead got {} leaders", @@ -428,7 +438,7 @@ mod tests { #[tokio::test] async fn test_failover_3() { - let leader_count = _test_failover(3, 1100, 1200).await; + let leader_count = test_failover(3, 1100, 1200).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -438,7 +448,7 @@ mod tests { #[tokio::test] async fn test_failover_10() { - let leader_count = _test_failover(10, 1300, 1400).await; + let leader_count = test_failover(10, 1300, 1400).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", @@ -448,7 +458,7 @@ mod tests { #[tokio::test] async fn test_failover_100() { - let leader_count = _test_failover(100, 1500, 1600).await; + let leader_count = test_failover(100, 1500, 1600).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", From 4a22219c43fd60e6336090bf450a732dfb865159 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 23 Dec 2022 09:29:07 +0100 Subject: [PATCH 56/61] change sleep time to 5 --- src/meta/src/rpc/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index f3c434a6b0ec6..5f8480549d063 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -238,7 +238,7 @@ mod tests { use super::*; #[cfg(test)] - const WAIT_INTERVAL: Duration = Duration::from_secs(4); + const WAIT_INTERVAL: Duration = Duration::from_secs(5); /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on /// `meta_port + 1`, ... From 313cd413ce48ae5fa951533dab5b7a3314f945d9 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 23 Dec 2022 09:35:35 +0100 Subject: [PATCH 57/61] nit --- src/meta/src/rpc/server.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 5f8480549d063..0d24d62bcf87f 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -318,6 +318,8 @@ mod tests { }) .unwrap(); + // check if node is leader + // Only leader nodes support adding worker nodes let cluster_client = ClusterServiceClient::new(channel); let resp = cluster_client .to_owned() @@ -330,9 +332,8 @@ mod tests { worker_node_parallelism: 5, }) .await; - let is_leader = resp.is_ok(); - if is_leader { + if resp.is_ok() { leader_count += 1; } } From 00fd0cbad39136404860e6ec0879b0375ce1aef2 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 23 Dec 2022 09:39:17 +0100 Subject: [PATCH 58/61] remove large single_leader_setup tests --- src/meta/src/rpc/server.rs | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 0d24d62bcf87f..007cf81b4fbc7 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -371,23 +371,9 @@ mod tests { } #[tokio::test] - async fn test_single_leader_setup_10() { - let v = setup_n_nodes(10, 3456).await; - let leader_count = number_of_leaders(10, 3456, 7890).await; - assert_eq!( - leader_count, 1, - "Expected to have 1 leader, instead got {} leaders", - leader_count - ); - for element in v { - element.0.abort(); - } - } - - #[tokio::test] - async fn test_single_leader_setup_100() { - let v = setup_n_nodes(100, 4567).await; - let leader_count = number_of_leaders(100, 4567, 8901).await; + async fn test_single_leader_setup_5() { + let v = setup_n_nodes(5, 3456).await; + let leader_count = number_of_leaders(5, 3456, 7890).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", From 8add8d5feecbd5eaae8f92d6b6aad53b8df1af37 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 23 Dec 2022 09:41:15 +0100 Subject: [PATCH 59/61] remove large failover tests --- src/meta/src/rpc/server.rs | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 007cf81b4fbc7..80880bb56d8a1 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -432,24 +432,4 @@ mod tests { leader_count ); } - - #[tokio::test] - async fn test_failover_10() { - let leader_count = test_failover(10, 1300, 1400).await; - assert_eq!( - leader_count, 1, - "Expected to have 1 leader, instead got {} leaders", - leader_count - ); - } - - #[tokio::test] - async fn test_failover_100() { - let leader_count = test_failover(100, 1500, 1600).await; - assert_eq!( - leader_count, 1, - "Expected to have 1 leader, instead got {} leaders", - leader_count - ); - } } From e14fabd302478df19833986d7d3ea0054fbae2aa Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 23 Dec 2022 09:44:44 +0100 Subject: [PATCH 60/61] node_controllers --- src/meta/src/rpc/server.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index 80880bb56d8a1..c6d60f312ad73 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -344,50 +344,50 @@ mod tests { // in parallel #[tokio::test] async fn test_single_leader_setup_1() { - let v = setup_n_nodes(1, 1234).await; + let node_controllers = setup_n_nodes(1, 1234).await; let leader_count = number_of_leaders(1, 1234, 5678).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - for element in v { - element.0.abort(); + for nc in node_controllers { + nc.0.abort(); } } #[tokio::test] async fn test_single_leader_setup_3() { - let v = setup_n_nodes(3, 2345).await; + let node_controllers = setup_n_nodes(3, 2345).await; let leader_count = number_of_leaders(3, 2345, 6789).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - for element in v { - element.0.abort(); + for nc in node_controllers { + nc.0.abort(); } } #[tokio::test] async fn test_single_leader_setup_5() { - let v = setup_n_nodes(5, 3456).await; + let node_controllers = setup_n_nodes(5, 3456).await; let leader_count = number_of_leaders(5, 3456, 7890).await; assert_eq!( leader_count, 1, "Expected to have 1 leader, instead got {} leaders", leader_count ); - for element in v { - element.0.abort(); + for nc in node_controllers { + nc.0.abort(); } } /// returns number of leaders after failover #[cfg(test)] async fn test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { - let vec_meta_handlers = setup_n_nodes(number_of_nodes, meta_port).await; + let node_controllers = setup_n_nodes(number_of_nodes, meta_port).await; // we should have 1 leader on startup let leader_count = number_of_leaders(number_of_nodes, meta_port, compute_port).await; @@ -398,7 +398,7 @@ mod tests { ); // kill leader to trigger failover - let leader_shutdown_sender = &vec_meta_handlers[0].1; + let leader_shutdown_sender = &node_controllers[0].1; leader_shutdown_sender .send(()) .expect("Sending shutdown to leader should not fail"); @@ -407,8 +407,8 @@ mod tests { // expect that we still have 1 leader // skipping first meta_port, since that node was former leader and got killed let leaders = number_of_leaders(number_of_nodes - 1, meta_port + 1, compute_port).await; - for ele in vec_meta_handlers { - ele.0.abort(); + for nc in node_controllers { + nc.0.abort(); } leaders } From 5fb03b5b4d3d398182743a4ecaff8bd12eeb3324 Mon Sep 17 00:00:00 2001 From: CAJan93 Date: Fri, 23 Dec 2022 10:47:30 +0100 Subject: [PATCH 61/61] wait for shutdown signal --- src/meta/src/rpc/server.rs | 42 ++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/src/meta/src/rpc/server.rs b/src/meta/src/rpc/server.rs index c6d60f312ad73..75f7ba8b5b108 100644 --- a/src/meta/src/rpc/server.rs +++ b/src/meta/src/rpc/server.rs @@ -156,7 +156,7 @@ pub async fn rpc_serve_with_store( } }); - let (svc_shutdown_tx, svc_shutdown_rx) = WatchChannel(()); + let (svc_shutdown_tx, mut svc_shutdown_rx) = WatchChannel(()); let join_handle = tokio::spawn(async move { let span = tracing::span!(tracing::Level::INFO, "services"); @@ -190,7 +190,16 @@ pub async fn rpc_serve_with_store( // wait until this node becomes a leader while !node_is_leader(&leader_rx) { - leader_rx.changed().await.expect("Leader sender dropped"); + tokio::select! { + _ = leader_rx.changed() => {} + res = svc_shutdown_rx.changed() => { + match res { + Ok(_) => tracing::info!("Shutting down meta node"), + Err(_) => tracing::error!("Shutdown sender dropped"), + } + return; + } + } } // shut down follower svc if node used to be follower @@ -225,24 +234,19 @@ pub async fn rpc_serve_with_store( Ok((join_handle, svc_shutdown_tx)) } +#[cfg(test)] mod tests { - #[cfg(test)] use core::panic; - #[cfg(test)] use tokio::time::sleep; - #[cfg(test)] use tonic::transport::Endpoint; - #[cfg(test)] use super::*; - #[cfg(test)] const WAIT_INTERVAL: Duration = Duration::from_secs(5); /// Start `n` meta nodes on localhost. First node will be started at `meta_port`, 2nd node on /// `meta_port + 1`, ... - #[cfg(test)] async fn setup_n_nodes(n: u16, meta_port: u16) -> Vec<(JoinHandle<()>, WatchSender<()>)> { use std::net::{IpAddr, Ipv4Addr}; @@ -282,7 +286,6 @@ mod tests { /// ## Returns /// Number of nodes which currently are leaders. Number is not snapshoted. If there is a /// leader failover in process, you may get an incorrect result - #[cfg(test)] async fn number_of_leaders(number_of_nodes: u16, meta_port: u16, host_port: u16) -> u16 { use risingwave_common::config::MAX_CONNECTION_WINDOW_SIZE; use risingwave_common::util::addr::HostAddr; @@ -351,8 +354,9 @@ mod tests { "Expected to have 1 leader, instead got {} leaders", leader_count ); - for nc in node_controllers { - nc.0.abort(); + for (join_handle, shutdown_tx) in node_controllers { + shutdown_tx.send(()).unwrap(); + join_handle.await.unwrap(); } } @@ -365,8 +369,9 @@ mod tests { "Expected to have 1 leader, instead got {} leaders", leader_count ); - for nc in node_controllers { - nc.0.abort(); + for (join_handle, shutdown_tx) in node_controllers { + shutdown_tx.send(()).unwrap(); + join_handle.await.unwrap(); } } @@ -379,13 +384,13 @@ mod tests { "Expected to have 1 leader, instead got {} leaders", leader_count ); - for nc in node_controllers { - nc.0.abort(); + for (join_handle, shutdown_tx) in node_controllers { + shutdown_tx.send(()).unwrap(); + join_handle.await.unwrap(); } } /// returns number of leaders after failover - #[cfg(test)] async fn test_failover(number_of_nodes: u16, meta_port: u16, compute_port: u16) -> u16 { let node_controllers = setup_n_nodes(number_of_nodes, meta_port).await; @@ -407,8 +412,9 @@ mod tests { // expect that we still have 1 leader // skipping first meta_port, since that node was former leader and got killed let leaders = number_of_leaders(number_of_nodes - 1, meta_port + 1, compute_port).await; - for nc in node_controllers { - nc.0.abort(); + for (join_handle, shutdown_tx) in node_controllers { + let _ = shutdown_tx.send(()); + join_handle.await.unwrap(); } leaders }