From f2b358a0b3d0d54537a4cd5032925f93c41ac668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 17 Oct 2024 13:13:09 +0200 Subject: [PATCH 1/9] lib.rs: export MetricsError It was public but not exported, despite being returned from methods that are part of public API. --- scylla/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scylla/src/lib.rs b/scylla/src/lib.rs index a23692e3b2..92e2562594 100644 --- a/scylla/src/lib.rs +++ b/scylla/src/lib.rs @@ -271,4 +271,4 @@ pub use transport::load_balancing; pub use transport::retry_policy; pub use transport::speculative_execution; -pub use transport::metrics::Metrics; +pub use transport::metrics::{Metrics, MetricsError}; From c5122de187806348c590a34a3c6daa73aca2852c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 21 Oct 2024 16:00:48 +0200 Subject: [PATCH 2/9] Unpub UntranslatedEndpoint and ResolvedContactPoint `ResolvedContactPoint` was unnameable. It was part of public API, because it was in a variant of `UntranslatedEndpoint`, which itself was public. `UntranslatedEndpoint` is not used in any public API, so can be made `pub(crate)`. Thanks to that `ResolvedContactPoint` can also be made `pub(crate)` as it no longer appears in any public API. --- scylla/src/transport/node.rs | 8 ++++---- scylla/src/transport/topology.rs | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/scylla/src/transport/node.rs b/scylla/src/transport/node.rs index 0568c9db7d..233d47bb4b 100644 --- a/scylla/src/transport/node.rs +++ b/scylla/src/transport/node.rs @@ -242,10 +242,10 @@ pub struct CloudEndpoint { /// Describes a database server known on Session startup, with already resolved address. #[derive(Debug, Clone)] -#[non_exhaustive] -pub struct ResolvedContactPoint { - pub address: SocketAddr, - pub datacenter: Option, +pub(crate) struct ResolvedContactPoint { + pub(crate) address: SocketAddr, + #[cfg_attr(not(feature = "cloud"), allow(unused))] + pub(crate) datacenter: Option, } // Resolve the given hostname using a DNS lookup if necessary. diff --git a/scylla/src/transport/topology.rs b/scylla/src/transport/topology.rs index 0d2671ec96..c97fd41a8a 100644 --- a/scylla/src/transport/topology.rs +++ b/scylla/src/transport/topology.rs @@ -75,9 +75,8 @@ pub struct Peer { /// An endpoint for a node that the driver is to issue connections to, /// possibly after prior address translation. -#[non_exhaustive] // <- so that we can add more fields in a backwards-compatible way #[derive(Clone, Debug)] -pub enum UntranslatedEndpoint { +pub(crate) enum UntranslatedEndpoint { /// Provided by user in SessionConfig (initial contact points). ContactPoint(ResolvedContactPoint), /// Fetched in Metadata with `query_peers()` From b6fc7d95575235b23066672809da5ea18b5edfa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 21 Oct 2024 13:41:06 +0200 Subject: [PATCH 3/9] Split KnownNode into KnownNode and InternalKnownNode It was not possible for user to create / retrieve `KnownNode::CloudEndpoint` variant: - `CloudEndpoint` was unnameable type, which means user can't create an instance of it. - No public API allowed user to create this variant or CloudEndpoint struct: they are only used internally. This commit splits `KnownNode` struct into two: - `KnownNode`, without `CloudEndpoint`, which is part of public API - `InternalKnownNode` which is only used internally and thus `pub(crate)`. Thanks to that, `CloudEndpoint` is no longer part of any public API and can be made `pub(crate)`. --- scylla/src/transport/cluster.rs | 4 ++-- scylla/src/transport/node.rs | 36 ++++++++++++++++++++++++-------- scylla/src/transport/session.rs | 27 ++++++++++++++---------- scylla/src/transport/topology.rs | 6 +++--- 4 files changed, 48 insertions(+), 25 deletions(-) diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index 2313402bc1..e5b04f4733 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -27,7 +27,7 @@ use tracing::{debug, warn}; use uuid::Uuid; use super::locator::tablets::{RawTablet, Tablet, TabletsInfo}; -use super::node::{KnownNode, NodeAddr}; +use super::node::{InternalKnownNode, NodeAddr}; use super::NodeRef; use super::locator::ReplicaLocator; @@ -145,7 +145,7 @@ struct UseKeyspaceRequest { impl Cluster { pub(crate) async fn new( - known_nodes: Vec, + known_nodes: Vec, pool_config: PoolConfig, keyspaces_to_fetch: Vec, fetch_schema_metadata: bool, diff --git a/scylla/src/transport/node.rs b/scylla/src/transport/node.rs index 233d47bb4b..c7f4383627 100644 --- a/scylla/src/transport/node.rs +++ b/scylla/src/transport/node.rs @@ -227,17 +227,35 @@ impl Hash for Node { pub enum KnownNode { Hostname(String), Address(SocketAddr), +} + +/// Describes a database server known on `Session` startup. +/// It is similar to [KnownNode] but includes also `CloudEndpoint` variant, +/// which is created by the driver if session is configured to connect to +/// serverless cluster. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub(crate) enum InternalKnownNode { + Hostname(String), + Address(SocketAddr), #[cfg(feature = "cloud")] CloudEndpoint(CloudEndpoint), } +impl From for InternalKnownNode { + fn from(value: KnownNode) -> Self { + match value { + KnownNode::Hostname(s) => InternalKnownNode::Hostname(s), + KnownNode::Address(s) => InternalKnownNode::Address(s), + } + } +} + /// Describes a database server in the serverless Scylla Cloud. #[cfg(feature = "cloud")] -#[non_exhaustive] #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub struct CloudEndpoint { - pub hostname: String, - pub datacenter: String, +pub(crate) struct CloudEndpoint { + pub(crate) hostname: String, + pub(crate) datacenter: String, } /// Describes a database server known on Session startup, with already resolved address. @@ -275,12 +293,12 @@ pub(crate) async fn resolve_hostname(hostname: &str) -> Result (Vec, Vec) { // Find IP addresses of all known nodes passed in the config let mut initial_peers: Vec = Vec::with_capacity(known_nodes.len()); @@ -290,16 +308,16 @@ pub(crate) async fn resolve_contact_points( for node in known_nodes.iter() { match node { - KnownNode::Hostname(hostname) => { + InternalKnownNode::Hostname(hostname) => { to_resolve.push((hostname, None)); hostnames.push(hostname.clone()); } - KnownNode::Address(address) => initial_peers.push(ResolvedContactPoint { + InternalKnownNode::Address(address) => initial_peers.push(ResolvedContactPoint { address: *address, datacenter: None, }), #[cfg(feature = "cloud")] - KnownNode::CloudEndpoint(CloudEndpoint { + InternalKnownNode::CloudEndpoint(CloudEndpoint { hostname, datacenter, }) => to_resolve.push((hostname, Some(datacenter.clone()))), diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index c1cc7415fd..42933bdaf6 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -44,7 +44,7 @@ use super::errors::TracingProtocolError; use super::execution_profile::{ExecutionProfile, ExecutionProfileHandle, ExecutionProfileInner}; #[cfg(feature = "cloud")] use super::node::CloudEndpoint; -use super::node::KnownNode; +use super::node::{InternalKnownNode, KnownNode}; use super::partitioner::PartitionerName; use super::query_result::MaybeFirstRowTypedError; use super::topology::UntranslatedPeer; @@ -489,23 +489,28 @@ impl Session { let known_nodes = config.known_nodes; #[cfg(feature = "cloud")] - let known_nodes = if let Some(cloud_servers) = - config.cloud_config.as_ref().map(|cloud_config| { - cloud_config + let cloud_known_nodes: Option> = + if let Some(ref cloud_config) = config.cloud_config { + let cloud_servers = cloud_config .get_datacenters() .iter() .map(|(dc_name, dc_data)| { - KnownNode::CloudEndpoint(CloudEndpoint { + InternalKnownNode::CloudEndpoint(CloudEndpoint { hostname: dc_data.get_server().to_owned(), datacenter: dc_name.clone(), }) }) - .collect() - }) { - cloud_servers - } else { - known_nodes - }; + .collect(); + Some(cloud_servers) + } else { + None + }; + + #[cfg(not(feature = "cloud"))] + let cloud_known_nodes: Option> = None; + + let known_nodes = cloud_known_nodes + .unwrap_or_else(|| known_nodes.into_iter().map(|node| node.into()).collect()); // Ensure there is at least one known node if known_nodes.is_empty() { diff --git a/scylla/src/transport/topology.rs b/scylla/src/transport/topology.rs index c97fd41a8a..93a80d2fa0 100644 --- a/scylla/src/transport/topology.rs +++ b/scylla/src/transport/topology.rs @@ -33,7 +33,7 @@ use super::errors::{ KeyspaceStrategyError, KeyspacesMetadataError, MetadataError, PeersMetadataError, ProtocolError, TablesMetadataError, UdtMetadataError, ViewsMetadataError, }; -use super::node::{KnownNode, NodeAddr, ResolvedContactPoint}; +use super::node::{InternalKnownNode, NodeAddr, ResolvedContactPoint}; /// Allows to read current metadata from the cluster pub(crate) struct MetadataReader { @@ -51,7 +51,7 @@ pub(crate) struct MetadataReader { // When no known peer is reachable, initial known nodes are resolved once again as a fallback // and establishing control connection to them is attempted. - initial_known_nodes: Vec, + initial_known_nodes: Vec, // When a control connection breaks, the PoolRefiller of its pool uses the requester // to signal ClusterWorker that an immediate metadata refresh is advisable. @@ -451,7 +451,7 @@ impl MetadataReader { /// Creates new MetadataReader, which connects to initially_known_peers in the background #[allow(clippy::too_many_arguments)] pub(crate) async fn new( - initial_known_nodes: Vec, + initial_known_nodes: Vec, control_connection_repair_requester: broadcast::Sender<()>, mut connection_config: ConnectionConfig, keepalive_interval: Option, From 6439b327d9e42d7d2e311f241e27b862a0581d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 17 Oct 2024 13:39:29 +0200 Subject: [PATCH 4/9] cloud: Export `CloudConfig`. This struct is a part of public API: it is present in a field of `SessionConfig`. It was not exported - so it was an unnameable type. It has a pub method that allows its construction by parsing a yaml file, so user could (if this struct was public) create it and put it in several `SessionConfigs`. This commit exports this struct so it is a proper part of public API. --- scylla/src/cloud/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scylla/src/cloud/mod.rs b/scylla/src/cloud/mod.rs index 2b9bf23aae..fd991a8d45 100644 --- a/scylla/src/cloud/mod.rs +++ b/scylla/src/cloud/mod.rs @@ -2,7 +2,7 @@ mod config; use std::net::SocketAddr; -pub(crate) use config::CloudConfig; +pub use config::CloudConfig; pub use config::CloudConfigError; use openssl::{ error::ErrorStack, From d7ba74707c9a30ff3c38f0585befb391fdfeb6cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 17 Oct 2024 14:28:17 +0200 Subject: [PATCH 5/9] ClusterData::new: don't needlessly create `Datacenter` structs. `ClusterData::new()` was creating a datacenter hashmap, populating it with all the nodes, updating rack counts in it, and then dropping it. This whole work is useless and can be just removed. --- scylla/src/transport/cluster.rs | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index e5b04f4733..4d64f902f3 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -257,18 +257,6 @@ impl Cluster { } impl ClusterData { - // Updates information about rack count in each datacenter - fn update_rack_count(datacenters: &mut HashMap) { - for datacenter in datacenters.values_mut() { - datacenter.rack_count = datacenter - .nodes - .iter() - .filter_map(|node| node.rack.as_ref()) - .unique() - .count(); - } - } - pub(crate) async fn wait_until_all_pools_are_initialized(&self) { for node in self.locator.unique_nodes_in_global_ring().iter() { node.wait_until_pool_initialized().await; @@ -289,7 +277,6 @@ impl ClusterData { let mut new_known_peers: HashMap> = HashMap::with_capacity(metadata.peers.len()); let mut ring: Vec<(Token, Arc)> = Vec::new(); - let mut datacenters: HashMap = HashMap::new(); for peer in metadata.peers { // Take existing Arc if possible, otherwise create new one @@ -325,19 +312,6 @@ impl ClusterData { new_known_peers.insert(peer_host_id, node.clone()); - if let Some(dc) = &node.datacenter { - match datacenters.get_mut(dc) { - Some(v) => v.nodes.push(node.clone()), - None => { - let v = Datacenter { - nodes: vec![node.clone()], - rack_count: 0, - }; - datacenters.insert(dc.clone(), v); - } - } - } - for token in peer_tokens { ring.push((token, node.clone())); } @@ -384,8 +358,6 @@ impl ClusterData { ) } - Self::update_rack_count(&mut datacenters); - let keyspaces = metadata.keyspaces; let (locator, keyspaces) = tokio::task::spawn_blocking(move || { let keyspace_strategies = keyspaces.values().map(|ks| &ks.strategy); From 7f7f2d0b9bfd1250aab577d5dac3b2b793917c24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 17 Oct 2024 14:54:40 +0200 Subject: [PATCH 6/9] session_test: Don't use ClusterData::get_datacenters_info This method does some things that are unnecessary here: - For each DC it allocates a vector with it's nodes - It calculates rack count (which requires .unique() call which allocates) For the purpose of this test - and I'm pretty sure most of possible usages of get_datacenters_info - it is enough to get DC names from replica locator. --- scylla/src/transport/session_test.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index ff2dc1e64b..ad2c21960d 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -8,7 +8,6 @@ use crate::routing::Token; use crate::statement::Consistency; use crate::test_utils::{scylla_supports_tablets, setup_tracing}; use crate::tracing::TracingInfo; -use crate::transport::cluster::Datacenter; use crate::transport::errors::{BadKeyspaceName, BadQuery, DbError, QueryError}; use crate::transport::partitioner::{ calculate_token_for_partition_key, Murmur3Partitioner, Partitioner, PartitionerName, @@ -1874,10 +1873,11 @@ async fn test_turning_off_schema_fetching() { let cluster_data = &session.get_cluster_data(); let keyspace = &cluster_data.get_keyspace_info()[&ks]; - let datacenters: HashMap = cluster_data.get_datacenters_info(); - let datacenter_repfactors: HashMap = datacenters - .into_keys() - .map(|dc_name| (dc_name, 1)) + let datacenter_repfactors: HashMap = cluster_data + .replica_locator() + .datacenter_names() + .iter() + .map(|dc_name| (dc_name.to_owned(), 1)) .collect(); assert_eq!( From 14ff415935d1566cb2f8274ba84a27a794ab4c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 17 Oct 2024 14:44:12 +0200 Subject: [PATCH 7/9] ClusterData: remove get_datacenters_info() This method seems to have very limited usability: - For iterating through datacenter names user can call `ReplicaLocator::datacenter_names, which is constant time. - Nothing else was available to the user because `Datacenter` struct is not exported (so is unnameable). This also means all the work done by this method was a waste. - For more advanced tasks there are `ReplicaLocator::unique_nodes_in_datacenter_ring` and `ClusterData::get_nodes_info`. I don't think it is worth it to have this method which allocates a lot on each call and calculates rack counts. It could make sense to have rack counts precalculated when creating `ClusterData` - I'll think about it during metadata refactor. I'm pretty sure that the original intention was to calculate this whole hash map in `ClusterData::new`, but for some reason the calculation there was thrown out and redone here. I don't think its worth it repair this API now considering it was basically unavailable to users.` --- scylla/src/transport/cluster.rs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index 4d64f902f3..a8aa60ac63 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -381,25 +381,6 @@ impl ClusterData { &self.keyspaces } - /// Access datacenter details collected by the driver - /// Returned `HashMap` is indexed by names of datacenters - pub fn get_datacenters_info(&self) -> HashMap { - self.locator - .datacenter_names() - .iter() - .map(|dc_name| { - let nodes = self - .locator - .unique_nodes_in_datacenter_ring(dc_name) - .unwrap() - .to_vec(); - let rack_count = nodes.iter().map(|node| node.rack.as_ref()).unique().count(); - - (dc_name.clone(), Datacenter { nodes, rack_count }) - }) - .collect() - } - /// Access details about nodes known to the driver pub fn get_nodes_info(&self) -> &[Arc] { self.locator.unique_nodes_in_global_ring() From ef2bba2d8da6989e39030c4d35dea286d2e527cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 17 Oct 2024 14:47:37 +0200 Subject: [PATCH 8/9] cluster.rs: Remove Datacenter struct It is no longer used anywhere. --- scylla/src/transport/cluster.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index a8aa60ac63..7cd11dcdab 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -59,12 +59,6 @@ impl<'a> std::fmt::Debug for ClusterNeatDebug<'a> { } } -#[derive(Clone, Debug)] -pub struct Datacenter { - pub nodes: Vec>, - pub rack_count: usize, -} - #[derive(Clone)] pub struct ClusterData { pub(crate) known_peers: HashMap>, // Invariant: nonempty after Cluster::new() From 8a3e60db4e8a0b2446caec1c83ac8d8578763643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 21 Oct 2024 13:42:23 +0200 Subject: [PATCH 9/9] Cargo.toml: warn on unnameable types Such types should not appear anywhere - except for sealed traits. Previous commits removed all occurrences of such types, so now we can enable the lint. The one legit occurrence - a sealed trait - is marked with `#[allow]`. --- scylla-cql/Cargo.toml | 1 + scylla-macros/Cargo.toml | 1 + scylla/Cargo.toml | 1 + scylla/src/transport/session_builder.rs | 4 ++++ 4 files changed, 7 insertions(+) diff --git a/scylla-cql/Cargo.toml b/scylla-cql/Cargo.toml index 089c9f5fa7..537e307fa4 100644 --- a/scylla-cql/Cargo.toml +++ b/scylla-cql/Cargo.toml @@ -55,4 +55,5 @@ full-serialization = [ ] [lints.rust] +unnameable_types = "warn" unreachable_pub = "warn" diff --git a/scylla-macros/Cargo.toml b/scylla-macros/Cargo.toml index eba7704065..f91d926877 100644 --- a/scylla-macros/Cargo.toml +++ b/scylla-macros/Cargo.toml @@ -18,4 +18,5 @@ quote = "1.0" proc-macro2 = "1.0" [lints.rust] +unnameable_types = "warn" unreachable_pub = "warn" diff --git a/scylla/Cargo.toml b/scylla/Cargo.toml index 0a51427b09..01c7cf1cbc 100644 --- a/scylla/Cargo.toml +++ b/scylla/Cargo.toml @@ -95,5 +95,6 @@ name = "benchmark" harness = false [lints.rust] +unnameable_types = "warn" unreachable_pub = "warn" unexpected_cfgs = { level = "warn", check-cfg = ['cfg(scylla_cloud_tests)'] } diff --git a/scylla/src/transport/session_builder.rs b/scylla/src/transport/session_builder.rs index c5373f7c2d..9a7a9cbf71 100644 --- a/scylla/src/transport/session_builder.rs +++ b/scylla/src/transport/session_builder.rs @@ -29,6 +29,10 @@ use openssl::ssl::SslContext; use tracing::warn; mod sealed { + // This is a sealed trait - its whole purpose is to be unnameable. + // This means we need to disable the check. + #[allow(unknown_lints)] // Rust 1.66 doesn't know this lint + #[allow(unnameable_types)] pub trait Sealed {} } pub trait SessionBuilderKind: sealed::Sealed + Clone {}