-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
statistics: do not copy and paste the code for saving statistics #55046
Conversation
Tested locally:
#!/usr/bin/env -S cargo +nightly-2024-05-01 -Zscript
---cargo
[dependencies]
clap = { version = "4.2", features = ["derive"] }
sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "mysql"] }
tokio = { version = "1", features = ["full"] }
fake = { version = "2.5", features = ["derive"] }
---
use clap::Parser;
use fake::{Fake, Faker};
use sqlx::mysql::MySqlPoolOptions;
#[derive(Parser, Debug)]
#[clap(version)]
struct Args {
#[clap(short, long, help = "MySQL connection string")]
database_url: String,
#[clap(short, long, default_value = "3", help = "Number of partitions")]
num_partitions: u32,
}
#[derive(Debug)]
struct TableRow {
partition_key: u32,
column1: String,
column2: i32,
column3: i32,
column4: String,
}
#[tokio::main]
async fn main() -> Result<(), sqlx::Error> {
let args = Args::parse();
let pool = MySqlPoolOptions::new()
.max_connections(5)
.connect(&args.database_url)
.await?;
// Create partitioned table
let create_table_query = format!(
"CREATE TABLE IF NOT EXISTS t (
partition_key INT,
column1 VARCHAR(255),
column2 INT,
column3 INT,
column4 VARCHAR(255)
) PARTITION BY RANGE (partition_key) (
{}
)",
(0..args.num_partitions)
.map(|i| format!("PARTITION p{} VALUES LESS THAN ({})", i, (i + 1) * 3000))
.collect::<Vec<_>>()
.join(",\n")
);
sqlx::query(&create_table_query).execute(&pool).await?;
for partition in 0..args.num_partitions {
for _ in 0..3000 {
let row = TableRow {
partition_key: partition * 3000 + 1,
column1: Faker.fake::<String>(),
column2: Faker.fake::<i32>(),
column3: Faker.fake::<i32>(),
column4: Faker.fake::<String>(),
};
sqlx::query(
"INSERT INTO t (partition_key, column1, column2, column3, column4)
VALUES (?, ?, ?, ?, ?)",
)
.bind(row.partition_key)
.bind(&row.column1)
.bind(row.column2)
.bind(row.column3)
.bind(&row.column4)
.execute(&pool)
.await?;
}
println!("Inserted 3000 rows into partition {}", partition);
}
println!("Successfully inserted {} rows into the 't' table across {} partitions.",
3000 * args.num_partitions, args.num_partitions);
Ok(())
} ./sql2.rs --database-url="mysql://root@localhost:4000/test"
warning: `package.edition` is unspecified, defaulting to `2021`
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.28s
Running `/Users/hi-rustin/.cargo/target/6a/821756d0d91706/debug/sql2 '--database-url=mysql://root@localhost:4000/test'`
Inserted 3000 rows into partition 0
Inserted 3000 rows into partition 1
Inserted 3000 rows into partition 2
Successfully inserted 9000 rows into the 't' table across 3 partitions.
[2024/07/30 15:19:36.107 +08:00] [INFO] [refresher.go:102] ["Auto analyze triggered"] [category=stats] [job="DynamicPartitionedTableAnalysisJob:\n\tAnalyzeType: analyzeDynamicPartition\n\tPartitions: p1, p2, p0\n\tPartitionIndexes: map[]\n\tSchema: test\n\tGlobal Table: t\n\tGlobal TableID: 104\n\tTableStatsVer: 2\n\tChangePercentage: 1.000000\n\tTableSize: 15000.00\n\tLastAnalysisDuration: 30m0s\n\tWeight: 1.376307\n"]
[2024/07/30 15:19:36.138 +08:00] [INFO] [analyze.go:401] ["use multiple sessions to save analyze results"] [sessionCount=2]
[2024/07/30 15:21:03.612 +08:00] [INFO] [analyze.go:401] ["use multiple sessions to save analyze results"] [sessionCount=2]
mysql> set global tidb_analyze_partition_concurrency = 1;
Query OK, 0 rows affected (0.01 sec)
mysql> select @@tidb_analyze_partition_concurrency;
+--------------------------------------+
| @@tidb_analyze_partition_concurrency |
+--------------------------------------+
| 1 |
+--------------------------------------+
1 row in set (0.00 sec)
[2024/07/30 15:23:54.322 +08:00] [INFO] [analyze.go:408] ["use single session to save analyze results"]
#!/usr/bin/env -S cargo +nightly-2024-05-01 -Zscript
---cargo
[dependencies]
clap = { version = "4.2", features = ["derive"] }
sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "mysql"] }
tokio = { version = "1", features = ["full"] }
fake = { version = "2.5", features = ["derive"] }
---
use clap::Parser;
use fake::{Fake, Faker};
use sqlx::mysql::MySqlPoolOptions;
#[derive(Parser, Debug)]
#[clap(version)]
struct Args {
#[clap(short, long, help = "MySQL connection string")]
database_url: String,
}
#[derive(Debug)]
struct TableRow {
partition_key: u32,
column1: String,
column2: i32,
column3: i32,
column4: String,
}
#[tokio::main]
async fn main() -> Result<(), sqlx::Error> {
let args = Args::parse();
let pool = MySqlPoolOptions::new()
.max_connections(5)
.connect(&args.database_url)
.await?;
// Insert 3000 rows into partition p1
for _ in 0..3000 {
let row = TableRow {
partition_key: 3001, // This ensures the row goes into partition p1 (3000 < 3001 < 6000)
column1: Faker.fake::<String>(),
column2: Faker.fake::<i32>(),
column3: Faker.fake::<i32>(),
column4: Faker.fake::<String>(),
};
sqlx::query(
"INSERT INTO t (partition_key, column1, column2, column3, column4)
VALUES (?, ?, ?, ?, ?)",
)
.bind(row.partition_key)
.bind(&row.column1)
.bind(row.column2)
.bind(row.column3)
.bind(&row.column4)
.execute(&pool)
.await?;
}
println!("Successfully inserted 3000 additional rows into partition p1 of the 't' table.");
Ok(())
}
[2024/07/30 15:32:09.103 +08:00] [INFO] [refresher.go:102] ["Auto analyze triggered"] [category=stats] [job="DynamicPartitionedTableAnalysisJob:\n\tAnalyzeType: analyzeDynamicPartition\n\tPartitions: p1\n\tPartitionIndexes: map[]\n\tSchema: test\n\tGlobal Table: t\n\tGlobal TableID: 104\n\tTableStatsVer: 2\n\tChangePercentage: 0.800000\n\tTableSize: 75000.00\n\tLastAnalysisDuration: 12m32.998s\n\tWeight: 1.193767\n"]
[2024/07/30 15:32:09.120 +08:00] [INFO] [analyze.go:408] ["use single session to save analyze results"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔢 Self-check (PR reviewed by myself and ready for feedback.)
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## master #55046 +/- ##
================================================
+ Coverage 72.6158% 74.0203% +1.4044%
================================================
Files 1565 1565
Lines 440229 442538 +2309
================================================
+ Hits 319676 327568 +7892
+ Misses 100711 94898 -5813
- Partials 19842 20072 +230
Flags with carried forward coverage won't be shown. Click here to find out more.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔢 Self-check (PR reviewed by myself and ready for feedback.)
Tested locally:
tiup playground nightly --db.binpath /Volumes/t7/code/tidb/bin/tidb-server
#!/usr/bin/env -S cargo +nightly -Zscript
---cargo
[dependencies]
clap = { version = "4.2", features = ["derive"] }
sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "mysql"] }
tokio = { version = "1", features = ["full"] }
fake = { version = "2.5", features = ["derive"] }
---
use clap::Parser;
use fake::{Fake, Faker};
use sqlx::mysql::MySqlPoolOptions;
#[derive(Parser, Debug)]
#[clap(version)]
struct Args {
#[clap(short, long, help = "MySQL connection string")]
database_url: String,
}
#[derive(Debug)]
struct TableRow {
partition_key: u32,
column1: String,
column2: i32,
column3: i32,
column4: String,
}
#[tokio::main]
async fn main() -> Result<(), sqlx::Error> {
let args = Args::parse();
let pool = MySqlPoolOptions::new()
.max_connections(5)
.connect(&args.database_url)
.await?;
// Create partitioned table if not exists
sqlx::query(
"CREATE TABLE IF NOT EXISTS t (
partition_key INT NOT NULL,
column1 VARCHAR(255) NOT NULL,
column2 INT NOT NULL,
column3 INT NOT NULL,
column4 VARCHAR(255) NOT NULL
) PARTITION BY RANGE (partition_key) (
PARTITION p0 VALUES LESS THAN (3000),
PARTITION p1 VALUES LESS THAN (6000),
PARTITION p2 VALUES LESS THAN (9000),
PARTITION p3 VALUES LESS THAN (12000),
PARTITION p4 VALUES LESS THAN (15000),
PARTITION p5 VALUES LESS THAN (18000),
PARTITION p6 VALUES LESS THAN (21000),
PARTITION p7 VALUES LESS THAN (24000),
PARTITION p8 VALUES LESS THAN (27000),
PARTITION p9 VALUES LESS THAN (30000),
PARTITION p10 VALUES LESS THAN (33000),
PARTITION p11 VALUES LESS THAN (36000),
PARTITION p12 VALUES LESS THAN (39000),
PARTITION p13 VALUES LESS THAN (42000),
PARTITION p14 VALUES LESS THAN (45000),
PARTITION p15 VALUES LESS THAN (48000),
PARTITION p16 VALUES LESS THAN (51000),
PARTITION p17 VALUES LESS THAN (54000),
PARTITION p18 VALUES LESS THAN (57000),
PARTITION p19 VALUES LESS THAN (60000),
PARTITION p20 VALUES LESS THAN (63000)
)"
)
.execute(&pool)
.await?;
// Insert 3000 rows into each of the 20 partitions
for partition in 1..=20 {
let partition_key = partition * 3000 + 1; // This ensures each partition key is unique
for _ in 0..3000 {
let row = TableRow {
partition_key, // Use the current partition key
column1: Faker.fake::<String>(),
column2: Faker.fake::<i32>(),
column3: Faker.fake::<i32>(),
column4: Faker.fake::<String>(),
};
sqlx::query(
"INSERT INTO t (partition_key, column1, column2, column3, column4)
VALUES (?, ?, ?, ?, ?)"
)
.bind(row.partition_key)
.bind(&row.column1)
.bind(row.column2)
.bind(row.column3)
.bind(&row.column4)
.execute(&pool)
.await?;
}
println!("Successfully inserted 3000 rows into partition {} of the 't' table.", partition);
}
Ok(())
}
mysql> analyze table t;
Query OK, 0 rows affected, 23 warnings (0.44 sec)
mysql> show warnings;
+---------+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Level | Code | Message |
+---------+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Warning | 1105 | No predicate column has been collected yet for table test.t, so only indexes and the columns composing the indexes will be analyzed |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is "TiDB assumes that the table is empty, use sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p3, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p4, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p5, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p6, reason to use this rate is "use min(1, 110000/3020) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p7, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p8, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p9, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p10, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p11, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p12, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p13, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p14, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p15, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p16, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p17, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p18, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p19, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Note | 1105 | Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p20, reason to use this rate is "use min(1, 110000/3000) as the sample-rate=1" |
| Warning | 1105 | Insufficient sessions to save analyze results. Consider increasing the 'analyze-partition-concurrency-quota' configuration to improve analyze performance. This value should typically be greater than or equal to 'tidb_analyze_partition_concurrency'. |
+---------+------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
23 rows in set (0.01 sec) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔢 Self-check (PR reviewed by myself and ready for feedback.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
[APPROVALNOTIFIER] This PR is APPROVED This pull-request has been approved by: elsa0520, time-and-fate The full list of commands accepted by this bot can be found here. The pull request process is described here
Needs approval from an approver in each of these files:
Approvers can indicate their approval by writing |
[LGTM Timeline notifier]Timeline:
|
What problem does this PR solve?
Issue Number: ref #55043
Problem Summary:
What changed and how does it work?
Just reuse the concurrent code to handle the single-threaded case.
Check List
Tests
Side effects
Documentation
Release note
Please refer to Release Notes Language Style Guide to write a quality release note.