Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added the test scripts for resumption #2117

Merged
merged 27 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
045ad30
Added the test scripts for resumption
shubham-yb Dec 25, 2024
78209bf
Updated the large count tables test
shubham-yb Dec 26, 2024
0485f4c
Renamed test in GH Actions
shubham-yb Dec 26, 2024
1b2ee52
Merge branch 'main' into shubham/resumption
shubham-yb Dec 26, 2024
8fbb84f
Test: Only run GH integration tests
shubham-yb Dec 26, 2024
0e5bcd0
Added AWS region to large table test
shubham-yb Dec 27, 2024
986a51e
Merge branch 'main' into shubham/resumption
shubham-yb Dec 27, 2024
d5b7aaa
Cleanup
shubham-yb Dec 27, 2024
e8bd070
Reduced time between each retry for the large table test
shubham-yb Dec 27, 2024
18a38ab
Merge branch 'main' into shubham/resumption
shubham-yb Jan 2, 2025
61c9b1d
Merge branch 'main' into shubham/resumption
shubham-yb Jan 4, 2025
d72c841
Added import data resumption test framework and PG test case
shubham-yb Jan 6, 2025
b38a7f5
Increased the table sizes for the PG test
shubham-yb Jan 6, 2025
a14fa6b
Increased the table sizes for the PG test
shubham-yb Jan 6, 2025
20c2715
Added conditional check while dropping the database
shubham-yb Jan 6, 2025
9a6cf4c
Row count optimsation and cleanup
shubham-yb Jan 7, 2025
b42f922
Merge branch 'main' into shubham/resumption
shubham-yb Jan 7, 2025
2d9749d
Addressed review comments
shubham-yb Jan 19, 2025
6751621
Added better error handling
shubham-yb Jan 20, 2025
c19acf9
Merge branch 'main' into shubham/resumption
shubham-yb Jan 20, 2025
b5f7c9e
Test
shubham-yb Jan 20, 2025
ec1f782
Test
shubham-yb Jan 20, 2025
5e00018
Merge branch 'main' into shubham/resumption
shubham-yb Jan 21, 2025
fd8c456
Test fix for deadlock issue
shubham-yb Jan 21, 2025
288ab0f
Cleanup and misc changes
shubham-yb Jan 21, 2025
3b9e19a
Cleanup
shubham-yb Jan 22, 2025
5d609e0
Added prints to determine if the process was terminated or killed
shubham-yb Jan 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added import data resumption test framework and PG test case
  • Loading branch information
shubham-yb committed Jan 6, 2025
commit d72c841061260b97f04dd6e54a4316aed73ce5a6
1 change: 0 additions & 1 deletion migtests/scripts/functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,6 @@ import_data() {
--target-db-name ${TARGET_DB_NAME}
--disable-pb true
--send-diagnostics=false
--truncate-splits true
--max-retries 1
"

Expand Down
102 changes: 79 additions & 23 deletions migtests/scripts/resumption.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def load_config(config_file):

def prepare_import_data_file_command(config):
"""
Prepares the yb-voyager command based on the given configuration.
Prepares the yb-voyager import data file command based on the given configuration.
"""
file_table_map = config['file_table_map']
additional_flags = config.get('additional_flags', {})
Expand Down Expand Up @@ -59,13 +59,44 @@ def prepare_import_data_file_command(config):
return args


def prepare_import_data_command(config):
"""
Prepares the yb-voyager import data command based on the given configuration.
"""

additional_flags = config.get('additional_flags', {})

args = [
'yb-voyager', 'import', 'data',
'--export-dir', os.getenv('EXPORT_DIR', ''),
'--target-db-host', os.getenv('TARGET_DB_HOST', ''),
'--target-db-port', os.getenv('TARGET_DB_PORT', ''),
'--target-db-user', os.getenv('TARGET_DB_USER', ''),
'--target-db-password', os.getenv('TARGET_DB_PASSWORD', ''),
'--target-db-name', os.getenv('TARGET_DB_NAME', ''),
'--disable-pb', 'true',
'--send-diagnostics', 'false',
]

if os.getenv('SOURCE_DB_TYPE') != 'postgresql':
args.extend(['--target-db-schema', os.getenv('TARGET_DB_SCHEMA', '')])

if os.getenv('RUN_WITHOUT_ADAPTIVE_PARALLELISM') == 'true':
args.extend(['--enable-adaptive-parallelism', 'false'])

for flag, value in additional_flags.items():
args.append(flag)
args.append(value)

return args


def run_and_resume_voyager(command, resumption):
"""
Runs the yb-voyager command with support for resumption testing.
Includes final import retry logic.
"""
for attempt in range(1, resumption['max_retries'] + 1):
print(f"\n--- Attempt {attempt} of {resumption['max_retries']} ---")
for attempt in range(1, resumption['max_restarts'] + 1):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's get/define all the configs in the beginning. It will make it easier to understand what all configuration options are involved.

max_restarts = resumption['max_restarts']
min_interrupt_seconds = resumption['min_interrupt_seconds']
... 

print(f"\n--- Attempt {attempt} of {resumption['max_restarts']} ---")
try:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
print("Running command:", ' '.join(command), flush=True)
Expand Down Expand Up @@ -153,43 +184,68 @@ def run_and_resume_voyager(command, resumption):
print("Final import failed after 2 attempts.")
sys.exit(1)


def validate_row_counts(row_count, schema, export_dir):
def validate_row_counts(row_count, export_dir):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note for future: you can create a common python file that has such helper
functions.

"""
Validates the row counts of the target tables after import.
If the row count validation fails, it prints a message with the log path.
If the row count validation fails, it logs details and exits.
"""
for table_name, expected_row_count in row_count.items():
print(f"\nValidating row count for table '{table_name}'...")
failed_validations = []

for table_identifier, expected_row_count in row_count.items():
print(f"\nValidating row count for table '{table_identifier}'...")

# Parse schema and table, always quote both
if '.' in table_identifier:
schema, table_name = table_identifier.split('.', 1)
else:
schema = "public"
table_name = table_identifier

tgt = None
try:
tgt = yb.new_target_db()
tgt.connect()
print("Connected to target database.")

print(f"Connected to target database. Using schema: {schema}")
actual_row_count = tgt.get_row_count(table_name, schema)

if actual_row_count == expected_row_count:
print(f"\u2714 Validation successful: {table_name} - Expected: {expected_row_count}, Actual: {actual_row_count}")
print(f"\u2714 Validation successful: {table_identifier} - Expected: {expected_row_count}, Actual: {actual_row_count}")
else:
print(f"\u274C Validation failed: {table_name} - Expected: {expected_row_count}, Actual: {actual_row_count}")
print(f"Row count validation failed. For more details check {export_dir}/logs")
sys.exit(1)
print(f"\u274C Validation failed: {table_identifier} - Expected: {expected_row_count}, Actual: {actual_row_count}")
failed_validations.append((table_identifier, expected_row_count, actual_row_count))
except Exception as e:
print(f"Error during validation: {e}")
sys.exit(1)
print(f"Error during validation for table '{table_identifier}': {e}")
failed_validations.append((table_identifier, expected_row_count, "Error"))
finally:
if 'tgt' in locals() and tgt:
if tgt:
tgt.close()
print("Disconnected from target database.")

if failed_validations:
print("\nValidation failed for the following tables:")
for table, expected, actual in failed_validations:
print(f" Table: {table}, Expected: {expected}, Actual: {actual}")
print(f"\nFor more details, check {export_dir}/logs")
sys.exit(1)
else:
print("\nAll table row counts validated successfully.")



def run_import_with_resumption(config):
"""
Runs the yb-voyager import data file command with resumption testing and validation.
"""

import_type = config.get('import_type', 'file') # Default to 'file' if not specified

if import_type == 'file':
command = prepare_import_data_file_command(config)
elif import_type == 'offline':
command = prepare_import_data_command(config)
else:
raise ValueError(f"Unsupported import_type: {import_type}")

command = prepare_import_data_file_command(config)
run_and_resume_voyager(command, config['resumption'])
validate_row_counts(config['row_count'], os.getenv('TARGET_DB_SCHEMA', 'public'), os.getenv('EXPORT_DIR', ''))

validate_row_counts(config['row_count'], os.getenv('EXPORT_DIR', ''))


if __name__ == "__main__":
Expand Down
38 changes: 31 additions & 7 deletions migtests/scripts/resumption.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ else
source ${TEST_DIR}/env.sh
fi

source ${SCRIPTS}/yugabytedb/env.sh
if [ "${SOURCE_DB_TYPE}" != "" ]; then
source ${SCRIPTS}/${SOURCE_DB_TYPE}/env.sh
fi

source ${SCRIPTS}/yugabytedb/env.sh
source ${SCRIPTS}/functions.sh

main() {
Expand All @@ -44,11 +47,12 @@ main() {
echo "Creating export-dir in the parent test directory"
mkdir -p ${EXPORT_DIR}
echo "Assigning permissions to the export-dir to execute init-db script"
chmod +x ${TEST_DIR}/init-target-db

if [ -f "${TEST_DIR}/generate_config.py" ]; then
chmod +x "${TEST_DIR}/generate_config.py"
fi
for script in init-db init-target-db generate_config.py; do
if [ -f "${TEST_DIR}/${script}" ]; then
chmod +x "${TEST_DIR}/${script}"
fi
done

step "START: ${TEST_NAME}"
print_env
Expand All @@ -58,8 +62,27 @@ main() {
step "Check the Voyager version installed"
yb-voyager version

step "Initialise target database."
./init-target-db
step "Initialise databases"

for script in init-db init-target-db; do
if [ -f "${TEST_DIR}/${script}" ]; then
"${TEST_DIR}/${script}"
fi
done

step "Run additional steps in case of offline"
if [ "${SOURCE_DB_TYPE}" != "" ]; then
step "Grant source database user permissions"
grant_permissions ${SOURCE_DB_NAME} ${SOURCE_DB_TYPE} ${SOURCE_DB_SCHEMA}

step "Export data."
# false if exit code of export_data is non-zero
export_data || {
cat_log_file "yb-voyager-export-data.log"
cat_log_file "debezium-source_db_exporter.log"
exit 1
}
fi

step "Generate the YAML file"
if [ -f "${TEST_DIR}/generate_config.py" ]; then
Expand All @@ -75,6 +98,7 @@ main() {
if [ -f "${TEST_DIR}/generate_config.py" ]; then
rm config.yaml
fi
run_psql postgres "DROP DATABASE ${SOURCE_DB_NAME};"
run_ysql yugabyte "DROP DATABASE IF EXISTS ${TARGET_DB_NAME};"
}

Expand Down
4 changes: 3 additions & 1 deletion migtests/tests/pg/partitions/init-db
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ run_psql postgres "CREATE DATABASE ${SOURCE_DB_NAME};"
echo "Initialising source database."

run_psql ${SOURCE_DB_NAME} "\i schema.sql;"
run_psql ${SOURCE_DB_NAME} "\i snapshot.sql;"
# run_psql ${SOURCE_DB_NAME} "\i snapshot.sql;"
chmod +x ./snapshot.sh
./snapshot.sh 1000

if [ -n "${SOURCE_REPLICA_DB_NAME}" ] && [ "${SOURCE_REPLICA_DB_NAME}" != "${SOURCE_DB_NAME}" ];
then
Expand Down
133 changes: 133 additions & 0 deletions migtests/tests/pg/partitions/snapshot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/bin/bash
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming that the ONLY change here is that you're specifying ROW_COUNT and essentially making generate_series dynamic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes correct


set -e
set -x

source ${SCRIPTS}/functions.sh

# Set default row count (can be overridden by user input)
ROW_COUNT=${1:-1000} # Default to 1000 if no argument is provided

REGIONS=('London' 'Boston' 'Sydney')
AMOUNTS=(1000 2000 5000)

# Insert into sales_region table
sql_sales_region="
WITH region_list AS (
SELECT ARRAY['${REGIONS[0]}', '${REGIONS[1]}', '${REGIONS[2]}']::TEXT[] region
), amount_list AS (
SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
)
INSERT INTO sales_region
(id, amount, branch, region)
SELECT
n,
amount[1 + mod(n, array_length(amount, 1))],
'Branch ' || n as branch,
region[1 + mod(n, array_length(region, 1))]
FROM amount_list, region_list, generate_series(1, $ROW_COUNT) as n;
"
run_psql "${SOURCE_DB_NAME}" "$sql_sales_region"

# Insert into test_partitions_sequences table
sql_test_partitions_sequences="
WITH region_list AS (
SELECT ARRAY['${REGIONS[0]}', '${REGIONS[1]}', '${REGIONS[2]}']::TEXT[] region
), amount_list AS (
SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
)
INSERT INTO test_partitions_sequences
(amount, branch, region)
SELECT
amount[1 + mod(n, array_length(amount, 1))],
'Branch ' || n as branch,
region[1 + mod(n, array_length(region, 1))]
FROM amount_list, region_list, generate_series(1, $ROW_COUNT) as n;
"
run_psql "${SOURCE_DB_NAME}" "$sql_test_partitions_sequences"

# Insert into p1.sales_region table
sql_p1_sales_region="
WITH region_list AS (
SELECT ARRAY['${REGIONS[0]}', '${REGIONS[1]}', '${REGIONS[2]}']::TEXT[] region
), amount_list AS (
SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
)
INSERT INTO p1.sales_region
(id, amount, branch, region)
SELECT
n,
amount[1 + mod(n, array_length(amount, 1))],
'Branch ' || n as branch,
region[1 + mod(n, array_length(region, 1))]
FROM amount_list, region_list, generate_series(1, $ROW_COUNT) as n;
"
run_psql "${SOURCE_DB_NAME}" "$sql_p1_sales_region"

# Insert into sales table
sql_sales="
WITH amount_list AS (
SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
), date_list AS (
SELECT ARRAY['2019-11-01'::TIMESTAMP, '2020-02-01'::TIMESTAMP, '2020-05-01'::TIMESTAMP] sale_date
)
INSERT INTO sales
(id, p_name, amount, sale_date)
SELECT
n,
'Person ' || n as p_name,
amount[1 + mod(n, array_length(amount, 1))],
sale_date[1 + mod(n, array_length(amount, 1))]
FROM
amount_list,
date_list,
generate_series(1, $ROW_COUNT) as n;
"
run_psql "${SOURCE_DB_NAME}" "$sql_sales"

# Insert into range_columns_partition_test table
sql_range_columns_partition_test="
INSERT INTO range_columns_partition_test
VALUES
(5, 5),
(3, 4),
(5, 11),
(5, 12),
(4, 3),
(3, 1);
"
run_psql "${SOURCE_DB_NAME}" "$sql_range_columns_partition_test"

sql_select_range_columns_partition_test="
SELECT
tableoid :: regclass,
*
FROM
range_columns_partition_test;
"
run_psql "${SOURCE_DB_NAME}" "$sql_select_range_columns_partition_test"

# Insert into emp table
sql_emp="
INSERT INTO emp
SELECT num, 'user_' || num , (RANDOM()*50)::INTEGER
FROM generate_series(1, $ROW_COUNT) AS num;
"
run_psql "${SOURCE_DB_NAME}" "$sql_emp"

# Insert into customers table
sql_customers="
WITH status_list AS (
SELECT '{"ACTIVE", "RECURRING", "REACTIVATED", "EXPIRED"}'::TEXT[] statuses
), arr_list AS (
SELECT '{100, 200, 50, 250}'::INT[] arr
)
INSERT INTO customers
(id, statuses, arr)
SELECT n,
statuses[1 + mod(n, array_length(statuses, 1))],
arr[1 + mod(n, array_length(arr, 1))]
FROM arr_list, generate_series(1,$ROW_COUNT) AS n, status_list;
"
run_psql "${SOURCE_DB_NAME}" "$sql_customers"

Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def generate_yaml(num_tables=1250):
},
"row_count": {},
"resumption": {
"max_retries": 50,
"max_restarts": 50,
"min_interrupt_seconds": 15,
"max_interrupt_seconds": 30,
"min_restart_wait_seconds": 15,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ row_count:

# Resumption Settings
resumption:
max_retries: 30
max_restarts: 30
min_interrupt_seconds: 300
max_interrupt_seconds: 720
min_restart_wait_seconds: 30
Expand Down
Loading