Added import data resumption test framework and PG test case

yugabyte · shubham-yb · Jan 22, 2025 · Dec 25, 2024 · Dec 26, 2024 · Dec 26, 2024
commit d72c841061260b97f04dd6e54a4316aed73ce5a6
diff --git a/migtests/scripts/functions.sh b/migtests/scripts/functions.sh
@@ -389,7 +389,6 @@ import_data() {
 		--target-db-name ${TARGET_DB_NAME}
 		--disable-pb true
 		--send-diagnostics=false 
-		--truncate-splits true
 		--max-retries 1
 		"
 

diff --git a/migtests/scripts/resumption.py b/migtests/scripts/resumption.py
@@ -29,7 +29,7 @@ def load_config(config_file):
 
 def prepare_import_data_file_command(config):
     """
-    Prepares the yb-voyager command based on the given configuration.
+    Prepares the yb-voyager import data file command based on the given configuration.
     """
     file_table_map = config['file_table_map']
     additional_flags = config.get('additional_flags', {})
@@ -59,13 +59,44 @@ def prepare_import_data_file_command(config):
     return args
 
 
+def prepare_import_data_command(config):
+    """
+    Prepares the yb-voyager import data command based on the given configuration.
+    """
+
+    additional_flags = config.get('additional_flags', {})
+
+    args = [
+        'yb-voyager', 'import', 'data',
+        '--export-dir', os.getenv('EXPORT_DIR', ''),
+        '--target-db-host', os.getenv('TARGET_DB_HOST', ''),
+        '--target-db-port', os.getenv('TARGET_DB_PORT', ''),
+        '--target-db-user', os.getenv('TARGET_DB_USER', ''),
+        '--target-db-password', os.getenv('TARGET_DB_PASSWORD', ''),
+        '--target-db-name', os.getenv('TARGET_DB_NAME', ''),
+        '--disable-pb', 'true',
+        '--send-diagnostics', 'false',
+    ]
+
+    if os.getenv('SOURCE_DB_TYPE') != 'postgresql':
+        args.extend(['--target-db-schema', os.getenv('TARGET_DB_SCHEMA', '')])
+
+    if os.getenv('RUN_WITHOUT_ADAPTIVE_PARALLELISM') == 'true':
+        args.extend(['--enable-adaptive-parallelism', 'false'])
+
+    for flag, value in additional_flags.items():
+        args.append(flag)
+        args.append(value)
+
+    return args
+
+
 def run_and_resume_voyager(command, resumption):
     """
     Runs the yb-voyager command with support for resumption testing.
-    Includes final import retry logic.
     """
-    for attempt in range(1, resumption['max_retries'] + 1):
-        print(f"\n--- Attempt {attempt} of {resumption['max_retries']} ---")
+    for attempt in range(1, resumption['max_restarts'] + 1):
+        print(f"\n--- Attempt {attempt} of {resumption['max_restarts']} ---")
         try:
             process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
             print("Running command:", ' '.join(command), flush=True)
@@ -153,43 +184,68 @@ def run_and_resume_voyager(command, resumption):
         print("Final import failed after 2 attempts.")
         sys.exit(1)
 
-
-def validate_row_counts(row_count, schema, export_dir):
+def validate_row_counts(row_count, export_dir):
     """
     Validates the row counts of the target tables after import.
-    If the row count validation fails, it prints a message with the log path.
+    If the row count validation fails, it logs details and exits.
     """
-    for table_name, expected_row_count in row_count.items():
-        print(f"\nValidating row count for table '{table_name}'...")
+    failed_validations = []
+
+    for table_identifier, expected_row_count in row_count.items():
+        print(f"\nValidating row count for table '{table_identifier}'...")
+
+        # Parse schema and table, always quote both
+        if '.' in table_identifier:
+            schema, table_name = table_identifier.split('.', 1)
+        else:
+            schema = "public"
+            table_name = table_identifier
+
+        tgt = None
         try:
             tgt = yb.new_target_db()
             tgt.connect()
-            print("Connected to target database.")
-
+            print(f"Connected to target database. Using schema: {schema}")
             actual_row_count = tgt.get_row_count(table_name, schema)
+
             if actual_row_count == expected_row_count:
-                print(f"\u2714 Validation successful: {table_name} - Expected: {expected_row_count}, Actual: {actual_row_count}")
+                print(f"\u2714 Validation successful: {table_identifier} - Expected: {expected_row_count}, Actual: {actual_row_count}")
             else:
-                print(f"\u274C Validation failed: {table_name} - Expected: {expected_row_count}, Actual: {actual_row_count}")
-                print(f"Row count validation failed. For more details check {export_dir}/logs")
-                sys.exit(1)
+                print(f"\u274C Validation failed: {table_identifier} - Expected: {expected_row_count}, Actual: {actual_row_count}")
+                failed_validations.append((table_identifier, expected_row_count, actual_row_count))
         except Exception as e:
-            print(f"Error during validation: {e}")
-            sys.exit(1)
+            print(f"Error during validation for table '{table_identifier}': {e}")
+            failed_validations.append((table_identifier, expected_row_count, "Error"))
         finally:
-            if 'tgt' in locals() and tgt:
+            if tgt:
                 tgt.close()
                 print("Disconnected from target database.")
 
+    if failed_validations:
+        print("\nValidation failed for the following tables:")
+        for table, expected, actual in failed_validations:
+            print(f"  Table: {table}, Expected: {expected}, Actual: {actual}")
+        print(f"\nFor more details, check {export_dir}/logs")
+        sys.exit(1)
+    else:
+        print("\nAll table row counts validated successfully.")
+
+
 
 def run_import_with_resumption(config):
-    """
-    Runs the yb-voyager import data file command with resumption testing and validation.
-    """
+
+    import_type = config.get('import_type', 'file')  # Default to 'file' if not specified
+
+    if import_type == 'file':
+        command = prepare_import_data_file_command(config)
+    elif import_type == 'offline':
+        command = prepare_import_data_command(config)
+    else:
+        raise ValueError(f"Unsupported import_type: {import_type}")
 
-    command = prepare_import_data_file_command(config)
     run_and_resume_voyager(command, config['resumption'])
-    validate_row_counts(config['row_count'], os.getenv('TARGET_DB_SCHEMA', 'public'), os.getenv('EXPORT_DIR', ''))
+
+    validate_row_counts(config['row_count'], os.getenv('EXPORT_DIR', ''))
 
 
 if __name__ == "__main__":

diff --git a/migtests/scripts/resumption.sh b/migtests/scripts/resumption.sh
@@ -34,8 +34,11 @@ else
 	source ${TEST_DIR}/env.sh
 fi
 
-source ${SCRIPTS}/yugabytedb/env.sh
+if [ "${SOURCE_DB_TYPE}" != "" ]; then
+	source ${SCRIPTS}/${SOURCE_DB_TYPE}/env.sh
+fi
 
+source ${SCRIPTS}/yugabytedb/env.sh
 source ${SCRIPTS}/functions.sh
 
 main() {
@@ -44,11 +47,12 @@ main() {
 	echo "Creating export-dir in the parent test directory"
 	mkdir -p ${EXPORT_DIR}
 	echo "Assigning permissions to the export-dir to execute init-db script"
-	chmod +x ${TEST_DIR}/init-target-db
 
-	if [ -f "${TEST_DIR}/generate_config.py" ]; then
-	  chmod +x "${TEST_DIR}/generate_config.py"	  
-	fi
+	for script in init-db init-target-db generate_config.py; do
+	  if [ -f "${TEST_DIR}/${script}" ]; then
+		chmod +x "${TEST_DIR}/${script}"
+	  fi
+	done
 
 	step "START: ${TEST_NAME}"
 	print_env
@@ -58,8 +62,27 @@ main() {
 	step "Check the Voyager version installed"
 	yb-voyager version
 
-	step "Initialise target database."
-	./init-target-db
+	step "Initialise databases"
+
+	for script in init-db init-target-db; do
+	  if [ -f "${TEST_DIR}/${script}" ]; then
+	    "${TEST_DIR}/${script}"
+	  fi
+	done
+
+	step "Run additional steps in case of offline"
+	if [ "${SOURCE_DB_TYPE}" != "" ]; then
+		step "Grant source database user permissions"
+		grant_permissions ${SOURCE_DB_NAME} ${SOURCE_DB_TYPE} ${SOURCE_DB_SCHEMA}
+
+		step "Export data."
+		# false if exit code of export_data is non-zero
+		export_data || { 
+			cat_log_file "yb-voyager-export-data.log"
+			cat_log_file "debezium-source_db_exporter.log"
+			exit 1
+		}
+	fi
 
 	step "Generate the YAML file"
 	if [ -f "${TEST_DIR}/generate_config.py" ]; then
@@ -75,6 +98,7 @@ main() {
 	if [ -f "${TEST_DIR}/generate_config.py" ]; then
 	  rm config.yaml
 	fi
+	run_psql postgres "DROP DATABASE ${SOURCE_DB_NAME};"
 	run_ysql yugabyte "DROP DATABASE IF EXISTS ${TARGET_DB_NAME};"
 }
 

diff --git a/migtests/tests/pg/partitions/init-db b/migtests/tests/pg/partitions/init-db
@@ -12,7 +12,9 @@ run_psql postgres "CREATE DATABASE ${SOURCE_DB_NAME};"
 echo "Initialising source database."
 
 run_psql ${SOURCE_DB_NAME} "\i schema.sql;"
-run_psql ${SOURCE_DB_NAME} "\i snapshot.sql;"
+# run_psql ${SOURCE_DB_NAME} "\i snapshot.sql;"
+chmod +x ./snapshot.sh
+./snapshot.sh 1000
 
 if [ -n "${SOURCE_REPLICA_DB_NAME}" ] && [ "${SOURCE_REPLICA_DB_NAME}" != "${SOURCE_DB_NAME}" ];
 then

diff --git a/migtests/tests/pg/partitions/snapshot.sh b/migtests/tests/pg/partitions/snapshot.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+
+set -e
+set -x
+
+source ${SCRIPTS}/functions.sh
+
+# Set default row count (can be overridden by user input)
+ROW_COUNT=${1:-1000}  # Default to 1000 if no argument is provided
+
+REGIONS=('London' 'Boston' 'Sydney')
+AMOUNTS=(1000 2000 5000)
+
+# Insert into sales_region table
+sql_sales_region="
+WITH region_list AS (
+    SELECT ARRAY['${REGIONS[0]}', '${REGIONS[1]}', '${REGIONS[2]}']::TEXT[] region
+), amount_list AS (
+    SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
+) 
+INSERT INTO sales_region  
+(id, amount, branch, region) 
+SELECT 
+    n, 
+    amount[1 + mod(n, array_length(amount, 1))], 
+    'Branch ' || n as branch, 
+    region[1 + mod(n, array_length(region, 1))] 
+FROM amount_list, region_list, generate_series(1, $ROW_COUNT) as n;
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_sales_region"
+
+# Insert into test_partitions_sequences table
+sql_test_partitions_sequences="
+WITH region_list AS (
+    SELECT ARRAY['${REGIONS[0]}', '${REGIONS[1]}', '${REGIONS[2]}']::TEXT[] region
+), amount_list AS (
+    SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
+) 
+INSERT INTO test_partitions_sequences  
+(amount, branch, region) 
+SELECT 
+    amount[1 + mod(n, array_length(amount, 1))], 
+    'Branch ' || n as branch, 
+    region[1 + mod(n, array_length(region, 1))] 
+FROM amount_list, region_list, generate_series(1, $ROW_COUNT) as n;
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_test_partitions_sequences"
+
+# Insert into p1.sales_region table
+sql_p1_sales_region="
+WITH region_list AS (
+    SELECT ARRAY['${REGIONS[0]}', '${REGIONS[1]}', '${REGIONS[2]}']::TEXT[] region
+), amount_list AS (
+    SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
+) 
+INSERT INTO p1.sales_region  
+(id, amount, branch, region) 
+SELECT 
+    n, 
+    amount[1 + mod(n, array_length(amount, 1))], 
+    'Branch ' || n as branch, 
+    region[1 + mod(n, array_length(region, 1))] 
+FROM amount_list, region_list, generate_series(1, $ROW_COUNT) as n;
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_p1_sales_region"
+
+# Insert into sales table
+sql_sales="
+WITH amount_list AS (
+    SELECT ARRAY[${AMOUNTS[0]}, ${AMOUNTS[1]}, ${AMOUNTS[2]}]::INT[] amount
+), date_list AS (
+    SELECT ARRAY['2019-11-01'::TIMESTAMP, '2020-02-01'::TIMESTAMP, '2020-05-01'::TIMESTAMP] sale_date
+) 
+INSERT INTO sales
+(id, p_name, amount, sale_date)
+SELECT
+    n,
+    'Person ' || n as p_name,
+    amount[1 + mod(n, array_length(amount, 1))],
+    sale_date[1 + mod(n, array_length(amount, 1))]
+FROM 
+amount_list,
+date_list,
+generate_series(1, $ROW_COUNT) as n;
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_sales"
+
+# Insert into range_columns_partition_test table
+sql_range_columns_partition_test="
+INSERT INTO range_columns_partition_test
+VALUES
+    (5, 5),
+    (3, 4),
+    (5, 11),
+    (5, 12),
+    (4, 3),
+    (3, 1);
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_range_columns_partition_test"
+
+sql_select_range_columns_partition_test="
+SELECT
+    tableoid :: regclass,
+    *
+FROM
+    range_columns_partition_test;
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_select_range_columns_partition_test"
+
+# Insert into emp table
+sql_emp="
+INSERT INTO emp 
+SELECT num, 'user_' || num , (RANDOM()*50)::INTEGER 
+FROM generate_series(1, $ROW_COUNT) AS num;
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_emp"
+
+# Insert into customers table
+sql_customers="
+WITH status_list AS (
+        SELECT '{"ACTIVE", "RECURRING", "REACTIVATED", "EXPIRED"}'::TEXT[] statuses
+        ), arr_list AS (
+            SELECT '{100, 200, 50, 250}'::INT[] arr
+        )
+        INSERT INTO customers
+        (id, statuses, arr)
+            SELECT  n,
+                    statuses[1 + mod(n, array_length(statuses, 1))],
+                    arr[1 + mod(n, array_length(arr, 1))]
+                        FROM arr_list, generate_series(1,$ROW_COUNT) AS n, status_list;
+"
+run_psql "${SOURCE_DB_NAME}" "$sql_customers"
+
diff --git a/...resumption/import-file-1250-tables/env.sh → ...import-file/large-number-of-tables/env.sh b/...resumption/import-file-1250-tables/env.sh → ...import-file/large-number-of-tables/env.sh
diff --git a/...mport-file-1250-tables/generate_config.py → ...large-number-of-tables/generate_config.py b/...mport-file-1250-tables/generate_config.py → ...large-number-of-tables/generate_config.py
@@ -12,7 +12,7 @@ def generate_yaml(num_tables=1250):
         },
         "row_count": {},
         "resumption": {
-            "max_retries": 50,
+            "max_restarts": 50,
             "min_interrupt_seconds": 15,
             "max_interrupt_seconds": 30,
             "min_restart_wait_seconds": 15,

diff --git a/...on/import-file-1250-tables/init-target-db → ...ile/large-number-of-tables/init-target-db b/...on/import-file-1250-tables/init-target-db → ...ile/large-number-of-tables/init-target-db
diff --git a/.../resumption/import-file-large/config.yaml → ...mport-file/single-large-table/config.yaml b/.../resumption/import-file-large/config.yaml → ...mport-file/single-large-table/config.yaml
@@ -14,7 +14,7 @@ row_count:
 
 # Resumption Settings
 resumption:
-  max_retries: 30
+  max_restarts: 30
   min_interrupt_seconds: 300
   max_interrupt_seconds: 720
   min_restart_wait_seconds: 30

diff --git a/...tests/resumption/import-file-large/env.sh → ...ion/import-file/single-large-table/env.sh b/...tests/resumption/import-file-large/env.sh → ...ion/import-file/single-large-table/env.sh
diff --git a/...sumption/import-file-large/init-target-db → ...rt-file/single-large-table/init-target-db b/...sumption/import-file-large/init-target-db → ...rt-file/single-large-table/init-target-db