From ccd5a3743b2e5c746b272ce9b242cf21d873a7ad Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Mon, 3 Jun 2024 14:09:05 +0530 Subject: [PATCH 01/14] init1: added business logic to horizontally scale based on tablet required --- yb-voyager/src/migassessment/sizing.go | 104 +++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 8 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index 6b97ce31e..5bb33d3d4 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -92,11 +92,17 @@ const ( SHARDED_LOAD_TIME_TABLE = "sharded_load_time" // GITHUB_RAW_LINK use raw github link to fetch the file from repository using the api: // https://raw.githubusercontent.com/{username-or-organization}/{repository}/{branch}/{path-to-file} - GITHUB_RAW_LINK = "https://raw.githubusercontent.com/yugabyte/yb-voyager/main/yb-voyager/src/migassessment/resources" - EXPERIMENT_DATA_FILENAME = "yb_2024_0_source.db" - DBS_DIR = "dbs" - SIZE_UNIT_GB = "GB" - SIZE_UNIT_MB = "MB" + GITHUB_RAW_LINK = "https://raw.githubusercontent.com/yugabyte/yb-voyager/main/yb-voyager/src/migassessment/resources" + EXPERIMENT_DATA_FILENAME = "yb_2024_0_source.db" + DBS_DIR = "dbs" + SIZE_UNIT_GB = "GB" + SIZE_UNIT_MB = "MB" + LOW_PHASE_SHARD_COUNT = 8 + LOW_PHASE_SIZE_THRESHOLD_GB = 0.512 + HIGH_PHASE_SHARD_COUNT = 24 + HIGH_PHASE_SIZE_THRESHOLD_GB = 10 + FINAL_PHASE_SIZE_THRESHOLD_GB = 100 + MAX_TABLETS_PER_TABLE = 256 ) var ExperimentDB *sql.DB @@ -150,8 +156,9 @@ func SizingAssessment() error { sizingRecommendationPerCore = checkShardedTableLimit(sourceIndexMetadata, shardedLimits, sizingRecommendationPerCore) - sizingRecommendationPerCore = findNumNodesNeeded(sourceIndexMetadata, shardedThroughput, sizingRecommendationPerCore) + sizingRecommendationPerCore = findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata, shardedThroughput, sizingRecommendationPerCore) + sizingRecommendationPerCore = findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata, shardedLimits, sizingRecommendationPerCore) finalSizingRecommendation := pickBestRecommendation(sizingRecommendationPerCore) if finalSizingRecommendation.FailureReasoning != "" { @@ -249,7 +256,7 @@ func pickBestRecommendation(recommendation map[int]IntermediateRecommendation) I } /* -findNumNodesNeeded calculates the number of nodes needed based on sharded throughput limits and updates the recommendation accordingly. +findNumNodesNeededBasedOnThroughputRequirement calculates the number of nodes needed based on sharded throughput limits and updates the recommendation accordingly. Parameters: - sourceIndexMetadata: A slice of SourceDBMetadata structs representing source indexes. - shardedLimits: A slice of ExpDataShardedThroughput structs representing sharded throughput limits. @@ -258,7 +265,7 @@ Parameters: Returns: - An updated map of recommendations with the number of nodes needed. */ -func findNumNodesNeeded(sourceIndexMetadata []SourceDBMetadata, shardedLimits []ExpDataShardedThroughput, +func findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata []SourceDBMetadata, shardedLimits []ExpDataShardedThroughput, recommendation map[int]IntermediateRecommendation) map[int]IntermediateRecommendation { // Iterate over sharded throughput limits for _, shardedLimit := range shardedLimits { @@ -311,6 +318,87 @@ func findNumNodesNeeded(sourceIndexMetadata []SourceDBMetadata, shardedLimits [] return recommendation } +func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMetadata, + shardedLimits []ExpDataShardedLimit, + recommendation map[int]IntermediateRecommendation) map[int]IntermediateRecommendation { + // Iterate over each intermediate recommendation where failureReasoning is empty + totalTabletsRequired := 0 + for _, rec := range recommendation { + if len(rec.ShardedTables) != 0 && rec.FailureReasoning == "" { + // Iterate over each table and its indexes to find out how many tablets are needed + for _, table := range rec.ShardedTables { + // check and fetch indexes size data for table + _, indexesSizeSum, _, _ := checkAndFetchIndexes(table, sourceIndexMetadata) + threshold, tabletsRequired := getThresholdAndTablets(table.Size + indexesSizeSum) + totalTabletsRequired += tabletsRequired + fmt.Printf("Table %s with size %f GB requires %d tablets from threshold %d", table.ObjectName, table.Size, tabletsRequired, threshold) + } + // assuming table limits is also a tablet limit + // get shardedLimit of current recommendation + for _, record := range shardedLimits { + if record.numCores.Valid && int(record.numCores.Float64) == rec.VCPUsPerInstance { + nodesRequired := math.Ceil(float64(totalTabletsRequired) / float64(record.maxSupportedNumTables.Int64)) + // update recommendation to use the maximum of the existing recommended nodes and nodes calculated based on tablets + if nodesRequired > rec.NumNodes { + fmt.Printf("Tablets required %d, existing recommendation: %f new recommendation: %f\n", totalTabletsRequired, rec.NumNodes, nodesRequired) + } else { + fmt.Printf("Tablets required %d, existing recommendation: %f. Staying with current recommendation\n", totalTabletsRequired, rec.NumNodes) + } + rec.NumNodes = math.Max(rec.NumNodes, nodesRequired) + } + } + } + } + // return updated recommendations + return recommendation +} + +/* +getThresholdAndTablets determines the size threshold and number of tablets needed for a given table size. + +Parameters: +- sizeGB: float64 - The size of the table in gigabytes. + +Returns: +- float64: The size threshold in gigabytes for the table based on the phase. +- int: The number of tablets needed for the table. + +Description: +This function calculates which size threshold applies to a table based on its size and determines the number of tablets required. +- For sizes up to the low phase limit (8 shards of 512 MB each, up to 4 GB), the low phase threshold is used. +- For sizes up to the high phase limit (24 shards of 10 GB each, up to 240 GB), the high phase threshold is used. +- For larger sizes, the final phase threshold (100 GB) is used, with a maximum of 256 tablets per table. +*/ +func getThresholdAndTablets(sizeGB float64) (float64, int) { + var threshold float64 + var tablets int + + if sizeGB <= LOW_PHASE_SIZE_THRESHOLD_GB*float64(LOW_PHASE_SHARD_COUNT) { + threshold = LOW_PHASE_SIZE_THRESHOLD_GB + tablets = int(sizeGB / LOW_PHASE_SIZE_THRESHOLD_GB) + if tablets < 1 { + tablets = 1 + } + } else if sizeGB <= HIGH_PHASE_SIZE_THRESHOLD_GB*float64(HIGH_PHASE_SHARD_COUNT) { + threshold = HIGH_PHASE_SIZE_THRESHOLD_GB + tablets = int(sizeGB / HIGH_PHASE_SIZE_THRESHOLD_GB) + if tablets < 1 { + tablets = 1 + } + } else { + threshold = FINAL_PHASE_SIZE_THRESHOLD_GB + tablets = int(sizeGB / FINAL_PHASE_SIZE_THRESHOLD_GB) + if tablets < 1 { + tablets = 1 + } + if tablets > MAX_TABLETS_PER_TABLE { + tablets = MAX_TABLETS_PER_TABLE + } + } + + return threshold, tablets +} + /* checkShardedTableLimit checks if the total number of sharded tables exceeds the sharded limit for each core configuration. If the limit is exceeded, it updates the recommendation with a failure reasoning. From e45b31da7f49a238913c2bec2348929090cb7365 Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Mon, 3 Jun 2024 21:18:45 +0530 Subject: [PATCH 02/14] init2: added business logic to horizontally scale based on tablet required --- yb-voyager/src/migassessment/sizing.go | 68 ++++++++++++++++---------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index 5bb33d3d4..3e8fc2220 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -331,7 +331,7 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta _, indexesSizeSum, _, _ := checkAndFetchIndexes(table, sourceIndexMetadata) threshold, tabletsRequired := getThresholdAndTablets(table.Size + indexesSizeSum) totalTabletsRequired += tabletsRequired - fmt.Printf("Table %s with size %f GB requires %d tablets from threshold %d", table.ObjectName, table.Size, tabletsRequired, threshold) + fmt.Printf("Table %s with size %f GB requires %d tablets from threshold %f", table.ObjectName, table.Size, tabletsRequired, threshold) } // assuming table limits is also a tablet limit // get shardedLimit of current recommendation @@ -366,37 +366,53 @@ Returns: Description: This function calculates which size threshold applies to a table based on its size and determines the number of tablets required. - For sizes up to the low phase limit (8 shards of 512 MB each, up to 4 GB), the low phase threshold is used. +- Intermediate phase upto 80 GB is calculated based on 8 tablets of 10 GB each. - For sizes up to the high phase limit (24 shards of 10 GB each, up to 240 GB), the high phase threshold is used. -- For larger sizes, the final phase threshold (100 GB) is used, with a maximum of 256 tablets per table. +- For larger sizes, the final phase threshold (100 GB) is used. */ func getThresholdAndTablets(sizeGB float64) (float64, int) { - var threshold float64 - var tablets int - - if sizeGB <= LOW_PHASE_SIZE_THRESHOLD_GB*float64(LOW_PHASE_SHARD_COUNT) { - threshold = LOW_PHASE_SIZE_THRESHOLD_GB - tablets = int(sizeGB / LOW_PHASE_SIZE_THRESHOLD_GB) - if tablets < 1 { - tablets = 1 - } - } else if sizeGB <= HIGH_PHASE_SIZE_THRESHOLD_GB*float64(HIGH_PHASE_SHARD_COUNT) { - threshold = HIGH_PHASE_SIZE_THRESHOLD_GB - tablets = int(sizeGB / HIGH_PHASE_SIZE_THRESHOLD_GB) - if tablets < 1 { - tablets = 1 - } + var tablets = math.Ceil(sizeGB / LOW_PHASE_SIZE_THRESHOLD_GB) + + if tablets <= LOW_PHASE_SHARD_COUNT { + // this means that table size is less than 4GB. So 8 tablets of 512MB each will be enough + return LOW_PHASE_SIZE_THRESHOLD_GB, int(tablets) } else { - threshold = FINAL_PHASE_SIZE_THRESHOLD_GB - tablets = int(sizeGB / FINAL_PHASE_SIZE_THRESHOLD_GB) - if tablets < 1 { - tablets = 1 - } - if tablets > MAX_TABLETS_PER_TABLE { - tablets = MAX_TABLETS_PER_TABLE + // this means that table size is more than 4GB. + // find out the per tablet size if it is less than 10GB which is high phase threshold + perTabletSize := sizeGB / LOW_PHASE_SHARD_COUNT + if perTabletSize <= HIGH_PHASE_SIZE_THRESHOLD_GB { + // tablet count is still 8 but the size of each tablet is less than 10GB(table size < 80GB). So same number of tablets could fit + return HIGH_PHASE_SIZE_THRESHOLD_GB, LOW_PHASE_SHARD_COUNT + } else { + // table size is > 80GB. So we need to increase the tablet count + tablets = math.Ceil(LOW_PHASE_SHARD_COUNT + (sizeGB-LOW_PHASE_SHARD_COUNT*HIGH_PHASE_SIZE_THRESHOLD_GB)/HIGH_PHASE_SIZE_THRESHOLD_GB) + if tablets <= HIGH_PHASE_SHARD_COUNT { + // this means that table size is less than 240GB. So tablets of 10GB each will be enough + return HIGH_PHASE_SIZE_THRESHOLD_GB, int(tablets) + } else { + // this means that table size is more than 240 GB. + // find out the per tablet size if it is less than 100GB which is final phase threshold + perTabletSize = sizeGB / HIGH_PHASE_SHARD_COUNT + if perTabletSize <= FINAL_PHASE_SIZE_THRESHOLD_GB { + // tablet count is still 24 but the size of each tablet is less than 100GB(table size < 2400GB). So same number of tablets could fit + return FINAL_PHASE_SIZE_THRESHOLD_GB, HIGH_PHASE_SHARD_COUNT + } else { + // table size is > 2400GB. So we need to increase the tablet count + tablets = math.Ceil(HIGH_PHASE_SHARD_COUNT + (sizeGB-HIGH_PHASE_SHARD_COUNT*FINAL_PHASE_SIZE_THRESHOLD_GB)/FINAL_PHASE_SIZE_THRESHOLD_GB) + if tablets <= MAX_TABLETS_PER_TABLE { + // this means that table size is less than 25600GB. So 256 tablets of 100GB each will be enough + return FINAL_PHASE_SIZE_THRESHOLD_GB, int(tablets) + } else { + // to support table size > 25600GB, tablets per table limit in YugabyteDB needs to be + // set to 0(meaning no limit). Refer doc: + //https://docs.yugabyte.com/preview/architecture/docdb-sharding/tablet-splitting/#final-phase + tablets = math.Ceil(MAX_TABLETS_PER_TABLE + (sizeGB-MAX_TABLETS_PER_TABLE*FINAL_PHASE_SIZE_THRESHOLD_GB)/FINAL_PHASE_SIZE_THRESHOLD_GB) + return FINAL_PHASE_SIZE_THRESHOLD_GB, int(tablets) + } + } + } } } - - return threshold, tablets } /* From 5fc0c02c9334d6408979e6ab8c5dbf1eb55afaeb Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Mon, 3 Jun 2024 21:24:15 +0530 Subject: [PATCH 03/14] init3 --- yb-voyager/src/migassessment/sizing.go | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index 3e8fc2220..575b4c20b 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -323,15 +323,14 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta recommendation map[int]IntermediateRecommendation) map[int]IntermediateRecommendation { // Iterate over each intermediate recommendation where failureReasoning is empty totalTabletsRequired := 0 - for _, rec := range recommendation { + for index, rec := range recommendation { if len(rec.ShardedTables) != 0 && rec.FailureReasoning == "" { // Iterate over each table and its indexes to find out how many tablets are needed for _, table := range rec.ShardedTables { // check and fetch indexes size data for table _, indexesSizeSum, _, _ := checkAndFetchIndexes(table, sourceIndexMetadata) - threshold, tabletsRequired := getThresholdAndTablets(table.Size + indexesSizeSum) + _, tabletsRequired := getThresholdAndTablets(table.Size + indexesSizeSum) totalTabletsRequired += tabletsRequired - fmt.Printf("Table %s with size %f GB requires %d tablets from threshold %f", table.ObjectName, table.Size, tabletsRequired, threshold) } // assuming table limits is also a tablet limit // get shardedLimit of current recommendation @@ -339,12 +338,8 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta if record.numCores.Valid && int(record.numCores.Float64) == rec.VCPUsPerInstance { nodesRequired := math.Ceil(float64(totalTabletsRequired) / float64(record.maxSupportedNumTables.Int64)) // update recommendation to use the maximum of the existing recommended nodes and nodes calculated based on tablets - if nodesRequired > rec.NumNodes { - fmt.Printf("Tablets required %d, existing recommendation: %f new recommendation: %f\n", totalTabletsRequired, rec.NumNodes, nodesRequired) - } else { - fmt.Printf("Tablets required %d, existing recommendation: %f. Staying with current recommendation\n", totalTabletsRequired, rec.NumNodes) - } rec.NumNodes = math.Max(rec.NumNodes, nodesRequired) + recommendation[index] = rec } } } From d1d0760a157a89c1d289efada46b5bc683623c5a Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Mon, 3 Jun 2024 21:32:23 +0530 Subject: [PATCH 04/14] init4 --- yb-voyager/src/migassessment/sizing.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index 575b4c20b..c4747de52 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -369,30 +369,30 @@ func getThresholdAndTablets(sizeGB float64) (float64, int) { var tablets = math.Ceil(sizeGB / LOW_PHASE_SIZE_THRESHOLD_GB) if tablets <= LOW_PHASE_SHARD_COUNT { - // this means that table size is less than 4GB. So 8 tablets of 512MB each will be enough + // table size is less than 4GB, hence 8 tablets of 512MB each will be enough return LOW_PHASE_SIZE_THRESHOLD_GB, int(tablets) } else { - // this means that table size is more than 4GB. + // table size is more than 4GB. // find out the per tablet size if it is less than 10GB which is high phase threshold perTabletSize := sizeGB / LOW_PHASE_SHARD_COUNT if perTabletSize <= HIGH_PHASE_SIZE_THRESHOLD_GB { - // tablet count is still 8 but the size of each tablet is less than 10GB(table size < 80GB). So same number of tablets could fit + // tablet count is still 8 but the size of each tablet is less than 10GB(table size < 80GB). return HIGH_PHASE_SIZE_THRESHOLD_GB, LOW_PHASE_SHARD_COUNT } else { - // table size is > 80GB. So we need to increase the tablet count + // table size is > 80GB, hence we need to increase the tablet count tablets = math.Ceil(LOW_PHASE_SHARD_COUNT + (sizeGB-LOW_PHASE_SHARD_COUNT*HIGH_PHASE_SIZE_THRESHOLD_GB)/HIGH_PHASE_SIZE_THRESHOLD_GB) if tablets <= HIGH_PHASE_SHARD_COUNT { - // this means that table size is less than 240GB. So tablets of 10GB each will be enough + // this means that table size is less than 240GB, hence 24 tablets of 10GB each will be enough return HIGH_PHASE_SIZE_THRESHOLD_GB, int(tablets) } else { - // this means that table size is more than 240 GB. + // table size is more than 240 GB. // find out the per tablet size if it is less than 100GB which is final phase threshold perTabletSize = sizeGB / HIGH_PHASE_SHARD_COUNT if perTabletSize <= FINAL_PHASE_SIZE_THRESHOLD_GB { - // tablet count is still 24 but the size of each tablet is less than 100GB(table size < 2400GB). So same number of tablets could fit + // tablet count is still 24 but the size of each tablet is less than 100GB(table size < 2400GB). return FINAL_PHASE_SIZE_THRESHOLD_GB, HIGH_PHASE_SHARD_COUNT } else { - // table size is > 2400GB. So we need to increase the tablet count + // table size is > 2400GB, hence we need to increase the tablet count tablets = math.Ceil(HIGH_PHASE_SHARD_COUNT + (sizeGB-HIGH_PHASE_SHARD_COUNT*FINAL_PHASE_SIZE_THRESHOLD_GB)/FINAL_PHASE_SIZE_THRESHOLD_GB) if tablets <= MAX_TABLETS_PER_TABLE { // this means that table size is less than 25600GB. So 256 tablets of 100GB each will be enough From b60fa6d107d6612900282792b435e0912ece8333 Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Tue, 4 Jun 2024 16:32:23 +0530 Subject: [PATCH 05/14] provide a way to use existing keypair file --- yb-voyager/src/migassessment/sizing.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index c4747de52..b3af7db01 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -322,8 +322,8 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta shardedLimits []ExpDataShardedLimit, recommendation map[int]IntermediateRecommendation) map[int]IntermediateRecommendation { // Iterate over each intermediate recommendation where failureReasoning is empty - totalTabletsRequired := 0 for index, rec := range recommendation { + totalTabletsRequired := 0 if len(rec.ShardedTables) != 0 && rec.FailureReasoning == "" { // Iterate over each table and its indexes to find out how many tablets are needed for _, table := range rec.ShardedTables { @@ -336,7 +336,8 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta // get shardedLimit of current recommendation for _, record := range shardedLimits { if record.numCores.Valid && int(record.numCores.Float64) == rec.VCPUsPerInstance { - nodesRequired := math.Ceil(float64(totalTabletsRequired) / float64(record.maxSupportedNumTables.Int64)) + // considering RF=3, hence total required tablets would be 3 times the totalTabletsRequired + nodesRequired := math.Ceil(float64(totalTabletsRequired*3) / float64(record.maxSupportedNumTables.Int64)) // update recommendation to use the maximum of the existing recommended nodes and nodes calculated based on tablets rec.NumNodes = math.Max(rec.NumNodes, nodesRequired) recommendation[index] = rec From 928a77579b5518d780413f80ab372d4d18603a48 Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Wed, 5 Jun 2024 14:34:32 +0530 Subject: [PATCH 06/14] provide a path to the loadSourceMetadata func --- yb-voyager/src/migassessment/sizing.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index b3af7db01..a49e3e0d8 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -117,7 +117,7 @@ var experimentData20240 []byte func SizingAssessment() error { log.Infof("loading metadata files for sharding assessment") - sourceTableMetadata, sourceIndexMetadata, _, err := loadSourceMetadata() + sourceTableMetadata, sourceIndexMetadata, _, err := loadSourceMetadata(GetSourceMetadataDBFilePath()) if err != nil { SizingReport.FailureReasoning = fmt.Sprintf("failed to load source metadata: %v", err) return fmt.Errorf("failed to load source metadata: %w", err) @@ -752,8 +752,7 @@ Returns: []SourceDBMetadata: all index objects from source db float64: total size of source db */ -func loadSourceMetadata() ([]SourceDBMetadata, []SourceDBMetadata, float64, error) { - filePath := GetSourceMetadataDBFilePath() +func loadSourceMetadata(filePath string) ([]SourceDBMetadata, []SourceDBMetadata, float64, error) { SourceMetaDB, err := utils.ConnectToSqliteDatabase(filePath) if err != nil { return nil, nil, 0.0, fmt.Errorf("cannot connect to source metadata database: %w", err) From c71a983027da4d4cb3d79e3f126c5700d2de1e4e Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Wed, 5 Jun 2024 16:37:07 +0530 Subject: [PATCH 07/14] init1: unit test for sizing calculator --- yb-voyager/go.mod | 1 + yb-voyager/go.sum | 3 + yb-voyager/src/migassessment/sizing_test.go | 149 ++++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 yb-voyager/src/migassessment/sizing_test.go diff --git a/yb-voyager/go.mod b/yb-voyager/go.mod index b14945017..7c536d930 100644 --- a/yb-voyager/go.mod +++ b/yb-voyager/go.mod @@ -40,6 +40,7 @@ require ( ) require ( + github.com/DATA-DOG/go-sqlmock v1.5.2 // indirect github.com/jackc/puddle v1.3.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/pganalyze/pg_query_go/v5 v5.1.0 // indirect diff --git a/yb-voyager/go.sum b/yb-voyager/go.sum index 0706211f5..aaaa7ac4a 100644 --- a/yb-voyager/go.sum +++ b/yb-voyager/go.sum @@ -476,6 +476,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v0.8.1 h1:oPdPEZFSbl7 github.com/AzureAD/microsoft-authentication-library-for-go v0.8.1/go.mod h1:4qFor3D/HDsvBME35Xy9rwW9DecL+M2sNw1ybjPtwA0= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= +github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/GoogleCloudPlatform/cloudsql-proxy v1.33.2/go.mod h1:uqoR4sJc63p7ugW8a/vsEspOsNuehbi7ptS2CHCyOnY= github.com/HdrHistogram/hdrhistogram-go v1.1.0/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= @@ -1390,6 +1392,7 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= diff --git a/yb-voyager/src/migassessment/sizing_test.go b/yb-voyager/src/migassessment/sizing_test.go new file mode 100644 index 000000000..06a92ef9f --- /dev/null +++ b/yb-voyager/src/migassessment/sizing_test.go @@ -0,0 +1,149 @@ +/* +Copyright (c) YugabyteDB, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migassessment + +import ( + "database/sql" + "errors" + "github.com/DATA-DOG/go-sqlmock" + "github.com/stretchr/testify/assert" + "math" + "testing" +) + +const ( + SOURCEDB_SELECT_QUERY = "SELECT schema_name, object_name, row_count, reads_per_second, writes_per_second, " + + "is_index, parent_table_name, size_in_bytes FROM table_index_stats ORDER BY size_in_bytes ASC" +) + +var SourceDBColumns = []string{"schema_name", "object_name", "row_count", "reads_per_second", "writes_per_second", + "is_index", "parent_table_name", "size_in_bytes"} + +/* +===== Test functions to test getSourceMetadata function ===== +*/ +func TestGetSourceMetadata_Success(t *testing.T) { + db, mock := createMockDB(t) + rows := sqlmock.NewRows(SourceDBColumns). + AddRow("public", "table1", 1000, 10, 5, false, "", 1048576000). + AddRow("public", "index1", 0, 0, 0, true, "table1", 104857600) + + mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnRows(rows) + + sourceTableMetadata, sourceIndexMetadata, totalSourceDBSize, err := getSourceMetadata(db) + // assert if there are errors + assert.NoError(t, err) + // check if the total tables are equal to expected tables + assert.Len(t, sourceTableMetadata, 1) + // check if the total indexes are equal to expected indexes + assert.Len(t, sourceIndexMetadata, 1) + // check if the total size of the source database is equal to the expected size + assert.True(t, 1.07 == Round(totalSourceDBSize, 2)) + // check if the values of the source table metadata are equal to the expected values + assert.Equal(t, "public", sourceTableMetadata[0].SchemaName) + assert.Equal(t, "table1", sourceTableMetadata[0].ObjectName) + assert.Equal(t, "public", sourceIndexMetadata[0].SchemaName) + assert.Equal(t, "index1", sourceIndexMetadata[0].ObjectName) +} + +func TestGetSourceMetadata_QueryError(t *testing.T) { + db, mock := createMockDB(t) + mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnError(errors.New("query error")) + + _, _, _, err := getSourceMetadata(db) + assert.Error(t, err) + // check if the error returned contains the expected string + assert.Contains(t, err.Error(), "failed to query source metadata") +} + +func TestGetSourceMetadata_RowScanError(t *testing.T) { + db, mock := createMockDB(t) + // 4th column is expected to be int, but as it is float, it will throw an error + rows := sqlmock.NewRows(SourceDBColumns).AddRow("public", "table1", 1000, 10.5, 5, false, "", 1048576000). + RowError(1, errors.New("row scan error")) + mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnRows(rows) + + _, _, _, err := getSourceMetadata(db) + assert.Error(t, err) + // check if the error is as expected + assert.Contains(t, err.Error(), "failed to read from result set of query source metadata") +} + +func TestGetSourceMetadata_NoRows(t *testing.T) { + db, mock := createMockDB(t) + rows := sqlmock.NewRows(SourceDBColumns) + mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnRows(rows) + sourceTableMetadata, sourceIndexMetadata, totalSourceDBSize, err := getSourceMetadata(db) + + assert.NoError(t, err) + // since there is no mock data, all the fields are expected to be empty + assert.Empty(t, sourceTableMetadata) + assert.Empty(t, sourceIndexMetadata) + assert.Equal(t, 0.0, totalSourceDBSize) +} + +/* +===== Test functions to test getTargetMetadata function ===== +*/ + +/* +===== Test functions to test shardingBasedOnTableSizeAndCount function ===== +*/ + +/* +===== Test functions to test shardingBasedOnOperations function ===== +*/ + +/* +===== Test functions to test checkShardedTableLimit function ===== +*/ + +/* +===== Test functions to test findNumNodesNeededBasedOnThroughputRequirement function ===== +*/ + +/* +===== Test functions to test findNumNodesNeededBasedOnTabletsRequired function ===== +*/ + +/* +===== Test functions to test pickBestRecommendation function ===== +*/ + +/* +===== Test functions to test calculateTimeTakenAndParallelJobsForImport function ===== +*/ + +/* +===== Test functions to test getReasoning function ===== +*/ + +/* +====================HELPER FUNCTIONS==================== +*/ + +// Round rounds a float64 to n decimal places. +func Round(val float64, precision int) float64 { + ratio := math.Pow(10, float64(precision)) + return math.Round(val*ratio) / ratio +} + +func createMockDB(t *testing.T) (*sql.DB, sqlmock.Sqlmock) { + db, mock, err := sqlmock.New() + assert.NoError(t, err) + return db, mock +} From 80c68b7e579aff3f0493eb988f1ae79897e1dc78 Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Wed, 5 Jun 2024 16:45:54 +0530 Subject: [PATCH 08/14] calculating tablets required for indexes separately --- yb-voyager/src/migassessment/sizing.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index b3af7db01..ab5742cbc 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -327,9 +327,16 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta if len(rec.ShardedTables) != 0 && rec.FailureReasoning == "" { // Iterate over each table and its indexes to find out how many tablets are needed for _, table := range rec.ShardedTables { - // check and fetch indexes size data for table - _, indexesSizeSum, _, _ := checkAndFetchIndexes(table, sourceIndexMetadata) - _, tabletsRequired := getThresholdAndTablets(table.Size + indexesSizeSum) + _, tabletsRequired := getThresholdAndTablets(table.Size) + for _, index := range sourceIndexMetadata { + if index.ParentTableName.Valid && (index.ParentTableName.String == (table.SchemaName + "." + table.ObjectName)) { + // calculating tablets required for each of the index + _, tabletsRequiredForIndex := getThresholdAndTablets(index.Size) + // tablets required for each table is the sum of tablets required for the table and its indexes + tabletsRequired += tabletsRequiredForIndex + } + } + // adding total tablets required across all tables totalTabletsRequired += tabletsRequired } // assuming table limits is also a tablet limit From 4dca92d9721d46e2bdd07217870e708550e8ad9c Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Thu, 6 Jun 2024 16:16:14 +0530 Subject: [PATCH 09/14] fixing variable naming --- yb-voyager/src/migassessment/sizing.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index ab5742cbc..c84654866 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -322,7 +322,7 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta shardedLimits []ExpDataShardedLimit, recommendation map[int]IntermediateRecommendation) map[int]IntermediateRecommendation { // Iterate over each intermediate recommendation where failureReasoning is empty - for index, rec := range recommendation { + for i, rec := range recommendation { totalTabletsRequired := 0 if len(rec.ShardedTables) != 0 && rec.FailureReasoning == "" { // Iterate over each table and its indexes to find out how many tablets are needed @@ -347,7 +347,7 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta nodesRequired := math.Ceil(float64(totalTabletsRequired*3) / float64(record.maxSupportedNumTables.Int64)) // update recommendation to use the maximum of the existing recommended nodes and nodes calculated based on tablets rec.NumNodes = math.Max(rec.NumNodes, nodesRequired) - recommendation[index] = rec + recommendation[i] = rec } } } From bc34b3cb96afa14658a8eb51b821571bc29dbac0 Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Wed, 5 Jun 2024 16:45:54 +0530 Subject: [PATCH 10/14] calculating tablets required for indexes separately --- yb-voyager/src/migassessment/sizing.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index a49e3e0d8..29f108c76 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -327,9 +327,16 @@ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMeta if len(rec.ShardedTables) != 0 && rec.FailureReasoning == "" { // Iterate over each table and its indexes to find out how many tablets are needed for _, table := range rec.ShardedTables { - // check and fetch indexes size data for table - _, indexesSizeSum, _, _ := checkAndFetchIndexes(table, sourceIndexMetadata) - _, tabletsRequired := getThresholdAndTablets(table.Size + indexesSizeSum) + _, tabletsRequired := getThresholdAndTablets(table.Size) + for _, index := range sourceIndexMetadata { + if index.ParentTableName.Valid && (index.ParentTableName.String == (table.SchemaName + "." + table.ObjectName)) { + // calculating tablets required for each of the index + _, tabletsRequiredForIndex := getThresholdAndTablets(index.Size) + // tablets required for each table is the sum of tablets required for the table and its indexes + tabletsRequired += tabletsRequiredForIndex + } + } + // adding total tablets required across all tables totalTabletsRequired += tabletsRequired } // assuming table limits is also a tablet limit From 7cdc0dcee196bdbbe4e8c970ca4991f868a4d19a Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Mon, 10 Jun 2024 14:14:39 +0530 Subject: [PATCH 11/14] added unit tests for sizing calculator --- yb-voyager/src/migassessment/sizing.go | 52 +- yb-voyager/src/migassessment/sizing_test.go | 584 +++++++++++++++++++- 2 files changed, 610 insertions(+), 26 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing.go b/yb-voyager/src/migassessment/sizing.go index 50fb39a79..d4b047308 100644 --- a/yb-voyager/src/migassessment/sizing.go +++ b/yb-voyager/src/migassessment/sizing.go @@ -105,8 +105,6 @@ const ( MAX_TABLETS_PER_TABLE = 256 ) -var ExperimentDB *sql.DB - func getExperimentDBPath() string { return filepath.Join(AssessmentDir, DBS_DIR, EXPERIMENT_DATA_FILENAME) } @@ -123,25 +121,25 @@ func SizingAssessment() error { return fmt.Errorf("failed to load source metadata: %w", err) } - err = createConnectionToExperimentData() + experimentDB, err := createConnectionToExperimentData() if err != nil { SizingReport.FailureReasoning = fmt.Sprintf("failed to connect to experiment data: %v", err) return fmt.Errorf("failed to connect to experiment data: %w", err) } - colocatedLimits, err := loadColocatedLimit() + colocatedLimits, err := loadColocatedLimit(experimentDB) if err != nil { SizingReport.FailureReasoning = fmt.Sprintf("error fetching the colocated limits: %v", err) return fmt.Errorf("error fetching the colocated limits: %w", err) } - shardedLimits, err := loadShardedTableLimits() + shardedLimits, err := loadShardedTableLimits(experimentDB) if err != nil { SizingReport.FailureReasoning = fmt.Sprintf("error fetching the sharded limits: %v", err) return fmt.Errorf("error fetching the colocated limits: %w", err) } - shardedThroughput, err := loadShardedThroughput() + shardedThroughput, err := loadShardedThroughput(experimentDB) if err != nil { SizingReport.FailureReasoning = fmt.Sprintf("error fetching the sharded throughput: %v", err) return fmt.Errorf("error fetching the sharded throughput: %w", err) @@ -174,7 +172,7 @@ func SizingAssessment() error { // calculate time taken for colocated import importTimeForColocatedObjects, parallelVoyagerJobsColocated, err := calculateTimeTakenAndParallelJobsForImport(COLOCATED_LOAD_TIME_TABLE, colocatedObjects, - finalSizingRecommendation.VCPUsPerInstance, finalSizingRecommendation.MemoryPerCore) + finalSizingRecommendation.VCPUsPerInstance, finalSizingRecommendation.MemoryPerCore, experimentDB) if err != nil { SizingReport.FailureReasoning = fmt.Sprintf("calculate time taken for colocated data import: %v", err) return fmt.Errorf("calculate time taken for colocated data import: %w", err) @@ -183,7 +181,7 @@ func SizingAssessment() error { // calculate time taken for sharded import importTimeForShardedObjects, parallelVoyagerJobsSharded, err := calculateTimeTakenAndParallelJobsForImport(SHARDED_LOAD_TIME_TABLE, shardedObjects, - finalSizingRecommendation.VCPUsPerInstance, finalSizingRecommendation.MemoryPerCore) + finalSizingRecommendation.VCPUsPerInstance, finalSizingRecommendation.MemoryPerCore, experimentDB) if err != nil { SizingReport.FailureReasoning = fmt.Sprintf("calculate time taken for sharded data import: %v", err) return fmt.Errorf("calculate time taken for sharded data import: %w", err) @@ -318,6 +316,17 @@ func findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata []Source return recommendation } +/* +findNumNodesNeededBasedOnTabletsRequired calculates the number of nodes needed based on tablets required by each +table and its indexes and updates the recommendation accordingly. +Parameters: + - sourceIndexMetadata: A slice of SourceDBMetadata structs representing source indexes. + - shardedLimits: A slice of ExpDataShardedThroughput structs representing sharded throughput limits. + - recommendation: A map where the key is the number of vCPUs per instance and the value is an IntermediateRecommendation struct. + +Returns: + - An updated map of recommendations with the number of nodes needed. +*/ func findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata []SourceDBMetadata, shardedLimits []ExpDataShardedLimit, recommendation map[int]IntermediateRecommendation) map[int]IntermediateRecommendation { @@ -631,7 +640,7 @@ Returns: - A slice of ExpDataColocatedLimit structs containing the fetched colocated limits. - An error if there was any issue during the data retrieval process. */ -func loadColocatedLimit() ([]ExpDataColocatedLimit, error) { +func loadColocatedLimit(experimentDB *sql.DB) ([]ExpDataColocatedLimit, error) { var colocatedLimits []ExpDataColocatedLimit query := fmt.Sprintf(` SELECT max_colocated_db_size_gb, @@ -644,7 +653,7 @@ func loadColocatedLimit() ([]ExpDataColocatedLimit, error) { FROM %v ORDER BY num_cores DESC `, COLOCATED_LIMITS_TABLE) - rows, err := ExperimentDB.Query(query) + rows, err := experimentDB.Query(query) if err != nil { return nil, fmt.Errorf("cannot fetch data from experiment data table with query [%s]: %w", query, err) } @@ -674,7 +683,7 @@ Returns: - A slice of ExpDataShardedLimit structs containing the fetched sharded table limits. - An error if there was any issue during the data retrieval process. */ -func loadShardedTableLimits() ([]ExpDataShardedLimit, error) { +func loadShardedTableLimits(experimentDB *sql.DB) ([]ExpDataShardedLimit, error) { // added num_cores >= VCPUPerInstance from colo recommendation as that is the starting point selectQuery := fmt.Sprintf(` SELECT num_cores, memory_per_core, num_tables @@ -682,7 +691,7 @@ func loadShardedTableLimits() ([]ExpDataShardedLimit, error) { WHERE dimension LIKE '%%TableLimits-3nodeRF=3%%' ORDER BY num_cores `, SHARDED_SIZING_TABLE) - rows, err := ExperimentDB.Query(selectQuery) + rows, err := experimentDB.Query(selectQuery) if err != nil { return nil, fmt.Errorf("error while fetching cores info with query [%s]: %w", selectQuery, err) @@ -713,7 +722,7 @@ Returns: - A slice of ExpDataShardedThroughput structs containing the fetched sharded throughput information. - An error if there was any issue during the data retrieval process. */ -func loadShardedThroughput() ([]ExpDataShardedThroughput, error) { +func loadShardedThroughput(experimentDB *sql.DB) ([]ExpDataShardedThroughput, error) { selectQuery := fmt.Sprintf(` SELECT inserts_per_core, selects_per_core, @@ -725,7 +734,7 @@ func loadShardedThroughput() ([]ExpDataShardedThroughput, error) { WHERE dimension = 'MaxThroughput' ORDER BY num_cores DESC; `, SHARDED_SIZING_TABLE) - rows, err := ExperimentDB.Query(selectQuery) + rows, err := experimentDB.Query(selectQuery) if err != nil { return nil, fmt.Errorf("error while fetching throughput info with query [%s]: %w", selectQuery, err) } @@ -808,7 +817,7 @@ Returns: int64: Total parallel jobs used for import. */ func calculateTimeTakenAndParallelJobsForImport(tableName string, dbObjects []SourceDBMetadata, - vCPUPerInstance int, memPerCore int) (float64, int64, error) { + vCPUPerInstance int, memPerCore int, experimentDB *sql.DB) (float64, int64, error) { // the total size of objects var size float64 = 0 var timeTakenOfFetchedRow float64 @@ -838,7 +847,7 @@ func calculateTimeTakenAndParallelJobsForImport(tableName string, dbObjects []So AND num_cores = ? LIMIT 1; `, tableName, tableName, tableName) - row := ExperimentDB.QueryRow(selectQuery, vCPUPerInstance, memPerCore, size, vCPUPerInstance) + row := experimentDB.QueryRow(selectQuery, vCPUPerInstance, memPerCore, size, vCPUPerInstance) if err := row.Scan(&maxSizeOfFetchedRow, &timeTakenOfFetchedRow, ¶llelJobs); err != nil { if err == sql.ErrNoRows { @@ -983,7 +992,7 @@ func getReasoning(recommendation IntermediateRecommendation, shardedObjects []So sizeUnitSharded, shardedReads, shardedWrites) // If colocated objects exist, add sharded objects information as rest of the objects need to be migrated as sharded if len(colocatedObjects) > 0 { - reasoning += " Rest " + shardedReasoning + " need to be migrated as range partitioned tables" + reasoning += " Rest " + shardedReasoning + "need to be migrated as range partitioned tables" } else { reasoning += shardedReasoning + "as sharded." } @@ -1051,17 +1060,16 @@ func getListOfIndexesAlongWithObjects(tableList []SourceDBMetadata, return indexesAndObject, cumulativeIndexCount } -func createConnectionToExperimentData() error { +func createConnectionToExperimentData() (*sql.DB, error) { filePath, err := getExperimentFile() if err != nil { - return fmt.Errorf("failed to get experiment file: %w", err) + return nil, fmt.Errorf("failed to get experiment file: %w", err) } DbConnection, err := utils.ConnectToSqliteDatabase(filePath) if err != nil { - return fmt.Errorf("failed to connect to experiment data database: %w", err) + return nil, fmt.Errorf("failed to connect to experiment data database: %w", err) } - ExperimentDB = DbConnection - return nil + return DbConnection, nil } func getExperimentFile() (string, error) { diff --git a/yb-voyager/src/migassessment/sizing_test.go b/yb-voyager/src/migassessment/sizing_test.go index 06a92ef9f..709ad2eb4 100644 --- a/yb-voyager/src/migassessment/sizing_test.go +++ b/yb-voyager/src/migassessment/sizing_test.go @@ -19,6 +19,7 @@ package migassessment import ( "database/sql" "errors" + "fmt" "github.com/DATA-DOG/go-sqlmock" "github.com/stretchr/testify/assert" "math" @@ -33,6 +34,45 @@ const ( var SourceDBColumns = []string{"schema_name", "object_name", "row_count", "reads_per_second", "writes_per_second", "is_index", "parent_table_name", "size_in_bytes"} +var colocatedLimits = []ExpDataColocatedLimit{ + { + maxColocatedSizeSupported: sql.NullFloat64{Float64: 113, Valid: true}, + numCores: sql.NullFloat64{Float64: 2, Valid: true}, + memPerCore: sql.NullFloat64{Float64: 4, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 2000, Valid: true}, + minSupportedNumTables: sql.NullFloat64{Float64: 1, Valid: true}, + maxSupportedSelectsPerCore: sql.NullFloat64{Float64: 1175, Valid: true}, + maxSupportedInsertsPerCore: sql.NullFloat64{Float64: 357, Valid: true}, + }, + { + maxColocatedSizeSupported: sql.NullFloat64{Float64: 113, Valid: true}, + numCores: sql.NullFloat64{Float64: 4, Valid: true}, + memPerCore: sql.NullFloat64{Float64: 4, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 2000, Valid: true}, + minSupportedNumTables: sql.NullFloat64{Float64: 1, Valid: true}, + maxSupportedSelectsPerCore: sql.NullFloat64{Float64: 1230, Valid: true}, + maxSupportedInsertsPerCore: sql.NullFloat64{Float64: 400, Valid: true}, + }, + { + maxColocatedSizeSupported: sql.NullFloat64{Float64: 113, Valid: true}, + numCores: sql.NullFloat64{Float64: 8, Valid: true}, + memPerCore: sql.NullFloat64{Float64: 4, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 5000, Valid: true}, + minSupportedNumTables: sql.NullFloat64{Float64: 1, Valid: true}, + maxSupportedSelectsPerCore: sql.NullFloat64{Float64: 1246, Valid: true}, + maxSupportedInsertsPerCore: sql.NullFloat64{Float64: 608, Valid: true}, + }, + { + maxColocatedSizeSupported: sql.NullFloat64{Float64: 113, Valid: true}, + numCores: sql.NullFloat64{Float64: 16, Valid: true}, + memPerCore: sql.NullFloat64{Float64: 4, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 5000, Valid: true}, + minSupportedNumTables: sql.NullFloat64{Float64: 1, Valid: true}, + maxSupportedSelectsPerCore: sql.NullFloat64{Float64: 1220, Valid: true}, + maxSupportedInsertsPerCore: sql.NullFloat64{Float64: 755, Valid: true}, + }, +} + /* ===== Test functions to test getSourceMetadata function ===== */ @@ -96,41 +136,577 @@ func TestGetSourceMetadata_NoRows(t *testing.T) { assert.Equal(t, 0.0, totalSourceDBSize) } -/* -===== Test functions to test getTargetMetadata function ===== -*/ - /* ===== Test functions to test shardingBasedOnTableSizeAndCount function ===== */ +func TestShardingBasedOnTableSizeAndCount_Basic(t *testing.T) { + sourceTableMetadata := []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + } + var sourceIndexMetadata []SourceDBMetadata + recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} + + expectedRecommendation := map[int]IntermediateRecommendation{ + 2: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, + ShardedSize: 0, + }, + 4: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, + ShardedSize: 0, + }, + 8: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, + ShardedSize: 0, + }, + 16: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, + ShardedSize: 0, + }, + } + + actualRecommendation := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + assert.Equal(t, expectedRecommendation, actualRecommendation) +} + +func TestShardingBasedOnTableSizeAndCount_WithIndexes(t *testing.T) { + sourceTableMetadata := []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + } + sourceIndexMetadata := []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "index1", Size: 20, IsIndex: true, ParentTableName: sql.NullString{String: "table1", Valid: true}}, + } + recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} + + expectedRecommendation := map[int]IntermediateRecommendation{ + 2: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, // Table size + ShardedSize: 0, + }, + 4: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, // Table size + index size + ShardedSize: 0, + }, + 8: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, // Table size + index size + ShardedSize: 0, + }, + 16: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 100}, + }, + ShardedTables: nil, + ColocatedSize: 100, // Table size + index size + ShardedSize: 0, + }, + } + + result := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + assert.Equal(t, expectedRecommendation, result) +} + +func TestShardingBasedOnTableSizeAndCount_ColocatedLimitExceededBySize(t *testing.T) { + sourceTableMetadata := []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 110}, + {SchemaName: "public", ObjectName: "table2", Size: 500}, + } + sourceIndexMetadata := []SourceDBMetadata{} + recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} + + expectedRecommendation := map[int]IntermediateRecommendation{ + 2: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 110}, + }, + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table2", Size: 500}, + }, + ColocatedSize: 110, + ShardedSize: 500, + }, + 4: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 110}, + }, + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table2", Size: 500}, + }, + ColocatedSize: 110, + ShardedSize: 500, + }, + 8: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 110}, + }, + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table2", Size: 500}, + }, + ColocatedSize: 110, + ShardedSize: 500, + }, + 16: { + ColocatedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 110}, + }, + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table2", Size: 500}, + }, + ColocatedSize: 110, + ShardedSize: 500, + }, + } + + result := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + assert.Equal(t, expectedRecommendation, result) +} + +func TestShardingBasedOnTableSizeAndCount_ColocatedLimitExceededByCount(t *testing.T) { + const numTables = 16000 + sourceTableMetadata := make([]SourceDBMetadata, numTables) + for i := 0; i < numTables; i++ { + sourceTableMetadata[i] = SourceDBMetadata{SchemaName: "public", ObjectName: fmt.Sprintf("table%v", i), Size: 0.0001} + } + + sourceIndexMetadata := []SourceDBMetadata{} + recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} + + expectedResults := make(map[int]map[string]int) + expectedResults[2] = map[string]int{ + "lenColocatedTables": 2000, + "lenShardedTables": 14000, + } + expectedResults[4] = map[string]int{ + "lenColocatedTables": 2000, + "lenShardedTables": 14000, + } + expectedResults[8] = map[string]int{ + "lenColocatedTables": 5000, + "lenShardedTables": 11000, + } + expectedResults[16] = map[string]int{ + "lenColocatedTables": 5000, + "lenShardedTables": 11000, + } + + actualRecommendationsResult := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + for key, rec := range actualRecommendationsResult { + assert.Equal(t, expectedResults[key]["lenColocatedTables"], len(rec.ColocatedTables)) + assert.Equal(t, expectedResults[key]["lenShardedTables"], len(rec.ShardedTables)) + } +} + +func TestShardingBasedOnTableSizeAndCount_NoColocatedTables(t *testing.T) { + sourceTableMetadata := []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 600}, + {SchemaName: "public", ObjectName: "table2", Size: 500}, + } + sourceIndexMetadata := []SourceDBMetadata{} + recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} + + expectedResults := make(map[int]map[string]int) + expectedResults[2] = map[string]int{ + "lenColocatedTables": 0, + "lenShardedTables": 2, + } + expectedResults[4] = map[string]int{ + "lenColocatedTables": 0, + "lenShardedTables": 2, + } + expectedResults[8] = map[string]int{ + "lenColocatedTables": 0, + "lenShardedTables": 2, + } + expectedResults[16] = map[string]int{ + "lenColocatedTables": 0, + "lenShardedTables": 2, + } + + result := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + for key, rec := range result { + assert.Equal(t, expectedResults[key]["lenColocatedTables"], len(rec.ColocatedTables)) + assert.Equal(t, expectedResults[key]["lenShardedTables"], len(rec.ShardedTables)) + } +} /* ===== Test functions to test shardingBasedOnOperations function ===== */ +func TestShardingBasedOnOperations(t *testing.T) { + // Define test data + sourceIndexMetadata := []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}, + } + recommendation := map[int]IntermediateRecommendation{ + 2: { + ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + 4: { + ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + 8: { + ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + 16: { + ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + } + + // Run the function + updatedRecommendation := shardingBasedOnOperations(sourceIndexMetadata, colocatedLimits, recommendation) + + // expected is that the table should be removed from colocated and added to sharded as the ops requirement is high + for _, rec := range updatedRecommendation { + assert.Empty(t, rec.ColocatedTables) + assert.Len(t, rec.ShardedTables, 1) + assert.Equal(t, "table1", rec.ShardedTables[0].ObjectName) + } +} /* ===== Test functions to test checkShardedTableLimit function ===== */ +func TestCheckShardedTableLimit(t *testing.T) { + // Define test data + var sourceIndexMetadata []SourceDBMetadata + + shardedLimits := []ExpDataShardedLimit{ + {numCores: sql.NullFloat64{Float64: 16, Valid: true}, maxSupportedNumTables: sql.NullInt64{Int64: 1, Valid: true}}, + } + + recommendation := map[int]IntermediateRecommendation{ + 16: { + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + {SchemaName: "public", ObjectName: "table2", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + }, + VCPUsPerInstance: 16, + MemoryPerCore: 4, + }, + } + + // Run the function + updatedRecommendation := checkShardedTableLimit(sourceIndexMetadata, shardedLimits, recommendation) + + // check if the Failure reasoning matches with the one generated + for _, rec := range updatedRecommendation { + // establish expected reasoning + expectedFailureReasoning := fmt.Sprintf("Cannot support 2 sharded objects on a machine with %d cores"+ + " and %dGiB memory.", rec.VCPUsPerInstance, rec.VCPUsPerInstance*rec.MemoryPerCore) + assert.Equal(t, expectedFailureReasoning, rec.FailureReasoning) + } +} /* ===== Test functions to test findNumNodesNeededBasedOnThroughputRequirement function ===== */ +func TestFindNumNodesNeededBasedOnThroughputRequirement_Positive(t *testing.T) { + // Define test data + sourceIndexMetadata := []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + {ObjectName: "table2", Size: 20.0, ReadsPerSec: 200, WritesPerSec: 100}, + } + + shardedLimits := []ExpDataShardedThroughput{ + { + numCores: sql.NullFloat64{Float64: 4}, + maxSupportedSelectsPerCore: sql.NullFloat64{Float64: 200}, + maxSupportedInsertsPerCore: sql.NullFloat64{Float64: 100}, + selectConnPerNode: sql.NullInt64{Int64: 50}, + insertConnPerNode: sql.NullInt64{Int64: 25}, + }, + } + + recommendation := map[int]IntermediateRecommendation{ + 4: { + ColocatedTables: []SourceDBMetadata{}, + ShardedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}}, + }, + } + + // Run the function + updatedRecommendation := findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata, shardedLimits, recommendation) + + // for 4 cores data, expected results are + var expectedOptimalSelectConnectionsPerNode int64 = 50 + var expectedOptimalInsertConnectionsPerNode int64 = 25 + var expectedNumNodesNeeded float64 = 3 + + // Check the results + recommendationToVerify := updatedRecommendation[4] + assert.Equal(t, expectedOptimalSelectConnectionsPerNode, recommendationToVerify.OptimalSelectConnectionsPerNode) + assert.Equal(t, expectedOptimalInsertConnectionsPerNode, recommendationToVerify.OptimalInsertConnectionsPerNode) + assert.Equal(t, expectedNumNodesNeeded, recommendationToVerify.NumNodes) +} + +func TestFindNumNodesNeededBasedOnThroughputRequirement_Negative(t *testing.T) { + // Define test data + var sourceIndexMetadata []SourceDBMetadata + + shardedLimits := []ExpDataShardedThroughput{ + { + numCores: sql.NullFloat64{Float64: 4}, + maxSupportedSelectsPerCore: sql.NullFloat64{Float64: 200}, + maxSupportedInsertsPerCore: sql.NullFloat64{Float64: 100}, + }, + } + + recommendation := map[int]IntermediateRecommendation{ + 4: { + ShardedTables: []SourceDBMetadata{{ObjectName: "table2", Size: 20.0, ReadsPerSec: 2000, WritesPerSec: 5000}}, + VCPUsPerInstance: 4, + MemoryPerCore: 4, + }, + } + + // Run the function + updatedRecommendation := findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata, shardedLimits, recommendation) + + // validate the expected number of nodes + assert.Equal(t, updatedRecommendation[4].NumNodes, float64(15)) +} /* ===== Test functions to test findNumNodesNeededBasedOnTabletsRequired function ===== */ +func TestFindNumNodesNeededBasedOnTabletsRequired_Positive_NoChange(t *testing.T) { + // Define test data + var sourceIndexMetadata []SourceDBMetadata + + shardedLimits := []ExpDataShardedLimit{ + { + numCores: sql.NullFloat64{Float64: 4, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 20, Valid: true}, + }, + } + + recommendation := map[int]IntermediateRecommendation{ + 4: { + ColocatedTables: []SourceDBMetadata{}, + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 20}, + {SchemaName: "public", ObjectName: "table2", Size: 120}, + }, + VCPUsPerInstance: 4, + NumNodes: 3, + }, + } + + // Run the function + updatedRecommendation := findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata, shardedLimits, recommendation) + + // check if the num nodes in updated recommendation is same as before(3) meaning no scaling is required + assert.Equal(t, float64(3), updatedRecommendation[4].NumNodes) +} + +func TestFindNumNodesNeededBasedOnTabletsRequired_Positive(t *testing.T) { + // Define test data + sourceIndexMetadata := []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "index1", Size: 7, ParentTableName: sql.NullString{String: "public.table1"}}, + {SchemaName: "public", ObjectName: "index2", Size: 3, ParentTableName: sql.NullString{String: "public.table2"}}, + } + + shardedLimits := []ExpDataShardedLimit{ + { + numCores: sql.NullFloat64{Float64: 4, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 20, Valid: true}, + }, + } + + recommendation := map[int]IntermediateRecommendation{ + 4: { + ColocatedTables: []SourceDBMetadata{}, + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 250}, + {SchemaName: "public", ObjectName: "table2", Size: 120}, + }, + VCPUsPerInstance: 4, + NumNodes: 3, + }, + } + + // Run the function + updatedRecommendation := findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata, shardedLimits, recommendation) + + // check if the num nodes in updated recommendation has increased. Meaning scaling is required. + assert.Equal(t, float64(6), updatedRecommendation[4].NumNodes) +} + /* ===== Test functions to test pickBestRecommendation function ===== */ +func TestPickBestRecommendation(t *testing.T) { + recommendations := map[int]IntermediateRecommendation{ + 4: { + VCPUsPerInstance: 4, + NumNodes: 10, + FailureReasoning: "", + }, + 8: { + VCPUsPerInstance: 8, + NumNodes: 3, + FailureReasoning: "", + }, + } + bestPick := pickBestRecommendation(recommendations) + // validate the best recommendation which is 8 vcpus per instance is picked up + assert.Equal(t, 8, bestPick.VCPUsPerInstance) +} + /* ===== Test functions to test calculateTimeTakenAndParallelJobsForImport function ===== */ +func TestCalculateTimeTakenAndParallelJobsForImport_Positive(t *testing.T) { + db, mock := createMockDB(t) + // Define the mock response for the query + rows := sqlmock.NewRows([]string{"csv_size_gb", "migration_time_secs", "parallel_threads"}). + AddRow(100, 6000, 4) + mock.ExpectQuery("(?i)SELECT csv_size_gb, migration_time_secs, parallel_threads FROM .* WHERE num_cores = .*"). + WithArgs(4, 4, 50.0, 4). + WillReturnRows(rows) + + // Define test data + dbObjects := []SourceDBMetadata{{Size: 30.0}, {Size: 20.0}} + + // Call the function + estimatedTime, parallelJobs, err := calculateTimeTakenAndParallelJobsForImport(COLOCATED_LOAD_TIME_TABLE, dbObjects, 4, 4, db) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // Define expected results + expectedTime := 50.0 // Calculated as ((6000 * 50) / 100) / 60 + expectedJobs := int64(4) + + if estimatedTime != expectedTime || parallelJobs != expectedJobs { + t.Errorf("calculateTimeTakenAndParallelJobsForImport() = (%v, %v), want (%v, %v)", estimatedTime, parallelJobs, expectedTime, expectedJobs) + } + + // Ensure all expectations were met + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled expectations: %s", err) + } +} + /* ===== Test functions to test getReasoning function ===== */ +func TestGetReasoning_NoObjects(t *testing.T) { + recommendation := IntermediateRecommendation{VCPUsPerInstance: 4, MemoryPerCore: 16} + var shardedObjects []SourceDBMetadata + var colocatedObjects []SourceDBMetadata + expected := "Recommended instance type with 4 vCPU and 64 GiB memory could fit " + + result := getReasoning(recommendation, shardedObjects, 0, colocatedObjects, 0) + if result != expected { + t.Errorf("Expected %q but got %q", expected, result) + } +} + +func TestGetReasoning_OnlyColocatedObjects(t *testing.T) { + recommendation := IntermediateRecommendation{VCPUsPerInstance: 8, MemoryPerCore: 8} + var shardedObjects []SourceDBMetadata + colocatedObjects := []SourceDBMetadata{ + {Size: 50.0, ReadsPerSec: 1000, WritesPerSec: 500}, + {Size: 30.0, ReadsPerSec: 2000, WritesPerSec: 1500}, + } + expected := "Recommended instance type with 8 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 explicit/implicit indexes) with 80.00 GB size and throughput requirement of 3000 reads/sec and 2000 writes/sec as colocated." + + result := getReasoning(recommendation, shardedObjects, 0, colocatedObjects, 0) + if result != expected { + t.Errorf("Expected %q but got %q", expected, result) + } +} + +func TestGetReasoning_OnlyShardedObjects(t *testing.T) { + recommendation := IntermediateRecommendation{VCPUsPerInstance: 16, MemoryPerCore: 4} + shardedObjects := []SourceDBMetadata{ + {Size: 100.0, ReadsPerSec: 4000, WritesPerSec: 3000}, + {Size: 200.0, ReadsPerSec: 5000, WritesPerSec: 4000}, + } + var colocatedObjects []SourceDBMetadata + expected := "Recommended instance type with 16 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 explicit/implicit indexes) with 300.00 GB size and throughput requirement of 9000 reads/sec and 7000 writes/sec as sharded." + + result := getReasoning(recommendation, shardedObjects, 0, colocatedObjects, 0) + if result != expected { + t.Errorf("Expected %q but got %q", expected, result) + } +} + +func TestGetReasoning_ColocatedAndShardedObjects(t *testing.T) { + recommendation := IntermediateRecommendation{VCPUsPerInstance: 32, MemoryPerCore: 2} + shardedObjects := []SourceDBMetadata{ + {Size: 150.0, ReadsPerSec: 7000, WritesPerSec: 6000}, + } + colocatedObjects := []SourceDBMetadata{ + {Size: 70.0, ReadsPerSec: 3000, WritesPerSec: 2000}, + {Size: 50.0, ReadsPerSec: 2000, WritesPerSec: 1000}, + } + expected := "Recommended instance type with 32 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 explicit/implicit indexes) with 120.00 GB size and throughput requirement of 5000 reads/sec and 3000 writes/sec as colocated. Rest 1 objects(1 tables and 0 explicit/implicit indexes) with 150.00 GB size and throughput requirement of 7000 reads/sec and 6000 writes/sec need to be migrated as range partitioned tables" + + result := getReasoning(recommendation, shardedObjects, 0, colocatedObjects, 0) + if result != expected { + t.Errorf("Expected %q but got %q", expected, result) + } +} + +func TestGetReasoning_Indexes(t *testing.T) { + recommendation := IntermediateRecommendation{VCPUsPerInstance: 4, MemoryPerCore: 16} + shardedObjects := []SourceDBMetadata{ + {Size: 200.0, ReadsPerSec: 6000, WritesPerSec: 4000}, + } + colocatedObjects := []SourceDBMetadata{ + {Size: 100.0, ReadsPerSec: 3000, WritesPerSec: 2000}, + } + expected := "Recommended instance type with 4 vCPU and 64 GiB memory could fit 1 objects(0 tables and 1 explicit/implicit indexes) with 100.00 GB size and throughput requirement of 3000 reads/sec and 2000 writes/sec as colocated. Rest 1 objects(0 tables and 1 explicit/implicit indexes) with 200.00 GB size and throughput requirement of 6000 reads/sec and 4000 writes/sec need to be migrated as range partitioned tables" + + result := getReasoning(recommendation, shardedObjects, 1, colocatedObjects, 1) + if result != expected { + t.Errorf("Expected %q but got %q", expected, result) + } +} /* ====================HELPER FUNCTIONS==================== From 62fdb1754b1f34b4a4e4bd378bf24d684e7faa6d Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Mon, 10 Jun 2024 15:59:43 +0530 Subject: [PATCH 12/14] added few more test cases and renamed appropriately --- yb-voyager/src/migassessment/sizing_test.go | 336 +++++++++++++++----- 1 file changed, 259 insertions(+), 77 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing_test.go b/yb-voyager/src/migassessment/sizing_test.go index 709ad2eb4..4f0d42abf 100644 --- a/yb-voyager/src/migassessment/sizing_test.go +++ b/yb-voyager/src/migassessment/sizing_test.go @@ -26,11 +26,8 @@ import ( "testing" ) -const ( - SOURCEDB_SELECT_QUERY = "SELECT schema_name, object_name, row_count, reads_per_second, writes_per_second, " + - "is_index, parent_table_name, size_in_bytes FROM table_index_stats ORDER BY size_in_bytes ASC" -) - +var SourceDbSelectQuery = "SELECT schema_name, object_name, row_count, reads_per_second, writes_per_second, " + + "is_index, parent_table_name, size_in_bytes FROM table_index_stats ORDER BY size_in_bytes ASC" var SourceDBColumns = []string{"schema_name", "object_name", "row_count", "reads_per_second", "writes_per_second", "is_index", "parent_table_name", "size_in_bytes"} @@ -76,13 +73,14 @@ var colocatedLimits = []ExpDataColocatedLimit{ /* ===== Test functions to test getSourceMetadata function ===== */ -func TestGetSourceMetadata_Success(t *testing.T) { +// verify successfully able to connect and load the source metadata +func TestGetSourceMetadata_SuccessReadingSourceMetadata(t *testing.T) { db, mock := createMockDB(t) rows := sqlmock.NewRows(SourceDBColumns). AddRow("public", "table1", 1000, 10, 5, false, "", 1048576000). AddRow("public", "index1", 0, 0, 0, true, "table1", 104857600) - mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnRows(rows) + mock.ExpectQuery(SourceDbSelectQuery).WillReturnRows(rows) sourceTableMetadata, sourceIndexMetadata, totalSourceDBSize, err := getSourceMetadata(db) // assert if there are errors @@ -100,9 +98,10 @@ func TestGetSourceMetadata_Success(t *testing.T) { assert.Equal(t, "index1", sourceIndexMetadata[0].ObjectName) } -func TestGetSourceMetadata_QueryError(t *testing.T) { +// if table_index_stat does not exist in the assessment.db or one of the required column does not exist in the table +func TestGetSourceMetadata_QueryErrorIfTableDoesNotExistOrColumnsUnavailable(t *testing.T) { db, mock := createMockDB(t) - mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnError(errors.New("query error")) + mock.ExpectQuery(SourceDbSelectQuery).WillReturnError(errors.New("query error")) _, _, _, err := getSourceMetadata(db) assert.Error(t, err) @@ -110,12 +109,14 @@ func TestGetSourceMetadata_QueryError(t *testing.T) { assert.Contains(t, err.Error(), "failed to query source metadata") } +// verify if the all columns datatypes are correct. Expecting failure in case of unsupported data type. Example: +// expected column value is int but in assessment.db, float value is provided. Hence, it will fail to read/typecast func TestGetSourceMetadata_RowScanError(t *testing.T) { db, mock := createMockDB(t) // 4th column is expected to be int, but as it is float, it will throw an error rows := sqlmock.NewRows(SourceDBColumns).AddRow("public", "table1", 1000, 10.5, 5, false, "", 1048576000). RowError(1, errors.New("row scan error")) - mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnRows(rows) + mock.ExpectQuery(SourceDbSelectQuery).WillReturnRows(rows) _, _, _, err := getSourceMetadata(db) assert.Error(t, err) @@ -123,10 +124,11 @@ func TestGetSourceMetadata_RowScanError(t *testing.T) { assert.Contains(t, err.Error(), "failed to read from result set of query source metadata") } +// verify if there are no rows in the source metadata func TestGetSourceMetadata_NoRows(t *testing.T) { db, mock := createMockDB(t) rows := sqlmock.NewRows(SourceDBColumns) - mock.ExpectQuery(SOURCEDB_SELECT_QUERY).WillReturnRows(rows) + mock.ExpectQuery(SourceDbSelectQuery).WillReturnRows(rows) sourceTableMetadata, sourceIndexMetadata, totalSourceDBSize, err := getSourceMetadata(db) assert.NoError(t, err) @@ -139,7 +141,8 @@ func TestGetSourceMetadata_NoRows(t *testing.T) { /* ===== Test functions to test shardingBasedOnTableSizeAndCount function ===== */ -func TestShardingBasedOnTableSizeAndCount_Basic(t *testing.T) { +// validate if the source table of size in colocated limit is correctly placed in colocated table recommendation +func TestShardingBasedOnTableSizeAndCount_TableWithSizePlacedInColocated(t *testing.T) { sourceTableMetadata := []SourceDBMetadata{ {SchemaName: "public", ObjectName: "table1", Size: 100}, } @@ -181,16 +184,22 @@ func TestShardingBasedOnTableSizeAndCount_Basic(t *testing.T) { }, } - actualRecommendation := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + actualRecommendation := + shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) assert.Equal(t, expectedRecommendation, actualRecommendation) } -func TestShardingBasedOnTableSizeAndCount_WithIndexes(t *testing.T) { +// validate if the source table and index of size in colocated limit is correctly placed in colocated table +// recommendation +func TestShardingBasedOnTableSizeAndCount_WithIndexes_ColocateAll(t *testing.T) { sourceTableMetadata := []SourceDBMetadata{ {SchemaName: "public", ObjectName: "table1", Size: 100}, } sourceIndexMetadata := []SourceDBMetadata{ - {SchemaName: "public", ObjectName: "index1", Size: 20, IsIndex: true, ParentTableName: sql.NullString{String: "table1", Valid: true}}, + { + SchemaName: "public", ObjectName: "index1", Size: 10, IsIndex: true, + ParentTableName: sql.NullString{String: "public.table1", Valid: true}, + }, } recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} @@ -200,45 +209,44 @@ func TestShardingBasedOnTableSizeAndCount_WithIndexes(t *testing.T) { {SchemaName: "public", ObjectName: "table1", Size: 100}, }, ShardedTables: nil, - ColocatedSize: 100, // Table size - ShardedSize: 0, + ColocatedSize: 110, // Table size + index size }, 4: { ColocatedTables: []SourceDBMetadata{ {SchemaName: "public", ObjectName: "table1", Size: 100}, }, ShardedTables: nil, - ColocatedSize: 100, // Table size + index size - ShardedSize: 0, + ColocatedSize: 110, // Table size + index size }, 8: { ColocatedTables: []SourceDBMetadata{ {SchemaName: "public", ObjectName: "table1", Size: 100}, }, ShardedTables: nil, - ColocatedSize: 100, // Table size + index size - ShardedSize: 0, + ColocatedSize: 110, // Table size + index size }, 16: { ColocatedTables: []SourceDBMetadata{ {SchemaName: "public", ObjectName: "table1", Size: 100}, }, ShardedTables: nil, - ColocatedSize: 100, // Table size + index size - ShardedSize: 0, + ColocatedSize: 110, // Table size + index size }, } - result := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + result := + shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) assert.Equal(t, expectedRecommendation, result) } +// validate if the source table and index of size in colocated limit exceeds the size limit and respective tables need +// to be put in sharded func TestShardingBasedOnTableSizeAndCount_ColocatedLimitExceededBySize(t *testing.T) { sourceTableMetadata := []SourceDBMetadata{ {SchemaName: "public", ObjectName: "table1", Size: 110}, {SchemaName: "public", ObjectName: "table2", Size: 500}, } - sourceIndexMetadata := []SourceDBMetadata{} + var sourceIndexMetadata []SourceDBMetadata recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} expectedRecommendation := map[int]IntermediateRecommendation{ @@ -284,18 +292,22 @@ func TestShardingBasedOnTableSizeAndCount_ColocatedLimitExceededBySize(t *testin }, } - result := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + result := + shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) assert.Equal(t, expectedRecommendation, result) } +// validate if the source table and index of size in colocated limit exceeds limit of maximum supported num objects +// and respective tables need to be put in sharded func TestShardingBasedOnTableSizeAndCount_ColocatedLimitExceededByCount(t *testing.T) { const numTables = 16000 sourceTableMetadata := make([]SourceDBMetadata, numTables) for i := 0; i < numTables; i++ { - sourceTableMetadata[i] = SourceDBMetadata{SchemaName: "public", ObjectName: fmt.Sprintf("table%v", i), Size: 0.0001} + sourceTableMetadata[i] = + SourceDBMetadata{SchemaName: "public", ObjectName: fmt.Sprintf("table%v", i), Size: 0.0001} } - sourceIndexMetadata := []SourceDBMetadata{} + var sourceIndexMetadata []SourceDBMetadata recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} expectedResults := make(map[int]map[string]int) @@ -316,19 +328,21 @@ func TestShardingBasedOnTableSizeAndCount_ColocatedLimitExceededByCount(t *testi "lenShardedTables": 11000, } - actualRecommendationsResult := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + actualRecommendationsResult := + shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) for key, rec := range actualRecommendationsResult { assert.Equal(t, expectedResults[key]["lenColocatedTables"], len(rec.ColocatedTables)) assert.Equal(t, expectedResults[key]["lenShardedTables"], len(rec.ShardedTables)) } } +// validate if the tables of size more than max colocated size supported are put in the sharded tables func TestShardingBasedOnTableSizeAndCount_NoColocatedTables(t *testing.T) { sourceTableMetadata := []SourceDBMetadata{ {SchemaName: "public", ObjectName: "table1", Size: 600}, {SchemaName: "public", ObjectName: "table2", Size: 500}, } - sourceIndexMetadata := []SourceDBMetadata{} + var sourceIndexMetadata []SourceDBMetadata recommendation := map[int]IntermediateRecommendation{2: {}, 4: {}, 8: {}, 16: {}} expectedResults := make(map[int]map[string]int) @@ -349,9 +363,12 @@ func TestShardingBasedOnTableSizeAndCount_NoColocatedTables(t *testing.T) { "lenShardedTables": 2, } - result := shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) + result := + shardingBasedOnTableSizeAndCount(sourceTableMetadata, sourceIndexMetadata, colocatedLimits, recommendation) for key, rec := range result { + // assert that there are no colocated tables assert.Equal(t, expectedResults[key]["lenColocatedTables"], len(rec.ColocatedTables)) + // assert that there are 2 sharded tables assert.Equal(t, expectedResults[key]["lenShardedTables"], len(rec.ShardedTables)) } } @@ -359,35 +376,97 @@ func TestShardingBasedOnTableSizeAndCount_NoColocatedTables(t *testing.T) { /* ===== Test functions to test shardingBasedOnOperations function ===== */ -func TestShardingBasedOnOperations(t *testing.T) { +// validate that table throughput requirements are supported and sharding is not needed +func TestShardingBasedOnOperations_CanSupportOpsRequirement(t *testing.T) { + // Define test data + sourceIndexMetadata := []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + } + recommendation := map[int]IntermediateRecommendation{ + 2: { + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + 4: { + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + 8: { + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + 16: { + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, + }, + } + + // Run the function + updatedRecommendation := shardingBasedOnOperations(sourceIndexMetadata, colocatedLimits, recommendation) + + // expected is that the table should be removed from colocated and added to sharded as the ops requirement is high + for _, rec := range updatedRecommendation { + assert.Empty(t, rec.ShardedTables) + assert.Len(t, rec.ColocatedTables, 1) + assert.Equal(t, "table1", rec.ColocatedTables[0].ObjectName) + } +} + +// validate that the tables should be removed from colocated and added to sharded as the ops requirement is higher than +// supported +func TestShardingBasedOnOperations_CannotSupportOpsAndNeedsSharding(t *testing.T) { // Define test data sourceIndexMetadata := []SourceDBMetadata{ {ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}, } recommendation := map[int]IntermediateRecommendation{ 2: { - ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, - ShardedTables: []SourceDBMetadata{}, - ColocatedSize: 10.0, - ShardedSize: 0.0, + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, }, 4: { - ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, - ShardedTables: []SourceDBMetadata{}, - ColocatedSize: 10.0, - ShardedSize: 0.0, + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, }, 8: { - ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, - ShardedTables: []SourceDBMetadata{}, - ColocatedSize: 10.0, - ShardedSize: 0.0, + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, }, 16: { - ColocatedTables: []SourceDBMetadata{{ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}}, - ShardedTables: []SourceDBMetadata{}, - ColocatedSize: 10.0, - ShardedSize: 0.0, + ColocatedTables: []SourceDBMetadata{ + {ObjectName: "table1", Size: 10.0, ReadsPerSec: 1000000, WritesPerSec: 50000000}, + }, + ShardedTables: []SourceDBMetadata{}, + ColocatedSize: 10.0, + ShardedSize: 0.0, }, } @@ -405,14 +484,45 @@ func TestShardingBasedOnOperations(t *testing.T) { /* ===== Test functions to test checkShardedTableLimit function ===== */ -func TestCheckShardedTableLimit(t *testing.T) { +// validate that the sharded table limits is not exceeded and failure reasoning should be empty +func TestCheckShardedTableLimit_WithinLimit(t *testing.T) { // Define test data var sourceIndexMetadata []SourceDBMetadata - shardedLimits := []ExpDataShardedLimit{ - {numCores: sql.NullFloat64{Float64: 16, Valid: true}, maxSupportedNumTables: sql.NullInt64{Int64: 1, Valid: true}}, + { + numCores: sql.NullFloat64{Float64: 16, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 100, Valid: true}, + }, + } + recommendation := map[int]IntermediateRecommendation{ + 16: { + ShardedTables: []SourceDBMetadata{ + {SchemaName: "public", ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + {SchemaName: "public", ObjectName: "table2", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, + }, + VCPUsPerInstance: 16, + MemoryPerCore: 4, + }, } + // Run the function + updatedRecommendation := checkShardedTableLimit(sourceIndexMetadata, shardedLimits, recommendation) + for _, rec := range updatedRecommendation { + // failure reasoning should be empty + assert.Empty(t, rec.FailureReasoning) + } +} + +// validate that the sharded table limits is exceeded and failure reasoning is generated as expected +func TestCheckShardedTableLimit_LimitExceeded(t *testing.T) { + // Define test data + var sourceIndexMetadata []SourceDBMetadata + shardedLimits := []ExpDataShardedLimit{ + { + numCores: sql.NullFloat64{Float64: 16, Valid: true}, + maxSupportedNumTables: sql.NullInt64{Int64: 1, Valid: true}, + }, + } recommendation := map[int]IntermediateRecommendation{ 16: { ShardedTables: []SourceDBMetadata{ @@ -439,7 +549,8 @@ func TestCheckShardedTableLimit(t *testing.T) { /* ===== Test functions to test findNumNodesNeededBasedOnThroughputRequirement function ===== */ -func TestFindNumNodesNeededBasedOnThroughputRequirement_Positive(t *testing.T) { +// validate that the throughput requirements are supported and no scaling is needed +func TestFindNumNodesNeededBasedOnThroughputRequirement_CanSupportOps(t *testing.T) { // Define test data sourceIndexMetadata := []SourceDBMetadata{ {ObjectName: "table1", Size: 10.0, ReadsPerSec: 100, WritesPerSec: 50}, @@ -464,7 +575,8 @@ func TestFindNumNodesNeededBasedOnThroughputRequirement_Positive(t *testing.T) { } // Run the function - updatedRecommendation := findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata, shardedLimits, recommendation) + updatedRecommendation := + findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata, shardedLimits, recommendation) // for 4 cores data, expected results are var expectedOptimalSelectConnectionsPerNode int64 = 50 @@ -478,7 +590,8 @@ func TestFindNumNodesNeededBasedOnThroughputRequirement_Positive(t *testing.T) { assert.Equal(t, expectedNumNodesNeeded, recommendationToVerify.NumNodes) } -func TestFindNumNodesNeededBasedOnThroughputRequirement_Negative(t *testing.T) { +// validate that the throughput requirements cannot be supported by existing nodes and scaling is needed +func TestFindNumNodesNeededBasedOnThroughputRequirement_NeedMoreNodes(t *testing.T) { // Define test data var sourceIndexMetadata []SourceDBMetadata @@ -492,14 +605,17 @@ func TestFindNumNodesNeededBasedOnThroughputRequirement_Negative(t *testing.T) { recommendation := map[int]IntermediateRecommendation{ 4: { - ShardedTables: []SourceDBMetadata{{ObjectName: "table2", Size: 20.0, ReadsPerSec: 2000, WritesPerSec: 5000}}, + ShardedTables: []SourceDBMetadata{ + {ObjectName: "table2", Size: 20.0, ReadsPerSec: 2000, WritesPerSec: 5000}, + }, VCPUsPerInstance: 4, MemoryPerCore: 4, }, } // Run the function - updatedRecommendation := findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata, shardedLimits, recommendation) + updatedRecommendation := + findNumNodesNeededBasedOnThroughputRequirement(sourceIndexMetadata, shardedLimits, recommendation) // validate the expected number of nodes assert.Equal(t, updatedRecommendation[4].NumNodes, float64(15)) @@ -508,18 +624,16 @@ func TestFindNumNodesNeededBasedOnThroughputRequirement_Negative(t *testing.T) { /* ===== Test functions to test findNumNodesNeededBasedOnTabletsRequired function ===== */ - -func TestFindNumNodesNeededBasedOnTabletsRequired_Positive_NoChange(t *testing.T) { +// validate that the tablets requirements are supported and no scaling is needed +func TestFindNumNodesNeededBasedOnTabletsRequired_CanSupportTablets(t *testing.T) { // Define test data var sourceIndexMetadata []SourceDBMetadata - shardedLimits := []ExpDataShardedLimit{ { numCores: sql.NullFloat64{Float64: 4, Valid: true}, maxSupportedNumTables: sql.NullInt64{Int64: 20, Valid: true}, }, } - recommendation := map[int]IntermediateRecommendation{ 4: { ColocatedTables: []SourceDBMetadata{}, @@ -533,26 +647,26 @@ func TestFindNumNodesNeededBasedOnTabletsRequired_Positive_NoChange(t *testing.T } // Run the function - updatedRecommendation := findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata, shardedLimits, recommendation) + updatedRecommendation := + findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata, shardedLimits, recommendation) // check if the num nodes in updated recommendation is same as before(3) meaning no scaling is required assert.Equal(t, float64(3), updatedRecommendation[4].NumNodes) } -func TestFindNumNodesNeededBasedOnTabletsRequired_Positive(t *testing.T) { +// validate that the tablets cannot be supported by existing nodes and scaling is needed +func TestFindNumNodesNeededBasedOnTabletsRequired_NeedMoreNodes(t *testing.T) { // Define test data sourceIndexMetadata := []SourceDBMetadata{ {SchemaName: "public", ObjectName: "index1", Size: 7, ParentTableName: sql.NullString{String: "public.table1"}}, {SchemaName: "public", ObjectName: "index2", Size: 3, ParentTableName: sql.NullString{String: "public.table2"}}, } - shardedLimits := []ExpDataShardedLimit{ { numCores: sql.NullFloat64{Float64: 4, Valid: true}, maxSupportedNumTables: sql.NullInt64{Int64: 20, Valid: true}, }, } - recommendation := map[int]IntermediateRecommendation{ 4: { ColocatedTables: []SourceDBMetadata{}, @@ -566,7 +680,8 @@ func TestFindNumNodesNeededBasedOnTabletsRequired_Positive(t *testing.T) { } // Run the function - updatedRecommendation := findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata, shardedLimits, recommendation) + updatedRecommendation := + findNumNodesNeededBasedOnTabletsRequired(sourceIndexMetadata, shardedLimits, recommendation) // check if the num nodes in updated recommendation has increased. Meaning scaling is required. assert.Equal(t, float64(6), updatedRecommendation[4].NumNodes) @@ -575,8 +690,8 @@ func TestFindNumNodesNeededBasedOnTabletsRequired_Positive(t *testing.T) { /* ===== Test functions to test pickBestRecommendation function ===== */ - -func TestPickBestRecommendation(t *testing.T) { +// validate if the recommendation with optimal nodes and cores is picked up +func TestPickBestRecommendation_PickOneWithOptimalNodesAndCores(t *testing.T) { recommendations := map[int]IntermediateRecommendation{ 4: { VCPUsPerInstance: 4, @@ -594,16 +709,65 @@ func TestPickBestRecommendation(t *testing.T) { assert.Equal(t, 8, bestPick.VCPUsPerInstance) } +// validate if the recommendation with optimal nodes is picked up when some of the recommendation has failure reasoning +func TestPickBestRecommendation_PickOneWithOptimalNodesAndCoresWhenSomeHasFailures(t *testing.T) { + recommendations := map[int]IntermediateRecommendation{ + 4: { + VCPUsPerInstance: 4, + NumNodes: 10, + FailureReasoning: "has some failure", + }, + 8: { + VCPUsPerInstance: 8, + NumNodes: 3, + FailureReasoning: "has some failure as well", + }, + 16: { + VCPUsPerInstance: 16, + NumNodes: 3, + FailureReasoning: "", + }, + } + bestPick := pickBestRecommendation(recommendations) + // validate the best recommendation which is 16 vcpus per instance is picked up + assert.Equal(t, 16, bestPick.VCPUsPerInstance) +} + +// validate if the recommendation with optimal nodes and cores is picked up for showing failure reasoning +func TestPickBestRecommendation_PickLastMaxCoreRecommendationWhenNoneCanSupport(t *testing.T) { + recommendations := map[int]IntermediateRecommendation{ + 4: { + VCPUsPerInstance: 4, + NumNodes: 10, + FailureReasoning: "has some failure reasoning", + }, + 8: { + VCPUsPerInstance: 8, + NumNodes: 3, + FailureReasoning: "has some failure reasoning", + }, + 16: { + VCPUsPerInstance: 16, + NumNodes: 3, + FailureReasoning: "has some failure reasoning", + }, + } + bestPick := pickBestRecommendation(recommendations) + // validate the best recommendation which is 16 vcpus per instance is picked up + assert.Equal(t, 16, bestPick.VCPUsPerInstance) +} + /* ===== Test functions to test calculateTimeTakenAndParallelJobsForImport function ===== */ - -func TestCalculateTimeTakenAndParallelJobsForImport_Positive(t *testing.T) { +// validate the formula to calculate the import time +func TestCalculateTimeTakenAndParallelJobsForImport_ValidateFormulaToCalculateImportTime(t *testing.T) { db, mock := createMockDB(t) // Define the mock response for the query rows := sqlmock.NewRows([]string{"csv_size_gb", "migration_time_secs", "parallel_threads"}). AddRow(100, 6000, 4) - mock.ExpectQuery("(?i)SELECT csv_size_gb, migration_time_secs, parallel_threads FROM .* WHERE num_cores = .*"). + mock.ExpectQuery( + "(?i)SELECT csv_size_gb, migration_time_secs, parallel_threads FROM .* WHERE num_cores = .*"). WithArgs(4, 4, 50.0, 4). WillReturnRows(rows) @@ -611,7 +775,8 @@ func TestCalculateTimeTakenAndParallelJobsForImport_Positive(t *testing.T) { dbObjects := []SourceDBMetadata{{Size: 30.0}, {Size: 20.0}} // Call the function - estimatedTime, parallelJobs, err := calculateTimeTakenAndParallelJobsForImport(COLOCATED_LOAD_TIME_TABLE, dbObjects, 4, 4, db) + estimatedTime, parallelJobs, err := + calculateTimeTakenAndParallelJobsForImport(COLOCATED_LOAD_TIME_TABLE, dbObjects, 4, 4, db) if err != nil { t.Errorf("Unexpected error: %v", err) } @@ -619,9 +784,9 @@ func TestCalculateTimeTakenAndParallelJobsForImport_Positive(t *testing.T) { // Define expected results expectedTime := 50.0 // Calculated as ((6000 * 50) / 100) / 60 expectedJobs := int64(4) - if estimatedTime != expectedTime || parallelJobs != expectedJobs { - t.Errorf("calculateTimeTakenAndParallelJobsForImport() = (%v, %v), want (%v, %v)", estimatedTime, parallelJobs, expectedTime, expectedJobs) + t.Errorf("calculateTimeTakenAndParallelJobsForImport() = (%v, %v), want (%v, %v)", + estimatedTime, parallelJobs, expectedTime, expectedJobs) } // Ensure all expectations were met @@ -633,6 +798,7 @@ func TestCalculateTimeTakenAndParallelJobsForImport_Positive(t *testing.T) { /* ===== Test functions to test getReasoning function ===== */ +// validate reasoning when there are no objects(empty colocated and sharded objects) func TestGetReasoning_NoObjects(t *testing.T) { recommendation := IntermediateRecommendation{VCPUsPerInstance: 4, MemoryPerCore: 16} var shardedObjects []SourceDBMetadata @@ -645,6 +811,7 @@ func TestGetReasoning_NoObjects(t *testing.T) { } } +// validate reasoning when there are only colocated objects func TestGetReasoning_OnlyColocatedObjects(t *testing.T) { recommendation := IntermediateRecommendation{VCPUsPerInstance: 8, MemoryPerCore: 8} var shardedObjects []SourceDBMetadata @@ -652,7 +819,9 @@ func TestGetReasoning_OnlyColocatedObjects(t *testing.T) { {Size: 50.0, ReadsPerSec: 1000, WritesPerSec: 500}, {Size: 30.0, ReadsPerSec: 2000, WritesPerSec: 1500}, } - expected := "Recommended instance type with 8 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 explicit/implicit indexes) with 80.00 GB size and throughput requirement of 3000 reads/sec and 2000 writes/sec as colocated." + expected := "Recommended instance type with 8 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 " + + "explicit/implicit indexes) with 80.00 GB size and throughput requirement of 3000 reads/sec and " + + "2000 writes/sec as colocated." result := getReasoning(recommendation, shardedObjects, 0, colocatedObjects, 0) if result != expected { @@ -660,6 +829,7 @@ func TestGetReasoning_OnlyColocatedObjects(t *testing.T) { } } +// validate reasoning when there are only sharded objects func TestGetReasoning_OnlyShardedObjects(t *testing.T) { recommendation := IntermediateRecommendation{VCPUsPerInstance: 16, MemoryPerCore: 4} shardedObjects := []SourceDBMetadata{ @@ -667,7 +837,9 @@ func TestGetReasoning_OnlyShardedObjects(t *testing.T) { {Size: 200.0, ReadsPerSec: 5000, WritesPerSec: 4000}, } var colocatedObjects []SourceDBMetadata - expected := "Recommended instance type with 16 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 explicit/implicit indexes) with 300.00 GB size and throughput requirement of 9000 reads/sec and 7000 writes/sec as sharded." + expected := "Recommended instance type with 16 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 " + + "explicit/implicit indexes) with 300.00 GB size and throughput requirement of 9000 reads/sec and " + + "7000 writes/sec as sharded." result := getReasoning(recommendation, shardedObjects, 0, colocatedObjects, 0) if result != expected { @@ -675,6 +847,7 @@ func TestGetReasoning_OnlyShardedObjects(t *testing.T) { } } +// validate reasoning when there are colocated and sharded objects func TestGetReasoning_ColocatedAndShardedObjects(t *testing.T) { recommendation := IntermediateRecommendation{VCPUsPerInstance: 32, MemoryPerCore: 2} shardedObjects := []SourceDBMetadata{ @@ -684,7 +857,11 @@ func TestGetReasoning_ColocatedAndShardedObjects(t *testing.T) { {Size: 70.0, ReadsPerSec: 3000, WritesPerSec: 2000}, {Size: 50.0, ReadsPerSec: 2000, WritesPerSec: 1000}, } - expected := "Recommended instance type with 32 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 explicit/implicit indexes) with 120.00 GB size and throughput requirement of 5000 reads/sec and 3000 writes/sec as colocated. Rest 1 objects(1 tables and 0 explicit/implicit indexes) with 150.00 GB size and throughput requirement of 7000 reads/sec and 6000 writes/sec need to be migrated as range partitioned tables" + expected := "Recommended instance type with 32 vCPU and 64 GiB memory could fit 2 objects(2 tables and 0 " + + "explicit/implicit indexes) with 120.00 GB size and throughput requirement of 5000 reads/sec and " + + "3000 writes/sec as colocated. Rest 1 objects(1 tables and 0 explicit/implicit indexes) with 150.00 GB " + + "size and throughput requirement of 7000 reads/sec and 6000 writes/sec need to be migrated as range " + + "partitioned tables" result := getReasoning(recommendation, shardedObjects, 0, colocatedObjects, 0) if result != expected { @@ -692,6 +869,7 @@ func TestGetReasoning_ColocatedAndShardedObjects(t *testing.T) { } } +// validate reasoning when there are colocated and sharded objects with indexes func TestGetReasoning_Indexes(t *testing.T) { recommendation := IntermediateRecommendation{VCPUsPerInstance: 4, MemoryPerCore: 16} shardedObjects := []SourceDBMetadata{ @@ -700,7 +878,11 @@ func TestGetReasoning_Indexes(t *testing.T) { colocatedObjects := []SourceDBMetadata{ {Size: 100.0, ReadsPerSec: 3000, WritesPerSec: 2000}, } - expected := "Recommended instance type with 4 vCPU and 64 GiB memory could fit 1 objects(0 tables and 1 explicit/implicit indexes) with 100.00 GB size and throughput requirement of 3000 reads/sec and 2000 writes/sec as colocated. Rest 1 objects(0 tables and 1 explicit/implicit indexes) with 200.00 GB size and throughput requirement of 6000 reads/sec and 4000 writes/sec need to be migrated as range partitioned tables" + expected := "Recommended instance type with 4 vCPU and 64 GiB memory could fit 1 objects(0 tables and " + + "1 explicit/implicit indexes) with 100.00 GB size and throughput requirement of 3000 reads/sec and " + + "2000 writes/sec as colocated. Rest 1 objects(0 tables and 1 explicit/implicit indexes) with 200.00 GB size " + + "and throughput requirement of 6000 reads/sec and 4000 writes/sec need to be migrated as range " + + "partitioned tables" result := getReasoning(recommendation, shardedObjects, 1, colocatedObjects, 1) if result != expected { From ac0a6f60d4e449a115df6116ed3c6303e94902ff Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Mon, 10 Jun 2024 16:16:44 +0530 Subject: [PATCH 13/14] changed the assert --- yb-voyager/src/migassessment/sizing_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yb-voyager/src/migassessment/sizing_test.go b/yb-voyager/src/migassessment/sizing_test.go index 4f0d42abf..95fd6ce9a 100644 --- a/yb-voyager/src/migassessment/sizing_test.go +++ b/yb-voyager/src/migassessment/sizing_test.go @@ -90,7 +90,7 @@ func TestGetSourceMetadata_SuccessReadingSourceMetadata(t *testing.T) { // check if the total indexes are equal to expected indexes assert.Len(t, sourceIndexMetadata, 1) // check if the total size of the source database is equal to the expected size - assert.True(t, 1.07 == Round(totalSourceDBSize, 2)) + assert.Equal(t, 1.07, Round(totalSourceDBSize, 2)) // check if the values of the source table metadata are equal to the expected values assert.Equal(t, "public", sourceTableMetadata[0].SchemaName) assert.Equal(t, "table1", sourceTableMetadata[0].ObjectName) From 8535b8dbcfc10093891db97e856183b00042347e Mon Sep 17 00:00:00 2001 From: shaharuk-yb Date: Thu, 13 Jun 2024 13:12:03 +0530 Subject: [PATCH 14/14] incorporated review comments --- yb-voyager/src/migassessment/sizing_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/yb-voyager/src/migassessment/sizing_test.go b/yb-voyager/src/migassessment/sizing_test.go index 95fd6ce9a..7f1304483 100644 --- a/yb-voyager/src/migassessment/sizing_test.go +++ b/yb-voyager/src/migassessment/sizing_test.go @@ -26,9 +26,9 @@ import ( "testing" ) -var SourceDbSelectQuery = "SELECT schema_name, object_name, row_count, reads_per_second, writes_per_second, " + - "is_index, parent_table_name, size_in_bytes FROM table_index_stats ORDER BY size_in_bytes ASC" -var SourceDBColumns = []string{"schema_name", "object_name", "row_count", "reads_per_second", "writes_per_second", +var AssessmentDbSelectQuery = fmt.Sprintf("SELECT schema_name, object_name, row_count, reads_per_second, writes_per_second, "+ + "is_index, parent_table_name, size_in_bytes FROM %v ORDER BY size_in_bytes ASC", TABLE_INDEX_STATS) +var AssessmentDBColumns = []string{"schema_name", "object_name", "row_count", "reads_per_second", "writes_per_second", "is_index", "parent_table_name", "size_in_bytes"} var colocatedLimits = []ExpDataColocatedLimit{ @@ -76,11 +76,11 @@ var colocatedLimits = []ExpDataColocatedLimit{ // verify successfully able to connect and load the source metadata func TestGetSourceMetadata_SuccessReadingSourceMetadata(t *testing.T) { db, mock := createMockDB(t) - rows := sqlmock.NewRows(SourceDBColumns). + rows := sqlmock.NewRows(AssessmentDBColumns). AddRow("public", "table1", 1000, 10, 5, false, "", 1048576000). AddRow("public", "index1", 0, 0, 0, true, "table1", 104857600) - mock.ExpectQuery(SourceDbSelectQuery).WillReturnRows(rows) + mock.ExpectQuery(AssessmentDbSelectQuery).WillReturnRows(rows) sourceTableMetadata, sourceIndexMetadata, totalSourceDBSize, err := getSourceMetadata(db) // assert if there are errors @@ -101,7 +101,7 @@ func TestGetSourceMetadata_SuccessReadingSourceMetadata(t *testing.T) { // if table_index_stat does not exist in the assessment.db or one of the required column does not exist in the table func TestGetSourceMetadata_QueryErrorIfTableDoesNotExistOrColumnsUnavailable(t *testing.T) { db, mock := createMockDB(t) - mock.ExpectQuery(SourceDbSelectQuery).WillReturnError(errors.New("query error")) + mock.ExpectQuery(AssessmentDbSelectQuery).WillReturnError(errors.New("query error")) _, _, _, err := getSourceMetadata(db) assert.Error(t, err) @@ -114,9 +114,9 @@ func TestGetSourceMetadata_QueryErrorIfTableDoesNotExistOrColumnsUnavailable(t * func TestGetSourceMetadata_RowScanError(t *testing.T) { db, mock := createMockDB(t) // 4th column is expected to be int, but as it is float, it will throw an error - rows := sqlmock.NewRows(SourceDBColumns).AddRow("public", "table1", 1000, 10.5, 5, false, "", 1048576000). + rows := sqlmock.NewRows(AssessmentDBColumns).AddRow("public", "table1", 1000, 10.5, 5, false, "", 1048576000). RowError(1, errors.New("row scan error")) - mock.ExpectQuery(SourceDbSelectQuery).WillReturnRows(rows) + mock.ExpectQuery(AssessmentDbSelectQuery).WillReturnRows(rows) _, _, _, err := getSourceMetadata(db) assert.Error(t, err) @@ -127,8 +127,8 @@ func TestGetSourceMetadata_RowScanError(t *testing.T) { // verify if there are no rows in the source metadata func TestGetSourceMetadata_NoRows(t *testing.T) { db, mock := createMockDB(t) - rows := sqlmock.NewRows(SourceDBColumns) - mock.ExpectQuery(SourceDbSelectQuery).WillReturnRows(rows) + rows := sqlmock.NewRows(AssessmentDBColumns) + mock.ExpectQuery(AssessmentDbSelectQuery).WillReturnRows(rows) sourceTableMetadata, sourceIndexMetadata, totalSourceDBSize, err := getSourceMetadata(db) assert.NoError(t, err)