From bb994ab6d3afc845ec61a4b7620f05287ef5ab4c Mon Sep 17 00:00:00 2001 From: Qifan Zhang Date: Tue, 29 Oct 2024 17:15:58 +0800 Subject: [PATCH 1/3] GBQ driver supports max stream count setting --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 3 ++- csharp/src/Drivers/BigQuery/BigQueryParameters.cs | 1 + csharp/src/Drivers/BigQuery/BigQueryStatement.cs | 13 ++++++++++++- .../Drivers/BigQuery/BigQueryTestConfiguration.cs | 3 +++ .../test/Drivers/BigQuery/BigQueryTestingUtils.cs | 5 +++++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index acba314ebf..bb007a6442 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -1002,7 +1002,8 @@ private IReadOnlyDictionary ParseOptions() BigQueryParameters.UseLegacySQL, BigQueryParameters.LargeDecimalsAsString, BigQueryParameters.LargeResultsDestinationTable, - BigQueryParameters.GetQueryResultsOptionsTimeoutMinutes + BigQueryParameters.GetQueryResultsOptionsTimeoutMinutes, + BigQueryParameters.CreateReadSessionMaxStreamCount }; foreach (string key in statementOptions) diff --git a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs index 3f9aafb20b..27f58e9356 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs @@ -35,6 +35,7 @@ public class BigQueryParameters public const string Scopes = "adbc.bigquery.scopes"; public const string IncludeConstraintsWithGetObjects = "adbc.bigquery.include_constraints_getobjects"; public const string GetQueryResultsOptionsTimeoutMinutes = "adbc.bigquery.get_query_results_options.timeout"; + public const string CreateReadSessionMaxStreamCount = "adbc.bigquery.create_read_session.max_stream_count"; } /// diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs index d21f711c0d..c5f3f3ad19 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs @@ -103,8 +103,19 @@ public override QueryResult ExecuteQuery() string table = $"projects/{results.TableReference.ProjectId}/datasets/{results.TableReference.DatasetId}/tables/{results.TableReference.TableId}"; + int maxStreamCount = 1; + if (this.Options?.TryGetValue(BigQueryParameters.CreateReadSessionMaxStreamCount, out string? maxStreamCountString) == true) + { + if (int.TryParse(maxStreamCountString, out int count)) + { + if (count >= 0) + { + maxStreamCount = count; + } + } + } ReadSession rs = new ReadSession { Table = table, DataFormat = DataFormat.Arrow }; - ReadSession rrs = readClient.CreateReadSession("projects/" + results.TableReference.ProjectId, rs, 1); + ReadSession rrs = readClient.CreateReadSession("projects/" + results.TableReference.ProjectId, rs, maxStreamCount); long totalRows = results.TotalRows == null ? -1L : (long)results.TotalRows.Value; IArrowArrayStream stream = new MultiArrowReader(TranslateSchema(results.Schema), rrs.Streams.Select(s => ReadChunk(readClient, s.Name))); diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs index a1782910ff..ffcae7cc05 100644 --- a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs +++ b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs @@ -59,5 +59,8 @@ public BigQueryTestConfiguration() [JsonPropertyName("timeoutMinutes")] public int? TimeoutMinutes { get; set; } + + [JsonPropertyName("maxStreamCount")] + public int? MaxStreamCount { get; set; } } } diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs index bf80aa2562..347fc8cc56 100644 --- a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs +++ b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs @@ -91,6 +91,11 @@ internal static Dictionary GetBigQueryParameters(BigQueryTestCon parameters.Add(BigQueryParameters.GetQueryResultsOptionsTimeoutMinutes, testConfiguration.TimeoutMinutes.Value.ToString()); } + if (testConfiguration.MaxStreamCount.HasValue) + { + parameters.Add(BigQueryParameters.CreateReadSessionMaxStreamCount, testConfiguration.MaxStreamCount.Value.ToString()); + } + return parameters; } From a2d9e9c14d519ac8e9bb8f6d6ca6e5936e8367bd Mon Sep 17 00:00:00 2001 From: Qifan Zhang Date: Wed, 30 Oct 2024 18:00:28 +0800 Subject: [PATCH 2/3] resovled the comments --- csharp/src/Drivers/BigQuery/BigQueryParameters.cs | 2 +- csharp/src/Drivers/BigQuery/readme.md | 3 +++ csharp/test/Drivers/BigQuery/Resources/bigqueryconfig.json | 3 ++- csharp/test/Drivers/BigQuery/readme.md | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs index 27f58e9356..4dc9493c77 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs @@ -35,7 +35,7 @@ public class BigQueryParameters public const string Scopes = "adbc.bigquery.scopes"; public const string IncludeConstraintsWithGetObjects = "adbc.bigquery.include_constraints_getobjects"; public const string GetQueryResultsOptionsTimeoutMinutes = "adbc.bigquery.get_query_results_options.timeout"; - public const string CreateReadSessionMaxStreamCount = "adbc.bigquery.create_read_session.max_stream_count"; + public const string CreateReadSessionMaxStreamCount = "adbc.bigquery.max_fetch_concurrency"; } /// diff --git a/csharp/src/Drivers/BigQuery/readme.md b/csharp/src/Drivers/BigQuery/readme.md index eafaf2c86a..d78ab4c891 100644 --- a/csharp/src/Drivers/BigQuery/readme.md +++ b/csharp/src/Drivers/BigQuery/readme.md @@ -54,6 +54,9 @@ https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/G **adbc.bigquery.get_query_results_options.timeout**
    Optional. Sets the timeout (in minutes) for the GetQueryResultsOptions value. If not set, defaults to 5 minutes. +**adbc.bigquery.max_fetch_concurrency**
+    Optional. Sets the [maxStreamCount](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.Storage.V1/latest/Google.Cloud.BigQuery.Storage.V1.BigQueryReadClient#Google_Cloud_BigQuery_Storage_V1_BigQueryReadClient_CreateReadSession_System_String_Google_Cloud_BigQuery_Storage_V1_ReadSession_System_Int32_Google_Api_Gax_Grpc_CallSettings_) for the CreateReadSession method. If not set, defaults to 1. + **adbc.bigquery.include_constraints_getobjects**
    Optional. Some callers do not need the constraint details when they get the table information and can improve the speed of obtaining the results. Setting this value to `"false"` will not include the constraint details. The default value is `"true"`. diff --git a/csharp/test/Drivers/BigQuery/Resources/bigqueryconfig.json b/csharp/test/Drivers/BigQuery/Resources/bigqueryconfig.json index 888ae7eb30..960bcb8683 100644 --- a/csharp/test/Drivers/BigQuery/Resources/bigqueryconfig.json +++ b/csharp/test/Drivers/BigQuery/Resources/bigqueryconfig.json @@ -3,11 +3,12 @@ "clientId": "", "clientSecret": "", "refreshToken": "", + "maxStreamCount": 1, "metadata": { "catalog": "", "schema": "", "table": "", - "expectedColumnCount": 0 + "expectedColumnCount": 0 }, "query": "", "expectedResults": 0 diff --git a/csharp/test/Drivers/BigQuery/readme.md b/csharp/test/Drivers/BigQuery/readme.md index daffddc5fb..26f177da4a 100644 --- a/csharp/test/Drivers/BigQuery/readme.md +++ b/csharp/test/Drivers/BigQuery/readme.md @@ -39,6 +39,7 @@ The following values can be setup in the configuration - **query** - The query to use. - **expectedResults** - The expected number of results from the query. - **timeoutMinutes** - The timeout (in minutes). +- **maxStreamCount** - The max stream count. - **includeTableConstraints** - Whether to include table constraints in the GetObjects query. - **largeResultsDestinationTable** - Sets the [DestinationTable](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/Google.Cloud.BigQuery.V2.QueryOptions#Google_Cloud_BigQuery_V2_QueryOptions_DestinationTable) value of the QueryOptions if configured. Expects the format to be `{projectId}.{datasetId}.{tableId}` to set the corresponding values in the [TableReference](https://github.com/googleapis/google-api-dotnet-client/blob/6c415c73788b848711e47c6dd33c2f93c76faf97/Src/Generated/Google.Apis.Bigquery.v2/Google.Apis.Bigquery.v2.cs#L9348) class. - **allowLargeResults** - Whether to allow large results . From abe33e7ea53242721b8f18c2bfd1091684793c21 Mon Sep 17 00:00:00 2001 From: Qifan Zhang Date: Wed, 30 Oct 2024 22:19:06 +0800 Subject: [PATCH 3/3] resolved comments --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 2 +- csharp/src/Drivers/BigQuery/BigQueryParameters.cs | 2 +- csharp/src/Drivers/BigQuery/BigQueryStatement.cs | 2 +- csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index bb007a6442..398365314f 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -1003,7 +1003,7 @@ private IReadOnlyDictionary ParseOptions() BigQueryParameters.LargeDecimalsAsString, BigQueryParameters.LargeResultsDestinationTable, BigQueryParameters.GetQueryResultsOptionsTimeoutMinutes, - BigQueryParameters.CreateReadSessionMaxStreamCount + BigQueryParameters.MaxFetchConcurrency }; foreach (string key in statementOptions) diff --git a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs index 4dc9493c77..51272eb643 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs @@ -35,7 +35,7 @@ public class BigQueryParameters public const string Scopes = "adbc.bigquery.scopes"; public const string IncludeConstraintsWithGetObjects = "adbc.bigquery.include_constraints_getobjects"; public const string GetQueryResultsOptionsTimeoutMinutes = "adbc.bigquery.get_query_results_options.timeout"; - public const string CreateReadSessionMaxStreamCount = "adbc.bigquery.max_fetch_concurrency"; + public const string MaxFetchConcurrency = "adbc.bigquery.max_fetch_concurrency"; } /// diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs index c5f3f3ad19..91e1f8356a 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs @@ -104,7 +104,7 @@ public override QueryResult ExecuteQuery() string table = $"projects/{results.TableReference.ProjectId}/datasets/{results.TableReference.DatasetId}/tables/{results.TableReference.TableId}"; int maxStreamCount = 1; - if (this.Options?.TryGetValue(BigQueryParameters.CreateReadSessionMaxStreamCount, out string? maxStreamCountString) == true) + if (this.Options?.TryGetValue(BigQueryParameters.MaxFetchConcurrency, out string? maxStreamCountString) == true) { if (int.TryParse(maxStreamCountString, out int count)) { diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs index 347fc8cc56..2bc6227bab 100644 --- a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs +++ b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs @@ -93,7 +93,7 @@ internal static Dictionary GetBigQueryParameters(BigQueryTestCon if (testConfiguration.MaxStreamCount.HasValue) { - parameters.Add(BigQueryParameters.CreateReadSessionMaxStreamCount, testConfiguration.MaxStreamCount.Value.ToString()); + parameters.Add(BigQueryParameters.MaxFetchConcurrency, testConfiguration.MaxStreamCount.Value.ToString()); } return parameters;