From 78de9e903f388dadbc5e1fd6b04e6b7f24b20987 Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Fri, 24 May 2024 17:39:22 -0700 Subject: [PATCH 1/8] feat(csharp/src/Drivers/Apache): improve GetObjects metadata returned for columns --- .../Drivers/Apache/Spark/SparkConnection.cs | 237 +++++++++++++----- .../test/Drivers/Apache/Spark/DriverTests.cs | 86 +++++++ 2 files changed, 255 insertions(+), 68 deletions(-) diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs index 36d62f89f2..291bfa3cfa 100644 --- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs +++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs @@ -18,6 +18,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using System.Net.Http; using System.Net.Http.Headers; using System.Text; @@ -60,24 +61,57 @@ public class SparkConnection : HiveServer2Connection { "spark.thriftserver.arrowBasedRowSet.timestampAsString", "false" } }; + /// + /// JDBC-specific data type definitions. + /// Copied from https://github.com/JetBrains/jdk8u_jdk/blob/master/src/share/classes/java/sql/Types.java + /// + /// + /// NOTE: There is a partial copy of this enumeration in test/Drivers/Apache/Spark/DriverTests.cs + /// Please keep up-to-date. + /// private enum ColumnTypeId { - BOOLEAN_TYPE = 16, - TINYINT_TYPE = -6, - SMALLINT_TYPE = 5, - INT_TYPE = 4, + ARRAY_TYPE = 2003, BIGINT_TYPE = -5, - FLOAT_TYPE = 6, - DOUBLE_TYPE = 8, - STRING_TYPE = 12, - TIMESTAMP_TYPE = 93, BINARY_TYPE = -2, - ARRAY_TYPE = 2003, - MAP_TYPE = 2000, - STRUCT_TYPE = 2002, - DECIMAL_TYPE = 3, - DATE_TYPE = 91, + BOOLEAN_TYPE = 16, CHAR_TYPE = 1, + DATE_TYPE = 91, + DECIMAL_TYPE = 3, + DOUBLE_TYPE = 8, + FLOAT_TYPE = 6, + INTEGER_TYPE = 4, + JAVA_OBJECT_TYPE = 2000, + LONGNVARCHAR_TYPE = -16, + LONGVARBINARY_TYPE = -4, + LONGVARCHAR_TYPE = -1, + NCHAR_TYPE = -15, + NULL_TYPE = 0, + NUMERIC_TYPE = 2, + NVARCHAR_TYPE = -9, + REAL_TYPE = 7, + SMALLINT_TYPE = 5, + STRUCT_TYPE = 2002, + TIMESTAMP_TYPE = 93, + TINYINT_TYPE = -6, + VARBINARY_TYPE = -3, + VARCHAR_TYPE = 12, + + // Unused/unsupported + BIT_TYPE = -7, + BLOB_TYPE = 2004, + CLOB_TYPE = 2005, + DATALINK_TYPE = 70, + DISTINCT_TYPE = 2001, + NCLOB_TYPE = 2011, + OTHER_TYPE = 1111, + REF_CURSOR_TYPE = 2012, + REF_TYPE = 2006, + ROWID_TYPE = -8, + SQLXML_TYPE = 2009, + TIME_TYPE = 92, + TIME_WITH_TIMEZONE_TYPE = 2013, + TIMESTAMP_WITH_TIMEZONE_TYPE = 2014, } internal SparkConnection(IReadOnlyDictionary properties) @@ -303,7 +337,7 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata { Trace.TraceError($"getting objects with depth={depth.ToString()}, catalog = {catalogPattern}, dbschema = {dbSchemaPattern}, tablename = {tableNamePattern}"); - Dictionary>> catalogMap = new Dictionary>>(); + Dictionary>> catalogMap = new Dictionary>>(); if (depth == GetObjectsDepth.All || depth >= GetObjectsDepth.Catalogs) { TGetCatalogsReq getCatalogsReq = new TGetCatalogsReq(this.sessionHandle); @@ -314,10 +348,11 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata { throw new Exception(getCatalogsResp.Status.ErrorMessage); } + IReadOnlyDictionary columnMap = GetColumnIndexMap(getCatalogsResp.DirectResults.ResultSetMetadata.Schema.Columns); string catalogRegexp = PatternToRegEx(catalogPattern); TRowSet resp = getCatalogsResp.DirectResults.ResultSet.Results; - IReadOnlyList list = resp.Columns[0].StringVal.Values; + IReadOnlyList list = resp.Columns[columnMap["TABLE_CAT"]].StringVal.Values; for (int i = 0; i < list.Count; i++) { string col = list[i]; @@ -325,7 +360,7 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata if (Regex.IsMatch(catalog, catalogRegexp, RegexOptions.IgnoreCase)) { - catalogMap.Add(catalog, new Dictionary>()); + catalogMap.Add(catalog, new Dictionary>()); } } } @@ -342,17 +377,18 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata { throw new Exception(getSchemasResp.Status.ErrorMessage); } + IReadOnlyDictionary columnMap = GetColumnIndexMap(getSchemasResp.DirectResults.ResultSetMetadata.Schema.Columns); TRowSet resp = getSchemasResp.DirectResults.ResultSet.Results; - IReadOnlyList catalogList = resp.Columns[1].StringVal.Values; - IReadOnlyList schemaList = resp.Columns[0].StringVal.Values; + IReadOnlyList catalogList = resp.Columns[columnMap["TABLE_CATALOG"]].StringVal.Values; + IReadOnlyList schemaList = resp.Columns[columnMap["TABLE_SCHEM"]].StringVal.Values; for (int i = 0; i < catalogList.Count; i++) { string catalog = catalogList[i]; string schemaDb = schemaList[i]; // It seems Spark sometimes returns empty string for catalog on some schema (temporary tables). - catalogMap.GetValueOrDefault(catalog)?.Add(schemaDb, new Dictionary()); + catalogMap.GetValueOrDefault(catalog)?.Add(schemaDb, new Dictionary()); } } @@ -369,12 +405,14 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata { throw new Exception(getTablesResp.Status.ErrorMessage); } + + IReadOnlyDictionary columnMap = GetColumnIndexMap(getTablesResp.DirectResults.ResultSetMetadata.Schema.Columns); TRowSet resp = getTablesResp.DirectResults.ResultSet.Results; - IReadOnlyList catalogList = resp.Columns[0].StringVal.Values; - IReadOnlyList schemaList = resp.Columns[1].StringVal.Values; - IReadOnlyList tableList = resp.Columns[2].StringVal.Values; - IReadOnlyList tableTypeList = resp.Columns[3].StringVal.Values; + IReadOnlyList catalogList = resp.Columns[columnMap["TABLE_CAT"]].StringVal.Values; + IReadOnlyList schemaList = resp.Columns[columnMap["TABLE_SCHEM"]].StringVal.Values; + IReadOnlyList tableList = resp.Columns[columnMap["TABLE_NAME"]].StringVal.Values; + IReadOnlyList tableTypeList = resp.Columns[columnMap["TABLE_TYPE"]].StringVal.Values; for (int i = 0; i < catalogList.Count; i++) { @@ -382,10 +420,7 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata string schemaDb = schemaList[i]; string tableName = tableList[i]; string tableType = tableTypeList[i]; - TableInfoPair tableInfo = new TableInfoPair(); - tableInfo.Type = tableType; - tableInfo.Columns = new List(); - tableInfo.ColType = new List(); + TableInfo tableInfo = new(tableType); catalogMap.GetValueOrDefault(catalog)?.GetValueOrDefault(schemaDb)?.Add(tableName, tableInfo); } } @@ -407,31 +442,53 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata throw new Exception(columnsResponse.Status.ErrorMessage); } + IReadOnlyDictionary columnMap = GetColumnIndexMap(columnsResponse.DirectResults.ResultSetMetadata.Schema.Columns); TRowSet resp = columnsResponse.DirectResults.ResultSet.Results; - IReadOnlyList catalogList = resp.Columns[0].StringVal.Values; - IReadOnlyList schemaList = resp.Columns[1].StringVal.Values; - IReadOnlyList tableList = resp.Columns[2].StringVal.Values; - IReadOnlyList columnList = resp.Columns[3].StringVal.Values; - ReadOnlySpan columnTypeList = resp.Columns[4].I32Val.Values.Values; + IReadOnlyList catalogList = resp.Columns[columnMap["TABLE_CAT"]].StringVal.Values; + IReadOnlyList schemaList = resp.Columns[columnMap["TABLE_SCHEM"]].StringVal.Values; + IReadOnlyList tableList = resp.Columns[columnMap["TABLE_NAME"]].StringVal.Values; + IReadOnlyList columnNameList = resp.Columns[columnMap["COLUMN_NAME"]].StringVal.Values; + ReadOnlySpan columnTypeList = resp.Columns[columnMap["DATA_TYPE"]].I32Val.Values.Values; + IReadOnlyList typeNameList = resp.Columns[columnMap["TYPE_NAME"]].StringVal.Values; + ReadOnlySpan nullableList = resp.Columns[columnMap["NULLABLE"]].I32Val.Values.Values; + IReadOnlyList columnDefaultList = resp.Columns[columnMap["COLUMN_DEF"]].StringVal.Values; + ReadOnlySpan ordinalPosList = resp.Columns[columnMap["ORDINAL_POSITION"]].I32Val.Values.Values; + IReadOnlyList isNullableList = resp.Columns[columnMap["IS_NULLABLE"]].StringVal.Values; + IReadOnlyList isAutoIncrementList = resp.Columns[columnMap["IS_AUTO_INCREMENT"]].StringVal.Values; for (int i = 0; i < catalogList.Count; i++) { string catalog = catalogList[i]; string schemaDb = schemaList[i]; string tableName = tableList[i]; - string column = columnList[i]; - int colType = columnTypeList[i]; - TableInfoPair? tableInfo = catalogMap.GetValueOrDefault(catalog)?.GetValueOrDefault(schemaDb)?.GetValueOrDefault(tableName); - tableInfo?.Columns.Add(column); + string columnName = columnNameList[i]; + short colType = (short)columnTypeList[i]; + string typeName = typeNameList[i]; + short nullable = (short)nullableList[i]; + string? isAutoIncrementString = isAutoIncrementList[i]; + bool isAutoIncrement = (!string.IsNullOrEmpty(isAutoIncrementString) && (isAutoIncrementString.Equals("YES", StringComparison.InvariantCultureIgnoreCase) || isAutoIncrementString.Equals("TRUE", StringComparison.InvariantCultureIgnoreCase))); + string isNullable = isNullableList[i] ?? "YES"; + string columnDefault = columnDefaultList[i] ?? ""; + // Spark/Databricks reports ordinal index zero-indexed, instead of one-indexed + int ordinalPos = ordinalPosList[i] + 1; + TableInfo? tableInfo = catalogMap.GetValueOrDefault(catalog)?.GetValueOrDefault(schemaDb)?.GetValueOrDefault(tableName); + tableInfo?.ColumnName.Add(columnName); tableInfo?.ColType.Add(colType); + tableInfo?.Nullable.Add(nullable); + tableInfo?.TypeName.Add(typeName); + tableInfo?.IsAutoIncrement.Add(isAutoIncrement); + tableInfo?.IsNullable.Add(isNullable); + tableInfo?.ColumnDefault.Add(columnDefault); + tableInfo?.OrdinalPosition.Add(ordinalPos); + SetPrecisionAndScale(colType, typeName, tableInfo); } } StringArray.Builder catalogNameBuilder = new StringArray.Builder(); List catalogDbSchemasValues = new List(); - foreach (KeyValuePair>> catalogEntry in catalogMap) + foreach (KeyValuePair>> catalogEntry in catalogMap) { catalogNameBuilder.Append(catalogEntry.Key); @@ -457,6 +514,30 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata return new SparkInfoArrowStream(schema, dataArrays); } + private static IReadOnlyDictionary GetColumnIndexMap(List columns) => columns + .Select(t => new { Index = t.Position - 1, t.ColumnName }) + .ToDictionary(t => t.ColumnName, t => t.Index); + + private static void SetPrecisionAndScale(short colType, string typeName, TableInfo? tableInfo) + { + switch (colType) + { + case (short)ColumnTypeId.DECIMAL_TYPE: + case (short)ColumnTypeId.NUMERIC_TYPE: + { + Decimal128Type decimalType = SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); + tableInfo?.Precision.Add(decimalType.Precision); + tableInfo?.Scale.Add((short)decimalType.Scale); + break; + } + + default: + tableInfo?.Precision.Add(null); + tableInfo?.Scale.Add(null); + break; + } + } + private static IArrowType GetArrowType(ColumnTypeId columnTypeId, string typeName) { switch (columnTypeId) @@ -467,30 +548,40 @@ private static IArrowType GetArrowType(ColumnTypeId columnTypeId, string typeNam return Int8Type.Default; case ColumnTypeId.SMALLINT_TYPE: return Int16Type.Default; - case ColumnTypeId.INT_TYPE: + case ColumnTypeId.INTEGER_TYPE: return Int32Type.Default; case ColumnTypeId.BIGINT_TYPE: return Int64Type.Default; case ColumnTypeId.FLOAT_TYPE: + case ColumnTypeId.REAL_TYPE: return FloatType.Default; case ColumnTypeId.DOUBLE_TYPE: return DoubleType.Default; - case ColumnTypeId.STRING_TYPE: + case ColumnTypeId.VARCHAR_TYPE: + case ColumnTypeId.NVARCHAR_TYPE: + case ColumnTypeId.LONGVARCHAR_TYPE: + case ColumnTypeId.LONGNVARCHAR_TYPE: return StringType.Default; case ColumnTypeId.TIMESTAMP_TYPE: return new TimestampType(TimeUnit.Microsecond, timezone: (string?)null); case ColumnTypeId.BINARY_TYPE: + case ColumnTypeId.VARBINARY_TYPE: + case ColumnTypeId.LONGVARBINARY_TYPE: return BinaryType.Default; case ColumnTypeId.DATE_TYPE: return Date32Type.Default; case ColumnTypeId.CHAR_TYPE: + case ColumnTypeId.NCHAR_TYPE: return StringType.Default; case ColumnTypeId.DECIMAL_TYPE: + case ColumnTypeId.NUMERIC_TYPE: // Note: parsing the type name for SQL DECIMAL types as the precision and scale values // are not returned in the Thrift call to GetColumns return SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); + case ColumnTypeId.NULL_TYPE: + return NullType.Default; case ColumnTypeId.ARRAY_TYPE: - case ColumnTypeId.MAP_TYPE: + case ColumnTypeId.JAVA_OBJECT_TYPE: case ColumnTypeId.STRUCT_TYPE: return StringType.Default; default: @@ -500,7 +591,7 @@ private static IArrowType GetArrowType(ColumnTypeId columnTypeId, string typeNam private StructArray GetDbSchemas( GetObjectsDepth depth, - Dictionary> schemaMap) + Dictionary> schemaMap) { StringArray.Builder dbSchemaNameBuilder = new StringArray.Builder(); List dbSchemaTablesValues = new List(); @@ -508,7 +599,7 @@ private StructArray GetDbSchemas( int length = 0; - foreach (KeyValuePair> schemaEntry in schemaMap) + foreach (KeyValuePair> schemaEntry in schemaMap) { dbSchemaNameBuilder.Append(schemaEntry.Key); @@ -544,7 +635,7 @@ private StructArray GetDbSchemas( private StructArray GetTableSchemas( GetObjectsDepth depth, - Dictionary tableMap) + Dictionary tableMap) { StringArray.Builder tableNameBuilder = new StringArray.Builder(); StringArray.Builder tableTypeBuilder = new StringArray.Builder(); @@ -554,7 +645,7 @@ private StructArray GetTableSchemas( int length = 0; - foreach (KeyValuePair tableEntry in tableMap) + foreach (KeyValuePair tableEntry in tableMap) { tableNameBuilder.Append(tableEntry.Key); tableTypeBuilder.Append(tableEntry.Value.Type); @@ -571,7 +662,7 @@ private StructArray GetTableSchemas( } else { - tableColumnsValues.Add(GetColumnSchema(tableEntry.Value.Columns, tableEntry.Value.ColType)); + tableColumnsValues.Add(GetColumnSchema(tableEntry.Value)); } } @@ -593,8 +684,7 @@ private StructArray GetTableSchemas( nullBitmapBuffer.Build()); } - private StructArray GetColumnSchema( - List columns, List colTypes) + private StructArray GetColumnSchema(TableInfo tableInfo) { StringArray.Builder columnNameBuilder = new StringArray.Builder(); Int32Array.Builder ordinalPositionBuilder = new Int32Array.Builder(); @@ -619,31 +709,26 @@ private StructArray GetColumnSchema( int length = 0; - for (int i = 0; i < columns.Count; i++) + for (int i = 0; i < tableInfo.ColumnName.Count; i++) { - columnNameBuilder.Append(columns[i]); - ordinalPositionBuilder.Append((int)colTypes[i]); + columnNameBuilder.Append(tableInfo.ColumnName[i]); + ordinalPositionBuilder.Append(tableInfo.OrdinalPosition[i]); remarksBuilder.Append(""); - - - - xdbcColumnSizeBuilder.AppendNull(); - xdbcDecimalDigitsBuilder.AppendNull(); - - - xdbcDataTypeBuilder.AppendNull(); - xdbcTypeNameBuilder.Append(""); + xdbcColumnSizeBuilder.Append(tableInfo.Precision[i]); + xdbcDecimalDigitsBuilder.Append(tableInfo.Scale[i]); + xdbcDataTypeBuilder.Append(tableInfo.ColType[i]); + xdbcTypeNameBuilder.Append(tableInfo.TypeName[i]); xdbcNumPrecRadixBuilder.AppendNull(); - xdbcNullableBuilder.AppendNull(); - xdbcColumnDefBuilder.AppendNull(); - xdbcSqlDataTypeBuilder.Append((short)colTypes[i]); + xdbcNullableBuilder.Append(tableInfo.Nullable[i]); + xdbcColumnDefBuilder.Append(tableInfo.ColumnDefault[i]); + xdbcSqlDataTypeBuilder.Append(tableInfo.ColType[i]); xdbcDatetimeSubBuilder.AppendNull(); xdbcCharOctetLengthBuilder.AppendNull(); - xdbcIsNullableBuilder.Append("true"); + xdbcIsNullableBuilder.Append(tableInfo.IsNullable[i]); xdbcScopeCatalogBuilder.AppendNull(); xdbcScopeSchemaBuilder.AppendNull(); xdbcScopeTableBuilder.AppendNull(); - xdbcIsAutoincrementBuilder.AppendNull(); + xdbcIsAutoincrementBuilder.Append(tableInfo.IsAutoIncrement[i]); xdbcIsGeneratedcolumnBuilder.Append(true); nullBitmapBuffer.Append(true); length++; @@ -751,13 +836,29 @@ private static bool TryParse(string input, out Decimal128Type? value) } } - internal struct TableInfoPair + internal struct TableInfo(string type) { - public string Type { get; set; } + public string Type { get; } = type; + + public List ColumnName { get; } = new(); + + public List ColType { get; } = new(); + + public List TypeName { get; } = new(); + + public List Nullable { get; } = new(); + + public List Precision { get; } = new(); + + public List Scale { get; } = new(); + + public List OrdinalPosition { get; } = new(); + + public List ColumnDefault { get; } = new(); - public List Columns { get; set; } + public List IsNullable { get; } = new(); - public List ColType { get; set; } + public List IsAutoIncrement { get; } = new(); } internal class SparkInfoArrowStream : IArrowArrayStream diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index a4f3a4607d..4f36e5062e 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -37,6 +37,59 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark [TestCaseOrderer("Apache.Arrow.Adbc.Tests.Xunit.TestOrderer", "Apache.Arrow.Adbc.Tests")] public class DriverTests : SparkTestBase { + + /// JDBC-specific data type definitions. + /// Copied from https://github.com/JetBrains/jdk8u_jdk/blob/master/src/share/classes/java/sql/Types.java + /// + /// + /// NOTE: the original of this enumeration in src/Drivers/Apache/Spark/SparkConnection.cs + /// Please keep up-to-date. + /// + private enum SupportedColumnTypeId + { + ARRAY_TYPE = 2003, + BIGINT_TYPE = -5, + BINARY_TYPE = -2, + BOOLEAN_TYPE = 16, + CHAR_TYPE = 1, + DATE_TYPE = 91, + DECIMAL_TYPE = 3, + DOUBLE_TYPE = 8, + FLOAT_TYPE = 6, + INTEGER_TYPE = 4, + JAVA_OBJECT_TYPE = 2000, + LONGNVARCHAR_TYPE = -16, + LONGVARBINARY_TYPE = -4, + LONGVARCHAR_TYPE = -1, + NCHAR_TYPE = -15, + NULL_TYPE = 0, + NUMERIC_TYPE = 2, + NVARCHAR_TYPE = -9, + REAL_TYPE = 7, + SMALLINT_TYPE = 5, + STRUCT_TYPE = 2002, + TIMESTAMP_TYPE = 93, + TINYINT_TYPE = -6, + VARBINARY_TYPE = -3, + VARCHAR_TYPE = 12, + + // Unused/unsupported - throw an error if these are discovered in testing + //BIT_TYPE = -7, + //BLOB_TYPE = 2004, + //CLOB_TYPE = 2005, + //DATALINK_TYPE = 70, + //DISTINCT_TYPE = 2001, + //NCLOB_TYPE = 2011, + //OTHER_TYPE = 1111, + //REF_CURSOR_TYPE = 2012, + //REF_TYPE = 2006, + //ROWID_TYPE = -8, + //SQLXML_TYPE = 2009, + //TIME_TYPE = 92, + //TIME_WITH_TIMEZONE_TYPE = 2013, + //TIMESTAMP_WITH_TIMEZONE_TYPE = 2014, + } + private static List DefaultTableTypes => new() { "BASE TABLE", "VIEW" }; public DriverTests(ITestOutputHelper? outputHelper) : base(outputHelper) @@ -240,6 +293,39 @@ public void CanGetObjectsAll() Assert.True(columns != null, "Columns cannot be null"); Assert.Equal(TestConfiguration.Metadata.ExpectedColumnCount, columns.Count); + + for (int i = 0; i < columns.Count; i++) + { + // Verify column metadata is returned/consistent. + AdbcColumn column = columns[i]; + Assert.Equal(i + 1, column.OrdinalPosition); + Assert.NotNull(column.Name); + Assert.False(string.IsNullOrEmpty(column.Name)); + var types = Enum.GetValues(typeof(SupportedColumnTypeId)).Cast(); + Assert.Contains((SupportedColumnTypeId)column.XdbcSqlDataType!, types); + Assert.NotNull(column.XdbcDataType); + Assert.Contains((SupportedColumnTypeId)column.XdbcDataType!, types); + Assert.Equal(column.XdbcDataType, column.XdbcSqlDataType); + bool isDecimalType = column.XdbcDataType == (short)SupportedColumnTypeId.DECIMAL_TYPE || column.XdbcDataType == (short)SupportedColumnTypeId.NUMERIC_TYPE; + Assert.Equal(column.XdbcColumnSize.HasValue, isDecimalType); + Assert.Equal(column.XdbcDecimalDigits.HasValue, isDecimalType); + Assert.Equal(column.XdbcDataType, column.XdbcSqlDataType); + Assert.NotNull(column.Remarks); + Assert.True(string.IsNullOrEmpty(column.Remarks)); + Assert.NotNull(column.XdbcColumnDef); + Assert.NotNull(column.XdbcNullable); + Assert.Contains(new short[] { 1, 0 }, i => i == column.XdbcNullable); + Assert.NotNull(column.XdbcIsNullable); + Assert.Contains(new string[] { "YES", "NO" }, i => i.Equals(column.XdbcIsNullable)); + Assert.NotNull(column.XdbcIsAutoIncrement); + + Assert.Null(column.XdbcCharOctetLength); + Assert.Null(column.XdbcDatetimeSub); + Assert.Null(column.XdbcNumPrecRadix); + Assert.Null(column.XdbcScopeCatalog); + Assert.Null(column.XdbcScopeSchema); + Assert.Null(column.XdbcScopeTable); + } } /// From 527dde2f19e728c07b895028f54eb411f7b764cd Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Mon, 27 May 2024 13:14:28 -0700 Subject: [PATCH 2/8] feat(csharp/src/Drivers/Apache): improve GetObjects metadata returned for columns --- .../Drivers/Apache/Spark/SparkConnection.cs | 193 ++++++++++++++---- .../test/Drivers/Apache/Spark/DriverTests.cs | 86 +++----- 2 files changed, 191 insertions(+), 88 deletions(-) diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs index 291bfa3cfa..bbd9f0ff39 100644 --- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs +++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs @@ -62,55 +62,178 @@ public class SparkConnection : HiveServer2Connection }; /// - /// JDBC-specific data type definitions. - /// Copied from https://github.com/JetBrains/jdk8u_jdk/blob/master/src/share/classes/java/sql/Types.java + /// The Spark data type definitions based on the JDBC Types constants. /// /// - /// NOTE: There is a partial copy of this enumeration in test/Drivers/Apache/Spark/DriverTests.cs - /// Please keep up-to-date. + /// This enumeration can be used to determine the Spark-specific data types that are contained in fields xdbc_data_type and xdbc_sql_data_type + /// in the column metadata . This column metadata is returned as a result of a call to + /// + /// when depth is set to . /// - private enum ColumnTypeId + public enum SparkDataType : short { + // NOTE: There is a partial copy of this enumeration in test/Drivers/Apache/Spark/DriverTests.cs + // Please keep up-to-date. + // Copied from https://github.com/JetBrains/jdk8u_jdk/blob/master/src/share/classes/java/sql/Types.java + + /// + /// Identifies the generic SQL type ARRAY + /// ARRAY_TYPE = 2003, + /// + /// Identifies the generic SQL type BIGINT + /// BIGINT_TYPE = -5, + /// + /// Identifies the generic SQL type BINARY + /// BINARY_TYPE = -2, + /// + /// Identifies the generic SQL type BOOLEAN + /// BOOLEAN_TYPE = 16, + /// + /// Identifies the generic SQL type CHAR + /// CHAR_TYPE = 1, + /// + /// Identifies the generic SQL type DATE + /// DATE_TYPE = 91, + /// + /// Identifies the generic SQL type DECIMAL + /// DECIMAL_TYPE = 3, + /// + /// Identifies the generic SQL type DOUBLE + /// DOUBLE_TYPE = 8, + /// + /// Identifies the generic SQL type FLOAT + /// FLOAT_TYPE = 6, + /// + /// Identifies the generic SQL type INTEGER + /// INTEGER_TYPE = 4, + /// + /// Identifies the generic SQL type JAVA_OBJECT (MAP) + /// JAVA_OBJECT_TYPE = 2000, + /// + /// identifies the generic SQL type LONGNVARCHAR + /// LONGNVARCHAR_TYPE = -16, + /// + /// identifies the generic SQL type LONGVARBINARY + /// LONGVARBINARY_TYPE = -4, + /// + /// identifies the generic SQL type LONGVARCHAR + /// LONGVARCHAR_TYPE = -1, + /// + /// identifies the generic SQL type NCHAR + /// NCHAR_TYPE = -15, + /// + /// identifies the generic SQL value NULL + /// NULL_TYPE = 0, + /// + /// identifies the generic SQL type NUMERIC + /// NUMERIC_TYPE = 2, + /// + /// identifies the generic SQL type NVARCHAR + /// NVARCHAR_TYPE = -9, + /// + /// identifies the generic SQL type REAL + /// REAL_TYPE = 7, + /// + /// Identifies the generic SQL type SMALLINT + /// SMALLINT_TYPE = 5, + /// + /// Identifies the generic SQL type STRUCT + /// STRUCT_TYPE = 2002, + /// + /// Identifies the generic SQL type TIMESTAMP + /// TIMESTAMP_TYPE = 93, + /// + /// Identifies the generic SQL type TINYINT + /// TINYINT_TYPE = -6, + /// + /// Identifies the generic SQL type VARBINARY + /// VARBINARY_TYPE = -3, + /// + /// Identifies the generic SQL type VARCHAR + /// VARCHAR_TYPE = 12, - + // ====================== // Unused/unsupported + // ====================== + /// + /// Identifies the generic SQL type BIT + /// BIT_TYPE = -7, + /// + /// Identifies the generic SQL type BLOB + /// BLOB_TYPE = 2004, + /// + /// Identifies the generic SQL type CLOB + /// CLOB_TYPE = 2005, + /// + /// Identifies the generic SQL type DATALINK + /// DATALINK_TYPE = 70, + /// + /// Identifies the generic SQL type DISTINCT + /// DISTINCT_TYPE = 2001, + /// + /// identifies the generic SQL type NCLOB + /// NCLOB_TYPE = 2011, + /// + /// Indicates that the SQL type is database-specific and gets mapped to a Java object + /// OTHER_TYPE = 1111, + /// + /// Identifies the generic SQL type REF CURSOR + /// REF_CURSOR_TYPE = 2012, + /// + /// Identifies the generic SQL type REF + /// REF_TYPE = 2006, + /// + /// Identifies the generic SQL type ROWID + /// ROWID_TYPE = -8, + /// + /// Identifies the generic SQL type XML + /// SQLXML_TYPE = 2009, + /// + /// Identifies the generic SQL type TIME + /// TIME_TYPE = 92, + /// + /// Identifies the generic SQL type TIME WITH TIMEZONE + /// TIME_WITH_TIMEZONE_TYPE = 2013, + /// + /// Identifies the generic SQL type TIMESTAMP WITH TIMEZONE + /// TIMESTAMP_WITH_TIMEZONE_TYPE = 2014, } @@ -327,7 +450,7 @@ public override Schema GetTableSchema(string? catalog, string? dbSchema, string? //int? columnSize = columns[6].I32Val.Values.GetValue(i); //int? decimalDigits = columns[8].I32Val.Values.GetValue(i); bool nullable = columns[10].I32Val.Values.GetValue(i) == 1; - IArrowType dataType = SparkConnection.GetArrowType((ColumnTypeId)columnType!.Value, typeName); + IArrowType dataType = SparkConnection.GetArrowType((SparkDataType)columnType!.Value, typeName); fields[i] = new Field(columnName, dataType, nullable); } return new Schema(fields, null); @@ -522,8 +645,8 @@ private static void SetPrecisionAndScale(short colType, string typeName, TableIn { switch (colType) { - case (short)ColumnTypeId.DECIMAL_TYPE: - case (short)ColumnTypeId.NUMERIC_TYPE: + case (short)SparkDataType.DECIMAL_TYPE: + case (short)SparkDataType.NUMERIC_TYPE: { Decimal128Type decimalType = SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); tableInfo?.Precision.Add(decimalType.Precision); @@ -538,51 +661,51 @@ private static void SetPrecisionAndScale(short colType, string typeName, TableIn } } - private static IArrowType GetArrowType(ColumnTypeId columnTypeId, string typeName) + private static IArrowType GetArrowType(SparkDataType columnTypeId, string typeName) { switch (columnTypeId) { - case ColumnTypeId.BOOLEAN_TYPE: + case SparkDataType.BOOLEAN_TYPE: return BooleanType.Default; - case ColumnTypeId.TINYINT_TYPE: + case SparkDataType.TINYINT_TYPE: return Int8Type.Default; - case ColumnTypeId.SMALLINT_TYPE: + case SparkDataType.SMALLINT_TYPE: return Int16Type.Default; - case ColumnTypeId.INTEGER_TYPE: + case SparkDataType.INTEGER_TYPE: return Int32Type.Default; - case ColumnTypeId.BIGINT_TYPE: + case SparkDataType.BIGINT_TYPE: return Int64Type.Default; - case ColumnTypeId.FLOAT_TYPE: - case ColumnTypeId.REAL_TYPE: + case SparkDataType.FLOAT_TYPE: + case SparkDataType.REAL_TYPE: return FloatType.Default; - case ColumnTypeId.DOUBLE_TYPE: + case SparkDataType.DOUBLE_TYPE: return DoubleType.Default; - case ColumnTypeId.VARCHAR_TYPE: - case ColumnTypeId.NVARCHAR_TYPE: - case ColumnTypeId.LONGVARCHAR_TYPE: - case ColumnTypeId.LONGNVARCHAR_TYPE: + case SparkDataType.VARCHAR_TYPE: + case SparkDataType.NVARCHAR_TYPE: + case SparkDataType.LONGVARCHAR_TYPE: + case SparkDataType.LONGNVARCHAR_TYPE: return StringType.Default; - case ColumnTypeId.TIMESTAMP_TYPE: + case SparkDataType.TIMESTAMP_TYPE: return new TimestampType(TimeUnit.Microsecond, timezone: (string?)null); - case ColumnTypeId.BINARY_TYPE: - case ColumnTypeId.VARBINARY_TYPE: - case ColumnTypeId.LONGVARBINARY_TYPE: + case SparkDataType.BINARY_TYPE: + case SparkDataType.VARBINARY_TYPE: + case SparkDataType.LONGVARBINARY_TYPE: return BinaryType.Default; - case ColumnTypeId.DATE_TYPE: + case SparkDataType.DATE_TYPE: return Date32Type.Default; - case ColumnTypeId.CHAR_TYPE: - case ColumnTypeId.NCHAR_TYPE: + case SparkDataType.CHAR_TYPE: + case SparkDataType.NCHAR_TYPE: return StringType.Default; - case ColumnTypeId.DECIMAL_TYPE: - case ColumnTypeId.NUMERIC_TYPE: + case SparkDataType.DECIMAL_TYPE: + case SparkDataType.NUMERIC_TYPE: // Note: parsing the type name for SQL DECIMAL types as the precision and scale values // are not returned in the Thrift call to GetColumns return SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); - case ColumnTypeId.NULL_TYPE: + case SparkDataType.NULL_TYPE: return NullType.Default; - case ColumnTypeId.ARRAY_TYPE: - case ColumnTypeId.JAVA_OBJECT_TYPE: - case ColumnTypeId.STRUCT_TYPE: + case SparkDataType.ARRAY_TYPE: + case SparkDataType.JAVA_OBJECT_TYPE: + case SparkDataType.STRUCT_TYPE: return StringType.Default; default: throw new NotImplementedException($"Column type id: {columnTypeId} is not supported."); diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index 4f36e5062e..3d06134927 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -19,6 +19,7 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; +using SparkDataType = Apache.Arrow.Adbc.Drivers.Apache.Spark.SparkConnection.SparkDataType; using Apache.Arrow.Adbc.Tests.Metadata; using Apache.Arrow.Adbc.Tests.Xunit; using Apache.Arrow.Ipc; @@ -37,57 +38,36 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark [TestCaseOrderer("Apache.Arrow.Adbc.Tests.Xunit.TestOrderer", "Apache.Arrow.Adbc.Tests")] public class DriverTests : SparkTestBase { - - /// JDBC-specific data type definitions. - /// Copied from https://github.com/JetBrains/jdk8u_jdk/blob/master/src/share/classes/java/sql/Types.java + /// + /// Supported Spark data types as a subset of /// - /// - /// NOTE: the original of this enumeration in src/Drivers/Apache/Spark/SparkConnection.cs - /// Please keep up-to-date. - /// - private enum SupportedColumnTypeId + private enum SupportedSparkDataType : short { - ARRAY_TYPE = 2003, - BIGINT_TYPE = -5, - BINARY_TYPE = -2, - BOOLEAN_TYPE = 16, - CHAR_TYPE = 1, - DATE_TYPE = 91, - DECIMAL_TYPE = 3, - DOUBLE_TYPE = 8, - FLOAT_TYPE = 6, - INTEGER_TYPE = 4, - JAVA_OBJECT_TYPE = 2000, - LONGNVARCHAR_TYPE = -16, - LONGVARBINARY_TYPE = -4, - LONGVARCHAR_TYPE = -1, - NCHAR_TYPE = -15, - NULL_TYPE = 0, - NUMERIC_TYPE = 2, - NVARCHAR_TYPE = -9, - REAL_TYPE = 7, - SMALLINT_TYPE = 5, - STRUCT_TYPE = 2002, - TIMESTAMP_TYPE = 93, - TINYINT_TYPE = -6, - VARBINARY_TYPE = -3, - VARCHAR_TYPE = 12, - - // Unused/unsupported - throw an error if these are discovered in testing - //BIT_TYPE = -7, - //BLOB_TYPE = 2004, - //CLOB_TYPE = 2005, - //DATALINK_TYPE = 70, - //DISTINCT_TYPE = 2001, - //NCLOB_TYPE = 2011, - //OTHER_TYPE = 1111, - //REF_CURSOR_TYPE = 2012, - //REF_TYPE = 2006, - //ROWID_TYPE = -8, - //SQLXML_TYPE = 2009, - //TIME_TYPE = 92, - //TIME_WITH_TIMEZONE_TYPE = 2013, - //TIMESTAMP_WITH_TIMEZONE_TYPE = 2014, + ARRAY_TYPE = SparkDataType.ARRAY_TYPE, + BIGINT_TYPE = SparkDataType.BIGINT_TYPE, + BINARY_TYPE = SparkDataType.BINARY_TYPE, + BOOLEAN_TYPE = SparkDataType.BOOLEAN_TYPE, + CHAR_TYPE = SparkDataType.CHAR_TYPE, + DATE_TYPE = SparkDataType.DATE_TYPE, + DECIMAL_TYPE = SparkDataType.DECIMAL_TYPE, + DOUBLE_TYPE = SparkDataType.DOUBLE_TYPE, + FLOAT_TYPE = SparkDataType.FLOAT_TYPE, + INTEGER_TYPE = SparkDataType.INTEGER_TYPE, + JAVA_OBJECT_TYPE = SparkDataType.JAVA_OBJECT_TYPE, + LONGNVARCHAR_TYPE = SparkDataType.LONGNVARCHAR_TYPE, + LONGVARBINARY_TYPE = SparkDataType.LONGVARBINARY_TYPE, + LONGVARCHAR_TYPE = SparkDataType.LONGVARCHAR_TYPE, + NCHAR_TYPE = SparkDataType.NCHAR_TYPE, + NULL_TYPE = SparkDataType.NULL_TYPE, + NUMERIC_TYPE = SparkDataType.NUMERIC_TYPE, + NVARCHAR_TYPE = SparkDataType.NVARCHAR_TYPE, + REAL_TYPE = SparkDataType.REAL_TYPE, + SMALLINT_TYPE = SparkDataType.SMALLINT_TYPE, + STRUCT_TYPE = SparkDataType.STRUCT_TYPE, + TIMESTAMP_TYPE = SparkDataType.TIMESTAMP_TYPE, + TINYINT_TYPE = SparkDataType.TINYINT_TYPE, + VARBINARY_TYPE = SparkDataType.VARBINARY_TYPE, + VARCHAR_TYPE = SparkDataType.VARCHAR_TYPE, } private static List DefaultTableTypes => new() { "BASE TABLE", "VIEW" }; @@ -301,12 +281,12 @@ public void CanGetObjectsAll() Assert.Equal(i + 1, column.OrdinalPosition); Assert.NotNull(column.Name); Assert.False(string.IsNullOrEmpty(column.Name)); - var types = Enum.GetValues(typeof(SupportedColumnTypeId)).Cast(); - Assert.Contains((SupportedColumnTypeId)column.XdbcSqlDataType!, types); + var types = Enum.GetValues(typeof(SupportedSparkDataType)).Cast(); + Assert.Contains((SupportedSparkDataType)column.XdbcSqlDataType!, types); Assert.NotNull(column.XdbcDataType); - Assert.Contains((SupportedColumnTypeId)column.XdbcDataType!, types); + Assert.Contains((SupportedSparkDataType)column.XdbcDataType!, types); Assert.Equal(column.XdbcDataType, column.XdbcSqlDataType); - bool isDecimalType = column.XdbcDataType == (short)SupportedColumnTypeId.DECIMAL_TYPE || column.XdbcDataType == (short)SupportedColumnTypeId.NUMERIC_TYPE; + bool isDecimalType = column.XdbcDataType == (short)SupportedSparkDataType.DECIMAL_TYPE || column.XdbcDataType == (short)SupportedSparkDataType.NUMERIC_TYPE; Assert.Equal(column.XdbcColumnSize.HasValue, isDecimalType); Assert.Equal(column.XdbcDecimalDigits.HasValue, isDecimalType); Assert.Equal(column.XdbcDataType, column.XdbcSqlDataType); From e93c2e073f92a0e1872fc177d742b06ca669bd4b Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Tue, 28 May 2024 12:01:08 -0700 Subject: [PATCH 3/8] feat(csharp/src/Drivers/Apache): make the Spark data type enumeration private again. --- .../Drivers/Apache/Spark/SparkConnection.cs | 60 +++++++++---------- .../test/Drivers/Apache/Spark/DriverTests.cs | 53 ++++++++-------- 2 files changed, 56 insertions(+), 57 deletions(-) diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs index a3841e29ff..b9b87f760d 100644 --- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs +++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs @@ -75,7 +75,7 @@ public class SparkConnection : HiveServer2Connection /// /// when depth is set to . /// - public enum SparkDataType : short + private enum ColumnTypeId { // NOTE: There is a partial copy of this enumeration in test/Drivers/Apache/Spark/DriverTests.cs // Please keep up-to-date. @@ -483,7 +483,7 @@ public override Schema GetTableSchema(string? catalog, string? dbSchema, string? //int? columnSize = columns[6].I32Val.Values.GetValue(i); //int? decimalDigits = columns[8].I32Val.Values.GetValue(i); bool nullable = columns[10].I32Val.Values.GetValue(i) == 1; - IArrowType dataType = SparkConnection.GetArrowType((SparkDataType)columnType!.Value, typeName); + IArrowType dataType = SparkConnection.GetArrowType(columnType!.Value, typeName); fields[i] = new Field(columnName, dataType, nullable); } return new Schema(fields, null); @@ -678,8 +678,8 @@ private static void SetPrecisionAndScale(short colType, string typeName, TableIn { switch (colType) { - case (short)SparkDataType.DECIMAL_TYPE: - case (short)SparkDataType.NUMERIC_TYPE: + case (short)ColumnTypeId.DECIMAL_TYPE: + case (short)ColumnTypeId.NUMERIC_TYPE: { Decimal128Type decimalType = SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); tableInfo?.Precision.Add(decimalType.Precision); @@ -694,51 +694,51 @@ private static void SetPrecisionAndScale(short colType, string typeName, TableIn } } - private static IArrowType GetArrowType(SparkDataType columnTypeId, string typeName) + private static IArrowType GetArrowType(int columnTypeId, string typeName) { switch (columnTypeId) { - case SparkDataType.BOOLEAN_TYPE: + case (int)ColumnTypeId.BOOLEAN_TYPE: return BooleanType.Default; - case SparkDataType.TINYINT_TYPE: + case (int)ColumnTypeId.TINYINT_TYPE: return Int8Type.Default; - case SparkDataType.SMALLINT_TYPE: + case (int)ColumnTypeId.SMALLINT_TYPE: return Int16Type.Default; - case SparkDataType.INTEGER_TYPE: + case (int)ColumnTypeId.INTEGER_TYPE: return Int32Type.Default; - case SparkDataType.BIGINT_TYPE: + case (int)ColumnTypeId.BIGINT_TYPE: return Int64Type.Default; - case SparkDataType.FLOAT_TYPE: - case SparkDataType.REAL_TYPE: + case (int)ColumnTypeId.FLOAT_TYPE: + case (int)ColumnTypeId.REAL_TYPE: return FloatType.Default; - case SparkDataType.DOUBLE_TYPE: + case (int)ColumnTypeId.DOUBLE_TYPE: return DoubleType.Default; - case SparkDataType.VARCHAR_TYPE: - case SparkDataType.NVARCHAR_TYPE: - case SparkDataType.LONGVARCHAR_TYPE: - case SparkDataType.LONGNVARCHAR_TYPE: + case (int)ColumnTypeId.VARCHAR_TYPE: + case (int)ColumnTypeId.NVARCHAR_TYPE: + case (int)ColumnTypeId.LONGVARCHAR_TYPE: + case (int)ColumnTypeId.LONGNVARCHAR_TYPE: return StringType.Default; - case SparkDataType.TIMESTAMP_TYPE: + case (int)ColumnTypeId.TIMESTAMP_TYPE: return new TimestampType(TimeUnit.Microsecond, timezone: (string?)null); - case SparkDataType.BINARY_TYPE: - case SparkDataType.VARBINARY_TYPE: - case SparkDataType.LONGVARBINARY_TYPE: + case (int)ColumnTypeId.BINARY_TYPE: + case (int)ColumnTypeId.VARBINARY_TYPE: + case (int)ColumnTypeId.LONGVARBINARY_TYPE: return BinaryType.Default; - case SparkDataType.DATE_TYPE: + case (int)ColumnTypeId.DATE_TYPE: return Date32Type.Default; - case SparkDataType.CHAR_TYPE: - case SparkDataType.NCHAR_TYPE: + case (int)ColumnTypeId.CHAR_TYPE: + case (int)ColumnTypeId.NCHAR_TYPE: return StringType.Default; - case SparkDataType.DECIMAL_TYPE: - case SparkDataType.NUMERIC_TYPE: + case (int)ColumnTypeId.DECIMAL_TYPE: + case (int)ColumnTypeId.NUMERIC_TYPE: // Note: parsing the type name for SQL DECIMAL types as the precision and scale values // are not returned in the Thrift call to GetColumns return SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); - case SparkDataType.NULL_TYPE: + case (int)ColumnTypeId.NULL_TYPE: return NullType.Default; - case SparkDataType.ARRAY_TYPE: - case SparkDataType.JAVA_OBJECT_TYPE: - case SparkDataType.STRUCT_TYPE: + case (int)ColumnTypeId.ARRAY_TYPE: + case (int)ColumnTypeId.JAVA_OBJECT_TYPE: + case (int)ColumnTypeId.STRUCT_TYPE: return StringType.Default; default: throw new NotImplementedException($"Column type id: {columnTypeId} is not supported."); diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index 9666a691c2..8d7245a1e0 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -19,7 +19,6 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; -using SparkDataType = Apache.Arrow.Adbc.Drivers.Apache.Spark.SparkConnection.SparkDataType; using Apache.Arrow.Adbc.Tests.Metadata; using Apache.Arrow.Adbc.Tests.Xunit; using Apache.Arrow.Ipc; @@ -39,35 +38,35 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark public class DriverTests : SparkTestBase { /// - /// Supported Spark data types as a subset of + /// Supported Spark data types as a subset of /// private enum SupportedSparkDataType : short { - ARRAY_TYPE = SparkDataType.ARRAY_TYPE, - BIGINT_TYPE = SparkDataType.BIGINT_TYPE, - BINARY_TYPE = SparkDataType.BINARY_TYPE, - BOOLEAN_TYPE = SparkDataType.BOOLEAN_TYPE, - CHAR_TYPE = SparkDataType.CHAR_TYPE, - DATE_TYPE = SparkDataType.DATE_TYPE, - DECIMAL_TYPE = SparkDataType.DECIMAL_TYPE, - DOUBLE_TYPE = SparkDataType.DOUBLE_TYPE, - FLOAT_TYPE = SparkDataType.FLOAT_TYPE, - INTEGER_TYPE = SparkDataType.INTEGER_TYPE, - JAVA_OBJECT_TYPE = SparkDataType.JAVA_OBJECT_TYPE, - LONGNVARCHAR_TYPE = SparkDataType.LONGNVARCHAR_TYPE, - LONGVARBINARY_TYPE = SparkDataType.LONGVARBINARY_TYPE, - LONGVARCHAR_TYPE = SparkDataType.LONGVARCHAR_TYPE, - NCHAR_TYPE = SparkDataType.NCHAR_TYPE, - NULL_TYPE = SparkDataType.NULL_TYPE, - NUMERIC_TYPE = SparkDataType.NUMERIC_TYPE, - NVARCHAR_TYPE = SparkDataType.NVARCHAR_TYPE, - REAL_TYPE = SparkDataType.REAL_TYPE, - SMALLINT_TYPE = SparkDataType.SMALLINT_TYPE, - STRUCT_TYPE = SparkDataType.STRUCT_TYPE, - TIMESTAMP_TYPE = SparkDataType.TIMESTAMP_TYPE, - TINYINT_TYPE = SparkDataType.TINYINT_TYPE, - VARBINARY_TYPE = SparkDataType.VARBINARY_TYPE, - VARCHAR_TYPE = SparkDataType.VARCHAR_TYPE, + ARRAY_TYPE = 2003, + BIGINT_TYPE = -5, + BINARY_TYPE = -2, + BOOLEAN_TYPE = 16, + CHAR_TYPE = 1, + DATE_TYPE = 91, + DECIMAL_TYPE = 3, + DOUBLE_TYPE = 8, + FLOAT_TYPE = 6, + INTEGER_TYPE = 4, + JAVA_OBJECT_TYPE = 2000, + LONGNVARCHAR_TYPE = -16, + LONGVARBINARY_TYPE = -4, + LONGVARCHAR_TYPE = -1, + NCHAR_TYPE = -15, + NULL_TYPE = 0, + NUMERIC_TYPE = 2, + NVARCHAR_TYPE = -9, + REAL_TYPE = 7, + SMALLINT_TYPE = 5, + STRUCT_TYPE = 2002, + TIMESTAMP_TYPE = 93, + TINYINT_TYPE = -6, + VARBINARY_TYPE = -3, + VARCHAR_TYPE = 12, } private static List DefaultTableTypes => new() { "BASE TABLE", "VIEW" }; From da2ed8404543f1999c68a7b309f751e0dc88e341 Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Wed, 29 May 2024 15:38:00 -0700 Subject: [PATCH 4/8] feat(csharp/src/Drivers/Apache): updates for code review comment --- .../Drivers/Apache/Properties/AssemblyInfo.cs | 18 ++ .../Drivers/Apache/Spark/SparkConnection.cs | 202 ++++++++++-------- .../test/Drivers/Apache/Spark/DriverTests.cs | 67 +++--- 3 files changed, 170 insertions(+), 117 deletions(-) create mode 100644 csharp/src/Drivers/Apache/Properties/AssemblyInfo.cs diff --git a/csharp/src/Drivers/Apache/Properties/AssemblyInfo.cs b/csharp/src/Drivers/Apache/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..164905524e --- /dev/null +++ b/csharp/src/Drivers/Apache/Properties/AssemblyInfo.cs @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests.Drivers.Apache, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")] diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs index b9b87f760d..5114e124a1 100644 --- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs +++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs @@ -56,7 +56,19 @@ public class SparkConnection : HiveServer2Connection const bool InfoVendorSql = true; const int DecimalPrecisionDefault = 10; const int DecimalScaleDefault = 0; - + const string ColumnDef = "COLUMN_DEF"; + const string ColumnName = "COLUMN_NAME"; + const string DataType = "DATA_TYPE"; + const string IsAutoIncrement = "IS_AUTO_INCREMENT"; + const string IsNullable = "IS_NULLABLE"; + const string OrdinalPosition = "ORDINAL_POSITION"; + const string TableCat = "TABLE_CAT"; + const string TableCatalog = "TABLE_CATALOG"; + const string TableName = "TABLE_NAME"; + const string TableSchem = "TABLE_SCHEM"; + const string TableType = "TABLE_TYPE"; + const string TypeName = "TYPE_NAME"; + const string Nullable = "NULLABLE"; private readonly Lazy _productVersion; internal static TSparkGetDirectResults sparkGetDirectResults = new TSparkGetDirectResults(1000); @@ -67,7 +79,7 @@ public class SparkConnection : HiveServer2Connection }; /// - /// The Spark data type definitions based on the JDBC Types constants. + /// The Spark data type definitions based on the JDBC Types constants. /// /// /// This enumeration can be used to determine the Spark-specific data types that are contained in fields xdbc_data_type and xdbc_sql_data_type @@ -75,171 +87,171 @@ public class SparkConnection : HiveServer2Connection /// /// when depth is set to . /// - private enum ColumnTypeId + internal enum ColumnTypeId { // NOTE: There is a partial copy of this enumeration in test/Drivers/Apache/Spark/DriverTests.cs // Please keep up-to-date. - // Copied from https://github.com/JetBrains/jdk8u_jdk/blob/master/src/share/classes/java/sql/Types.java + // Copied from https://docs.oracle.com/en%2Fjava%2Fjavase%2F21%2Fdocs%2Fapi%2F%2F/constant-values.html#java.sql.Types.ARRAY /// /// Identifies the generic SQL type ARRAY /// - ARRAY_TYPE = 2003, + ARRAY = 2003, /// /// Identifies the generic SQL type BIGINT /// - BIGINT_TYPE = -5, + BIGINT = -5, /// /// Identifies the generic SQL type BINARY /// - BINARY_TYPE = -2, + BINARY = -2, /// /// Identifies the generic SQL type BOOLEAN /// - BOOLEAN_TYPE = 16, + BOOLEAN = 16, /// /// Identifies the generic SQL type CHAR /// - CHAR_TYPE = 1, + CHAR = 1, /// /// Identifies the generic SQL type DATE /// - DATE_TYPE = 91, + DATE = 91, /// /// Identifies the generic SQL type DECIMAL /// - DECIMAL_TYPE = 3, + DECIMAL = 3, /// /// Identifies the generic SQL type DOUBLE /// - DOUBLE_TYPE = 8, + DOUBLE = 8, /// /// Identifies the generic SQL type FLOAT /// - FLOAT_TYPE = 6, + FLOAT = 6, /// /// Identifies the generic SQL type INTEGER /// - INTEGER_TYPE = 4, + INTEGER = 4, /// /// Identifies the generic SQL type JAVA_OBJECT (MAP) /// - JAVA_OBJECT_TYPE = 2000, + JAVA_OBJECT = 2000, /// /// identifies the generic SQL type LONGNVARCHAR /// - LONGNVARCHAR_TYPE = -16, + LONGNVARCHAR = -16, /// /// identifies the generic SQL type LONGVARBINARY /// - LONGVARBINARY_TYPE = -4, + LONGVARBINARY = -4, /// /// identifies the generic SQL type LONGVARCHAR /// - LONGVARCHAR_TYPE = -1, + LONGVARCHAR = -1, /// /// identifies the generic SQL type NCHAR /// - NCHAR_TYPE = -15, + NCHAR = -15, /// /// identifies the generic SQL value NULL /// - NULL_TYPE = 0, + NULL = 0, /// /// identifies the generic SQL type NUMERIC /// - NUMERIC_TYPE = 2, + NUMERIC = 2, /// /// identifies the generic SQL type NVARCHAR /// - NVARCHAR_TYPE = -9, + NVARCHAR = -9, /// /// identifies the generic SQL type REAL /// - REAL_TYPE = 7, + REAL = 7, /// /// Identifies the generic SQL type SMALLINT /// - SMALLINT_TYPE = 5, + SMALLINT = 5, /// /// Identifies the generic SQL type STRUCT /// - STRUCT_TYPE = 2002, + STRUCT = 2002, /// /// Identifies the generic SQL type TIMESTAMP /// - TIMESTAMP_TYPE = 93, + TIMESTAMP = 93, /// /// Identifies the generic SQL type TINYINT /// - TINYINT_TYPE = -6, + TINYINT = -6, /// /// Identifies the generic SQL type VARBINARY /// - VARBINARY_TYPE = -3, + VARBINARY = -3, /// /// Identifies the generic SQL type VARCHAR /// - VARCHAR_TYPE = 12, + VARCHAR = 12, // ====================== // Unused/unsupported // ====================== /// /// Identifies the generic SQL type BIT /// - BIT_TYPE = -7, + BIT = -7, /// /// Identifies the generic SQL type BLOB /// - BLOB_TYPE = 2004, + BLOB = 2004, /// /// Identifies the generic SQL type CLOB /// - CLOB_TYPE = 2005, + CLOB = 2005, /// /// Identifies the generic SQL type DATALINK /// - DATALINK_TYPE = 70, + DATALINK = 70, /// /// Identifies the generic SQL type DISTINCT /// - DISTINCT_TYPE = 2001, + DISTINCT = 2001, /// /// identifies the generic SQL type NCLOB /// - NCLOB_TYPE = 2011, + NCLOB = 2011, /// /// Indicates that the SQL type is database-specific and gets mapped to a Java object /// - OTHER_TYPE = 1111, + OTHER = 1111, /// /// Identifies the generic SQL type REF CURSOR /// - REF_CURSOR_TYPE = 2012, + REF_CURSOR = 2012, /// /// Identifies the generic SQL type REF /// - REF_TYPE = 2006, + REF = 2006, /// /// Identifies the generic SQL type ROWID /// - ROWID_TYPE = -8, + ROWID = -8, /// /// Identifies the generic SQL type XML /// - SQLXML_TYPE = 2009, + SQLXML = 2009, /// /// Identifies the generic SQL type TIME /// - TIME_TYPE = 92, + TIME = 92, /// /// Identifies the generic SQL type TIME WITH TIMEZONE /// - TIME_WITH_TIMEZONE_TYPE = 2013, + TIME_WITH_TIMEZONE = 2013, /// /// Identifies the generic SQL type TIMESTAMP WITH TIMEZONE /// - TIMESTAMP_WITH_TIMEZONE_TYPE = 2014, + TIMESTAMP_WITH_TIMEZONE = 2014, } internal SparkConnection(IReadOnlyDictionary properties) @@ -443,8 +455,24 @@ public override IArrowArrayStream GetInfo(IReadOnlyList codes) public override IArrowArrayStream GetTableTypes() { + TGetTableTypesReq req = new() + { + SessionHandle = this.sessionHandle ?? throw new InvalidOperationException("session not created"), + GetDirectResults = sparkGetDirectResults + }; + TGetTableTypesResp resp = this.Client.GetTableTypes(req).Result; + if (resp.Status.StatusCode == TStatusCode.ERROR_STATUS) + { + throw new HiveServer2Exception(resp.Status.ErrorMessage) + .SetNativeError(resp.Status.ErrorCode) + .SetSqlState(resp.Status.SqlState); + } + + List columns = resp.DirectResults.ResultSet.Results.Columns; + StringArray tableTypes = columns[0].StringVal.Values; + StringArray.Builder tableTypesBuilder = new StringArray.Builder(); - tableTypesBuilder.AppendRange(new string[] { "BASE TABLE", "VIEW" }); + tableTypesBuilder.AppendRange(tableTypes); IArrowArray[] dataArrays = new IArrowArray[] { @@ -508,7 +536,7 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata string catalogRegexp = PatternToRegEx(catalogPattern); TRowSet resp = getCatalogsResp.DirectResults.ResultSet.Results; - IReadOnlyList list = resp.Columns[columnMap["TABLE_CAT"]].StringVal.Values; + IReadOnlyList list = resp.Columns[columnMap[TableCat]].StringVal.Values; for (int i = 0; i < list.Count; i++) { string col = list[i]; @@ -536,8 +564,8 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata IReadOnlyDictionary columnMap = GetColumnIndexMap(getSchemasResp.DirectResults.ResultSetMetadata.Schema.Columns); TRowSet resp = getSchemasResp.DirectResults.ResultSet.Results; - IReadOnlyList catalogList = resp.Columns[columnMap["TABLE_CATALOG"]].StringVal.Values; - IReadOnlyList schemaList = resp.Columns[columnMap["TABLE_SCHEM"]].StringVal.Values; + IReadOnlyList catalogList = resp.Columns[columnMap[TableCatalog]].StringVal.Values; + IReadOnlyList schemaList = resp.Columns[columnMap[TableSchem]].StringVal.Values; for (int i = 0; i < catalogList.Count; i++) { @@ -565,10 +593,10 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata IReadOnlyDictionary columnMap = GetColumnIndexMap(getTablesResp.DirectResults.ResultSetMetadata.Schema.Columns); TRowSet resp = getTablesResp.DirectResults.ResultSet.Results; - IReadOnlyList catalogList = resp.Columns[columnMap["TABLE_CAT"]].StringVal.Values; - IReadOnlyList schemaList = resp.Columns[columnMap["TABLE_SCHEM"]].StringVal.Values; - IReadOnlyList tableList = resp.Columns[columnMap["TABLE_NAME"]].StringVal.Values; - IReadOnlyList tableTypeList = resp.Columns[columnMap["TABLE_TYPE"]].StringVal.Values; + IReadOnlyList catalogList = resp.Columns[columnMap[TableCat]].StringVal.Values; + IReadOnlyList schemaList = resp.Columns[columnMap[TableSchem]].StringVal.Values; + IReadOnlyList tableList = resp.Columns[columnMap[TableName]].StringVal.Values; + IReadOnlyList tableTypeList = resp.Columns[columnMap[TableType]].StringVal.Values; for (int i = 0; i < catalogList.Count; i++) { @@ -601,17 +629,17 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata IReadOnlyDictionary columnMap = GetColumnIndexMap(columnsResponse.DirectResults.ResultSetMetadata.Schema.Columns); TRowSet resp = columnsResponse.DirectResults.ResultSet.Results; - IReadOnlyList catalogList = resp.Columns[columnMap["TABLE_CAT"]].StringVal.Values; - IReadOnlyList schemaList = resp.Columns[columnMap["TABLE_SCHEM"]].StringVal.Values; - IReadOnlyList tableList = resp.Columns[columnMap["TABLE_NAME"]].StringVal.Values; - IReadOnlyList columnNameList = resp.Columns[columnMap["COLUMN_NAME"]].StringVal.Values; - ReadOnlySpan columnTypeList = resp.Columns[columnMap["DATA_TYPE"]].I32Val.Values.Values; - IReadOnlyList typeNameList = resp.Columns[columnMap["TYPE_NAME"]].StringVal.Values; - ReadOnlySpan nullableList = resp.Columns[columnMap["NULLABLE"]].I32Val.Values.Values; - IReadOnlyList columnDefaultList = resp.Columns[columnMap["COLUMN_DEF"]].StringVal.Values; - ReadOnlySpan ordinalPosList = resp.Columns[columnMap["ORDINAL_POSITION"]].I32Val.Values.Values; - IReadOnlyList isNullableList = resp.Columns[columnMap["IS_NULLABLE"]].StringVal.Values; - IReadOnlyList isAutoIncrementList = resp.Columns[columnMap["IS_AUTO_INCREMENT"]].StringVal.Values; + IReadOnlyList catalogList = resp.Columns[columnMap[TableCat]].StringVal.Values; + IReadOnlyList schemaList = resp.Columns[columnMap[TableSchem]].StringVal.Values; + IReadOnlyList tableList = resp.Columns[columnMap[TableName]].StringVal.Values; + IReadOnlyList columnNameList = resp.Columns[columnMap[ColumnName]].StringVal.Values; + ReadOnlySpan columnTypeList = resp.Columns[columnMap[DataType]].I32Val.Values.Values; + IReadOnlyList typeNameList = resp.Columns[columnMap[TypeName]].StringVal.Values; + ReadOnlySpan nullableList = resp.Columns[columnMap[Nullable]].I32Val.Values.Values; + IReadOnlyList columnDefaultList = resp.Columns[columnMap[ColumnDef]].StringVal.Values; + ReadOnlySpan ordinalPosList = resp.Columns[columnMap[OrdinalPosition]].I32Val.Values.Values; + IReadOnlyList isNullableList = resp.Columns[columnMap[IsNullable]].StringVal.Values; + IReadOnlyList isAutoIncrementList = resp.Columns[columnMap[IsAutoIncrement]].StringVal.Values; for (int i = 0; i < catalogList.Count; i++) { @@ -698,47 +726,47 @@ private static IArrowType GetArrowType(int columnTypeId, string typeName) { switch (columnTypeId) { - case (int)ColumnTypeId.BOOLEAN_TYPE: + case (int)ColumnTypeId.BOOLEAN: return BooleanType.Default; - case (int)ColumnTypeId.TINYINT_TYPE: + case (int)ColumnTypeId.TINYINT: return Int8Type.Default; - case (int)ColumnTypeId.SMALLINT_TYPE: + case (int)ColumnTypeId.SMALLINT: return Int16Type.Default; - case (int)ColumnTypeId.INTEGER_TYPE: + case (int)ColumnTypeId.INTEGER: return Int32Type.Default; - case (int)ColumnTypeId.BIGINT_TYPE: + case (int)ColumnTypeId.BIGINT: return Int64Type.Default; - case (int)ColumnTypeId.FLOAT_TYPE: - case (int)ColumnTypeId.REAL_TYPE: + case (int)ColumnTypeId.FLOAT: + case (int)ColumnTypeId.REAL: return FloatType.Default; - case (int)ColumnTypeId.DOUBLE_TYPE: + case (int)ColumnTypeId.DOUBLE: return DoubleType.Default; - case (int)ColumnTypeId.VARCHAR_TYPE: - case (int)ColumnTypeId.NVARCHAR_TYPE: - case (int)ColumnTypeId.LONGVARCHAR_TYPE: - case (int)ColumnTypeId.LONGNVARCHAR_TYPE: + case (int)ColumnTypeId.VARCHAR: + case (int)ColumnTypeId.NVARCHAR: + case (int)ColumnTypeId.LONGVARCHAR: + case (int)ColumnTypeId.LONGNVARCHAR: return StringType.Default; - case (int)ColumnTypeId.TIMESTAMP_TYPE: + case (int)ColumnTypeId.TIMESTAMP: return new TimestampType(TimeUnit.Microsecond, timezone: (string?)null); - case (int)ColumnTypeId.BINARY_TYPE: - case (int)ColumnTypeId.VARBINARY_TYPE: - case (int)ColumnTypeId.LONGVARBINARY_TYPE: + case (int)ColumnTypeId.BINARY: + case (int)ColumnTypeId.VARBINARY: + case (int)ColumnTypeId.LONGVARBINARY: return BinaryType.Default; - case (int)ColumnTypeId.DATE_TYPE: + case (int)ColumnTypeId.DATE: return Date32Type.Default; - case (int)ColumnTypeId.CHAR_TYPE: - case (int)ColumnTypeId.NCHAR_TYPE: + case (int)ColumnTypeId.CHAR: + case (int)ColumnTypeId.NCHAR: return StringType.Default; - case (int)ColumnTypeId.DECIMAL_TYPE: - case (int)ColumnTypeId.NUMERIC_TYPE: + case (int)ColumnTypeId.DECIMAL: + case (int)ColumnTypeId.NUMERIC: // Note: parsing the type name for SQL DECIMAL types as the precision and scale values // are not returned in the Thrift call to GetColumns return SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); - case (int)ColumnTypeId.NULL_TYPE: + case (int)ColumnTypeId.NULL: return NullType.Default; - case (int)ColumnTypeId.ARRAY_TYPE: - case (int)ColumnTypeId.JAVA_OBJECT_TYPE: - case (int)ColumnTypeId.STRUCT_TYPE: + case (int)ColumnTypeId.ARRAY: + case (int)ColumnTypeId.JAVA_OBJECT: + case (int)ColumnTypeId.STRUCT: return StringType.Default; default: throw new NotImplementedException($"Column type id: {columnTypeId} is not supported."); diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index 8d7245a1e0..04559634db 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -19,6 +19,7 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; +using ColumnTypeId = Apache.Arrow.Adbc.Drivers.Apache.Spark.SparkConnection.ColumnTypeId; using Apache.Arrow.Adbc.Tests.Metadata; using Apache.Arrow.Adbc.Tests.Xunit; using Apache.Arrow.Ipc; @@ -42,31 +43,31 @@ public class DriverTests : SparkTestBase /// private enum SupportedSparkDataType : short { - ARRAY_TYPE = 2003, - BIGINT_TYPE = -5, - BINARY_TYPE = -2, - BOOLEAN_TYPE = 16, - CHAR_TYPE = 1, - DATE_TYPE = 91, - DECIMAL_TYPE = 3, - DOUBLE_TYPE = 8, - FLOAT_TYPE = 6, - INTEGER_TYPE = 4, - JAVA_OBJECT_TYPE = 2000, - LONGNVARCHAR_TYPE = -16, - LONGVARBINARY_TYPE = -4, - LONGVARCHAR_TYPE = -1, - NCHAR_TYPE = -15, - NULL_TYPE = 0, - NUMERIC_TYPE = 2, - NVARCHAR_TYPE = -9, - REAL_TYPE = 7, - SMALLINT_TYPE = 5, - STRUCT_TYPE = 2002, - TIMESTAMP_TYPE = 93, - TINYINT_TYPE = -6, - VARBINARY_TYPE = -3, - VARCHAR_TYPE = 12, + ARRAY = ColumnTypeId.ARRAY, + BIGINT = ColumnTypeId.BIGINT, + BINARY = ColumnTypeId.BINARY, + BOOLEAN = ColumnTypeId.BOOLEAN, + CHAR = ColumnTypeId.CHAR, + DATE = ColumnTypeId.DATE, + DECIMAL = ColumnTypeId.DECIMAL, + DOUBLE = ColumnTypeId.DOUBLE, + FLOAT = ColumnTypeId.FLOAT, + INTEGER = ColumnTypeId.INTEGER, + JAVA_OBJECT = ColumnTypeId.JAVA_OBJECT, + LONGNVARCHAR = ColumnTypeId.LONGNVARCHAR, + LONGVARBINARY = ColumnTypeId.LONGVARBINARY, + LONGVARCHAR = ColumnTypeId.LONGVARCHAR, + NCHAR = ColumnTypeId.NCHAR, + NULL = ColumnTypeId.NULL, + NUMERIC = ColumnTypeId.NUMERIC, + NVARCHAR = ColumnTypeId.NVARCHAR, + REAL = ColumnTypeId.REAL, + SMALLINT = ColumnTypeId.SMALLINT, + STRUCT = ColumnTypeId.STRUCT, + TIMESTAMP = ColumnTypeId.TIMESTAMP, + TINYINT = ColumnTypeId.TINYINT, + VARBINARY = ColumnTypeId.VARBINARY, + VARCHAR = ColumnTypeId.VARCHAR, } private static List DefaultTableTypes => new() { "BASE TABLE", "VIEW" }; @@ -347,24 +348,30 @@ public void CanGetObjectsAll() // Verify column metadata is returned/consistent. AdbcColumn column = columns[i]; Assert.Equal(i + 1, column.OrdinalPosition); - Assert.NotNull(column.Name); Assert.False(string.IsNullOrEmpty(column.Name)); + var types = Enum.GetValues(typeof(SupportedSparkDataType)).Cast(); Assert.Contains((SupportedSparkDataType)column.XdbcSqlDataType!, types); + Assert.Equal(column.XdbcDataType, column.XdbcSqlDataType); + Assert.NotNull(column.XdbcDataType); Assert.Contains((SupportedSparkDataType)column.XdbcDataType!, types); - Assert.Equal(column.XdbcDataType, column.XdbcSqlDataType); - bool isDecimalType = column.XdbcDataType == (short)SupportedSparkDataType.DECIMAL_TYPE || column.XdbcDataType == (short)SupportedSparkDataType.NUMERIC_TYPE; + + bool isDecimalType = column.XdbcDataType == (short)SupportedSparkDataType.DECIMAL || column.XdbcDataType == (short)SupportedSparkDataType.NUMERIC; Assert.Equal(column.XdbcColumnSize.HasValue, isDecimalType); Assert.Equal(column.XdbcDecimalDigits.HasValue, isDecimalType); - Assert.Equal(column.XdbcDataType, column.XdbcSqlDataType); + Assert.NotNull(column.Remarks); Assert.True(string.IsNullOrEmpty(column.Remarks)); + Assert.NotNull(column.XdbcColumnDef); + Assert.NotNull(column.XdbcNullable); Assert.Contains(new short[] { 1, 0 }, i => i == column.XdbcNullable); + Assert.NotNull(column.XdbcIsNullable); Assert.Contains(new string[] { "YES", "NO" }, i => i.Equals(column.XdbcIsNullable)); + Assert.NotNull(column.XdbcIsAutoIncrement); Assert.Null(column.XdbcCharOctetLength); @@ -455,7 +462,7 @@ public async Task CanGetTableTypes() List known_types = new List { - "BASE TABLE", "VIEW" + "TABLE", "VIEW" }; int results = 0; From 5661891857b8be66fc7a3fb038687302780c8eab Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Wed, 29 May 2024 16:35:12 -0700 Subject: [PATCH 5/8] feat(csharp/src/Drivers/Apache): corrections --- csharp/src/Drivers/Apache/Spark/SparkConnection.cs | 4 ++-- csharp/test/Drivers/Apache/Spark/DriverTests.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs index 5114e124a1..c4a3a1b75a 100644 --- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs +++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs @@ -706,8 +706,8 @@ private static void SetPrecisionAndScale(short colType, string typeName, TableIn { switch (colType) { - case (short)ColumnTypeId.DECIMAL_TYPE: - case (short)ColumnTypeId.NUMERIC_TYPE: + case (short)ColumnTypeId.DECIMAL: + case (short)ColumnTypeId.NUMERIC: { Decimal128Type decimalType = SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault)); tableInfo?.Precision.Add(decimalType.Precision); diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index 04559634db..2ae4df34dc 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -70,7 +70,7 @@ private enum SupportedSparkDataType : short VARCHAR = ColumnTypeId.VARCHAR, } - private static List DefaultTableTypes => new() { "BASE TABLE", "VIEW" }; + private static List DefaultTableTypes => new() { "TABLE", "VIEW" }; public DriverTests(ITestOutputHelper? outputHelper) : base(outputHelper) { @@ -299,7 +299,7 @@ public void CanGetObjectsTables(string tableNamePattern) AdbcTable? table = tables?.Where((table) => string.Equals(table.Name, tableName)).FirstOrDefault(); Assert.True(table != null, "table should not be null"); // TODO: Determine why this is returned blank. - //Assert.Equal("BASE TABLE", table.Type); + //Assert.Equal("TABLE", table.Type); } /// From f3aa51432dc5bb53836903ee3c9fc1c889b273c0 Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Thu, 30 May 2024 13:36:40 -0700 Subject: [PATCH 6/8] feat(csharp/src/Drivers/Apache): change TABLE type. --- csharp/test/Drivers/Apache/Spark/DriverTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index 2ae4df34dc..2feedc52d5 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -337,7 +337,7 @@ public void CanGetObjectsAll() Assert.True(table != null, "table should not be null"); // TODO: Determine why this is returned blank. - //Assert.Equal("BASE TABLE", table.Type); + //Assert.Equal("TABLE", table.Type); List? columns = table.Columns; Assert.True(columns != null, "Columns cannot be null"); From cb7988c000defeaaed9de026366c7faa28895342 Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Fri, 31 May 2024 08:49:29 -0700 Subject: [PATCH 7/8] feat(csharp/src/Drivers/Apache): test for XbdcTypeName. --- csharp/test/Drivers/Apache/Spark/DriverTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index 2feedc52d5..6166517b24 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -349,6 +349,7 @@ public void CanGetObjectsAll() AdbcColumn column = columns[i]; Assert.Equal(i + 1, column.OrdinalPosition); Assert.False(string.IsNullOrEmpty(column.Name)); + Assert.False(string.IsNullOrEmpty(column.XdbcTypeName)); var types = Enum.GetValues(typeof(SupportedSparkDataType)).Cast(); Assert.Contains((SupportedSparkDataType)column.XdbcSqlDataType!, types); From 5c5bb35c447ea2d3ee53387ec3449529730127d5 Mon Sep 17 00:00:00 2001 From: Bruce Irschick Date: Fri, 31 May 2024 09:41:16 -0700 Subject: [PATCH 8/8] feat(csharp/src/Drivers/Apache): test for XbdcTypeName. --- csharp/test/Drivers/Apache/Spark/DriverTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs b/csharp/test/Drivers/Apache/Spark/DriverTests.cs index 6166517b24..822dae2e0c 100644 --- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs +++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs @@ -19,12 +19,12 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; -using ColumnTypeId = Apache.Arrow.Adbc.Drivers.Apache.Spark.SparkConnection.ColumnTypeId; using Apache.Arrow.Adbc.Tests.Metadata; using Apache.Arrow.Adbc.Tests.Xunit; using Apache.Arrow.Ipc; using Xunit; using Xunit.Abstractions; +using ColumnTypeId = Apache.Arrow.Adbc.Drivers.Apache.Spark.SparkConnection.ColumnTypeId; namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark {