Skip to content

Commit

Permalink
[Destination MSSQL] v2 rc8 (#54186)
Browse files Browse the repository at this point in the history
Co-authored-by: Octavia Squidington III <[email protected]>
  • Loading branch information
gosusnp and octavia-squidington-iii authored Feb 20, 2025
1 parent 7abac4a commit e666f19
Show file tree
Hide file tree
Showing 12 changed files with 244 additions and 381 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,148 @@ abstract class BasicFunctionalityIntegrationTest(
)
}

@Test
open fun testDedupWithStringKey() {
assumeTrue(supportsDedup)
fun makeStream(syncId: Long) =
DestinationStream(
DestinationStream.Descriptor(randomizedNamespace, "test_stream"),
importType =
Dedupe(
primaryKey = listOf(listOf("id1"), listOf("id2")),
cursor = listOf("updated_at"),
),
schema =
ObjectType(
properties =
linkedMapOf(
"id1" to stringType,
"id2" to intType,
"updated_at" to timestamptzType,
"name" to stringType,
"_ab_cdc_deleted_at" to timestamptzType,
)
),
generationId = 42,
minimumGenerationId = 0,
syncId = syncId,
)
fun makeRecord(data: String, extractedAt: Long) =
InputRecord(
randomizedNamespace,
"test_stream",
data,
emittedAtMs = extractedAt,
)

val sync1Stream = makeStream(syncId = 42)
runSync(
updatedConfig,
sync1Stream,
listOf(
// emitted_at:1000 is equal to 1970-01-01 00:00:01Z.
// This obviously makes no sense in relation to updated_at being in the year 2000,
// but that's OK because (from destinations POV) updated_at has no relation to
// extractedAt.
makeRecord(
"""{"id1": "9cf974de-52cf-4194-9f3d-7efa76ba4d84", "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "name": "Alice1", "_ab_cdc_deleted_at": null}""",
extractedAt = 1000,
),
// Emit a second record for id=(1,200) with a different updated_at.
makeRecord(
"""{"id1": "9cf974de-52cf-4194-9f3d-7efa76ba4d84", "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "name": "Alice2", "_ab_cdc_deleted_at": null}""",
extractedAt = 1000,
),
// Emit a record with no _ab_cdc_deleted_at field. CDC sources typically emit an
// explicit null, but we should handle both cases.
makeRecord(
"""{"id1": "9cf974de-52cf-4194-9f3d-7efa76ba4d84", "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob1"}""",
extractedAt = 1000,
),
),
)
dumpAndDiffRecords(
parsedConfig,
listOf(
// Alice has only the newer record, and Bob also exists
OutputRecord(
extractedAt = 1000,
generationId = 42,
data =
mapOf(
"id1" to "9cf974de-52cf-4194-9f3d-7efa76ba4d84",
"id2" to 200,
"updated_at" to TimestampWithTimezoneValue("2000-01-01T00:01:00Z"),
"name" to "Alice2",
"_ab_cdc_deleted_at" to null
),
airbyteMeta = OutputRecord.Meta(syncId = 42),
),
OutputRecord(
extractedAt = 1000,
generationId = 42,
data =
mapOf(
"id1" to "9cf974de-52cf-4194-9f3d-7efa76ba4d84",
"id2" to 201,
"updated_at" to TimestampWithTimezoneValue("2000-01-01T00:02:00Z"),
"name" to "Bob1"
),
airbyteMeta = OutputRecord.Meta(syncId = 42),
),
),
sync1Stream,
primaryKey = listOf(listOf("id1"), listOf("id2")),
cursor = listOf("updated_at"),
)

val sync2Stream = makeStream(syncId = 43)
runSync(
updatedConfig,
sync2Stream,
listOf(
// Update both Alice and Bob
makeRecord(
"""{"id1": "9cf974de-52cf-4194-9f3d-7efa76ba4d84", "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "name": "Alice3", "_ab_cdc_deleted_at": null}""",
extractedAt = 2000,
),
makeRecord(
"""{"id1": "9cf974de-52cf-4194-9f3d-7efa76ba4d84", "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "name": "Bob2"}""",
extractedAt = 2000,
),
// And delete Bob. Again, T+D doesn't check the actual _value_ of deleted_at (i.e.
// the fact that it's in the past is irrelevant). It only cares whether deleted_at
// is non-null. So the destination should delete Bob.
makeRecord(
"""{"id1": "9cf974de-52cf-4194-9f3d-7efa76ba4d84", "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}""",
extractedAt = 2000,
),
),
)
dumpAndDiffRecords(
parsedConfig,
listOf(
// Alice still exists (and has been updated to the latest version), but Bob is gone
OutputRecord(
extractedAt = 2000,
generationId = 42,
data =
mapOf(
"id1" to "9cf974de-52cf-4194-9f3d-7efa76ba4d84",
"id2" to 200,
"updated_at" to TimestampWithTimezoneValue("2000-01-02T00:00:00Z"),
"name" to "Alice3",
"_ab_cdc_deleted_at" to null
),
airbyteMeta = OutputRecord.Meta(syncId = 43),
)
),
sync2Stream,
primaryKey = listOf(listOf("id1"), listOf("id2")),
cursor = listOf("updated_at"),
)
}

/**
* Change the cursor column in the second sync to a column that doesn't exist in the first sync.
* Verify that we overwrite everything correctly.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ data:
type: GSM
connectorType: destination
definitionId: 37a928c1-2d5c-431a-a97d-ae236bd1ea0c
dockerImageTag: 0.1.9
dockerImageTag: 0.1.10
dockerRepository: airbyte/destination-mssql-v2
documentationUrl: https://docs.airbyte.com/integrations/destinations/mssql-v2
githubIssueLabel: destination-mssql-v2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.integrations.destination.mssql.v2.config.MSSQLConfiguration
import io.airbyte.integrations.destination.mssql.v2.convert.AirbyteTypeToSqlType
import io.airbyte.integrations.destination.mssql.v2.convert.AirbyteTypeToMssqlType
import io.airbyte.integrations.destination.mssql.v2.convert.AirbyteValueToStatement.Companion.setAsNullValue
import io.airbyte.integrations.destination.mssql.v2.convert.AirbyteValueToStatement.Companion.setValue
import io.airbyte.integrations.destination.mssql.v2.convert.MssqlType
import io.airbyte.integrations.destination.mssql.v2.convert.ResultSetToAirbyteValue.Companion.getAirbyteNamedValue
import io.airbyte.integrations.destination.mssql.v2.convert.SqlTypeToMssqlType
import io.airbyte.protocol.models.Jsons
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
Expand Down Expand Up @@ -230,9 +229,9 @@ class MSSQLQueryBuilder(
Append -> emptyList()
Overwrite -> emptyList()
}
private val indexedColumns: Set<String> = uniquenessKey.toSet()

private val toSqlType = AirbyteTypeToSqlType()
private val toMssqlType = SqlTypeToMssqlType()
private val toMssqlType = AirbyteTypeToMssqlType()

val finalTableSchema: List<NamedField> =
airbyteFinalTableFields + extractFinalTableSchema(stream.schema)
Expand All @@ -251,9 +250,7 @@ class MSSQLQueryBuilder(
}

private fun getSchema(): List<NamedSqlField> =
finalTableSchema.map {
NamedSqlField(it.name, toMssqlType.convert(toSqlType.convert(it.type.type)))
}
finalTableSchema.map { NamedSqlField(it.name, toMssqlType.convert(it.type.type)) }

fun updateSchema(connection: Connection) {
val existingSchema = getExistingSchema(connection)
Expand Down Expand Up @@ -486,7 +483,12 @@ class MSSQLQueryBuilder(
separator: String = DEFAULT_SEPARATOR
): String {
return schema.joinToString(separator = separator) {
"[${it.name}] ${toMssqlType.convert(toSqlType.convert(it.type.type)).sqlString} NULL"
val mssqlType =
toMssqlType.convert(
it.type.type,
isIndexed = indexedColumns.contains(it.name),
)
"[${it.name}] ${mssqlType.sqlString} NULL"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.integrations.destination.mssql.v2.convert

import io.airbyte.cdk.load.data.AirbyteType
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.DateType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import java.sql.Types

enum class MssqlType(val sqlType: Int, val sqlStringOverride: String? = null) {
TEXT(Types.LONGVARCHAR),
BIT(Types.BOOLEAN),
DATE(Types.DATE),
BIGINT(Types.BIGINT),
DECIMAL(Types.DECIMAL, sqlStringOverride = "DECIMAL(18, 8)"),
VARCHAR(Types.VARCHAR, sqlStringOverride = "VARCHAR(MAX)"),
VARCHAR_INDEX(Types.VARCHAR, sqlStringOverride = "VARCHAR(200)"),
DATETIMEOFFSET(Types.TIMESTAMP_WITH_TIMEZONE),
TIME(Types.TIME),
DATETIME(Types.TIMESTAMP);

val sqlString: String = sqlStringOverride ?: name
}

class AirbyteTypeToMssqlType {
fun convert(airbyteSchema: AirbyteType, isIndexed: Boolean = false): MssqlType {
return when (airbyteSchema) {
is ObjectType -> MssqlType.TEXT
is ArrayType -> MssqlType.TEXT
is ArrayTypeWithoutSchema -> MssqlType.TEXT
is BooleanType -> MssqlType.BIT
is DateType -> MssqlType.DATE
is IntegerType -> MssqlType.BIGINT
is NumberType -> MssqlType.DECIMAL
is ObjectTypeWithEmptySchema -> MssqlType.TEXT
is ObjectTypeWithoutSchema -> MssqlType.TEXT
is StringType -> if (isIndexed) MssqlType.VARCHAR_INDEX else MssqlType.VARCHAR
is TimeTypeWithTimezone -> MssqlType.DATETIMEOFFSET
is TimeTypeWithoutTimezone -> MssqlType.TIME
is TimestampTypeWithTimezone -> MssqlType.DATETIMEOFFSET
is TimestampTypeWithoutTimezone -> MssqlType.DATETIME
is UnionType -> MssqlType.TEXT
is UnknownType -> MssqlType.TEXT
}
}
}

This file was deleted.

Loading

0 comments on commit e666f19

Please sign in to comment.