Skip to content

Commit

Permalink
Revert "chore: clean out unused "bases" and utils (#53234)"
Browse files Browse the repository at this point in the history
This reverts commit 4dec57a.
  • Loading branch information
edgao committed Feb 10, 2025
1 parent f4333f8 commit 58fad21
Show file tree
Hide file tree
Showing 679 changed files with 41,745 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ area/documentation:
CDK:
- airbyte-cdk/*
- airbyte-cdk/**/*

normalization:
- airbyte-integrations/bases/base-normalization/*
- airbyte-integrations/bases/base-normalization/**/*
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exclude: |
^.*?/node_modules/.*$|
^.*?/charts/.*$|
^airbyte-integrations/bases/base-normalization/.*$|
^.*?/normalization_test_output/.*$|
^.*?/pnpm-lock\.yaml$|
^.*?/source-amplitude/unit_tests/api_data/zipped\.json$|
Expand Down
5 changes: 5 additions & 0 deletions airbyte-integrations/bases/base-java/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*
!Dockerfile
!build
!javabase.sh
!run_with_normalization.sh
34 changes: 34 additions & 0 deletions airbyte-integrations/bases/base-java/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
### WARNING ###
# The Java connector Dockerfiles will soon be deprecated.
# This Dockerfile is not used to build the connector image we publish to DockerHub.
# The new logic to build the connector image is declared with Dagger here:
# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649

# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder.
# Please reach out to the Connectors Operations team if you have any question.
ARG JDK_VERSION=17.0.8
FROM amazoncorretto:${JDK_VERSION}
COPY --from=airbyte/integration-base:dev /airbyte /airbyte

RUN yum update -y && yum install -y tar openssl && yum clean all

WORKDIR /airbyte

# Add the Datadog Java APM agent
ADD https://dtdg.co/latest-java-tracer dd-java-agent.jar

COPY javabase.sh .
COPY run_with_normalization.sh .

# airbyte base commands
ENV AIRBYTE_SPEC_CMD "/airbyte/javabase.sh --spec"
ENV AIRBYTE_CHECK_CMD "/airbyte/javabase.sh --check"
ENV AIRBYTE_DISCOVER_CMD "/airbyte/javabase.sh --discover"
ENV AIRBYTE_READ_CMD "/airbyte/javabase.sh --read"
ENV AIRBYTE_WRITE_CMD "/airbyte/javabase.sh --write"

ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh"
ENTRYPOINT ["/airbyte/base.sh"]

LABEL io.airbyte.version=0.1.2
LABEL io.airbyte.name=airbyte/integration-base-java
3 changes: 3 additions & 0 deletions airbyte-integrations/bases/base-java/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
plugins {
id 'airbyte-docker-legacy'
}
33 changes: 33 additions & 0 deletions airbyte-integrations/bases/base-java/javabase.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash

set -e

# if IS_CAPTURE_HEAP_DUMP_ON_ERROR is set to true, then will capture Heap dump on OutOfMemory error
if [[ $IS_CAPTURE_HEAP_DUMP_ON_ERROR = true ]]; then

arrayOfSupportedConnectors=("source-postgres" "source-mssql" "source-mysql" )

# The heap dump would be captured only in case when java-based connector fails with OutOfMemory error
if [[ " ${arrayOfSupportedConnectors[*]} " =~ " $APPLICATION " ]]; then
JAVA_OPTS=$JAVA_OPTS" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/data/dump.hprof"
export JAVA_OPTS
echo "Added JAVA_OPTS=$JAVA_OPTS"
echo "APPLICATION=$APPLICATION"
fi
fi
#30781 - Allocate 32KB for log4j appender buffer to ensure that each line is logged in a single println
JAVA_OPTS=$JAVA_OPTS" -Dlog4j.encoder.byteBufferSize=32768 -Dlog4j2.configurationFile=log4j2.xml"
#needed because we make ThreadLocal.get(Thread) accessible in IntegrationRunner.stopOrphanedThreads
JAVA_OPTS=$JAVA_OPTS" --add-opens=java.base/java.lang=ALL-UNNAMED"
# tell jooq to be quiet (https://stackoverflow.com/questions/28272284/how-to-disable-jooqs-self-ad-message-in-3-4)
JAVA_OPTS=$JAVA_OPTS" -Dorg.jooq.no-logo=true -Dorg.jooq.no-tips=true"
export JAVA_OPTS

# Wrap run script in a script so that we can lazy evaluate the value of APPLICATION. APPLICATION is
# set by the dockerfile that inherits base-java, so it cannot be evaluated when base-java is built.
# We also need to make sure that stdin of the script is piped to the stdin of the java application.
if [[ $A = --write ]]; then
cat <&0 | /airbyte/bin/"$APPLICATION" "$@"
else
/airbyte/bin/"$APPLICATION" "$@"
fi
61 changes: 61 additions & 0 deletions airbyte-integrations/bases/base-java/run_with_normalization.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/bin/bash
# Intentionally no set -e, because we want to run normalization even if the destination fails
set -o pipefail

/airbyte/base.sh $@
destination_exit_code=$?
echo '{"type": "LOG","log":{"level":"INFO","message":"Destination process done (exit code '"$destination_exit_code"')"}}'

# store original args
args=$@

while [ $# -ne 0 ]; do
case "$1" in
--config)
CONFIG_FILE="$2"
shift 2
;;
*)
# move on
shift
;;
esac
done

# restore original args after shifts
set -- $args

USE_1S1T_FORMAT="false"
if [[ -s "$CONFIG_FILE" ]]; then
USE_1S1T_FORMAT=$(jq -r '.use_1s1t_format' "$CONFIG_FILE")
fi

if test "$1" != 'write'
then
normalization_exit_code=0
elif test "$NORMALIZATION_TECHNIQUE" = 'LEGACY' && test "$USE_1S1T_FORMAT" != "true"
then
echo '{"type": "LOG","log":{"level":"INFO","message":"Starting in-connector normalization"}}'
# Normalization tries to create this file from the connector config and crashes if it already exists
# so just nuke it and let normalization recreate it.
# Use -f to avoid error if it doesn't exist, since it's only created for certain SSL modes.
rm -f ca.crt
# the args in a write command are `write --catalog foo.json --config bar.json`
# so if we remove the `write`, we can just pass the rest directly into normalization
/airbyte/entrypoint.sh run ${@:2} --integration-type $AIRBYTE_NORMALIZATION_INTEGRATION | java -cp "/airbyte/lib/*" io.airbyte.cdk.integrations.destination.normalization.NormalizationLogParser
normalization_exit_code=$?
echo '{"type": "LOG","log":{"level":"INFO","message":"In-connector normalization done (exit code '"$normalization_exit_code"')"}}'
else
echo '{"type": "LOG","log":{"level":"INFO","message":"Skipping in-connector normalization"}}'
normalization_exit_code=0
fi

if test $destination_exit_code -ne 0
then
exit $destination_exit_code
elif test $normalization_exit_code -ne 0
then
exit $normalization_exit_code
else
exit 0
fi
13 changes: 13 additions & 0 deletions airbyte-integrations/bases/base-normalization/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
*
!Dockerfile
!entrypoint.sh
!build/sshtunneling.sh
!setup.py
!normalization
!dbt-project-template
!dbt-project-template-mssql
!dbt-project-template-mysql
!dbt-project-template-oracle
!dbt-project-template-clickhouse
!dbt-project-template-snowflake
!dbt-project-template-redshift
51 changes: 51 additions & 0 deletions airbyte-integrations/bases/base-normalization/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
build/
logs/
dbt-project-template/models/generated/
dbt-project-template/test_output.log
dbt_modules/
secrets/
dist/

integration_tests/normalization_test_output/*/*/macros
integration_tests/normalization_test_output/*/*/tests
integration_tests/normalization_test_output/**/*.json
integration_tests/normalization_test_output/**/*.log
integration_tests/normalization_test_output/**/*.md
integration_tests/normalization_test_output/**/*.sql
integration_tests/normalization_test_output/**/*.yml
!integration_tests/normalization_test_output/**/*dbt_project.yml
!integration_tests/normalization_test_output/**/generated/sources.yml

# We keep a minimal/restricted subset of sql files for all destinations to avoid noise in diff
# Simple Streams
!integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql
!integration_tests/normalization_test_output/**/DEDUP_EXCHANGE_RATE*.sql
!integration_tests/normalization_test_output/**/exchange_rate.sql
!integration_tests/normalization_test_output/**/EXCHANGE_RATE.sql
!integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql
# Nested Streams
# Parent table
!integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql
!integration_tests/normalization_test_output/**/nested_stream_with*_names_scd.sql
!integration_tests/normalization_test_output/**/nested_stream_with*_names.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_AB*.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_SCD.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES.sql
# Nested table
!integration_tests/normalization_test_output/**/nested_stream_with_*_partition_ab1.sql
!integration_tests/normalization_test_output/**/nested_stream_with_*_data_ab1.sql
!integration_tests/normalization_test_output/**/nested_stream_with*_partition_scd.sql
!integration_tests/normalization_test_output/**/nested_stream_with*_data_scd.sql
!integration_tests/normalization_test_output/**/nested_stream_with*_partition.sql
!integration_tests/normalization_test_output/**/nested_stream_with*_data.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_PARTITION_AB1.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_DATA_AB1.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION_SCD.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA_SCD.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION.sql
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA.sql

# but we keep all sql files for Postgres
!integration_tests/normalization_test_output/postgres/**/*.sql
integration_tests/normalization_test_output/postgres/**/dbt_data_tests
integration_tests/normalization_test_output/postgres/**/dbt_schema_tests
37 changes: 37 additions & 0 deletions airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM fishtownanalytics/dbt:1.0.0
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte

# Install SSH Tunneling dependencies
RUN apt-get update && apt-get install -y jq sshpass

WORKDIR /airbyte
COPY entrypoint.sh .
COPY build/sshtunneling.sh .

WORKDIR /airbyte/normalization_code
COPY normalization ./normalization
COPY setup.py .
COPY dbt-project-template/ ./dbt-template/

# Install python dependencies
WORKDIR /airbyte/base_python_structs

# workaround for https://github.com/yaml/pyyaml/issues/601
# this should be fixed in the airbyte/base-airbyte-protocol-python image
RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation

RUN pip install .

WORKDIR /airbyte/normalization_code
RUN pip install .

WORKDIR /airbyte/normalization_code/dbt-template/
# Download external dbt dependencies
RUN dbt deps

WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.4.3
LABEL io.airbyte.name=airbyte/normalization
57 changes: 57 additions & 0 deletions airbyte-integrations/bases/base-normalization/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
plugins {
id 'airbyte-docker-legacy'
id 'airbyte-python'
}

dependencies {
testFixtures(project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies'))
}

// we need to access the sshtunneling script from airbyte-workers for ssh support
def copySshScript = tasks.register('copySshScript', Copy) {
from "${project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').buildDir}/resources/testFixtures"
into "${buildDir}"
include "sshtunneling.sh"
}
copySshScript.configure {
dependsOn project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').tasks.named('processTestFixturesResources')
}

// make sure the copy task above worked (if it fails, it fails silently annoyingly)
def checkSshScriptCopy = tasks.register('checkSshScriptCopy') {
doFirst {
assert file("${buildDir}/sshtunneling.sh").exists() : "Copy of sshtunneling.sh failed."
}
}
checkSshScriptCopy.configure {
dependsOn copySshScript
}

def generate = tasks.register('generate')
generate.configure {
dependsOn checkSshScriptCopy
}

tasks.named('check').configure {
dependsOn generate
}

tasks.named("jar").configure {
dependsOn copySshScript
}

[
'bigquery',
'mysql',
'postgres',
'redshift',
'snowflake',
'oracle',
'mssql',
'clickhouse',
'tidb',
].each {destinationName ->
tasks.matching { it.name == 'integrationTestPython' }.configureEach {
dependsOn project(":airbyte-integrations:connectors:destination-$destinationName").tasks.named('assemble')
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
FROM ghcr.io/dbt-labs/dbt-core:1.3.1
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte

# Install SSH Tunneling dependencies
RUN apt-get update && apt-get install -y jq sshpass
WORKDIR /airbyte
COPY entrypoint.sh .
COPY build/sshtunneling.sh .

WORKDIR /airbyte/normalization_code
COPY normalization ./normalization
COPY setup.py .
COPY dbt-project-template/ ./dbt-template/

# Install python dependencies
WORKDIR /airbyte/base_python_structs

# workaround for https://github.com/yaml/pyyaml/issues/601
# this should be fixed in the airbyte/base-airbyte-protocol-python image
RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation

RUN pip install .

WORKDIR /airbyte/normalization_code
RUN pip install .

WORKDIR /airbyte/normalization_code/dbt-template/
RUN pip install "dbt-clickhouse>=1.4.0"
# Download external dbt dependencies
RUN dbt deps

WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.name=airbyte/normalization-clickhouse
Loading

0 comments on commit 58fad21

Please sign in to comment.