Skip to content

Commit

Permalink
Move duckdb & pandas import in tutorial DAG into task (#35964)
Browse files Browse the repository at this point in the history
This improves the code as per best practices and avoids import
error if duckdb is not installed

(cherry picked from commit f0ba2dc)
  • Loading branch information
ephraimbuddy committed Dec 5, 2023
1 parent 90f10b1 commit 9ba72a2
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions airflow/example_dags/tutorial_objectstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
# [END create_object_storage_path]


# [START instantiate_dag]
@dag(
schedule=None,
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
Expand All @@ -62,9 +61,6 @@ def tutorial_objectstorage():
located
[here](https://airflow.apache.org/docs/apache-airflow/stable/tutorial/objectstorage.html)
"""
# [END instantiate_dag]
import duckdb
import pandas as pd

# [START get_air_quality_data]
@task
Expand All @@ -74,6 +70,8 @@ def get_air_quality_data(**kwargs) -> ObjectStoragePath:
This task gets air quality data from the Finnish Meteorological Institute's
open data API. The data is saved as parquet.
"""
import pandas as pd

execution_date = kwargs["logical_date"]
start_time = kwargs["data_interval_start"]

Expand Down Expand Up @@ -113,6 +111,8 @@ def analyze(path: ObjectStoragePath, **kwargs):
#### Analyze
This task analyzes the air quality data, prints the results
"""
import duckdb

conn = duckdb.connect(database=":memory:")
conn.register_filesystem(path.fs)
conn.execute(f"CREATE OR REPLACE TABLE airquality_urban AS SELECT * FROM read_parquet('{path}')")
Expand Down

0 comments on commit 9ba72a2

Please sign in to comment.