Skip to content

Commit

Permalink
Support TIMESTAMP WITH TIMEZONE and DECIMAL types with DuckDB connect…
Browse files Browse the repository at this point in the history
…ion (#283)

* Handle duckdb's TIMESTAMP WITH TIME ZONE type

* Handle duckdb's DECIMAL type

* Bump version to 1.1.1
  • Loading branch information
jonmmease authored Mar 25, 2023
1 parent 2c88627 commit d2d13d9
Show file tree
Hide file tree
Showing 23 changed files with 123 additions and 55 deletions.
18 changes: 9 additions & 9 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion automation/bump_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def bump_version(version):
package_json_dirs = [
root / "vegafusion-wasm",
root / "javascript" / "vegafusion-embed",
root / "javascript" / "vegafusion-chart-editor",
root / "python" / "vegafusion-jupyter"
]
for package_json_dir in package_json_dirs:
Expand Down
2 changes: 1 addition & 1 deletion javascript/vegafusion-embed/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion javascript/vegafusion-embed/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vegafusion-embed",
"version": "1.1.0",
"version": "1.1.1",
"description": "Library to embed vegafusion visualizations",
"keywords": [
"vega",
Expand Down
2 changes: 1 addition & 1 deletion python/vegafusion-jupyter/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion python/vegafusion-jupyter/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vegafusion-jupyter",
"version": "1.1.0",
"version": "1.1.1",
"description": "Altair Jupyter Widget library that relies on VegaFusion for serverside calculations",
"keywords": [
"jupyter",
Expand Down
2 changes: 1 addition & 1 deletion python/vegafusion-jupyter/vegafusion_jupyter/_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Information about the frontend package of the widgets.
"""
module_name = "vegafusion-jupyter"
module_version = "^1.1.0"
module_version = "^1.1.1"
2 changes: 1 addition & 1 deletion python/vegafusion-jupyter/vegafusion_jupyter/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.1.0'
__version__ = '1.1.1'
4 changes: 2 additions & 2 deletions python/vegafusion/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ universal = 0
[metadata]
name = vegafusion
description = Core tools for using VegaFusion from Python
version = 1.1.0
version = 1.1.1
long_description = file: README.md
long_description_content_type = text/markdown
keywords = vega, altair, vegafusion, arrow
Expand Down Expand Up @@ -34,6 +34,6 @@ install_requires =

[options.extras_require]
embed =
vegafusion-python-embed==1.1.0
vegafusion-python-embed==1.1.1
vl-convert-python>=0.7.0

63 changes: 63 additions & 0 deletions python/vegafusion/tests/test_pretransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import polars as pl
from datetime import date
import decimal


def order_items_spec():
Expand Down Expand Up @@ -1045,6 +1046,68 @@ def test_pre_transform_dataset_duckdb_conn():
vf.runtime.set_connection("datafusion")


def test_pre_transform_dataset_duckdb_with_decimal_conn():
import duckdb

n = 4050
# Input a polars DataFrame (which follows the DataFrame Interface Protocol)
order_items = pd.DataFrame({
"menu_item_int": [0] * n + [1] * (2 * n) + [2] * (3 * n)
})

try:
# Create duckdb connection and register order_items with duckdb
conn = duckdb.connect()
conn.register("order_items_int", order_items)
conn.query(
"SELECT menu_item_int::DECIMAL(12,2) as menu_item from order_items_int"
).to_view("order_items")

# Set this as the active connection
vf.runtime.set_connection(conn)

# order_items includes a table://order_items data url
vega_spec = order_items_spec()
datasets, warnings = vf.runtime.pre_transform_datasets(
vega_spec,
["data_0"],
"UTC",
)
assert len(warnings) == 0
assert len(datasets) == 1

result = datasets[0]
expected = pd.DataFrame({
"menu_item": [decimal.Decimal(0), decimal.Decimal(1), decimal.Decimal(2)],
"__count": [n, 2 * n, 3 * n]
})
pd.testing.assert_frame_equal(result, expected)
finally:
vf.runtime.set_connection("datafusion")


def test_duckdb_timestamp_with_timezone():
try:
vf.runtime.set_connection("duckdb")
dates_df = pd.DataFrame({
"date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)],
})
dates_df["date_col"] = pd.to_datetime(dates_df.date_col).dt.tz_localize("UTC")
spec = date_column_spec()

(output_ds,), warnings = vf.runtime.pre_transform_datasets(
spec, ["data_0"], "America/New_York", default_input_tz="UTC", inline_datasets=dict(dates=dates_df)
)

# Timestamps are in the local timezone, so they should be midnight local time
assert list(output_ds.date_col) == [
pd.Timestamp('2022-01-01 00:00:00', tz='UTC'),
pd.Timestamp('2022-01-02 00:00:00', tz='UTC'),
pd.Timestamp('2022-01-03 00:00:00', tz='UTC')
]
finally:
vf.runtime.set_connection("datafusion")

def test_gh_268_hang():
"""
Tests for hang reported in https://github.com/hex-inc/vegafusion/issues/268
Expand Down
2 changes: 1 addition & 1 deletion python/vegafusion/vegafusion/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.1.0'
__version__ = '1.1.1'
10 changes: 8 additions & 2 deletions python/vegafusion/vegafusion/connection/duckdb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import warnings

from . import SqlConnection, CsvReadOptions
Expand Down Expand Up @@ -26,6 +27,11 @@ def duckdb_type_name_to_pyarrow_type(duckdb_type: str) -> pa.DataType:
return pa.int32()
elif duckdb_type in ("BIGINT", "INT8", "LONG"):
return pa.int64()
elif duckdb_type.startswith("DECIMAL"):
matches = re.findall(r"\d+", duckdb_type)
precision = int(matches[0])
scale = int(matches[1])
return pa.decimal128(precision, scale)
elif duckdb_type == "UTINYINT":
return pa.uint8()
elif duckdb_type == "USMALLINT":
Expand All @@ -34,14 +40,14 @@ def duckdb_type_name_to_pyarrow_type(duckdb_type: str) -> pa.DataType:
return pa.uint32()
elif duckdb_type == "UBIGINT":
return pa.uint64()
elif duckdb_type == "DOUBLE":
return pa.float64()
elif duckdb_type == "BOOLEAN":
return pa.bool_()
elif duckdb_type == "DATE":
return pa.date32()
elif duckdb_type == "TIMESTAMP":
return pa.timestamp("ms")
elif duckdb_type == "TIMESTAMP WITH TIME ZONE":
return pa.timestamp("ms", tz="UTC")
else:
raise ValueError(f"Unexpected DuckDB type: {duckdb_type}")

Expand Down
2 changes: 1 addition & 1 deletion vegafusion-common/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vegafusion-common"
version = "1.1.0"
version = "1.1.1"
edition = "2021"
description = "Common components required by multiple VegaFusion crates"
license = "BSD-3-Clause"
Expand Down
4 changes: 2 additions & 2 deletions vegafusion-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "vegafusion-core"
license = "BSD-3-Clause"
edition = "2021"
version = "1.1.0"
version = "1.1.1"
description = "Core components required by multiple VegaFusion crates, with WASM compatibility"

[features]
Expand Down Expand Up @@ -35,7 +35,7 @@ features = [ "preserve_order",]
[dependencies.vegafusion-common]
path = "../vegafusion-common"
features = [ "json", "sqlparser",]
version = "1.1.0"
version = "1.1.1"

[dependencies.datafusion-common]
version = "18.0.0"
Expand Down
4 changes: 2 additions & 2 deletions vegafusion-dataframe/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "vegafusion-dataframe"
license = "BSD-3-Clause"
version = "1.1.0"
version = "1.1.1"
edition = "2021"
description = "VegaFusion's DataFrame and Connection traits"

Expand All @@ -10,7 +10,7 @@ async-trait = "0.1.53"

[dependencies.vegafusion-common]
path = "../vegafusion-common"
version = "1.1.0"
version = "1.1.1"

[dependencies.datafusion-common]
version = "18.0.0"
Expand Down
4 changes: 2 additions & 2 deletions vegafusion-datafusion-udfs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "vegafusion-datafusion-udfs"
license = "BSD-3-Clause"
version = "1.1.0"
version = "1.1.1"
edition = "2021"
description = "Custom DataFusion UDFs used by VegaFusion"

Expand All @@ -14,7 +14,7 @@ regex = "^1.5.5"

[dependencies.vegafusion-common]
path = "../vegafusion-common"
version = "1.1.0"
version = "1.1.1"

[dependencies.datafusion-physical-expr]
version = "18.0.0"
10 changes: 5 additions & 5 deletions vegafusion-python-embed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "vegafusion-python-embed"
license = "BSD-3-Clause"
edition = "2021"
version = "1.1.0"
version = "1.1.1"
description = "vegafusion-python-embed PyO3 Python Package"

[lib]
Expand Down Expand Up @@ -36,21 +36,21 @@ features = [ "pyarrow",]
[dependencies.vegafusion-common]
path = "../vegafusion-common"
features = [ "pyo3",]
version = "1.1.0"
version = "1.1.1"

[dependencies.vegafusion-core]
path = "../vegafusion-core"
features = [ "pyarrow",]
version = "1.1.0"
version = "1.1.1"

[dependencies.vegafusion-runtime]
path = "../vegafusion-runtime"
features = [ "pyarrow",]
version = "1.1.0"
version = "1.1.1"

[dependencies.vegafusion-sql]
path = "../vegafusion-sql"
version = "1.1.0"
version = "1.1.1"
features = [ "datafusion-conn",]

[dependencies.tokio]
Expand Down
12 changes: 6 additions & 6 deletions vegafusion-runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ harness = false
name = "vegafusion-runtime"
license = "BSD-3-Clause"
edition = "2021"
version = "1.1.0"
version = "1.1.1"
description = "VegaFusion Runtime"

[features]
Expand Down Expand Up @@ -59,20 +59,20 @@ version = "0.30.0"
[dependencies.vegafusion-common]
path = "../vegafusion-common"
features = [ "json", "sqlparser", "prettyprint",]
version = "1.1.0"
version = "1.1.1"

[dependencies.vegafusion-core]
path = "../vegafusion-core"
features = [ "sqlparser",]
version = "1.1.0"
version = "1.1.1"

[dependencies.vegafusion-datafusion-udfs]
path = "../vegafusion-datafusion-udfs"
version = "1.1.0"
version = "1.1.1"

[dependencies.vegafusion-dataframe]
path = "../vegafusion-dataframe"
version = "1.1.0"
version = "1.1.1"

[dependencies.serde]
version = "1.0.137"
Expand Down Expand Up @@ -110,5 +110,5 @@ features = [ "async_tokio",]

[dev-dependencies.vegafusion-sql]
path = "../vegafusion-sql"
version = "1.1.0"
version = "1.1.1"
features = [ "datafusion-conn",]
Loading

0 comments on commit d2d13d9

Please sign in to comment.