Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: use duckdb instead of ibis to test interchange-only support #3672

Merged
merged 2 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions altair/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ class SupportsGeoInterface(Protocol):


def is_data_type(obj: Any) -> TypeIs[DataType]:
return _is_pandas_dataframe(obj) or isinstance(
obj, (dict, DataFrameLike, SupportsGeoInterface, nw.DataFrame)
return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance(
nw.from_native(obj, eager_or_interchange_only=True, strict=False),
nw.DataFrame,
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ all = [
dev = [
"hatch",
"ruff>=0.6.0",
"ibis-framework[polars]",
"duckdb>=1.0",
"ipython[kernel]",
"pandas>=1.1.3",
"pytest",
Expand Down
60 changes: 48 additions & 12 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@

import pkgutil
import re
import sys
from importlib.util import find_spec
from typing import TYPE_CHECKING
from pathlib import Path
from typing import TYPE_CHECKING, Any

import pytest

from tests import examples_arguments_syntax, examples_methods_syntax

if TYPE_CHECKING:
import sys
from collections.abc import Collection, Iterator, Mapping
from collections.abc import Callable, Collection, Iterator, Mapping
from re import Pattern

if sys.version_info >= (3, 11):
Expand All @@ -24,6 +25,21 @@
"pytest.MarkDecorator | Collection[pytest.MarkDecorator | pytest.Mark]"
)


def windows_has_tzdata() -> bool:
"""
From PyArrow: python/pyarrow/tests/util.py.

This is the default location where tz.cpp will look for (until we make
this configurable at run-time)

Skip test on Windows when the tz database is not configured.

See https://github.com/vega/altair/issues/3050.
"""
return (Path.home() / "Downloads" / "tzdata").exists()


slow: pytest.MarkDecorator = pytest.mark.slow()
"""
Custom ``pytest.mark`` decorator.
Expand Down Expand Up @@ -69,17 +85,37 @@
"""


skip_requires_pyarrow: pytest.MarkDecorator = pytest.mark.skipif(
find_spec("pyarrow") is None, reason="`pyarrow` not installed."
)
"""
``pytest.mark.skipif`` decorator.
def skip_requires_pyarrow(
fn: Callable[..., Any] | None = None, /, *, requires_tzdata: bool = False
) -> Callable[..., Any]:
"""
``pytest.mark.skipif`` decorator.

Applies when `pyarrow`_ import would fail.
Applies when `pyarrow`_ import would fail.

.. _pyarrow:
https://pypi.org/project/pyarrow/
"""
Additionally, we mark as expected to fail on `Windows`.

https://github.com/vega/altair/issues/3050

.. _pyarrow:
https://pypi.org/project/pyarrow/
"""
composed = pytest.mark.skipif(
find_spec("pyarrow") is None, reason="`pyarrow` not installed."
)
if requires_tzdata:
composed = pytest.mark.xfail(
sys.platform == "win32" and not windows_has_tzdata(),
reason="Timezone database is not installed on Windows",
)(composed)

def wrap(test_fn: Callable[..., Any], /) -> Callable[..., Any]:
return composed(test_fn)

if fn is None:
return wrap
else:
return wrap(fn)


def id_func_str_only(val) -> str:
Expand Down
20 changes: 1 addition & 19 deletions tests/utils/test_to_values_narwhals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import re
import sys
from datetime import datetime
from pathlib import Path

import narwhals.stable.v1 as nw
import pandas as pd
Expand All @@ -11,23 +9,7 @@
from tests import skip_requires_pyarrow


def windows_has_tzdata():
"""
From PyArrow: python/pyarrow/tests/util.py.

This is the default location where tz.cpp will look for (until we make
this configurable at run-time)
"""
return Path.home().joinpath("Downloads", "tzdata").exists()


# Skip test on Windows when the tz database is not configured.
# See https://github.com/vega/altair/issues/3050.
@pytest.mark.skipif(
sys.platform == "win32" and not windows_has_tzdata(),
reason="Timezone database is not installed on Windows",
)
@skip_requires_pyarrow
@skip_requires_pyarrow(requires_tzdata=True)
def test_arrow_timestamp_conversion():
"""Test that arrow timestamp values are converted to ISO-8601 strings."""
import pyarrow as pa
Expand Down
4 changes: 0 additions & 4 deletions tests/utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import io
import json
import sys

import narwhals.stable.v1 as nw
import numpy as np
Expand Down Expand Up @@ -121,9 +120,6 @@ def test_sanitize_dataframe_arrow_columns():


@skip_requires_pyarrow
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_sanitize_pyarrow_table_columns() -> None:
import pyarrow as pa

Expand Down
75 changes: 48 additions & 27 deletions tests/vegalite/v5/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from importlib.util import find_spec
from typing import TYPE_CHECKING

import ibis
import duckdb
import jsonschema
import narwhals.stable.v1 as nw
import pandas as pd
Expand All @@ -26,7 +26,7 @@
import altair as alt
from altair.utils.core import use_signature
from altair.utils.schemapi import Optional, SchemaValidationError, Undefined
from tests import skip_requires_vl_convert, slow
from tests import skip_requires_pyarrow, skip_requires_vl_convert, slow

if TYPE_CHECKING:
from typing import Any
Expand Down Expand Up @@ -1607,51 +1607,72 @@ def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch):
assert "numpy" not in sys.modules


@pytest.mark.skipif(
Version("1.5") > PANDAS_VERSION,
reason="A warning is thrown on old pandas versions",
)
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_ibis_with_date_32():
ibis.set_backend("polars")
df = pl.DataFrame(
@skip_requires_pyarrow(requires_tzdata=True)
def test_interchange_with_date_32():
# Test that objects which Narwhals only supports at the interchange
# level can be plotted when they contain date32 columns.
df = pl.DataFrame( # noqa: F841
{"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]}
)
tbl = ibis.memtable(df)
result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict()
rel = duckdb.sql("select * from df")
result = alt.Chart(rel).mark_line().encode(x="a", y="b").to_dict()
assert next(iter(result["datasets"].values())) == [
{"a": 1, "b": "2020-01-01T00:00:00"},
{"a": 2, "b": "2020-01-02T00:00:00"},
{"a": 3, "b": "2020-01-03T00:00:00"},
]


@pytest.mark.skipif(
Version("1.5") > PANDAS_VERSION,
reason="A warning is thrown on old pandas versions",
)
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_ibis_with_vegafusion(monkeypatch: pytest.MonkeyPatch):
dangotbanned marked this conversation as resolved.
Show resolved Hide resolved
ibis.set_backend("polars")
df = pl.DataFrame(
@skip_requires_pyarrow(requires_tzdata=True)
def test_interchange_with_vegafusion(monkeypatch: pytest.MonkeyPatch):
# Test that objects which Narwhals only supports at the interchange
# level don't get converted to PyArrow unnecessarily when plotted
# with the vegafusion transformer.
# TODO: this test can be drastically simplified when some level of
# DuckDB support in VegaFusion, as it can then just be `alt.Chart(rel_df)`
# without DuckDBWithInterchangeSupport.
df = pl.DataFrame( # noqa: F841
{
"a": [1, 2, 3],
"b": [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)],
}
)
tbl = ibis.memtable(df)
rel = duckdb.sql("select * from df")

class DuckDBWithInterchangeSupport:
"""
DuckDB doesn't (yet?) support the interchange protocol.

So, we create duckdb wrapper which defers to PyArrow's
implementation of the protocol.
"""

def __init__(self, rel: duckdb.DuckDBPyRelation) -> None:
self._rel = rel

def __dataframe__(self, allow_copy: bool = True) -> object:
return self._rel.to_arrow_table().__dataframe__()

rel_df = DuckDBWithInterchangeSupport(rel)
# "poison" `arrow_table_from_dfi_dataframe` to check that it does not get called
# if we use the vegafusion transformer
monkeypatch.setattr(
"altair.utils.data.arrow_table_from_dfi_dataframe", lambda x: 1 / 0
)
tbl = ibis.memtable(df)

# Narwhals doesn't fully support our custom DuckDBWithInterchangeSupport,
# so we need to overwrite `to_native`
def to_native(df, strict):
if isinstance(df, nw.DataFrame):
return rel_df
return df

monkeypatch.setattr("narwhals.stable.v1.to_native", to_native)
dangotbanned marked this conversation as resolved.
Show resolved Hide resolved

with alt.data_transformers.enable("vegafusion"):
result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict(format="vega")
result = (
alt.Chart(rel_df).mark_line().encode(x="a", y="b").to_dict(format="vega")
)
assert next(iter(result["data"]))["values"] == [
{"a": 1, "b": "2020-01-01T00:00:00.000"},
{"a": 2, "b": "2020-01-02T00:00:00.000"},
Expand Down