Skip to content

Commit

Permalink
Add read API to convert result to Polars
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa committed Nov 26, 2024
1 parent 03108ec commit c74719b
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 0 deletions.
1 change: 1 addition & 0 deletions dev/dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ pytz>=2018.3
pytest~=7.0
duckdb>=0.5.0,<2.0.0
ray~=2.10.0
polars~=1.15.0
5 changes: 5 additions & 0 deletions paimon_python_api/table_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#################################################################################

import pandas as pd
import polars as pl
import pyarrow as pa
import ray

Expand All @@ -41,6 +42,10 @@ def to_arrow_batch_reader(self, splits: List[Split]) -> pa.RecordBatchReader:
def to_pandas(self, splits: List[Split]) -> pd.DataFrame:
"""Read data from splits and converted to pandas.DataFrame format."""

@abstractmethod
def to_polars(self, splits: List[Split]) -> pl.DataFrame:
"""Read data from splits and converted to polars.DataFrame format."""

@abstractmethod
def to_duckdb(
self,
Expand Down
4 changes: 4 additions & 0 deletions paimon_python_java/pypaimon.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import duckdb
import pandas as pd
import polars as pl
import pyarrow as pa
import ray

Expand Down Expand Up @@ -164,6 +165,9 @@ def to_arrow_batch_reader(self, splits):
def to_pandas(self, splits: List[Split]) -> pd.DataFrame:
return self.to_arrow(splits).to_pandas()

def to_polars(self, splits: List[Split]) -> pl.DataFrame:
return pl.from_arrow(self.to_arrow(splits))

def to_duckdb(
self,
splits: List[Split],
Expand Down
6 changes: 6 additions & 0 deletions paimon_python_java/tests/test_write_and_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
import unittest
import pandas as pd
import pyarrow as pa
import polars as pl
from py4j.protocol import Py4JJavaError

from polars import testing as pl_testing
from paimon_python_api import Schema
from paimon_python_java import Catalog
from paimon_python_java.java_gateway import get_gateway
Expand Down Expand Up @@ -297,6 +299,10 @@ def testAllWriteAndReadApi(self):
pd.testing.assert_frame_equal(
actual.reset_index(drop=True), all_data.reset_index(drop=True))

# to_polars
pl_df = table_read.to_polars(splits)
pl_testing.assert_frame_equal(pl_df, pl.from_pandas(all_data))

# to_duckdb
duckdb_con = table_read.to_duckdb(splits, 'duckdb_table')
# select *
Expand Down

0 comments on commit c74719b

Please sign in to comment.