Skip to content

Commit

Permalink
feat: Add decimal datatype to spark mapping to feast data type
Browse files Browse the repository at this point in the history
Signed-off-by: tanlocnguyen <[email protected]>
  • Loading branch information
ElliotNguyen68 committed Apr 4, 2024
1 parent 21e5434 commit cd82e3a
Showing 1 changed file with 39 additions and 21 deletions.
60 changes: 39 additions & 21 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import json
import re
from collections import defaultdict
from datetime import datetime, timezone
from typing import (
Expand Down Expand Up @@ -77,9 +78,11 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any:
# Convert UNIX_TIMESTAMP values to `datetime`
if val_attr == "unix_timestamp_list_val":
val = [
datetime.fromtimestamp(v, tz=timezone.utc)
if v != NULL_TIMESTAMP_INT_VALUE
else None
(
datetime.fromtimestamp(v, tz=timezone.utc)
if v != NULL_TIMESTAMP_INT_VALUE
else None
)
for v in val
]
elif val_attr == "unix_timestamp_val":
Expand Down Expand Up @@ -295,9 +298,11 @@ def _type_err(item, dtype):
ValueType.INT32: ("int32_val", lambda x: int(x), None),
ValueType.INT64: (
"int64_val",
lambda x: int(x.timestamp())
if isinstance(x, pd._libs.tslibs.timestamps.Timestamp)
else int(x),
lambda x: (
int(x.timestamp())
if isinstance(x, pd._libs.tslibs.timestamps.Timestamp)
else int(x)
),
None,
),
ValueType.FLOAT: ("float_val", lambda x: float(x), None),
Expand Down Expand Up @@ -390,15 +395,21 @@ def _python_value_to_proto_value(
if feast_value_type == ValueType.BOOL_LIST:
# ProtoValue does not support conversion of np.bool_ so we need to convert it to support np.bool_.
return [
ProtoValue(**{field_name: proto_type(val=[bool(e) for e in value])}) # type: ignore
if value is not None
else ProtoValue()
(
ProtoValue(
**{field_name: proto_type(val=[bool(e) for e in value])}
) # type: ignore
if value is not None
else ProtoValue()
)
for value in values
]
return [
ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore
if value is not None
else ProtoValue()
(
ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore
if value is not None
else ProtoValue()
)
for value in values
]

Expand Down Expand Up @@ -433,15 +444,17 @@ def _python_value_to_proto_value(
if feast_value_type == ValueType.BOOL:
# ProtoValue does not support conversion of np.bool_ so we need to convert it to support np.bool_.
return [
ProtoValue(
**{
field_name: func(
bool(value) if type(value) is np.bool_ else value # type: ignore
)
}
(
ProtoValue(
**{
field_name: func(
bool(value) if type(value) is np.bool_ else value # type: ignore
)
}
)
if not pd.isnull(value)
else ProtoValue()
)
if not pd.isnull(value)
else ProtoValue()
for value in values
]
if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE:
Expand Down Expand Up @@ -752,7 +765,7 @@ def _non_empty_value(value: Any) -> bool:

def spark_to_feast_value_type(spark_type_as_str: str) -> ValueType:
# TODO not all spark types are convertible
# Current non-convertible types: interval, map, struct, structfield, decimal, binary
# Current non-convertible types: interval, map, struct, structfield, binary
type_map: Dict[str, ValueType] = {
"null": ValueType.UNKNOWN,
"byte": ValueType.BYTES,
Expand All @@ -762,6 +775,7 @@ def spark_to_feast_value_type(spark_type_as_str: str) -> ValueType:
"bigint": ValueType.INT64,
"long": ValueType.INT64,
"double": ValueType.DOUBLE,
"decimal": ValueType.DOUBLE,
"float": ValueType.FLOAT,
"boolean": ValueType.BOOL,
"timestamp": ValueType.UNIX_TIMESTAMP,
Expand All @@ -774,6 +788,10 @@ def spark_to_feast_value_type(spark_type_as_str: str) -> ValueType:
"array<boolean>": ValueType.BOOL_LIST,
"array<timestamp>": ValueType.UNIX_TIMESTAMP_LIST,
}
decimal_regex_pattern = r"^decimal\([0-9]{1,2},[0-9]{1,2}\)$"
if re.match(decimal_regex_pattern, spark_type_as_str):
spark_type_as_str = "decimal"

# TODO: Find better way of doing this.
if not isinstance(spark_type_as_str, str) or spark_type_as_str not in type_map:
return ValueType.NULL
Expand Down

0 comments on commit cd82e3a

Please sign in to comment.