Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refining example, add utilities, and fix xdist test error #794

Merged
merged 31 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
f70cd95
Fix xdist test error. Also make a small cleanup some codes
loomlike Oct 28, 2022
8772a2b
Merge branch 'main' into jumin/fix_xdist
loomlike Oct 28, 2022
990208b
Revert "Revert 756 (#798)"
loomlike Oct 31, 2022
9c1b93e
Merge branch 'jumin/fix_xdist' into jumin/revert_revert756
loomlike Oct 31, 2022
1fab5f2
revert 798 (revert756 - example notebook refactor). Also add job_util…
loomlike Nov 1, 2022
6a4e8a1
Merge pull request #1 from loomlike/jumin/revert_revert756
loomlike Nov 1, 2022
15f4939
Update test_azure_spark_e2e.py
blrchen Nov 1, 2022
26b7a0d
Fix doc dead links (#805)
blrchen Nov 1, 2022
8e401b4
Improve UI experience and clean up ui code warnings (#801)
Fendoe Nov 1, 2022
afd9309
Add release instructions for Release Candidate (#809)
blrchen Nov 1, 2022
8899f18
Bump version to 0.9.0-rc1 (#810)
blrchen Nov 1, 2022
572d762
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 1, 2022
c4cd2ec
Merge branch 'jumin/revert_revert756' into jumin/fix_xdist
loomlike Nov 1, 2022
995f509
Fix tests to use mocks and fix get_result_df's databricks behavior
loomlike Nov 2, 2022
6198506
fix tem file to dir
loomlike Nov 3, 2022
38aa149
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 3, 2022
ae9095c
checkout the feature_derivations.py from main (it was temporally chan…
loomlike Nov 3, 2022
59bd65c
Remove old databricks sample notebook. Change pip install feathr from…
loomlike Nov 3, 2022
125cc3a
Fix config and get_result_df for synapse
loomlike Nov 7, 2022
6fc93eb
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 7, 2022
1f3894a
Fix generate_config to accept all the feathr env var config name
loomlike Nov 8, 2022
8a610ac
Add more pytests
loomlike Nov 9, 2022
f6f1587
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 9, 2022
4c50485
Use None as default dataformat in the job_utils. Instead, set 'avro' …
loomlike Nov 9, 2022
c049958
Change feathr client to mocked object
loomlike Nov 10, 2022
56974db
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 15, 2022
7d3f8be
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 18, 2022
7fcffba
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 21, 2022
190377c
Change timeout to 1000s in the notebook
loomlike Nov 22, 2022
1d93b40
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 23, 2022
ca5d642
Merge branch 'main' into jumin/fix_xdist
loomlike Nov 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions feathr_project/feathr/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,6 @@ def get_offline_features(self,
output_path: Union[str, Sink],
execution_configurations: Union[SparkExecutionConfiguration ,Dict[str,str]] = {},
config_file_name:str = "feature_join_conf/feature_join.conf",
udf_files = None,
blrchen marked this conversation as resolved.
Show resolved Hide resolved
verbose: bool = False
):
"""
Expand Down Expand Up @@ -609,7 +608,7 @@ def _valid_materialize_keys(self, features: List[str], allow_empty_key=False):
self.logger.error(f"Inconsistent feature keys. Current keys are {str(keys)}")
return False
return True

def materialize_features(self, settings: MaterializationSettings, execution_configurations: Union[SparkExecutionConfiguration ,Dict[str,str]] = {}, verbose: bool = False, allow_materialize_non_agg_feature: bool = False):
"""Materialize feature data

Expand All @@ -621,7 +620,7 @@ def materialize_features(self, settings: MaterializationSettings, execution_conf
feature_list = settings.feature_names
if len(feature_list) > 0 and not self._valid_materialize_keys(feature_list):
raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.")

if not allow_materialize_non_agg_feature:
# Check if there are non-aggregation features in the list
for fn in feature_list:
Expand Down
1 change: 1 addition & 0 deletions feathr_project/feathr/udf/_preprocessing_pyudf_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def prepare_pyspark_udf_files(feature_names: List[str], local_workspace_dir):
for feature_name in feature_names:
if feature_name in features_with_preprocessing:
has_py_udf_preprocessing = True
break

if has_py_udf_preprocessing:
pyspark_driver_path = os.path.join(local_workspace_dir, FEATHR_PYSPARK_DRIVER_FILE_NAME)
Expand Down
26 changes: 19 additions & 7 deletions feathr_project/test/unit/utils/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,38 @@
from feathr.utils.config import FEATHR_CONFIG_TEMPLATE, generate_config


@pytest.mark.parametrize(
"output_filepath", [None, NamedTemporaryFile().name],
)
def test__generate_config(output_filepath: str):

config = FEATHR_CONFIG_TEMPLATE.format(
@pytest.fixture(scope="session")
def feathr_config_str() -> str:
return FEATHR_CONFIG_TEMPLATE.format(
resource_prefix="test_prefix",
project_name="test_project",
spark_cluster="local",
)


@pytest.mark.parametrize(
"output_filepath", [None, "config.yml"],
)
def test__generate_config(
output_filepath: str,
feathr_config_str: str,
tmp_path: Path,
):
# Use tmp_path so that the test files get cleaned up after the tests
if output_filepath:
output_filepath = str(tmp_path / output_filepath)

config_filepath = generate_config(
resource_prefix="test_prefix",
project_name="test_project",
spark_cluster="local",
output_filepath=output_filepath,
)

# Assert if the config file was generated in the specified output path.
if output_filepath:
assert output_filepath == config_filepath

# Assert the generated config string is correct.
with open(config_filepath, "r") as f:
assert config == f.read()
assert feathr_config_str == f.read()