diff --git a/gcf_data_mapper/enums/family.py b/gcf_data_mapper/enums/family.py index be69dc2..a8002cd 100644 --- a/gcf_data_mapper/enums/family.py +++ b/gcf_data_mapper/enums/family.py @@ -27,6 +27,7 @@ class FamilyNestedColumnNames(Enum): REGION = "Region" SOURCE = "Source" TYPE = "Type" + VALUE = "Value" class GCFProjectBudgetSource(Enum): diff --git a/gcf_data_mapper/parsers/family.py b/gcf_data_mapper/parsers/family.py index c68e565..2fd2ec7 100644 --- a/gcf_data_mapper/parsers/family.py +++ b/gcf_data_mapper/parsers/family.py @@ -84,6 +84,23 @@ def get_budgets(funding_list: list[dict], source: str) -> Optional[list[str]]: return budgets if budgets else ["0"] +def get_related_result_areas(result_areas: list[dict]) -> list[str]: + """Get the related result areas from the row. + + :param list[dict] result_areas: A list of all the result areas information, represented in dictionaries. + :return list[str]: A list of result areas where the 'Value' is greater than or equal to 0%. + """ + + area_key = FamilyNestedColumnNames.AREA.value + value_key = FamilyNestedColumnNames.VALUE.value + + return [ + str(result[area_key]) + for result in result_areas + if result[value_key] and float(result[value_key].replace("%", "")) > 0 + ] + + def map_family_metadata(row: pd.Series) -> Optional[dict]: """Map the metadata of a family based on the provided row. @@ -101,7 +118,6 @@ def map_family_metadata(row: pd.Series) -> Optional[dict]: funding_sources = row.at[FamilyColumnsNames.FUNDING.value] result_areas = row.at[FamilyColumnsNames.RESULT_AREAS.value] - area_key = FamilyNestedColumnNames.AREA.value name_key = FamilyNestedColumnNames.NAME.value region_key = FamilyNestedColumnNames.REGION.value type_key = FamilyNestedColumnNames.TYPE.value @@ -116,7 +132,7 @@ def map_family_metadata(row: pd.Series) -> Optional[dict]: implementing_agencies = [str(entity[name_key]) for entity in entities] regions = [str(country[region_key]) for country in countries] - areas = [str(result[area_key]) for result in result_areas] + areas = get_related_result_areas(result_areas) types = [str(result[type_key]) for result in result_areas] # As we are filtering the budget information by source for gcf and co financing, we diff --git a/gcf_data_mapper/parsers/helpers.py b/gcf_data_mapper/parsers/helpers.py index 539f1a7..32a3c97 100644 --- a/gcf_data_mapper/parsers/helpers.py +++ b/gcf_data_mapper/parsers/helpers.py @@ -61,7 +61,9 @@ def arrays_contain_empty_values(list_values: list[tuple], id: str) -> bool: :return bool: True if any list contains empty values, False otherwise. """ arrays_with_empty_values = [ - name for name, array in list_values if any(not value for value in array) + name + for name, array in list_values + if not array or any(not value for value in array) ] if arrays_with_empty_values: diff --git a/tests/unit_tests/parsers/family/conftest.py b/tests/unit_tests/parsers/family/conftest.py index b396243..4e32681 100644 --- a/tests/unit_tests/parsers/family/conftest.py +++ b/tests/unit_tests/parsers/family/conftest.py @@ -45,6 +45,7 @@ def mock_family_doc_df(): { "Area": "Coastal protection and restoration", "Type": "Adaptation", + "Value": "100%", }, ], "ApprovalDate": "2016-06-30T00:00:00.000Z", @@ -95,6 +96,7 @@ def mock_family_row_ds(): { "Area": "The Area for the Result Area", "Type": "The Type for the Result Area", + "Value": "100%", }, ], "ApprovalDate": "2016-06-30T00:00:00.000Z", @@ -141,7 +143,7 @@ def mock_family_row_no_result_areas(): }, ], "ResultAreas": [ - {"Area": "", "Type": ""}, + {"Area": "", "Type": "", "Value": ""}, ], "ApprovalDate": "2016-06-30T00:00:00.000Z", "StartDate": "2024-06-28T00:00:00.000Z", @@ -182,6 +184,7 @@ def mock_family_row_no_entities_no_regions(): { "Area": "The Area for the Result Area", "Type": "The Type for the Result Area", + "Value": "100%", }, ], "ApprovalDate": "2016-06-30T00:00:00.000Z", @@ -222,6 +225,7 @@ def mock_family_row_with_non_int_non_float_budget_values(): { "Area": "The Area for the Result Area", "Type": "The Type for the Result Area", + "Value": "100%", }, ], "ApprovalDate": "2016-06-30T00:00:00.000Z", @@ -271,6 +275,7 @@ def mock_family_doc_with_whitespace(): { "Area": " Coastal protection and restoration ", "Type": " Adaptation ", + "Value": "100%", }, ], "ApprovalDate": " 2016-06-30T00:00:00.000Z ", diff --git a/tests/unit_tests/parsers/family/test_map_family_metadata.py b/tests/unit_tests/parsers/family/test_map_family_metadata.py index abbdef1..55abd83 100644 --- a/tests/unit_tests/parsers/family/test_map_family_metadata.py +++ b/tests/unit_tests/parsers/family/test_map_family_metadata.py @@ -276,3 +276,31 @@ def test_all_metadata_values_are_list_of_strings(mock_family_row_ds: pd.Series): for value in family_metadata.values(): assert isinstance(value, list) assert all(isinstance(item, str) for item in value) + + +def test_maps_result_areas_with_value_greater_than_zero(mock_family_row_ds: pd.Series): + mock_family_row_ds["ResultAreas"] = [ + { + "Area": "The Area for the Result Area 1", + "Type": "The Type for the Result Area 1", + "Value": "50.00%", + }, + { + "Area": "The Area for the Result Area 2", + "Type": "The Type for the Result Area 2", + "Value": "50.00%", + }, + { + "Area": "The Area for the Result Area 3", + "Type": "The Type for the Result Area 3", + "Value": "0.00%", + }, + ] + + family_metadata = map_family_metadata(mock_family_row_ds) + assert family_metadata is not None + assert family_metadata["result_area"] == [ + "The Area for the Result Area 1", + "The Area for the Result Area 2", + ] + assert len(family_metadata["result_area"]) == 2