From 0b9b2231a04ff9ab4514a6cb31b96703a5ad1b4e Mon Sep 17 00:00:00 2001 From: KDekker Date: Thu, 29 Aug 2024 10:08:26 +0200 Subject: [PATCH 1/8] add automatic reordering of dimensions in dataframe. --- TM1py/Services/CellService.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/TM1py/Services/CellService.py b/TM1py/Services/CellService.py index fe441dd8..e2b68941 100644 --- a/TM1py/Services/CellService.py +++ b/TM1py/Services/CellService.py @@ -830,6 +830,18 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter if not dimensions: dimensions = self.get_dimension_names_for_writing(cube_name=cube_name) + # reorder columns in df to align with dimensions; CaseAndSpaceInsensitiveDict is a OrderedDict + dimension_to_column_map = CaseAndSpaceInsensitiveDict(dict(zip(dimensions, data.columns))) + column_to_dimension_map = CaseAndSpaceInsensitiveDict(dict(zip(data.columns,dimensions))) + + if dimension_to_column_map != column_to_dimension_map: + # identify the name(s) of the value columns: + columns_not_in_dimensions = [col for col in data.columns if col not in CaseAndSpaceInsensitiveSet(dimensions)] + # get the columns in the cube dimension order with the original column names (CaseAndSpaceInSensitive): + ordered_columns = [dimension_to_column_map[dim] for dim in data.columns if dim in dimension_to_column_map] + # reorder the dataframe: + data = data.loc[:, ordered_columns + columns_not_in_dimensions] + if not len(data.columns) == len(dimensions) + 1: raise ValueError("Number of columns in 'data' DataFrame must be number of dimensions in cube + 1") From 47020c15d2caf06bd9d9f72146eedaf389aa43e2 Mon Sep 17 00:00:00 2001 From: KDekker Date: Thu, 29 Aug 2024 10:08:59 +0200 Subject: [PATCH 2/8] add test for automatic reordering of dimensions in dataframe with CaseAndSpaceInsensitive column-dimension recognition --- Tests/CellService_test.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Tests/CellService_test.py b/Tests/CellService_test.py index 5b37d970..dd09cf67 100644 --- a/Tests/CellService_test.py +++ b/Tests/CellService_test.py @@ -1079,6 +1079,30 @@ def test_write_dataframe(self): self.dimension_names[1]: ["element 1", "element 2", "element 3"], self.dimension_names[2]: ["element 5", "element 5", "element 5"], "Value": [1.0, 2.0, 3.0]}) + + self.tm1.cells.write_dataframe(self.cube_name, df) + + query = MdxBuilder.from_cube(self.cube_name) + query = query.add_hierarchy_set_to_column_axis( + MdxHierarchySet.member(Member.of(self.dimension_names[0], "element 1"))) + query = query.add_hierarchy_set_to_row_axis(MdxHierarchySet.members([ + Member.of(self.dimension_names[1], "element 1"), + Member.of(self.dimension_names[1], "element 2"), + Member.of(self.dimension_names[1], "element 3")])) + + query = query.add_member_to_where(Member.of(self.dimension_names[2], "element 5")) + values = self.tm1.cells.execute_mdx_values(query.to_mdx()) + + self.assertEqual(list(df["Value"]), values) + + @skip_if_no_pandas + def test_write_dataframe_ordering(self): + df = pd.DataFrame({ + self.dimension_names[1]: ["element 1", "element 2", "element 3"], + self.dimension_names[0].replace('1', ' 1').lower(): ["element 1", "element 1", "element 1"], + self.dimension_names[2].replace('3', ' 3').lower(): ["element 5", "element 5", "element 5"], + "Value": [1.0, 2.0, 3.0]}) + self.tm1.cells.write_dataframe(self.cube_name, df) query = MdxBuilder.from_cube(self.cube_name) From 162d3eea1298c698fa681f20a2fcaf7e319cde38 Mon Sep 17 00:00:00 2001 From: KDekker Date: Thu, 29 Aug 2024 11:19:41 +0200 Subject: [PATCH 3/8] add fixed element values option instead of passing as df. Change CaseAndSpaceInsensitive logic check on dimension order in combination with the new column additions. --- TM1py/Services/CellService.py | 26 ++++++++++++++++++++------ Tests/CellService_test.py | 25 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/TM1py/Services/CellService.py b/TM1py/Services/CellService.py index e2b68941..aeb20e68 100644 --- a/TM1py/Services/CellService.py +++ b/TM1py/Services/CellService.py @@ -797,7 +797,7 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter precision: int = None, skip_non_updateable: bool = False, measure_dimension_elements: Dict = None, sum_numeric_duplicates: bool = True, remove_blob: bool = True, allow_spread: bool = False, - clear_view: str = None, **kwargs) -> str: + clear_view: str = None, fixed_dimension_elements:Dict =None, **kwargs) -> str: """ Function expects same shape as `execute_mdx_dataframe` returns. Column order must match dimensions in the target cube with an additional column for the values. @@ -822,6 +822,7 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter :param remove_blob: remove blob file after writing with use_blob=True :param allow_spread: allow TI process in use_blob or use_ti to use CellPutProportionalSpread on C elements :param clear_view: name of cube view to clear before writing + :param fixed_dimension_elements: Dict of fixed dimension element pairs. Column is created for you. :return: changeset or None """ if not isinstance(data, pd.DataFrame): @@ -830,15 +831,28 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter if not dimensions: dimensions = self.get_dimension_names_for_writing(cube_name=cube_name) - # reorder columns in df to align with dimensions; CaseAndSpaceInsensitiveDict is a OrderedDict - dimension_to_column_map = CaseAndSpaceInsensitiveDict(dict(zip(dimensions, data.columns))) - column_to_dimension_map = CaseAndSpaceInsensitiveDict(dict(zip(data.columns,dimensions))) + dimension_to_column_map = {dim: col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if + col.lower().replace(' ', '') == dim.lower().replace(' ', '')} + column_to_dimension_map = {v: k for k, v in dimension_to_column_map.items()} - if dimension_to_column_map != column_to_dimension_map: + # reorder columns in df to align with dimensions; CaseAndSpaceInsensitiveDict is a OrderedDict + if fixed_dimension_elements: + for dimension, element in fixed_dimension_elements.items(): + if dimension in CaseAndSpaceInsensitiveSet(data.columns): + raise ValueError("one or more of the fixed_dimension_elements are passed as a dataframe column. " + f"{dimension}: {element} is passed in fixed_dimension_elements. " + "Either remove the key value pair from the fixed_dimension_elements dict or " + f"avoid passing the {dimension} column in the dataframe.") + data[dimension] = element + # recreate the maps: + dimension_to_column_map = {dim:col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if col.lower().replace(' ', '') == dim.lower().replace(' ', '') } + column_to_dimension_map = {col:dim for col in CaseAndSpaceInsensitiveSet(data.columns) for dim in dimensions if dim.lower().replace(' ', '') == col.lower().replace(' ', '') } + + if list(dimension_to_column_map.keys()) != list(column_to_dimension_map.keys()): # identify the name(s) of the value columns: columns_not_in_dimensions = [col for col in data.columns if col not in CaseAndSpaceInsensitiveSet(dimensions)] # get the columns in the cube dimension order with the original column names (CaseAndSpaceInSensitive): - ordered_columns = [dimension_to_column_map[dim] for dim in data.columns if dim in dimension_to_column_map] + ordered_columns = [dimension_to_column_map[dim] for dim in dimensions if dim in dimension_to_column_map] # reorder the dataframe: data = data.loc[:, ordered_columns + columns_not_in_dimensions] diff --git a/Tests/CellService_test.py b/Tests/CellService_test.py index dd09cf67..8fcadbeb 100644 --- a/Tests/CellService_test.py +++ b/Tests/CellService_test.py @@ -1118,6 +1118,31 @@ def test_write_dataframe_ordering(self): self.assertEqual(list(df["Value"]), values) + @skip_if_no_pandas + def test_write_dataframe_fixed_dimension_elements(self): + df = pd.DataFrame({ + self.dimension_names[1]: ["element 1", "element 2", "element 3"], + "Value": [1.0, 2.0, 3.0]}) + + self.tm1.cells.write_dataframe( + self.cube_name, + df, + fixed_dimension_elements={self.dimension_names[0].replace('1', ' 1 ').lower():"element 1", + self.dimension_names[2]:"element 5"}) + + query = MdxBuilder.from_cube(self.cube_name) + query = query.add_hierarchy_set_to_column_axis( + MdxHierarchySet.member(Member.of(self.dimension_names[0], "element 1"))) + query = query.add_hierarchy_set_to_row_axis(MdxHierarchySet.members([ + Member.of(self.dimension_names[1], "element 1"), + Member.of(self.dimension_names[1], "element 2"), + Member.of(self.dimension_names[1], "element 3")])) + + query = query.add_member_to_where(Member.of(self.dimension_names[2], "element 5")) + values = self.tm1.cells.execute_mdx_values(query.to_mdx()) + + self.assertEqual(list(df["Value"]), values) + @skip_if_no_pandas def test_write_dataframe_duplicate_numeric_entries(self): df = pd.DataFrame({ From 7462627e773b14d03817eaa3f73195346bb74186 Mon Sep 17 00:00:00 2001 From: KDekker Date: Thu, 29 Aug 2024 11:22:19 +0200 Subject: [PATCH 4/8] add extra test for fixed elements --- Tests/CellService_test.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/Tests/CellService_test.py b/Tests/CellService_test.py index 8fcadbeb..5f9dac2b 100644 --- a/Tests/CellService_test.py +++ b/Tests/CellService_test.py @@ -1127,8 +1127,8 @@ def test_write_dataframe_fixed_dimension_elements(self): self.tm1.cells.write_dataframe( self.cube_name, df, - fixed_dimension_elements={self.dimension_names[0].replace('1', ' 1 ').lower():"element 1", - self.dimension_names[2]:"element 5"}) + fixed_dimension_elements={self.dimension_names[0].replace('1', ' 1 ').lower(): "element 1", + self.dimension_names[2]: "element 5"}) query = MdxBuilder.from_cube(self.cube_name) query = query.add_hierarchy_set_to_column_axis( @@ -1143,6 +1143,30 @@ def test_write_dataframe_fixed_dimension_elements(self): self.assertEqual(list(df["Value"]), values) + @skip_if_no_pandas + def test_write_dataframe_fixed_dimension_elements_all_fixed(self): + df = pd.DataFrame({ + "Value": [1.0]}) + + self.tm1.cells.write_dataframe( + self.cube_name, + df, + fixed_dimension_elements={ + self.dimension_names[1].replace('2', ' 2 ').lower(): "element 2", + self.dimension_names[0].replace('1', ' 1 ').lower(): "element 1", + self.dimension_names[2]: "element 5"}) + + query = MdxBuilder.from_cube(self.cube_name) + query = query.add_hierarchy_set_to_column_axis( + MdxHierarchySet.member(Member.of(self.dimension_names[0], "element 1"))) + query = query.add_hierarchy_set_to_row_axis(MdxHierarchySet.members([ + Member.of(self.dimension_names[1], "element 2")])) + + query = query.add_member_to_where(Member.of(self.dimension_names[2], "element 5")) + values = self.tm1.cells.execute_mdx_values(query.to_mdx()) + + self.assertEqual(list(df["Value"]), values) + @skip_if_no_pandas def test_write_dataframe_duplicate_numeric_entries(self): df = pd.DataFrame({ From cfc4f5e2999dff818b8dbb4e5e1a98952b7ec1e0 Mon Sep 17 00:00:00 2001 From: KDekker Date: Tue, 3 Sep 2024 13:54:15 +0200 Subject: [PATCH 5/8] rename fixed to static --- TM1py/Services/CellService.py | 8 ++++---- Tests/CellService_test.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/TM1py/Services/CellService.py b/TM1py/Services/CellService.py index aeb20e68..df7ba36d 100644 --- a/TM1py/Services/CellService.py +++ b/TM1py/Services/CellService.py @@ -797,7 +797,7 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter precision: int = None, skip_non_updateable: bool = False, measure_dimension_elements: Dict = None, sum_numeric_duplicates: bool = True, remove_blob: bool = True, allow_spread: bool = False, - clear_view: str = None, fixed_dimension_elements:Dict =None, **kwargs) -> str: + clear_view: str = None, static_dimension_elements:Dict =None, **kwargs) -> str: """ Function expects same shape as `execute_mdx_dataframe` returns. Column order must match dimensions in the target cube with an additional column for the values. @@ -822,7 +822,7 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter :param remove_blob: remove blob file after writing with use_blob=True :param allow_spread: allow TI process in use_blob or use_ti to use CellPutProportionalSpread on C elements :param clear_view: name of cube view to clear before writing - :param fixed_dimension_elements: Dict of fixed dimension element pairs. Column is created for you. + :param static_dimension_elements: Dict of fixed dimension element pairs. Column is created for you. :return: changeset or None """ if not isinstance(data, pd.DataFrame): @@ -836,8 +836,8 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter column_to_dimension_map = {v: k for k, v in dimension_to_column_map.items()} # reorder columns in df to align with dimensions; CaseAndSpaceInsensitiveDict is a OrderedDict - if fixed_dimension_elements: - for dimension, element in fixed_dimension_elements.items(): + if static_dimension_elements: + for dimension, element in static_dimension_elements.items(): if dimension in CaseAndSpaceInsensitiveSet(data.columns): raise ValueError("one or more of the fixed_dimension_elements are passed as a dataframe column. " f"{dimension}: {element} is passed in fixed_dimension_elements. " diff --git a/Tests/CellService_test.py b/Tests/CellService_test.py index 5f9dac2b..48610d4f 100644 --- a/Tests/CellService_test.py +++ b/Tests/CellService_test.py @@ -1127,8 +1127,8 @@ def test_write_dataframe_fixed_dimension_elements(self): self.tm1.cells.write_dataframe( self.cube_name, df, - fixed_dimension_elements={self.dimension_names[0].replace('1', ' 1 ').lower(): "element 1", - self.dimension_names[2]: "element 5"}) + static_dimension_elements={self.dimension_names[0].replace('1', ' 1 ').lower(): "element 1", + self.dimension_names[2]: "element 5"}) query = MdxBuilder.from_cube(self.cube_name) query = query.add_hierarchy_set_to_column_axis( @@ -1151,7 +1151,7 @@ def test_write_dataframe_fixed_dimension_elements_all_fixed(self): self.tm1.cells.write_dataframe( self.cube_name, df, - fixed_dimension_elements={ + static_dimension_elements={ self.dimension_names[1].replace('2', ' 2 ').lower(): "element 2", self.dimension_names[0].replace('1', ' 1 ').lower(): "element 1", self.dimension_names[2]: "element 5"}) From 8a6fa049761c60ed12d6253807cf84ad047637ed Mon Sep 17 00:00:00 2001 From: KDekker Date: Tue, 3 Sep 2024 13:56:46 +0200 Subject: [PATCH 6/8] add condition argument infer_column_order --- TM1py/Services/CellService.py | 37 +++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/TM1py/Services/CellService.py b/TM1py/Services/CellService.py index df7ba36d..0839f4ad 100644 --- a/TM1py/Services/CellService.py +++ b/TM1py/Services/CellService.py @@ -797,7 +797,9 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter precision: int = None, skip_non_updateable: bool = False, measure_dimension_elements: Dict = None, sum_numeric_duplicates: bool = True, remove_blob: bool = True, allow_spread: bool = False, - clear_view: str = None, static_dimension_elements:Dict =None, **kwargs) -> str: + clear_view: str = None, static_dimension_elements:Dict = None, + infer_column_order: bool = False, + **kwargs) -> str: """ Function expects same shape as `execute_mdx_dataframe` returns. Column order must match dimensions in the target cube with an additional column for the values. @@ -823,6 +825,8 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter :param allow_spread: allow TI process in use_blob or use_ti to use CellPutProportionalSpread on C elements :param clear_view: name of cube view to clear before writing :param static_dimension_elements: Dict of fixed dimension element pairs. Column is created for you. + :param infer_column_order: bool indicating whether the column order of the dataframe should automatically be + inferred and mapped to the dimension order in the cube. :return: changeset or None """ if not isinstance(data, pd.DataFrame): @@ -831,9 +835,10 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter if not dimensions: dimensions = self.get_dimension_names_for_writing(cube_name=cube_name) - dimension_to_column_map = {dim: col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if - col.lower().replace(' ', '') == dim.lower().replace(' ', '')} - column_to_dimension_map = {v: k for k, v in dimension_to_column_map.items()} + if infer_column_order: + dimension_to_column_map = {dim: col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if + col.lower().replace(' ', '') == dim.lower().replace(' ', '')} + column_to_dimension_map = {v: k for k, v in dimension_to_column_map.items()} # reorder columns in df to align with dimensions; CaseAndSpaceInsensitiveDict is a OrderedDict if static_dimension_elements: @@ -844,17 +849,19 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter "Either remove the key value pair from the fixed_dimension_elements dict or " f"avoid passing the {dimension} column in the dataframe.") data[dimension] = element - # recreate the maps: - dimension_to_column_map = {dim:col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if col.lower().replace(' ', '') == dim.lower().replace(' ', '') } - column_to_dimension_map = {col:dim for col in CaseAndSpaceInsensitiveSet(data.columns) for dim in dimensions if dim.lower().replace(' ', '') == col.lower().replace(' ', '') } - - if list(dimension_to_column_map.keys()) != list(column_to_dimension_map.keys()): - # identify the name(s) of the value columns: - columns_not_in_dimensions = [col for col in data.columns if col not in CaseAndSpaceInsensitiveSet(dimensions)] - # get the columns in the cube dimension order with the original column names (CaseAndSpaceInSensitive): - ordered_columns = [dimension_to_column_map[dim] for dim in dimensions if dim in dimension_to_column_map] - # reorder the dataframe: - data = data.loc[:, ordered_columns + columns_not_in_dimensions] + # recreate the maps for infer_column_order if infer_column_order: + if infer_column_order: + dimension_to_column_map = {dim:col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if col.lower().replace(' ', '') == dim.lower().replace(' ', '') } + column_to_dimension_map = {col:dim for col in CaseAndSpaceInsensitiveSet(data.columns) for dim in dimensions if dim.lower().replace(' ', '') == col.lower().replace(' ', '') } + + if infer_column_order: + if list(dimension_to_column_map.keys()) != list(column_to_dimension_map.keys()): + # identify the name(s) of the value columns: + columns_not_in_dimensions = [col for col in data.columns if col not in CaseAndSpaceInsensitiveSet(dimensions)] + # get the columns in the cube dimension order with the original column names (CaseAndSpaceInSensitive): + ordered_columns = [dimension_to_column_map[dim] for dim in dimensions if dim in dimension_to_column_map] + # reorder the dataframe: + data = data.loc[:, ordered_columns + columns_not_in_dimensions] if not len(data.columns) == len(dimensions) + 1: raise ValueError("Number of columns in 'data' DataFrame must be number of dimensions in cube + 1") From e0d61e0f0937c0a2b05532413f7831d23bce22a0 Mon Sep 17 00:00:00 2001 From: KDekker Date: Tue, 3 Sep 2024 14:30:13 +0200 Subject: [PATCH 7/8] force infer_column_order=True if static_dimension_elements are passed --- TM1py/Services/CellService.py | 2 ++ Tests/CellService_test.py | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/TM1py/Services/CellService.py b/TM1py/Services/CellService.py index 0839f4ad..6209e5b1 100644 --- a/TM1py/Services/CellService.py +++ b/TM1py/Services/CellService.py @@ -835,6 +835,8 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter if not dimensions: dimensions = self.get_dimension_names_for_writing(cube_name=cube_name) + infer_column_order = True if static_dimension_elements else infer_column_order + if infer_column_order: dimension_to_column_map = {dim: col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if col.lower().replace(' ', '') == dim.lower().replace(' ', '')} diff --git a/Tests/CellService_test.py b/Tests/CellService_test.py index 48610d4f..2ad8d386 100644 --- a/Tests/CellService_test.py +++ b/Tests/CellService_test.py @@ -1103,7 +1103,7 @@ def test_write_dataframe_ordering(self): self.dimension_names[2].replace('3', ' 3').lower(): ["element 5", "element 5", "element 5"], "Value": [1.0, 2.0, 3.0]}) - self.tm1.cells.write_dataframe(self.cube_name, df) + self.tm1.cells.write_dataframe(self.cube_name, df, infer_column_order=True) query = MdxBuilder.from_cube(self.cube_name) query = query.add_hierarchy_set_to_column_axis( @@ -1119,7 +1119,7 @@ def test_write_dataframe_ordering(self): self.assertEqual(list(df["Value"]), values) @skip_if_no_pandas - def test_write_dataframe_fixed_dimension_elements(self): + def test_write_dataframe_static_dimension_elements(self): df = pd.DataFrame({ self.dimension_names[1]: ["element 1", "element 2", "element 3"], "Value": [1.0, 2.0, 3.0]}) @@ -1144,13 +1144,14 @@ def test_write_dataframe_fixed_dimension_elements(self): self.assertEqual(list(df["Value"]), values) @skip_if_no_pandas - def test_write_dataframe_fixed_dimension_elements_all_fixed(self): + def test_write_dataframe_static_dimension_elements_all_static(self): df = pd.DataFrame({ "Value": [1.0]}) self.tm1.cells.write_dataframe( self.cube_name, df, + infer_column_order=True, static_dimension_elements={ self.dimension_names[1].replace('2', ' 2 ').lower(): "element 2", self.dimension_names[0].replace('1', ' 1 ').lower(): "element 1", From 960afe80b9550591b847c679633e7a673f8c8c01 Mon Sep 17 00:00:00 2001 From: MariusWirtz Date: Mon, 30 Sep 2024 11:36:44 +0200 Subject: [PATCH 8/8] Optimize infer_column_order and avoid df mutation --- TM1py/Services/CellService.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/TM1py/Services/CellService.py b/TM1py/Services/CellService.py index 6209e5b1..6cc0ce73 100644 --- a/TM1py/Services/CellService.py +++ b/TM1py/Services/CellService.py @@ -37,7 +37,7 @@ extract_compact_json_cellset, \ cell_is_updateable, build_mdx_from_cellset, build_mdx_and_values_from_cellset, \ dimension_names_from_element_unique_names, frame_to_significant_digits, build_dataframe_from_csv, \ - drop_dimension_properties, decohints, verify_version + drop_dimension_properties, decohints, verify_version, lower_and_drop_spaces try: import pandas as pd @@ -797,7 +797,7 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter precision: int = None, skip_non_updateable: bool = False, measure_dimension_elements: Dict = None, sum_numeric_duplicates: bool = True, remove_blob: bool = True, allow_spread: bool = False, - clear_view: str = None, static_dimension_elements:Dict = None, + clear_view: str = None, static_dimension_elements: Dict = None, infer_column_order: bool = False, **kwargs) -> str: """ @@ -832,16 +832,14 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter if not isinstance(data, pd.DataFrame): raise ValueError("argument 'data' must of type DataFrame") + # don't mutate passed data frame. Work on a copy instead + data = data.copy() + if not dimensions: dimensions = self.get_dimension_names_for_writing(cube_name=cube_name) infer_column_order = True if static_dimension_elements else infer_column_order - if infer_column_order: - dimension_to_column_map = {dim: col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if - col.lower().replace(' ', '') == dim.lower().replace(' ', '')} - column_to_dimension_map = {v: k for k, v in dimension_to_column_map.items()} - # reorder columns in df to align with dimensions; CaseAndSpaceInsensitiveDict is a OrderedDict if static_dimension_elements: for dimension, element in static_dimension_elements.items(): @@ -851,19 +849,15 @@ def write_dataframe(self, cube_name: str, data: 'pd.DataFrame', dimensions: Iter "Either remove the key value pair from the fixed_dimension_elements dict or " f"avoid passing the {dimension} column in the dataframe.") data[dimension] = element - # recreate the maps for infer_column_order if infer_column_order: - if infer_column_order: - dimension_to_column_map = {dim:col for dim in CaseAndSpaceInsensitiveSet(dimensions) for col in data.columns if col.lower().replace(' ', '') == dim.lower().replace(' ', '') } - column_to_dimension_map = {col:dim for col in CaseAndSpaceInsensitiveSet(data.columns) for dim in dimensions if dim.lower().replace(' ', '') == col.lower().replace(' ', '') } if infer_column_order: - if list(dimension_to_column_map.keys()) != list(column_to_dimension_map.keys()): - # identify the name(s) of the value columns: - columns_not_in_dimensions = [col for col in data.columns if col not in CaseAndSpaceInsensitiveSet(dimensions)] - # get the columns in the cube dimension order with the original column names (CaseAndSpaceInSensitive): - ordered_columns = [dimension_to_column_map[dim] for dim in dimensions if dim in dimension_to_column_map] - # reorder the dataframe: - data = data.loc[:, ordered_columns + columns_not_in_dimensions] + data.columns = data.columns.map(lower_and_drop_spaces) + + ordered_columns = list(map(lower_and_drop_spaces, dimensions)) + + columns_not_in_dimensions = data.columns.difference(ordered_columns).tolist() + + data = data[ordered_columns + columns_not_in_dimensions] if not len(data.columns) == len(dimensions) + 1: raise ValueError("Number of columns in 'data' DataFrame must be number of dimensions in cube + 1")