Skip to content

Commit

Permalink
Add reset_index to GeoSeries and GeoDataFrame (#856)
Browse files Browse the repository at this point in the history
This PR adds `.reset_index` to `GeoSeries` and `GeoDataFrame`. It tests `GeoDataFrame` to the same level that `cudf` tests the method. However `GeoSeries` has four blocks of parameterized tests numbering in the hundreds. I only implement the first test block for `GeoSeries`.

Authors:
  - H. Thomson Comer (https://github.com/thomcom)

Approvers:
  - Michael Wang (https://github.com/isVoid)

URL: #856
  • Loading branch information
thomcom authored Dec 15, 2022
1 parent f262325 commit 73e2a27
Show file tree
Hide file tree
Showing 4 changed files with 222 additions and 0 deletions.
83 changes: 83 additions & 0 deletions python/cuspatial/cuspatial/core/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,89 @@ def _gather(
# return
return result

def reset_index(
self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
):
"""Reset the index, or a level of it.
Parameters
----------
level : `int`, `str`, `tuple`, or `list`, default `None`
Only remove the given levels from the index. Removes all levels by
default.
drop : `bool`, default `False`
Do not try to insert index into dataframe columns. This resets the
index to the default integer index.
inplace : `bool`, default `False`
Modify the GeoDataFrame in place (do not create a new object).
col_level : `int` or `str`, default `0`
If the columns have multiple levels, determines which level the
labels are inserted into. By default it is inserted into the first
level.
col_fill : `object`, default `""`
If the columns have multiple levels, determines how the other
levels are named. If None then the index name is repeated.
Returns
-------
`GeoDataFrame`
"""

# Split geometry and non-geometry columns
geo_data, cudf_data = self._split_out_geometry_columns()

# Reset cudf column
cudf_reindexed = cudf_data.reset_index(
level, drop, inplace, col_level, col_fill
)

if inplace:
cudf_reindexed = cudf_data

# Reset GeoColumns
recombiner = self.copy(deep=False)
recombiner.index = cudf.RangeIndex(len(recombiner))
# Not a multi-index, and the index was not dropped.
if not drop:
if not isinstance(cudf_data.index, cudf.MultiIndex):
recombiner.insert(
loc=0, name="index", value=cudf_reindexed["index"]
)
# If the index is a MultiIndex, we need to insert the
# individual levels into the GeoDataFrame.
elif isinstance(cudf_data.index, cudf.MultiIndex):
# If level is not specified, it will be the difference
# between the number of columns in reindexed dataframe
# and the original.
if not level:
level = range(
len(cudf_reindexed.columns) - len(cudf_data.columns)
)
elif not isinstance(level, list):
level = [level]
levels = ["level_" + str(n) for n in level]
for n, name in enumerate(levels):
recombiner.insert(
loc=n,
name=name,
value=cudf_reindexed[name].reset_index(drop=True),
)
recombiner.index = cudf_reindexed.index

if inplace:
self.index = cudf_reindexed.index
self._data = recombiner._data
return None
else:
# Reset the index of the GeoDataFrame to match the
# cudf DataFrame and recombine.
geo_data.index = cudf_reindexed.index
result = GeoDataFrame._from_data(
recombiner._recombine_columns(geo_data, cudf_reindexed)
)
result.index = geo_data.index
return result


class _GeoSeriesUtility:
@classmethod
Expand Down
79 changes: 79 additions & 0 deletions python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,85 @@ def _gather(
):
return self.iloc[gather_map]

# def reset_index(self, drop=False, inplace=False, name=None):
def reset_index(
self,
level=None,
drop=False,
name=None,
inplace=False,
):
"""
Reset the index of the GeoSeries.
Parameters
----------
level : int, str, tuple, or list, default None
Only remove the given levels from the index. Removes all levels
by default.
drop : bool, default False
If drop is False, create a new dataframe with the original
index as a column. If drop is True, the original index is
dropped.
name : object, optional
The name to use for the column containing the original
Series values.
inplace: bool, default False
If True, the original GeoSeries is modified.
Returns
-------
GeoSeries
GeoSeries with reset index.
Examples
--------
>>> points = gpd.GeoSeries([
Point((-8, -8)),
Point((-2, -2)),
], index=[1, 0])
>>> print(points.reset_index())
0 POINT (-8.00000 -8.00000)
1 POINT (-2.00000 -2.00000)
dtype: geometry
"""
geo_series = self.copy(deep=False)

# Create a cudf series with the same index as the GeoSeries
# and use `cudf` reset_index to identify what our result
# should look like.
cudf_series = cudf.Series(
np.arange(len(geo_series.index)), index=geo_series.index
)
cudf_result = cudf_series.reset_index(level, drop, name, inplace)

if not inplace:
if isinstance(cudf_result, cudf.Series):
geo_series.index = cudf_result.index
return geo_series
elif isinstance(cudf_result, cudf.DataFrame):
# Drop was equal to False, so we need to create a
# `GeoDataFrame` from the `GeoSeries`
from cuspatial.core.geodataframe import GeoDataFrame

# The columns of the `cudf.DataFrame` are the new
# columns of the `GeoDataFrame`.
columns = {
col: cudf_result[col] for col in cudf_result.columns
}
geo_result = GeoDataFrame(columns)
geo_series.index = geo_result.index
# Add the original `GeoSeries` as a column.
if name:
geo_result[name] = geo_series
else:
geo_result[0] = geo_series
return geo_result
else:
self.index = cudf_series.index
return None

def contains_properly(self, other, align=True):
"""Returns a `Series` of `dtype('bool')` with value `True` for each
aligned geometry that contains _other_.
Expand Down
33 changes: 33 additions & 0 deletions python/cuspatial/cuspatial/tests/test_geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,3 +407,36 @@ def test_from_dict_with_list():
gpd.GeoDataFrame(dict_with_lists),
cuspatial.GeoDataFrame(dict_with_lists).to_geopandas(),
)


@pytest.mark.parametrize("level", [None, 0, 1])
@pytest.mark.parametrize("drop", [False, True])
@pytest.mark.parametrize("inplace", [False, True])
@pytest.mark.parametrize("col_level", [0, 1])
@pytest.mark.parametrize("col_fill", ["", "some_lv"])
def test_reset_index(level, drop, inplace, col_level, col_fill):
if not drop and inplace:
pytest.skip(
"For exception checks, see "
"test_reset_index_dup_level_name_exceptions"
)
midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
gpdf = gpd.GeoDataFrame(
{
"geometry": [
Point(0, 1),
Point(2, 3),
MultiPoint([(4, 5), (6, 7)]),
Point(8, 9),
],
"a": [*"abcd"],
},
index=midx,
)
gdf = cuspatial.from_geopandas(gpdf)
expected = gpdf.reset_index(level, drop, inplace, col_level, col_fill)
got = gdf.reset_index(level, drop, inplace, col_level, col_fill)
if inplace:
expected = gpdf
got = gdf
pd.testing.assert_frame_equal(expected, got.to_pandas())
27 changes: 27 additions & 0 deletions python/cuspatial/cuspatial/tests/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,3 +594,30 @@ def test_memory_usage_large():
geometry = cuspatial.from_geopandas(host_dataframe)["geometry"]
# the geometry column from naturalearth_lowres is 217kb of coordinates
assert geometry.memory_usage() == 216793


@pytest.mark.parametrize("level", [None, 0, 1])
@pytest.mark.parametrize("drop", [False, True])
@pytest.mark.parametrize("inplace", [False, True])
@pytest.mark.parametrize("name", [None, "ser"])
def test_reset_index(level, drop, name, inplace):
if not drop and inplace:
pytest.skip(
"For exception checks, see "
"test_reset_index_dup_level_name_exceptions"
)

midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
gps = gpd.GeoSeries(
[Point(0, 0), Point(0, 1), Point(2, 2), Point(3, 3)], index=midx
)
gs = cuspatial.from_geopandas(gps)
expected = gps.reset_index(level, drop, name, inplace)
got = gs.reset_index(level, drop, name, inplace)
if inplace:
expected = gps
got = gs
if drop:
pd.testing.assert_series_equal(expected, got.to_pandas())
else:
pd.testing.assert_frame_equal(expected, got.to_pandas())

0 comments on commit 73e2a27

Please sign in to comment.