@@ -65,14 +65,13 @@ def summarise_by_shape(input: xr.DataArray,
65
65
"""
66
66
67
67
# get no_data value
68
- nodata_value = input .GetRasterBand ( 1 ). GetNoDataValue ( )
68
+ nodata_value = input .attrs . get ( '_FillValue' , np . nan )
69
69
70
70
# Loop over each geometry in the GeoDataFrame
71
71
for geom in shapes .geometry :
72
72
# Mask the raster with the current geometry
73
73
out_image = input .rio .clip ([geom ],
74
- all_touched = all_touched ,
75
- nodata = nodata_value )
74
+ all_touched = all_touched )
76
75
77
76
# we only care about the data, not the shape
78
77
out_data = out_image .values .flatten ()
@@ -81,12 +80,16 @@ def summarise_by_shape(input: xr.DataArray,
81
80
out_data = out_data [~ np .isclose (out_data , nodata_value , equal_nan = True )]
82
81
83
82
if thr_quantile is not None :
83
+ # Sort the out_data array
84
+ sorted_data = np .sort (out_data )
85
+ # Calculate the index for the quantile threshold
86
+ quantile_position = int (thr_quantile * len (sorted_data ))
84
87
if thr_side .lower () == 'above' :
85
88
# filter the data above the threshold
86
- data = out_data [ out_data > np . quantile ( out_data , thr_quantile ) ]
89
+ data = sorted_data [ quantile_position : ]
87
90
elif thr_side .lower () == 'below' :
88
91
# filter the data below the threshold
89
- data = out_data [ out_data < np . quantile ( out_data , thr_quantile ) ]
92
+ data = sorted_data [: quantile_position ]
90
93
else :
91
94
raise ValueError ('The threshold side must be either "above" or "below".' )
92
95
elif thr_value is not None :
@@ -127,6 +130,52 @@ def summarise_by_shape(input: xr.DataArray,
127
130
128
131
return shapes
129
132
133
+ @as_DAM_process ()
134
+ def get_percentages_by_shape (input : xr .DataArray ,
135
+ shapes : gpd .GeoDataFrame ,
136
+ classes : list [int ],
137
+ ) -> gpd .GeoDataFrame :
138
+ """
139
+ Classify a raster by a shapefile.
140
+ """
141
+ # get no_data value
142
+ nodata_value = input .attrs .get ('_FillValue' , np .nan )
143
+
144
+ # Initialize an empty list to store the results
145
+ results = []
146
+
147
+ # Calculate the bin edges dynamically
148
+ bin_edges = np .append (classes , classes [- 1 ] + 1 )
149
+
150
+ # Loop over each geometry in the GeoDataFrame
151
+ for geom in shapes .geometry :
152
+ # Mask the raster with the current geometry
153
+ out_image = input .rio .clip ([geom ])
154
+
155
+ # we only care about the data, not the shape
156
+ out_data = out_image .values .flatten ()
157
+
158
+ # remove the nodata values
159
+ out_data = out_data [~ np .isclose (out_data , nodata_value , equal_nan = True )]
160
+
161
+ # get the values in the classes
162
+ hist , _ = np .histogram (out_data , bins = bin_edges )
163
+
164
+ # Turn the histogram into percentages of the total rounded to 0 decimals
165
+ hist_percentages = np .round (hist / hist .sum () * 100 , 0 )
166
+
167
+ # Add the histogram percentages to the list
168
+ results .append (hist_percentages )
169
+
170
+ # Turn the list of lists into a numpy array
171
+ results = np .array (results )
172
+ for i in range (len (classes )):
173
+ # Define the column name
174
+ column_name = f'class_{ classes [i ]} '
175
+ shapes [column_name ] = results [:, i ]
176
+
177
+ return shapes
178
+
130
179
def combine_raster_data (input : list [str ],
131
180
statistic : str = 'mean' ,
132
181
weights : Optional [list [float ]] = None ,
0 commit comments