After xu.concat(): selection and depth conversion issues #80

Sonia-Heye · 2023-04-25T09:45:59Z

Hey,
I've found an issue when using concatenated datasets, following xu.concat().
Firstly, my ds.ugrid.sel() function gives me an index error with the concatenated data.
Secondly, my depth conversion from sigma to meters (dfmt.get_Dataset_atdepths()) only works before the data is concatenated - and also does not work on the point-selected output from the first issue.
An example: P:\11208479-sequestration-seaweed\Oosterschelde_DFM_hydro_waq\dflowfm3d-oosterschelde-wq\scripts\issue_minimal_working_example.ipynb

Or:

import os
from pathlib import Path
import geopandas
import pandas as pd
import xugrid as xu
import dfm_tools as dfmt
from shapely.geometry import Point

## Reading output from 2 years, for 3 partitions

basedir = r"p:\archivedprojects\11206044-002ospareutrophication\final_results_Snellius_latestversion_20220210"
f1 = [os.path.join(basedir, r'A16b_Numtopsiguniform1_2015_pH\DFM_OUTPUT_DCSM-FM_0_5nm_waq',f'DCSM-FM_0_5nm_waq_{i:04d}_map.nc') for i in [146,147,148]]
f2 = [os.path.join(basedir, r'A16b_Numtopsiguniform1_2016_pH\DFM_OUTPUT_DCSM-FM_0_5nm_waq',f'DCSM-FM_0_5nm_waq_{i:04d}_map.nc') for i in [146,147,148]]

waq_xr1 = dfmt.open_partitioned_dataset(f1)       # opens every .map file in this folder
waq_xr2 = dfmt.open_partitioned_dataset(f2)

waq_xr1 = dfmt.rename_waqvars(waq_xr1)             # renames the water quality variables
waq_xr2 = dfmt.rename_waqvars(waq_xr2)

waq_xr = xu.concat([waq_xr1, waq_xr2], dim='time') # concatenate the two years


list_plifiles = [Path(r'p:\11208479-sequestration-seaweed\Oosterschelde_DFM_hydro_waq\dflowfm3d-oosterschelde-wq\boundary_conditions\zee-zuid.pli')]


## Loop over each boundary

for i,file in list(enumerate(list_plifiles)):
    print(str(file).split('\\')[-1].split('.')[0])
    
    # Read points within this offshore boundary:
    pli_file = pd.read_csv(list_plifiles[i], skiprows=1, sep=' ')   # read pli file
    pli_file = pli_file.dropna(axis=1)                              # drop empty columns
    # Convert dutch crs to decimal degrees (for DCSM)
    points = [] 
    for i in range(0,len(pli_file)):
        points.append(Point(pli_file[pli_file.columns[0]].to_list()[i], pli_file[pli_file.columns[1]].to_list()[i]))
    d = {'location': pli_file[pli_file.columns[2]].to_list(), 'geometry': points}
    gdf = geopandas.GeoDataFrame(d, crs='epsg:28992')   # set Dutch crs
    gdf = gdf.to_crs(4326)                              # convert to decimal degrees


point = 0


## Issue when using .sel from concatenated ds: 
waq_xr.ugrid.sel(x=gdf.geometry[point].x, y=gdf.geometry[point].y)

## Note: no issue with pre-concatenated data
ds_point = waq_xr1.ugrid.sel(x=gdf.geometry[point].x, y=gdf.geometry[point].y)

## Issue with depth conversion from sigma to meters:
dfmt.get_Dataset_atdepths(data_xr=ds_point, depths = waq_xr.mesh2d_layer_z[0,:].values)  ## using point selected from above

dfmt.get_Dataset_atdepths(data_xr=waq_xr, depths = waq_xr.mesh2d_layer_z[0,:].values)  # Also issue when using concat data

## But no issue when using 1 year (pre-concat and pre-point-selection):
dfmt.get_Dataset_atdepths(data_xr=waq_xr1, depths=waq_xr.mesh2d_layer_z[0,:].values.tolist(), reference='waterlevel')

Sonia-Heye · 2023-04-25T12:24:24Z

Three MWE's including corresponding errors:

#xr.open_dataset, xr.concat, xu.UgridDataset
chunks = {'time':1}
y1p1 = xr.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc',chunks=chunks)
y2p1 = xr.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc',chunks=chunks)
y1p2 = xr.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc',chunks=chunks)
y2p2 = xr.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc',chunks=chunks)
p1_xr = xr.concat([y1p1,y2p1], dim='time')
p1_xu = xu.UgridDataset(p1_xr)
#raises "IndexError: boolean index did not match indexed array along dimension 1; dimension is 3094 but corresponding boolean dimension is 4"

#xu.open_dataset, xu.concat, xu.merge_partitions
chunks = {'time':1}
y1p1 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc',chunks=chunks)
y2p1 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc',chunks=chunks)
y1p2 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc',chunks=chunks)
y2p2 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc',chunks=chunks)
p1_xu = xu.concat([y1p1,y2p1], dim='time')
p2_xu = xu.concat([y1p2,y2p2], dim='time')
y1_xu = xu.merge_partitions([y1p1,y1p2]) #works
all_xu = xu.merge_partitions([p1_xu,p2_xu]) #fails
#raises "ValueError: Expected 2 UGRID topologies for 2 partitions, received: defaultdict(<class 'list'>, {'mesh2d': [<xarray.Dataset> [...]"

#xu.open_dataset, xu.merge_partitions, xu.concat, xu.ugrid.sel
chunks = {'time':1}
y1p1 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc',chunks=chunks)
y2p1 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc',chunks=chunks)
y1p2 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc',chunks=chunks)
y2p2 = xu.open_dataset(r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc',chunks=chunks)
y1_xu = xu.merge_partitions([y1p1,y1p2])
y2_xu = xu.merge_partitions([y2p1,y2p2])
all_xu = xu.concat([y1_xu,y2_xu], dim='time')
all_xu_sel = all_xu.ugrid.sel(x=3.5051096780337727, y=51.56855173004132)
#raises "IndexError: index 4851 is out of bounds for axis 0 with size 1"

This one does work, with xu.open_mfdataset instead of xu.concat:

#xu.open_mfdataset, xu.merge_partitions, .ugrid.sel
chunks = {'time':1}
files_p1 = [r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc',r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0146_map.nc']
files_p2 = [r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2015_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc',r'p:\\archivedprojects\\11206044-002ospareutrophication\\final_results_Snellius_latestversion_20220210\\A16b_Numtopsiguniform1_2016_pH\\DFM_OUTPUT_DCSM-FM_0_5nm_waq\\DCSM-FM_0_5nm_waq_0147_map.nc']
all_p1 = xu.open_mfdataset(files_p1,chunks=chunks)
all_p2 = xu.open_mfdataset(files_p2,chunks=chunks)
all_xu = xu.merge_partitions([all_p1,all_p2])
all_xu_sel = all_xu.ugrid.sel(x=3.5051096780337727, y=51.56855173004132)

veenstrajelmer · 2024-09-05T11:14:22Z

I ran the code in the issue description again. Most of the lines work without errors. However, there is still one issue left. When interpolating the merged dataset (waq_xr) to depths, it raises "ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()". This is because the mesh2d_interface_z variable (and possibly others), has an additional time dimension. Reproducible code:

import os
import xugrid as xu
import dfm_tools as dfmt

## Reading output from 2 years, for 3 partitions
basedir = r"p:\archivedprojects\11206044-002ospareutrophication\final_results_Snellius_latestversion_20220210"
f1 = [os.path.join(basedir, r'A16b_Numtopsiguniform1_2015_pH\DFM_OUTPUT_DCSM-FM_0_5nm_waq',f'DCSM-FM_0_5nm_waq_{i:04d}_map.nc') for i in [146,147,148]]
f2 = [os.path.join(basedir, r'A16b_Numtopsiguniform1_2016_pH\DFM_OUTPUT_DCSM-FM_0_5nm_waq',f'DCSM-FM_0_5nm_waq_{i:04d}_map.nc') for i in [146,147,148]]

waq_xr1 = dfmt.open_partitioned_dataset(f1)       # opens every .map file in this folder
waq_xr2 = dfmt.open_partitioned_dataset(f2)

waq_xr1 = dfmt.rename_waqvars(waq_xr1)             # renames the water quality variables
waq_xr2 = dfmt.rename_waqvars(waq_xr2)

waq_xr = xu.concat([waq_xr1, waq_xr2], dim='time') # concatenate the two years

# this raises "ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()"
# dfmt.get_Dataset_atdepths(data_xr=waq_xr, depths = waq_xr.mesh2d_layer_z[0,:].values)

print(waq_xr2['mesh2d_interface_z'].dims) # ('mesh2d_nInterfaces',)
print(waq_xr['mesh2d_interface_z'].dims) # ('time', 'mesh2d_nInterfaces')

Prints:

('mesh2d_nInterfaces',)
('time', 'mesh2d_nInterfaces')

Is this a bug or should one use a different approach to concatenate the datasets?

Huite · 2024-09-05T11:18:29Z

I think it needs a different option for one of the keywords:

xarray.concat

veenstrajelmer · 2024-09-05T11:34:20Z

Thanks, you are right, I missed data_vars='minimal'. This avoids the time dimension being broadcasted on variables without it.

This code should do all the intented things and gives no errors, at least not with the current xugrid release (0.12.0):

import os
import geopandas
import pandas as pd
import xugrid as xu
import dfm_tools as dfmt
from shapely.geometry import Point

## Reading output from 2 years, for 3 partitions
basedir = r"p:\archivedprojects\11206044-002ospareutrophication\final_results_Snellius_latestversion_20220210"
f1 = [os.path.join(basedir, r'A16b_Numtopsiguniform1_2015_pH\DFM_OUTPUT_DCSM-FM_0_5nm_waq',f'DCSM-FM_0_5nm_waq_{i:04d}_map.nc') for i in [146,147,148]]
f2 = [os.path.join(basedir, r'A16b_Numtopsiguniform1_2016_pH\DFM_OUTPUT_DCSM-FM_0_5nm_waq',f'DCSM-FM_0_5nm_waq_{i:04d}_map.nc') for i in [146,147,148]]

# opens every .map file in this folder
waq_xr1 = dfmt.open_partitioned_dataset(f1)
waq_xr2 = dfmt.open_partitioned_dataset(f2)

# rename the water quality variables
waq_xr1 = dfmt.rename_waqvars(waq_xr1)
waq_xr2 = dfmt.rename_waqvars(waq_xr2)

# concatenate the two years
waq_xr = xu.concat([waq_xr1, waq_xr2], 
                   dim='time', # dimension along which to concatenate
                   data_vars='minimal' # to avoid expanding of non-time variables with time dimension
                   )

file_pli = r'p:\11208479-sequestration-seaweed\Oosterschelde_DFM_hydro_waq\dflowfm3d-oosterschelde-wq\boundary_conditions\zee-zuid.pli'

# Read points within this offshore boundary:
pli_file = pd.read_csv(file_pli, skiprows=1, sep=' ')  # read pli file
pli_file = pli_file.dropna(axis=1)  # drop empty columns
# Convert dutch crs to decimal degrees (for DCSM)
points = [] 
for i in range(0,len(pli_file)):
    points.append(Point(pli_file[pli_file.columns[0]].to_list()[i], pli_file[pli_file.columns[1]].to_list()[i]))
d = {'location': pli_file[pli_file.columns[2]].to_list(), 'geometry': points}
gdf = geopandas.GeoDataFrame(d, crs='epsg:28992')  # set Dutch crs
gdf = gdf.to_crs(4326)  # convert to decimal degrees

point = 0

ds_point = waq_xr.ugrid.sel(x=gdf.geometry[point].x, y=gdf.geometry[point].y)

ds_point_atdepths = dfmt.get_Dataset_atdepths(data_xr=ds_point, depths=waq_xr.mesh2d_layer_z[0].values)

waq_xr_atdepths = dfmt.get_Dataset_atdepths(data_xr=waq_xr, depths=waq_xr.mesh2d_layer_z[0].values)

veenstrajelmer mentioned this issue Apr 25, 2023

Fixes related to xugrid issues Deltares/dfm_tools#213

Open

70 tasks

veenstrajelmer closed this as completed Sep 5, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

After xu.concat(): selection and depth conversion issues #80

After xu.concat(): selection and depth conversion issues #80

Sonia-Heye commented Apr 25, 2023 •

edited by veenstrajelmer

Loading

Sonia-Heye commented Apr 25, 2023 •

edited by veenstrajelmer

Loading

veenstrajelmer commented Sep 5, 2024

Huite commented Sep 5, 2024 •

edited by veenstrajelmer

Loading

veenstrajelmer commented Sep 5, 2024 •

edited

Loading

After xu.concat(): selection and depth conversion issues #80

After xu.concat(): selection and depth conversion issues #80

Comments

Sonia-Heye commented Apr 25, 2023 • edited by veenstrajelmer Loading

Sonia-Heye commented Apr 25, 2023 • edited by veenstrajelmer Loading

veenstrajelmer commented Sep 5, 2024

Huite commented Sep 5, 2024 • edited by veenstrajelmer Loading

veenstrajelmer commented Sep 5, 2024 • edited Loading

Sonia-Heye commented Apr 25, 2023 •

edited by veenstrajelmer

Loading

Sonia-Heye commented Apr 25, 2023 •

edited by veenstrajelmer

Loading

Huite commented Sep 5, 2024 •

edited by veenstrajelmer

Loading

veenstrajelmer commented Sep 5, 2024 •

edited

Loading