Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fms.parallel startup #1477

Merged
merged 5 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 74 additions & 30 deletions fms2_io/include/netcdf_read_data.inc
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,12 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, &
endif
c(unlim_dim_index) = unlim_dim_level
endif
if (fileobj%is_root) then
if(fileobj%use_collective) then
varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg)
! NetCDF does not have the ability to specify collective I/O at
! the file basis so we must activate at the variable level
err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective)
call check_netcdf_code(err, append_error_msg)
select type(buf)
type is (integer(kind=i4_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
Expand All @@ -370,20 +374,38 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, &
end select
call check_netcdf_code(err, append_error_msg)
call unpack_data_2d(fileobj, varid, variable_name, buf)
endif
if (bcast) then
select type(buf)
type is (integer(kind=i4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (integer(kind=i8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
class default
call error("Unsupported variable type: "//trim(append_error_msg))
end select
else
if (fileobj%is_root) then
varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg)
select type(buf)
type is (integer(kind=i4_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
type is (integer(kind=i8_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
type is (real(kind=r4_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
type is (real(kind=r8_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
class default
call error("Unsupported variable type: "//trim(append_error_msg))
end select
call check_netcdf_code(err, append_error_msg)
call unpack_data_2d(fileobj, varid, variable_name, buf)
endif
if (bcast) then
select type(buf)
type is (integer(kind=i4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (integer(kind=i8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
class default
call error("Unsupported variable type: "//trim(append_error_msg))
end select
endif
endif
end subroutine netcdf_read_data_2d

Expand Down Expand Up @@ -446,8 +468,12 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, &
endif
c(unlim_dim_index) = unlim_dim_level
endif
if (fileobj%is_root) then
if(fileobj%use_collective) then
varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg)
! NetCDF does not have the ability to specify collective I/O at
! the file basis so we must activate at the variable level
err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective)
call check_netcdf_code(err, append_error_msg)
select type(buf)
type is (integer(kind=i4_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
Expand All @@ -462,20 +488,38 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, &
end select
call check_netcdf_code(err, append_error_msg)
call unpack_data_3d(fileobj, varid, variable_name, buf)
endif
if (bcast) then
select type(buf)
type is (integer(kind=i4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (integer(kind=i8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
class default
call error("Unsupported variable type: "//trim(append_error_msg))
end select
else
if (fileobj%is_root) then
varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg)
select type(buf)
type is (integer(kind=i4_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
type is (integer(kind=i8_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
type is (real(kind=r4_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
type is (real(kind=r8_kind))
err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e)
class default
call error("Unsupported variable type: "//trim(append_error_msg))
end select
call check_netcdf_code(err, append_error_msg)
call unpack_data_3d(fileobj, varid, variable_name, buf)
endif
if (bcast) then
select type(buf)
type is (integer(kind=i4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (integer(kind=i8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r4_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
type is (real(kind=r8_kind))
call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist)
class default
call error("Unsupported variable type: "//trim(append_error_msg))
end select
endif
endif
end subroutine netcdf_read_data_3d

Expand Down
78 changes: 57 additions & 21 deletions fms2_io/netcdf_io.F90
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module netcdf_io_mod
use mpp_mod
use fms_io_utils_mod
use platform_mod
use mpi, only: MPI_COMM_NULL
implicit none
private

Expand Down Expand Up @@ -149,6 +150,11 @@ module netcdf_io_mod
character (len=20) :: time_name
type(dimension_information) :: bc_dimensions !<information about the current dimensions for regional
!! restart variables
logical :: use_collective = .false. !< Flag indicating if we should open the file for collective input
!! this should be set to .true. in the user application if they want
!! collective reads (put before open_file())
Comment on lines +152 to +154
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@uramirez8707 @thomas-robinson - does this comment/variable name make it clear this applies only to reads and not writes?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It says for collective input so I think that's clear

integer :: TileComm=MPI_COMM_NULL !< MPI communicator used for collective reads.
!! To be replaced with a real communicator at user request

endtype FmsNetcdfFile_t

Expand Down Expand Up @@ -562,6 +568,7 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do

integer :: nc_format_param
integer :: err
integer :: IsNetcdf4=-999 !< Query the file for IsNetcdf4 in the event that the open for collective reads fails
character(len=256) :: buf !< Filename with .res in the filename if it is a restart
character(len=256) :: buf2 !< Filename with the filename appendix if there is one
logical :: is_res
Expand Down Expand Up @@ -619,30 +626,30 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do
fileobj%is_root = mpp_pe() .eq. fileobj%io_root

fileobj%is_netcdf4 = .false.
!Open the file with netcdf if this rank is the I/O root.
if (fileobj%is_root) then
if (fms2_ncchksz == -1) call error("netcdf_file_open:: fms2_ncchksz not set, call fms2_io_init")
if (fms2_nc_format_param == -1) call error("netcdf_file_open:: fms2_nc_format_param not set, call fms2_io_init")

if (present(nc_format)) then
if (string_compare(nc_format, "64bit", .true.)) then
nc_format_param = nf90_64bit_offset
elseif (string_compare(nc_format, "classic", .true.)) then
nc_format_param = nf90_classic_model
elseif (string_compare(nc_format, "netcdf4", .true.)) then
fileobj%is_netcdf4 = .true.
nc_format_param = nf90_netcdf4
else
call error("unrecognized netcdf file format: '"//trim(nc_format)//"' for file:"//trim(fileobj%path)//&
&"Check your open_file call, the acceptable values are 64bit, classic, netcdf4")
endif
call string_copy(fileobj%nc_format, nc_format)
if (fms2_ncchksz == -1) call error("netcdf_file_open:: fms2_ncchksz not set, call fms2_io_init")
if (fms2_nc_format_param == -1) call error("netcdf_file_open:: fms2_nc_format_param not set, call fms2_io_init")

if (present(nc_format)) then
if (string_compare(nc_format, "64bit", .true.)) then
nc_format_param = nf90_64bit_offset
elseif (string_compare(nc_format, "classic", .true.)) then
nc_format_param = nf90_classic_model
elseif (string_compare(nc_format, "netcdf4", .true.)) then
fileobj%is_netcdf4 = .true.
nc_format_param = nf90_netcdf4
else
call string_copy(fileobj%nc_format, trim(fms2_nc_format))
nc_format_param = fms2_nc_format_param
fileobj%is_netcdf4 = fms2_is_netcdf4
call error("unrecognized netcdf file format: '"//trim(nc_format)//"' for file:"//trim(fileobj%path)//&
&"Check your open_file call, the acceptable values are 64bit, classic, netcdf4")
endif
call string_copy(fileobj%nc_format, nc_format)
else
call string_copy(fileobj%nc_format, trim(fms2_nc_format))
nc_format_param = fms2_nc_format_param
fileobj%is_netcdf4 = fms2_is_netcdf4
endif

!Open the file with netcdf if this rank is the I/O root.
if (fileobj%is_root .and. .not.(fileobj%use_collective)) then
if (string_compare(mode, "read", .true.)) then
err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz)
elseif (string_compare(mode, "append", .true.)) then
Expand All @@ -656,6 +663,35 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do
&"Check your open_file call, the acceptable values are read, append, write, overwrite")
endif
call check_netcdf_code(err, "netcdf_file_open:"//trim(fileobj%path))
elseif(fileobj%use_collective .and. (fileobj%TileComm /= MPI_COMM_NULL)) then
if(string_compare(mode, "read", .true.)) then
! Open the file for collective reads if the user requested that treatment in their application.
! NetCDF does not have the ability to specify collective I/O at the file basis
! so we must activate each variable in netcdf_read_data_2d() and netcdf_read_data_3d()
err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=fileobj%TileComm, info=MPP_INFO_NULL)
if(err /= nf90_noerr) then
err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid)
err = nf90_get_att(fileobj%ncid, nf90_global, "_IsNetcdf4", IsNetcdf4)
err = nf90_close(fileobj%ncid)
if(IsNetcdf4 /= 1) then
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is IsNetcdf4 not always set here? If it is going to be set here and then checked, why initialize it when it's declared?

If IsNetcdf4 is not always set by nf90_get_att, then if it equals 1 on one pass and is not set on the next, it will still be 1 because of the implied save by setting it when it's declared.

call mpp_error(NOTE,"netcdf_file_open: Open for collective read failed because the file is not netCDF-4 format."// &
" Falling back to parallel independent for file "// trim(fileobj%path))
endif
err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz)
endif
elseif (string_compare(mode, "write", .true.)) then
call mpp_error(FATAL,"netcdf_file_open: Attempt to create a file for collective write"// &
" This feature is not implemented"// trim(fileobj%path))
!err = nf90_create(trim(fileobj%path), ior(nf90_noclobber, nc_format_param), fileobj%ncid, comm=fileobj%TileComm, info=MPP_INFO_NULL)
elseif (string_compare(mode,"overwrite",.true.)) then
call mpp_error(FATAL,"netcdf_file_open: Attempt to create a file for collective overwrite"// &
" This feature is not implemented"// trim(fileobj%path))
!err = nf90_create(trim(fileobj%path), ior(nf90_clobber, nc_format_param), fileobj%ncid, comm=fileobj%TileComm, info=MPP_INFO_NULL)
else
call error("unrecognized file mode: '"//trim(mode)//"' for file:"//trim(fileobj%path)//&
&"Check your open_file call, the acceptable values are read, append, write, overwrite")
endif
call check_netcdf_code(err, "netcdf_file_open:"//trim(fileobj%path))
else
fileobj%ncid = missing_ncid
endif
Expand Down
Loading