From 6cbad97416a76235bf123a87e6f8a988d7c1a671 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 13 Jan 2025 00:28:23 +0100 Subject: [PATCH 1/6] Breaking: Standardise cf standards and missingval handling (#695) * start adding cf keyword * add modifieddiskarray file * use scale and offset keywords * bugfix * bugfix * updates * create * tweaks * bugfixes * bugfix GRIB * bugfixes * add raw keyword * clean up extensions * updates * mosaic fixes and standardisation * bugfixes * closured in create * bugfixes * fix resample test for nokw * test create with a function * Apply suggestions from code review Co-authored-by: Anshul Singhvi * Hook up scale and offset to Zarr datasets via CDM (#732) This fixes a strange issue I was having where Rasters.jl read the wrong values from a Kerchunk/Zarr dataset. * use coalesceval instead of maskingval * combine missingval and maskingval * one missingval * fixes * fix mosaic * tweaks and bugfixes * bugfix everything * fix aqua * fix netcdf metadata * docs tweaks * typo * bugfix and tweak docs * fix extension doctests * fix warp missingval * bugfix missingval tests * remove ext --------- Co-authored-by: Anshul Singhvi --- ext/RastersArchGDALExt/RastersArchGDALExt.jl | 13 +- ext/RastersArchGDALExt/gdal_source.jl | 262 ++++++---- ext/RastersArchGDALExt/resample.jl | 7 +- ext/RastersArchGDALExt/warp.jl | 23 +- .../RastersCoordinateTransformationsExt.jl | 9 +- .../RastersGRIBDatasetsExt.jl | 26 +- .../gribdatasets_source.jl | 14 +- ext/RastersMakieExt/RastersMakieExt.jl | 7 +- .../RastersNCDatasetsExt.jl | 26 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 61 ++- .../RastersRasterDataSourcesExt.jl | 7 +- .../RastersZarrDatasetsExt.jl | 24 +- .../zarrdatasets_source.jl | 35 +- src/Rasters.jl | 1 + src/array.jl | 168 +++--- src/create.jl | 375 ++++++++++++-- src/extensions.jl | 10 +- src/filearray.jl | 46 +- src/filestack.jl | 13 +- src/methods/burning/array_init.jl | 38 +- src/methods/classify.jl | 6 +- src/methods/crop_extend.jl | 73 +-- src/methods/fixed.png | Bin 0 -> 18225 bytes src/methods/fractional_resample.jl | 33 ++ src/methods/mask.jl | 38 +- src/methods/mosaic.jl | 51 +- src/methods/rasterize.jl | 19 +- src/methods/replace_missing.jl | 10 +- src/methods/shared_docstrings.jl | 84 ++- src/methods/zonal.jl | 8 +- src/modifieddiskarray.jl | 247 +++++++++ src/nokw.jl | 5 + src/openstack.jl | 21 +- src/show.jl | 3 +- src/sources/commondatamodel.jl | 230 ++++---- src/sources/grd.jl | 143 +++-- src/stack.jl | 173 +++++-- src/utils.jl | 489 ++++++++++++------ src/write.jl | 75 ++- test/array.jl | 3 +- test/create.jl | 220 ++++++++ test/methods.jl | 68 ++- test/rasterize.jl | 21 +- test/resample.jl | 63 ++- test/runtests.jl | 16 +- test/sources/commondatamodel.jl | 2 +- test/sources/gdal.jl | 143 +++-- test/sources/grd.jl | 89 ++-- test/sources/gribdatasets.jl | 7 +- test/sources/ncdatasets.jl | 135 +++-- test/sources/rasterdatasources.jl | 49 +- test/sources/zarr.jl | 91 +++- test/warp.jl | 38 +- 53 files changed, 2635 insertions(+), 1183 deletions(-) create mode 100644 src/methods/fixed.png create mode 100644 src/methods/fractional_resample.jl create mode 100644 src/modifieddiskarray.jl create mode 100644 test/create.jl diff --git a/ext/RastersArchGDALExt/RastersArchGDALExt.jl b/ext/RastersArchGDALExt/RastersArchGDALExt.jl index 55fb3a239..293013406 100644 --- a/ext/RastersArchGDALExt/RastersArchGDALExt.jl +++ b/ext/RastersArchGDALExt/RastersArchGDALExt.jl @@ -1,10 +1,7 @@ module RastersArchGDALExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, ArchGDAL -else - using ..Rasters, ..ArchGDAL -end +using Rasters +using ArchGDAL import DiskArrays, Extents, @@ -16,15 +13,17 @@ using DimensionalData, using Rasters.Lookups using Rasters.Dimensions -using Rasters: GDALsource, AbstractProjected, RasterStackOrArray, FileArray, NoKW, +using Rasters: GDALsource, AbstractProjected, AbstractRaster, AbstractRasterStack, + RasterStackOrArray, FileArray, NoKW, RES_KEYWORD, SIZE_KEYWORD, CRS_KEYWORD, FILENAME_KEYWORD, SUFFIX_KEYWORD, EXPERIMENTAL, GDAL_EMPTY_TRANSFORM, GDAL_TOPLEFT_X, GDAL_WE_RES, GDAL_ROT1, GDAL_TOPLEFT_Y, GDAL_ROT2, GDAL_NS_RES, _no_crs_error -import Rasters: reproject, resample, warp, nokw +import Rasters: reproject, resample, warp, cellsize, nokw, isnokw, isnokwornothing import LinearAlgebra: dot, cross +const AG = ArchGDAL const RA = Rasters const AG = ArchGDAL const DD = DimensionalData diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 65b7ba5fb..1b187816b 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -34,59 +34,57 @@ const GDAL_VIRTUAL_FILESYSTEMS = "/vsi" .* ( "sparse", ) -# Array ######################################################################## - -function RA.FileArray{GDALsource}(ds::AG.RasterDataset{T}, filename; kw...) where {T} - eachchunk, haschunks = DA.eachchunk(ds), DA.haschunks(ds) - RA.FileArray{GDALsource,T,3}(filename, size(ds); eachchunk, haschunks, kw...) -end +# TODO more cases of return values here, like wrapped disk arrays RA.cleanreturn(A::AG.RasterDataset) = Array(A) RA.haslayers(::GDALsource) = false RA._sourcetrait(A::AG.RasterDataset) = GDALsource() -function Base.write( - filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; +function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRasterStack; kw...) + ext = splitext(filename)[2] + throw(ArgumentError("Cant write a RasterStack to $ext with gdal")) +end +function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; force=false, verbose=true, + write=true, missingval=nokw, + scale=nokw, + offset=nokw, + coerce=nokw, + eltype=Missings.nonmissingtype(T), + f=identity, kw... ) where T RA.check_can_write(filename, force) - A1 = _maybe_correct_to_write(A, missingval) - _create_with_driver(filename, dims(A1), eltype(A1), Rasters.missingval(A1); _block_template=A1, kw...) do dataset - verbose && _maybe_warn_south_up(A, verbose, "Writing South-up. Use `reverse(myrast; dims=Y)` first to write conventional North-up") - open(A1; write=true) do O - AG.RasterDataset(dataset) .= parent(O) + write = f === identity ? write : true + A1 = _maybe_permute_to_gdal(A) + + # Missing values + missingval_pair = RA._write_missingval_pair(A, missingval; eltype, verbose) + + _create_with_driver(filename, dims(A1), eltype; + missingval=missingval_pair[1], _block_template=A1, scale, offset, verbose, kw... + ) do dataset + if write + mod = RA._mod(eltype, missingval_pair, scale, offset, coerce) + open(A1; write=true) do O + R = RA._maybe_modify(AG.RasterDataset(dataset), mod) + R .= parent(O) + if hasdim(A, Band()) + f(R) + else + f(view(R, :, :, 1)) + end + end end end return filename end -function RA.create(filename, ::GDALsource, T::Type, dims::DD.DimTuple; - missingval=nokw, - metadata=nokw, - name=nokw, - lazy=true, - verbose=true, - kw... -) - T = Missings.nonmissingtype(T) - missingval = ismissing(missingval) ? RA._writeable_missing(T) : missingval - _create_with_driver(filename, dims, T, missingval; kw...) do _ - verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. Use `reverse(myrast; dims=Y)` first to write conventional North-up") - nothing - end - - return Raster(filename; source=GDALsource(), name, lazy, metadata, dropband=!hasdim(dims, Band)) -end - -function _maybe_warn_south_up(A, verbose, msg) - verbose && lookup(A, Y) isa AbstractSampled && order(A, Y) isa ForwardOrdered && @warn msg -end - function RA._open(f, ::GDALsource, filename::AbstractString; write=false, + mod=RA.NoMod(), kw... ) # Check the file actually exists because the GDAL error is unhelpful @@ -104,15 +102,13 @@ function RA._open(f, ::GDALsource, filename::AbstractString; end end end - if write - # Pass the OF_UPDATE flag to GDAL - AG.readraster(RA.cleanreturn ∘ f, filename; flags=AG.OF_UPDATE) - else - # Otherwise just read - AG.readraster(RA.cleanreturn ∘ f, filename) + flags = write ? AG.OF_UPDATE : AG.OF_READONLY + return AG.readraster(filename; flags) do A + RA.cleanreturn(f(RA._maybe_modify(A, mod))) end end -RA._open(f, ::GDALsource, ds::AG.RasterDataset; kw...) = RA.cleanreturn(f(ds)) +RA._open(f, ::GDALsource, A::AG.RasterDataset; mod=RA.NoMod(), kw...) = + RA.cleanreturn(f(RA._maybe_modify(A, mod))) # DimensionalData methods for ArchGDAL types ############################### @@ -206,20 +202,27 @@ function RA._dims(raster::AG.RasterDataset, crs=nokw, mappedcrs=nokw) end # TODO make metadata optional, its slow to get -function RA._metadata(raster::AG.RasterDataset, args...) - band = AG.getband(raster.ds, 1) +function RA._metadata(rds::AG.RasterDataset, args...) + band = AG.getband(rds.ds, 1) + metadata = RA._metadatadict(GDALsource()) # color = AG.getname(AG.getcolorinterp(band)) scale = AG.getscale(band) offset = AG.getoffset(band) # norvw = AG.noverview(band) units = AG.getunittype(band) - filelist = AG.filelist(raster) - metadata = RA._metadatadict(GDALsource(), "scale"=>scale, "offset"=>offset) - if units == "" + filepath = _getfilepath(rds) + # Set metadata if they are not default values + if scale != oneunit(scale) + metadata["scale"] = scale + end + if offset != zero(offset) + metadata["offset"] = offset + end + if units != "" metadata["units"] = units end - if length(filelist) > 0 - metadata["filepath"] = first(filelist) + if !isnothing(filepath) + metadata["filepath"] = filepath end return metadata end @@ -228,38 +231,26 @@ end # Create a Raster from a dataset RA.Raster(ds::AG.Dataset; kw...) = Raster(AG.RasterDataset(ds); kw...) -function RA.Raster(ds::AG.RasterDataset; - crs=crs(ds), - mappedcrs=nokw, - dims=RA._dims(ds, crs, mappedcrs), - refdims=(), - name=nokw, - metadata=RA._metadata(ds), - missingval=RA.missingval(ds), - lazy=false, - dropband=false -) - kw = (; refdims, name, metadata, missingval) - filelist = AG.filelist(ds) - raster = if lazy && length(filelist) > 0 - filename = first(filelist) - Raster(FileArray{GDALsource}(ds, filename), dims; kw...) +function RA.Raster(ds::AG.RasterDataset; lazy=false, kw...) + filepath = if lazy + fp = _getfilepath(ds) + isnothing(fp) ? "/vsimem" : fp else - Raster(Array(ds), dims; kw...) + "" end - return dropband ? RA._drop_single_band(raster, lazy) : raster + Raster(ds, filepath; kw...) end RA.missingval(ds::AG.Dataset, args...) = RA.missingval(AG.RasterDataset(ds)) -function RA.missingval(rasterds::AG.RasterDataset, args...) +function RA.missingval(rds::AG.RasterDataset, args...) # All bands have the same missingval in GDAL - band = AG.getband(rasterds.ds, 1) + band = AG.getband(rds.ds, 1) # GDAL will set this hasnodataval = Ref(Cint(0)) # Int64 and UInt64 need special casing in GDAL - nodataval = if eltype(rasterds) == Int64 + nodataval = if eltype(rds) == Int64 AG.GDAL.gdalgetrasternodatavalueasint64(band, hasnodataval) - elseif eltype(rasterds) == UInt64 + elseif eltype(rds) == UInt64 AG.GDAL.gdalgetrasternodatavalueasuint64(band, hasnodataval) else AG.GDAL.gdalgetrasternodatavalue(band, hasnodataval) @@ -273,7 +264,7 @@ function RA.missingval(rasterds::AG.RasterDataset, args...) end end -# GDAL always returns well known text +# GDAL always returns well known text crs function RA.crs(raster::AG.RasterDataset, args...) WellKnownText(GeoFormatTypes.CRS(), string(AG.getproj(raster.ds))) end @@ -286,14 +277,26 @@ function AG.Dataset(f::Function, A::AbstractRaster; kw...) f(rds.ds) end end -function AG.RasterDataset(f::Function, A::AbstractRaster; filename="", kw...) - A1 = _maybe_correct_to_write(A) - return _create_with_driver(filename, dims(A1), eltype(A1), missingval(A1); _block_template=A1, kw...) do dataset - rds = AG.RasterDataset(dataset) - open(A1) do a - rds .= parent(a) +function AG.RasterDataset(f::Function, A::AbstractRaster; + filename="", + scale=nokw, + offset=nokw, + coerce=nokw, + verbose=false, + eltype=Missings.nonmissingtype(eltype(A)), + missingval=Rasters.missingval(A), + kw... +) + return open(_maybe_permute_to_gdal(A)) do O + _create_with_driver(filename, dims(A), eltype; + _block_template=A, missingval, scale, offset, verbose, kw... + ) do dataset + rds = AG.RasterDataset(dataset) + mv = RA.missingval(rds) => RA.missingval(O) + mod = RA._mod(eltype, mv, scale, offset, coerce) + RA._maybe_modify(rds, mod) .= parent(O) + f(rds) end - f(rds) end end @@ -301,37 +304,30 @@ end # Sometimes GDAL stores the `missingval` in the wrong type, so fix it. _missingval_from_gdal(T::Type{<:AbstractFloat}, x::Real) = convert(T, x) -function _missingval_from_gdal(T::Type{<:Integer}, x::AbstractFloat) +function _missingval_from_gdal(T::Type{<:Integer}, x::AbstractFloat; verbose=true) if trunc(x) === x && x >= typemin(T) && x <= typemax(T) convert(T, x) else - @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" + verbose && @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" nothing end end -function _missingval_from_gdal(T::Type{<:Integer}, x::Integer) +function _missingval_from_gdal(T::Type{<:Integer}, x::Integer; verbose=true) if x >= typemin(T) && x <= typemax(T) convert(T, x) else - @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" + verbose && @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" nothing end end _missingval_from_gdal(T, x) = x -# Fix array and dimension configuration before writing with GDAL -_maybe_correct_to_write(A::AbstractDimArray, args...) = - _maybe_correct_to_write(lookup(A, X()), A, args...) -_maybe_correct_to_write(::Lookup, A::AbstractDimArray, args...) = A -function _maybe_correct_to_write( - lookup::Union{AbstractSampled,NoLookup}, A::AbstractDimArray, args... -) - RA._maybe_use_type_missingval(A, GDALsource(), args...) |> _maybe_permute_to_gdal +# Make sure driver is sensible +function _check_driver(::Nothing, driver) + isnokwornothing(driver) || isempty(driver) ? "MEM" : driver end - -_check_driver(filename::Nothing, driver) = "MEM" function _check_driver(filename::AbstractString, driver) - if isempty(driver) + if isnokwornothing(driver) || isempty(driver) if isempty(filename) driver = "MEM" else @@ -346,42 +342,62 @@ end # Handle creating a dataset with any driver, # applying the function `f` to the created dataset -function _create_with_driver(f, filename, dims::Tuple, T, missingval; - options=Dict{String,String}(), - driver="", - _block_template=nothing, +function _create_with_driver(f, filename, dims::Tuple, T; + verbose=true, + missingval=nokw, + options=nokw, + driver=nokw, chunks=nokw, + scale=nokw, + offset=nokw, + _block_template=nothing, kw... ) + # Allow but discourage south-up + verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. You may wish to reverse the `Y` dimension to use conventional North-up") + options = isnokwornothing(options) ? Dict{String,String}() : options + + # Pairs should not get this far + @assert !(missingval isa Pair) + # If missingval is missing, generate one GDAL can write + missingval = ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval + # Make sure dimensions are valid for GDAL _gdal_validate(dims) + # Move x and y locus to start x, y = map(DD.dims(dims, (XDim, YDim))) do d maybeshiftlocus(Start(), RA.nolookup_to_sampled(d)) end + # Band handling - a 2d raster has 1 band newdims = hasdim(dims, Band()) ? (x, y, DD.dims(dims, Band)) : (x, y) nbands = hasdim(dims, Band) ? length(DD.dims(dims, Band())) : 1 + # Driver detection driver = _check_driver(filename, driver) - options_vec = _process_options(driver, options; _block_template, chunks) + options_vec = _process_options(driver, options; _block_template, chunks, verbose) gdaldriver = driver isa String ? AG.getdriver(driver) : driver + # Keywords and filenames for AG.create create_kw = (; width=length(x), height=length(y), nbands, dtype=T,) filename = isnothing(filename) ? "" : filename + # Not all drivers can be directly created, some need an intermediate step if AG.shortname(gdaldriver) in GDAL_DRIVERS_SUPPORTING_CREATE AG.create(filename; driver=gdaldriver, options=options_vec, create_kw...) do dataset - _set_dataset_properties!(dataset, newdims, missingval) + _set_dataset_properties!(dataset, newdims, missingval, scale, offset) f(dataset) end else # Create a tif and copy it to `filename`, as ArchGDAL.create # does not support direct creation of ASCII etc. rasters - tif_options_vec = _process_options("GTiff", Dict{String,String}(); chunks, _block_template) + tif_options_vec = _process_options("GTiff", Dict{String,String}(); + chunks, _block_template, verbose + ) tif_driver = AG.getdriver("GTiff") tif_name = tempname() * ".tif" AG.create(tif_name; driver=tif_driver, options=tif_options_vec, create_kw...) do dataset - _set_dataset_properties!(dataset, newdims, missingval) + _set_dataset_properties!(dataset, newdims, missingval, scale, offset) f(dataset) target_ds = AG.copy(dataset; filename=filename, driver=gdaldriver, options=options_vec) AG.destroy(target_ds) @@ -389,6 +405,7 @@ function _create_with_driver(f, filename, dims::Tuple, T, missingval; end end +# Makie sure gdal can actually write this raster @noinline function _gdal_validate(dims) all(hasdim(dims, (XDim, YDim))) || throw(ArgumentError("`Raster` must have both an `X` and `Y` to be converted to an ArchGDAL `Dataset`")) if length(dims) === 3 @@ -402,7 +419,8 @@ end # Convert a Dict of options to a Vector{String} for GDAL function _process_options(driver::String, options::Dict; chunks=nokw, - _block_template=nothing + _block_template=nothing, + verbose=true, ) options_str = Dict(string(k)=>string(v) for (k,v) in options) # Get the GDAL driver object @@ -429,7 +447,7 @@ function _process_options(driver::String, options::Dict; if (xchunksize % 16 == 0) && (ychunksize % 16 == 0) options_str["TILED"] = "YES" else - xchunksize == 1 || @warn "X and Y chunk size do not match. Columns are used and X size $xchunksize is ignored" + xchunksize == 1 || (verbose && @warn "X and Y chunk size do not match. Columns are used and X size $xchunksize is ignored") end # don't overwrite user specified values if !("BLOCKXSIZE" in keys(options_str)) @@ -443,7 +461,7 @@ function _process_options(driver::String, options::Dict; if xchunksize == ychunksize options_str["BLOCKSIZE"] = block_x else - @warn "Writing COG X and Y chunks do not match: $block_x, $block_y. Default of 512, 512 used." + verbose && @warn "Writing COG X and Y chunks do not match: $block_x, $block_y. Default of 512, 512 used." end end end @@ -489,9 +507,9 @@ end # Set the properties of an ArchGDAL Dataset to match # the dimensions and missingval of a Raster -_set_dataset_properties!(ds::AG.Dataset, A) = - _set_dataset_properties!(ds, dims(A), missingval(A)) -function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval) +_set_dataset_properties!(ds::AG.Dataset, A, scale, offset) = + _set_dataset_properties!(ds, dims(A), missingval(A), scale, offset) +function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval, scale, offset) # We cant write mixed Points/Intervals, so default to Intervals if mixed xy = DD.dims(dims, (X, Y)) if any(x -> x isa Intervals, map(sampling, xy)) && any(x -> x isa Points, map(sampling, xy)) @@ -520,12 +538,14 @@ function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval) gt = RA.dims2geotransform(x, y) AG.setgeotransform!(dataset, gt) - # Set the missing value/nodataval. This is a little complicated - # because gdal has separate method for 64 bit integers - if !isnothing(missingval) + if !RA.isnokwornothing(missingval) bands = hasdim(dims, Band) ? axes(DD.dims(dims, Band), 1) : 1 for i in bands rasterband = AG.getband(dataset, i) + (RA.isnokw(offset) || isnothing(offset)) || AG.setoffset!(rasterband, offset) + (RA.isnokw(scale) || isnothing(scale)) || AG.setscale!(rasterband, scale) + # Set the missing value/nodataval. This is a little complicated + # because gdal has separate method for 64 bit integers if missingval isa Int64 AG.GDAL.gdalsetrasternodatavalueasint64(rasterband, missingval) elseif missingval isa UInt64 @@ -575,7 +595,7 @@ _maybe_restore_from_gdal(A, dims::Tuple) = _maybe_reorder(permutedims(A, dims), _maybe_restore_from_gdal(A, dims::Union{Tuple{<:XDim,<:YDim,<:Band},Tuple{<:XDim,<:YDim}}) = _maybe_reorder(A, dims) -function _maybe_reorder(A, dims) +function _maybe_reorder(A, dims::Tuple) if all(map(l -> l isa AbstractSampled, lookup(dims, (XDim, YDim)))) && all(map(l -> l isa AbstractSampled, lookup(A, (XDim, YDim)))) reorder(A, dims) @@ -583,6 +603,14 @@ function _maybe_reorder(A, dims) A end end + +function _maybe_warn_south_up(A, verbose, msg) + if hasdim(A, Y()) + verbose && lookup(A, Y()) isa AbstractSampled && order(A, Y()) isa ForwardOrdered && @warn msg + end + return nothing +end + #= Geotranforms ######################################################################## See https://lists.osgeo.org/pipermail/gdal-dev/2011-July/029449.html @@ -625,6 +653,15 @@ RA.affine2geotransform(am) = error(USING_COORDINATETRANSFORMATIONS_MESSAGE) _isaligned(geotransform) = geotransform[GDAL_ROT1] == 0 && geotransform[GDAL_ROT2] == 0 +function _getfilepath(ds) + filelist = AG.filelist(ds) + if length(filelist) == 0 + return nothing + else + return first(filelist) + end +end + # precompilation # function _precompile(::Type{GDALsource}) # ccall(:jl_generating_output, Cint, ()) == 1 || return nothing @@ -647,3 +684,4 @@ _isaligned(geotransform) = geotransform[GDAL_ROT1] == 0 && geotransform[GDAL_ROT # end # _precompile(GRDsource) + diff --git a/ext/RastersArchGDALExt/resample.jl b/ext/RastersArchGDALExt/resample.jl index d5b067ed0..ead14b5d6 100644 --- a/ext/RastersArchGDALExt/resample.jl +++ b/ext/RastersArchGDALExt/resample.jl @@ -7,7 +7,12 @@ function resample(xs::Union{Tuple,NamedTuple}; to=first(xs), kw...) map(x -> resample(x; to, kw...), xs) end function resample(A::RasterStackOrArray; - to=nothing, res=nothing, crs=nothing, size=nothing, method=:near, kw... + to=nothing, + res=nothing, + crs=nothing, + size=nothing, + method=:near, + kw... ) (isnothing(size) || isnothing(res)) || _size_and_res_error() diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 8ebfac98a..e8368e776 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -11,25 +11,40 @@ function warp(A::AbstractRaster, flags::Dict; filename=nothing, kw...) end end function warp(st::AbstractRasterStack, flags::Dict; filename=nothing, suffix=keys(st), kw...) - RA.mapargs((A, s) -> warp(A, flags; filename, suffix=s), st, suffix; kw...) + RA.mapargs((A, s) -> warp(A, flags; filename, suffix=s, kw...), st, suffix) end -function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", kw...) +function _warp(A::AbstractRaster, flags::Dict; + filename=nothing, + suffix="", + missingval=Rasters.missingval(A), + name=Rasters.name(A), + kw... +) A1 = _set_gdalwarp_sampling(A) filename = RA._maybe_add_suffix(filename, suffix) flagvect = reduce([flags...]; init=String[]) do acc, (key, val) append!(acc, String[_asflag(key), _stringvect(val)...]) end + # TODO: detect if `A` already holds a lazy GDAL FileArray. + # If it does, we can just open it and use it directly. tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) - out = AG.Dataset(A1; filename=tempfile, kw...) do dataset + mv_pair = RA._write_missingval_pair(A1, missingval; + verbose=false, eltype=eltype(A1), metadata=metadata(A), required=true + ) + # We really need a missingval for `warp`, as it may rotate and add missing values + out = AG.Dataset(A1; filename=tempfile, missingval=mv_pair[1], kw...) do dataset AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name = name(A)) + raster = Raster(warped; + lazy=true, dropband=!hasdim(A, Band()), name, missingval=mv_pair + ) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end end + # And permute the dimensions back to what they were in A out1 = _maybe_restore_from_gdal(out, dims(A)) out2 = _reset_gdalwarp_sampling(out1, A) diff --git a/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl b/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl index 39d0ee745..1643f7a1e 100644 --- a/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl +++ b/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl @@ -1,11 +1,7 @@ module RastersCoordinateTransformationsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, CoordinateTransformations -else - using ..Rasters, ..CoordinateTransformations -end - +using Rasters +using CoordinateTransformations using DimensionalData using Rasters.Lookups using Rasters.Dimensions @@ -16,7 +12,6 @@ const RA = Rasters const DD = DimensionalData const LA = Lookups - include("affineprojected.jl") end # module diff --git a/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl b/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl index 59c69048f..ca81de017 100644 --- a/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl +++ b/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl @@ -1,32 +1,12 @@ module RastersGRIBDatasetsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, GRIBDatasets, CommonDataModel -else - using ..Rasters, ..GRIBDatasets, ..CommonDataModel -end - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings +using Rasters +using GRIBDatasets -using Dates, - DimensionalData, - GeoFormatTypes - -using Rasters.Lookups -using Rasters.Dimensions using Rasters: GRIBsource -using CommonDataModel: AbstractDataset - const RA = Rasters -const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface -const LA = Lookups +const GDS = GRIBDatasets include("gribdatasets_source.jl") diff --git a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl index 25df03172..a43292693 100644 --- a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl +++ b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl @@ -1,7 +1,5 @@ -const GDS = GRIBDatasets - function RA.OpenStack(fs::RA.FileStack{GRIBsource,K}) where K - RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs))) + RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs)), fs.mods) end # In GRIBDatasets, the file is open for reading the values and closed afterwards. @@ -13,9 +11,11 @@ function RA._open(f, ::GRIBsource, filename::AbstractString; write=false, kw...) RA._open(f, GRIBsource(), ds; kw...) end -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::GDS.Variable) = DiskArrays.eachchunk(var.values) -RA._get_haschunks(var::GDS.Variable) = DiskArrays.haschunks(var.values) - RA._sourcetrait(::GDS.Variable) = GRIBsource() RA._sourcetrait(::GDS.GRIBDataset) = GRIBsource() + +function RA.missingval(var::GDS.Variable{T}, args...) where T + mv = GDS.missing_value(var) + T1 = promote_type(typeof(mv), T) + return T1(mv) +end diff --git a/ext/RastersMakieExt/RastersMakieExt.jl b/ext/RastersMakieExt/RastersMakieExt.jl index 0c71302e7..4ddae96c3 100644 --- a/ext/RastersMakieExt/RastersMakieExt.jl +++ b/ext/RastersMakieExt/RastersMakieExt.jl @@ -1,10 +1,7 @@ module RastersMakieExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Makie, Rasters -else - using ..Makie, ..Rasters -end +using Makie +using Rasters using Rasters.DimensionalData using Rasters.Dimensions diff --git a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl index c9d475ec6..8513b5c27 100644 --- a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl +++ b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl @@ -1,31 +1,23 @@ module RastersNCDatasetsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, NCDatasets, CommonDataModel -else - using ..Rasters, ..NCDatasets, ..CommonDataModel -end - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings +using Rasters +using NCDatasets +using CommonDataModel +using Dates +using DimensionalData -using Dates, - DimensionalData, - GeoFormatTypes +import Missings using Rasters.Lookups using Rasters.Dimensions -using Rasters: CDMsource, NCDsource, nokw, NoKW +using Rasters: CDMsource, NCDsource, NoKW, nokw, isnokw using CommonDataModel: AbstractDataset +const NCD = NCDatasets +const CDM = CommonDataModel const RA = Rasters const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface const LA = Lookups include("ncdatasets_source.jl") diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 193a00fe3..6296bb86f 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -1,6 +1,3 @@ -const NCD = NCDatasets - - const NCDAllowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String} function RA._check_allowed_type(::RA.NCDsource, eltyp) @@ -10,7 +7,8 @@ function RA._check_allowed_type(::RA.NCDsource, eltyp) """ )) end -function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; + +function Base.write(filename::AbstractString, source::NCDsource, A::AbstractRaster; append=false, force=false, kw... @@ -21,21 +19,21 @@ function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; RA.check_can_write(filename, force) "c" end - mode = !isfile(filename) || !append ? "c" : "a"; ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(A))) try - RA._writevar!(ds, A; kw...) + RA._writevar!(ds, source, A; kw...) finally close(ds) end return filename end -function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; +function Base.write(filename::AbstractString, source::Source, s::AbstractRasterStack{K,T}; append=false, force=false, missingval=nokw, + f=identity, kw... -) +) where {Source<:NCDsource,K,T} mode = if append isfile(filename) ? "a" : "c" else @@ -43,22 +41,23 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac "c" end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) + missingval = RA._stack_nt(s, isnokw(missingval) ? Rasters.missingval(s) : missingval) try - if missingval isa NamedTuple - map(k -> RA._writevar!(ds, s[k]; missingval=missingval[k], kw...), keys(s)) - else - map(k -> RA._writevar!(ds, s[k]; missingval, kw...), keys(s)) + map(keys(s)) do k + RA._writevar!(ds, source, s[k]; missingval=missingval[k], kw...) end + f(RA.OpenStack{Source,K,T}(ds)) finally close(ds) end return filename end +Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os)) + function RA.OpenStack(fs::RA.FileStack{NCDsource,K}) where K - RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs))) + RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs)), fs.mods) end -Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os)) function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) isfile(filename) || RA._isurl(filename) || RA._filenotfound_error(filename) @@ -68,14 +67,38 @@ function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) end end - -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::NCD.Variable) = DiskArrays.eachchunk(var) -RA._get_haschunks(var::NCD.Variable) = DiskArrays.haschunks(var) - RA._sourcetrait(::NCD.Dataset) = NCDsource() RA._sourcetrait(::NCD.Variable) = NCDsource() +@inline function RA.get_scale(metadata::Metadata{NCDsource}, scaled::Bool) + scale = scaled ? get(metadata, "scale_factor", nothing) : nothing + offset = scaled ? get(metadata, "add_offset", nothing) : nothing + return scale, offset +end + +RA.missingval(var::NCD.Variable, args...) = + RA.missingval(RA.Metadata{NCDsource}(CDM.attribs(var))) +RA.missingval(var::NCD.Variable, md::RA.Metadata{<:NCDsource}) = RA.missingval(md) + +function RA.missingval(md::RA.Metadata{NCDsource}) + # TODO: handle multiple missing values + fv = get(md, "_FillValue", nothing) + mv = get(md, "missing_value", nothing) + if isnothing(fv) + if mv isa Vector + length(mv) > 1 && @warn "'missing_value' $mv has multiple values. Currently we only uses the first." + return first(mv) + else + return mv + end + else + if !isnothing(mv) + fv == mv || @warn "Both '_FillValue' $fv and 'missing_value' $mv were found. Currently we only use the first." + end + return fv + end +end + # precompilation # const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}} diff --git a/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl b/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl index 126916606..20beabb45 100644 --- a/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl +++ b/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl @@ -1,12 +1,7 @@ module RastersRasterDataSourcesExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, RasterDataSources -else - using ..Rasters, ..RasterDataSources -end +using Rasters, RasterDataSources -# using RasterDataSources: RasterDataSource using Rasters.Lookups using Rasters.Dimensions diff --git a/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl b/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl index 49f1647c9..bb44066aa 100644 --- a/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl +++ b/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl @@ -1,30 +1,12 @@ module RastersZarrDatasetsExt -using Rasters, CommonDataModel -using ZarrDatasets: ZarrDatasets as ZD - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings - -using Dates, - DimensionalData, - GeoFormatTypes +using Rasters +using ZarrDatasets -using Rasters.Lookups -using Rasters.Dimensions +using ZarrDatasets: ZarrDatasets as ZD using Rasters: Zarrsource -using ZarrDatasets -using CommonDataModel: AbstractDataset, CommonDataModel as CDM - const RA = Rasters -const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface -const LA = Lookups include("zarrdatasets_source.jl") diff --git a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl index 5bf4f8f9e..ffe723686 100644 --- a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl +++ b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl @@ -1,20 +1,41 @@ function RA.OpenStack(fs::RA.FileStack{Zarrsource,K}) where K - RA.OpenStack{Zarrsource,K}(ZD.ZarrDataset(RA.filename(fs))) + RA.OpenStack{Zarrsource,K}(ZD.ZarrDataset(RA.filename(fs)), fs.mods) end # In ZarrDatasets, the file is open for reading the values and closed afterwards. Base.close(os::RA.OpenStack{Zarrsource}) = nothing function RA._open(f, ::Zarrsource, filename::AbstractString; write=false, kw...) - #isfile(filename) || RA._filenotfound_error(filename) ds = ZarrDatasets.ZarrDataset(filename) RA._open(f, Zarrsource(), ds; kw...) end - -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::ZD.ZarrVariable) = DiskArrays.eachchunk(var.zarray) -RA._get_haschunks(var::ZD.ZarrVariable) = DiskArrays.haschunks(var.zarray) - RA._sourcetrait(::ZD.ZarrVariable) = Zarrsource() RA._sourcetrait(::ZD.ZarrDataset) = Zarrsource() + +RA.missingval(var::ZD.ZarrVariable, args...) = RA.missingval(RA.Metadata{Zarrsource}(CDM.attribs(var))) +RA.missingval(var::ZD.ZarrVariable, md::RA.Metadata{<:Zarrsource}) = RA.missingval(md) + +@inline function RA.get_scale(metadata::RA.Metadata{<: Zarrsource}, scaled::Bool) + scale = scaled ? get(metadata, "scale_factor", nothing) : nothing + offset = scaled ? get(metadata, "add_offset", nothing) : nothing + return scale, offset +end +# TODO: handle multiple missing values +function RA.missingval(md::RA.Metadata{<:Zarrsource}) + fv = get(md, "_FillValue", nothing) + mv = get(md, "missing_value", nothing) + if isnothing(fv) + if mv isa Vector + length(mv) > 1 && @warn "'missing_value' $mv has multiple values. Currently we only uses the first." + return first(mv) + else + return mv + end + else + if !isnothing(mv) + fv == mv || @warn "Both '_FillValue' $fv and 'missing_value' $mv were found. Currently we only use the first." + end + return fv + end +end diff --git a/src/Rasters.jl b/src/Rasters.jl index 94af83dfe..e660e7c86 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -101,6 +101,7 @@ include("methods/shared_docstrings.jl") include("lookup.jl") include("dimensions.jl") include("sources/sources.jl") +include("modifieddiskarray.jl") include("filearray.jl") include("filestack.jl") include("openstack.jl") diff --git a/src/array.jl b/src/array.jl index 92f5d52e7..4e75fb72e 100644 --- a/src/array.jl +++ b/src/array.jl @@ -10,14 +10,14 @@ wish to disable memory checks. This setting can be overridden with the `checkmem` keyword, where applicable. """ -function checkmem!(checkmem::Bool) +function checkmem!(checkmem::Bool) !checkmem || @warn "Setting `checkmem` to `false` globally may lead to out-of-memory errors or system crashes" CHECKMEM[] = checkmem return checkmem end const FLATTEN_SELECT = FileArray -const FLATTEN_IGNORE = Union{Dict,Set,Base.MultiplicativeInverses.SignedMultiplicativeInverse} +const FLATTEN_IGNORE = Union{Dict,Set,Base.MultiplicativeInverses.SignedMultiplicativeInverse,Array} """ AbstractRaster <: DimensionalData.AbstractDimArray @@ -42,7 +42,7 @@ abstract type AbstractRaster{T,N,D,A} <: AbstractDimArray{T,N,D,A} end Returns the value representing missing data in the dataset """ function missingval end -missingval(_) = missing +missingval(_) = nothing missingval(::AbstractArray{T}) where T = Missing <: T ? missing : nothing missingval(A::AbstractRaster) = A.missingval @@ -93,8 +93,7 @@ function DD.rebuild(A::AbstractRaster; end function DD.modify(f, A::AbstractRaster) - # Have to avoid calling `open` on CFDiskArray - newdata = if isdisk(A) && !(parent(A) isa CFDiskArray) + newdata = if isdisk(A) # TODO may have to avoid calling `open` on DiskArray open(A) do O f(parent(O)) end @@ -193,12 +192,12 @@ end Raster(A::AbstractDimArray; kw...) Raster(A::AbstractArray, dims; kw...) -A generic [`AbstractRaster`](@ref) for spatial/raster array data. It can hold -either memory-backed arrays or, if `lazy=true`, a [`FileArray`](@ref), -which stores the `String` path to an unopened file. +A generic [`AbstractRaster`](@ref) for spatial/raster array data. It can hold +either memory-backed arrays or, if `lazy=true`, a [`FileArray`](@ref), +which stores the `String` path to an unopened file. -If `lazy=true`, the file will only be opened lazily when it is indexed with `getindex` -or when `read(A)` is called. Broadcasting, taking a view, reversing, and most other +If `lazy=true`, the file will only be opened lazily when it is indexed with `getindex` +or when `read(A)` is called. Broadcasting, taking a view, reversing, and most other methods will _not_ load data from disk; they will be applied later, lazily. # Arguments @@ -207,28 +206,20 @@ methods will _not_ load data from disk; they will be applied later, lazily. # Keywords -- `name`: a `Symbol` name for the array, which will also retrieve the, alphabetically first, - named layer if `Raster` is used on a multi-layered file like a NetCDF. - If instead `RasterStack` is used to read the multi-layered file, by default, all variables - will be added to the stack. -$GROUP_KEYWORD -- `missingval`: value reprsenting missing data, normally detected from the file. Set manually - when you know the value is not specified or is incorrect. This will *not* change any - values in the raster, it simply assigns which value is treated as missing. To replace all of - the missing values in the raster, use [`replace_missing`](@ref). -- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. -$CONSTRUCTOR_CRS_KEYWORD -$CONSTRUCTOR_MAPPEDCRS_KEYWORD -- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. - Usually not needed. +$NAME_KEYWORD +$GROUP_KEYWORD +$MISSINGVAL_KEYWORD +$METADATA_KEYWORD +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD +$REFDIMS_KEYWORD When a filepath `String` is used: $DROPBAND_KEYWORD $LAZY_KEYWORD -$REPLACE_MISSING_KEYWORD $SOURCE_KEYWORD -- `write`: defines the default `write` keyword value when calling `open` on the Raster. `false` by default. - Only makes sense to use when `lazy=true`. +$SCALED_KEYWORD +$RAW_KEYWORD When A is an `AbstractDimArray`: - `data`: can replace the data in an existing `AbstractRaster` @@ -248,6 +239,7 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) end end +# Create a Raster from and AbstractArray and dims function Raster(A::AbstractArray{T,N}, dims::Tuple; refdims=(), name=Symbol(""), @@ -261,12 +253,15 @@ function Raster(A::AbstractArray{T,N}, dims::Tuple; A = isnokw(mappedcrs) ? A : setmappedcrs(A, mappedcrs) return A end +# Create a Raster from and AbstractVector and dims, +# reshaping the Vector to match the dimensions function Raster(A::AbstractArray{T,1}, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw... )::Raster{T,length(dims)} where T Raster(reshape(A, map(length, dims)), dims; kw...) end Raster(A::AbstractArray{<:Any,1}, dim::Dimension; kw...) = Raster(A, (dim,); kw...) +# Load a Raster from a table function Raster(table, dims::Tuple; name=nokw, kw... @@ -277,7 +272,9 @@ function Raster(table, dims::Tuple; A = reshape(cols[name], map(length, dims)) return Raster(A, dims; name, kw...) end +# Load a Raster from another AbstractArray with `dims` as keyword Raster(A::AbstractArray; dims, kw...) = Raster(A, dims; kw...)::Raster +# Load a Raster from another AbstractDimArray function Raster(A::AbstractDimArray; data=parent(A), dims=dims(A), @@ -289,20 +286,23 @@ function Raster(A::AbstractDimArray; )::Raster return Raster(data, dims; refdims, name, metadata, missingval, kw...) end +# Load a Raster from a string filename and predefined dimensions function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw... )::Raster Raster(filename; dims, kw...) end -function Raster(filename::AbstractString; - source=nokw, +# Load a Raster from a string filename +function Raster(filename::AbstractString; + source=nokw, kw... ) source = _sourcetrait(filename, source) - _open(filename; source) do ds + _open(filename; source, mod=NoMod()) do ds Raster(ds, filename; source, kw...) end::Raster end +# Load a Raster from an opened Dataset function Raster(ds, filename::AbstractString; dims=nokw, refdims=(), @@ -313,74 +313,62 @@ function Raster(ds, filename::AbstractString; crs=nokw, mappedcrs=nokw, source=nokw, - replace_missing=false, - write=false, - lazy=false, - dropband=true, - checkmem=CHECKMEM[], + replace_missing=nokw, + coerce=convert, + scaled::Union{Bool,NoKW}=nokw, + verbose::Bool=true, + write::Bool=false, + lazy::Bool=false, + dropband::Bool=true, + checkmem::Bool=CHECKMEM[], + raw::Bool=false, + mod=nokw, + f=identity, )::Raster + _maybe_warn_replace_missing(replace_missing) + # `raw` option will ignore `scaled` and `missingval` + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + # TODO use a clearer name for this name1 = filekey(ds, name) + # Detect the source from filename source = _sourcetrait(filename, source) - data1, dims1, metadata1, missingval1 = _open(source, ds; name=name1, group) do var - metadata1 = isnokw(metadata) ? _metadata(var) : metadata - missingval1 = _fix_missingval(var, missingval) - rm = replace_missing && !isnothing(missingval1) - missingval2 = rm ? missing : missingval1 - data = if lazy - A = FileArray{typeof(source)}(var, filename; name=name1, group, write) - rm ? _replace_missing(A, missingval1) : A - else - checkmem && _checkobjmem(var) - x = Array(rm ? _replace_missing(var, missingval1) : var) - x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug - end - dims1 = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) - data, dims1, metadata1, missingval2 - end - name2 = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) - raster = Raster(data1, dims1, refdims, name2, metadata1, missingval1) - return dropband ? _drop_single_band(raster, lazy) : raster -end - -_fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing -_fix_missingval(::AbstractArray, ::Nothing) = nothing -_fix_missingval(A::AbstractArray, ::NoKW) = _fix_missingval(A, Rasters.missingval(A)) -_fix_missingval(::AbstractArray{T}, missingval) where T = _fix_missingval(T, missingval) -function _fix_missingval(::Type{T}, missingval::M) where {T,M} - T1 = nonmissingtype(T) - if missingval isa T - missingval - elseif hasmethod(convert, Tuple{Type{T1},M}) && isreal(missingval) && - missingval <= typemax(T1) && missingval >= typemin(T1) - if T1 <: Integer && !isinteger(missingval) - nothing + # Open the dataset and variable specified by `name`, at `group` level if provided + # At this level we do not apply `mod`. + data_out, dims_out, metadata_out, missingval_out = _open(source, ds; name=name1, group, mod=NoMod()) do var + metadata_out = isnokw(metadata) ? _metadata(var) : metadata + missingval_out = _read_missingval_pair(var, metadata_out, missingval) + # Generate mod for scaling + mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval_out; scaled, coerce) : mod + # Define or load the data array + data_out = if lazy + # Define a lay FileArray + FileArray{typeof(source)}(var, filename; + name=name1, group, mod, write + ) else - convert(T, missingval) + modvar = _maybe_modify(var, mod) + # Check the data will fit in memory + checkmem && _checkobjmem(modvar) + # Move the modified array to memory + Array(modvar) end - else - nothing + # Generate dims + dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data_out) + # Return the data to the parent function + mv_outer = _outer_missingval(mod) + data_out, dims_out, metadata_out, mv_outer end -end - -function _replace_missing(A::AbstractArray{T}, missingval) where T - repmissing(x) = isequal(x, missingval) ? missing : x - return repmissing.(A) + # Use name or an empty Symbol + name_out = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) + # Define the raster + raster = Raster(data_out, dims_out, refdims, name_out, metadata_out, missingval_out) + # Maybe drop a single band dimension + return _maybe_drop_single_band(raster, dropband, lazy) end filekey(ds, name) = name filekey(filename::String) = Symbol(splitext(basename(filename))[1]) -DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{Dimension}}) = Raster - - -function _drop_single_band(raster, lazy::Bool) - if hasdim(raster, Band()) && size(raster, Band()) < 2 - if lazy - return view(raster, Band(1)) # TODO fix dropdims in DiskArrays - else - return dropdims(raster; dims=Band()) - end - else - return raster - end -end +# Add a `dimconstructor` method so `AbstractProjected` lookups create a Raster +# TODO this should be unwrapped to `DD.lookupconstructor` to avoid future ambiguities +DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{Dimension}}) = Raster \ No newline at end of file diff --git a/src/create.jl b/src/create.jl index f0f66472c..25cdba8ae 100644 --- a/src/create.jl +++ b/src/create.jl @@ -1,41 +1,358 @@ +const TypeNamedTuple = NamedTuple{<:Any,<:Tuple{Vararg{Type}}} +""" + create([f!], [filename], template; kw...) -create(filename, A::AbstractRaster{T}; kw...) where T = create(filename, T, A; kw...) -function create(filename, T, A::AbstractRaster; - name=name(A), metadata=metadata(A), missingval=missingval(A), kw... +Create a new, uninitialised [`Raster`](@ref) or [`RasterStack`](@ref). + +If `filename` is a `String` it will be created on disk, and opened lazily. +If it is `nothing` or not passed, an in-memory `Raster` will be created. + +If type is a `Type` return value is a `Raster`. The `eltype` will usually be `T`, except +where `scale` and/or `offset` keywords are used or a `missingval` of a different type is specified, +in which case `T` will depend on the type promotion of `scale`, `offset` and `missingval` with `T`. +If `missingval` is a `Pair` of `on_disk_missingval => user_facing_missingval`, the user facing value +will effect `T`, not the internal on-disk value. + +If types is a `NamedTuple` of types, the result will be a `RasterStack`. In this case `fill` and +`missingval` can be single values (for all layers) or `NamedTuple` with the same names to specify per-layer. + +`f!` will be applied to the `Raster` or `RasterStack` while it is stil open after creation, +to avoid opening it twice. The return value of `f!` is disregarded but modifications +to the `Raster` or the `RasterStack` layers will be written to disk or changed in memory. + +## Arguments + +- `filename`: a String file path, which will create a file on disk and return it as + a lazy `Raster`, or `nothing` to create an in-memory `Raster`. +- `template`: a `Raster`, `Tuple` of `Dimension` or `Extents.Extent` to use as a template. + If an `Extent` is used, a `size` or `res` keyword must be passed. + If a `T` argument is not used, it is taken from the `template` eltype. +- `type`: the element type to use in the created array. A `NamedTuple` of types + will create a `RasterStack` + +## Keywords + +$NAME_KEYWORD +$REFDIMS_KEYWORD +$METADATA_KEYWORD +$WRITE_MISSINGVAL_KEYWORD +- `fill`: A value to fill the array with, before `scale` and `offset` are applied. + If there is no `fill`, raster values may remain undefined. They may be set to + `missingval` on disk, but this is not guaranteed. It us often more efficient to + use `fill` than to fill manually after `create`. +$SOURCE_KEYWORD +- `lazy`: A `Bool` specifying if to load data lazily from disk. For `create` + `lazy=true` is the default, as creating a disk-based file is normally associated + with it being larger than memory. +$CHUNKS_KEYWORD +$SCALE_KEYWORD +$OFFSET_KEYWORD +$COERCE_KEYWORD +$VERBOSE_KEYWORD +$RES_KEYWORD +$SIZE_KEYWORD +$CRS_KEYWORD +$CHUNKS_KEYWORD +- `reverse_y`: often we want to write `Y` dimensions in reverse. + When building dimensions from an `Extents.Extent` and `size` or `res` we can do this by + using `reverse_y=true`. Using a negative value in `res` will acheive the same result. + With a template `Raster` or a `Tuple` of `Dimension`, the existing order is used. + +## Example + +Here we create a UInt8 GeoTIFF and open it as a Raster, from -80 to 80 +lattitude, and 0 to 120 longitude, with a resolution of 0.25 degrees. + +We scale values from 0-1 over `UInt8` 0-200, and using `255`. +Values that don't convert exactly will error (we could use `coerce=trunc` to fix that). + +We use `UInt8(255)` as the `missingval` on disk, but mask it with `missing` in +the loaded `Raster`. + +We use standard lat/lon (EPSG:4326) as the crs, and force writing if the file exists. + +```julia +using Rasters, NCDatasets, ArchGDAL, Extents, Dates +using Rasters.Lookups +rast = Rasters.create("created.tif", UInt8, Extents.Extent(X=(0, 120), Y=(-80, 80), Band=(0, 12)); + res=(X=10.0, Y=10.0, Band=1), + # size=(X=100, Y=100, Band=12), + name=:myraster, + crs=EPSG(4326), + force=true, + fill=0x01, + sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Intervals(Start())), +) do A + # While we have the newly created raster open, we can write to it + A[X=1:10, Y=1:10] .= 0xff +end + +read(rast) +``` + +We can also create a `RasterStack` by passing a `NamedTuple` of types: + +```julia +ext = Extents.Extent(X=(0, 120), Y=(-80, 80))#, Band=(1, 3)) +types = (a=UInt8, b=Int32, c=Float64) +rast = Rasters.create("created.nc", types, ext; + # res=(X=1.0, Y=1.0, Band=1), + size=(X=100, Y=100), + crs=EPSG(4326), + force=true, + # sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Points()), +end + +RasterStack("created.nc") + +╭───────────────────────────────────────────╮ +│ 480×640 Raster{Union{Missing, Float64},2} │ +├───────────────────────────────────────────┴───────────────────────────────────────── dims ┐ + ↓ X Projected{Float64} LinRange{Float64}(0.0, 119.75, 480) ForwardOrdered Regular Points, + → Y Projected{Float64} LinRange{Float64}(79.75, -80.0, 640) ReverseOrdered Regular Points +├───────────────────────────────────────────────────────────────────────────────── metadata ┤ + Metadata{GDALsource} of Dict{String, Any} with 2 entries: + "filepath" => "created.tif" + "scale" => 0.005 +├─────────────────────────────────────────────────────────────────────────────────── raster ┤ + extent: Extent(X = (0.0, 119.75), Y = (-80.0, 79.75)) + missingval: missing + crs: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199 +433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] + filename: nothing +└───────────────────────────────────────────────────────────────────────────────────────────┘ +``` +""" +# Create with a function that will be called to fill the raster +create(f::Function, args...; kw...) = create(args...; kw..., f) +# Create from Raster or RasterStack with no filename +create(A::Union{AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, A; kw...) +# Create from type and Raster or RasterStack with no filename +create(T::Union{Type,TypeNamedTuple}, A::Union{Tuple,Extents.Extent,AbstractRaster,AbstractRasterStack}; kw...) = + create(nothing, T, A; kw...) +# Create from filename and Raster +function create(filename::Union{AbstractString,Nothing}, A::AbstractRaster{T}; + missingval=missingval(A), # Only take missingval here when types are not specified + kw... +) where T + create(filename, T, A; missingval, kw...) +end +# Create from filename and RasterStack +function create(filename::Union{AbstractString,Nothing}, st::AbstractRasterStack; + missingval=missingval(st), # Only take missingval here when types are not specified + kw... ) - create(filename, T, dims(A); parent=parent(A), name, metadata, missingval, kw...) -end -function create(filename::AbstractString, T::Type, dims::Tuple; - lazy=true, - parent=nothing, - suffix=nothing, - source::Source=_sourcetrait(filename), - missingval=nothing, kw... + create(filename, map(eltype, layers(st)), st; missingval, kw...) +end +# Create Raster from filename, type and a Raster, using its +# parent as the parent type so e.g. CuArray will propagate +create(filename::Union{AbstractString,Nothing}, T::Union{Type,TypeNamedTuple}, A::AbstractRaster; kw...) = + create(filename, T, dims(A); parent=parent(A), kw...) +# Create RasterStack from filename, NamedTuple type and dims +function create(filename::Union{AbstractString,Nothing}, T::NamedTuple{K1}, st::AbstractRasterStack{K2}; + metadata=metadata(st), + layerdims=nokw, + layermetadata=nokw, + kw... +) where {K1,K2} + if all(map(in(K2), K1)) + layerdims = isnokw(layerdims) ? DD.layerdims(st)[K1] : layerdims + layermetadata = isnokw(layermetadata) ? DD.layermetadata(st)[K1] : layermetadata + end + return create(filename, T, dims(st); + parent=first(parent(st)), metadata, missingval, layerdims, layermetadata, kw... + ) +end +# Create from filename, type and dims +function create(filename::AbstractString, T::Union{Type,NamedTuple}, dims::Tuple; + lazy=true, + parent=nokw, + suffix=nokw, + source::Source=_sourcetrait(filename), + missingval=nokw, + kw... ) filename = _maybe_add_suffix(filename, suffix) # This calls `create` in the /sources file for this `source` - create(filename, source, T, dims; lazy, missingval, kw...) + return create(filename, source, T, dims; lazy, missingval, kw...) end -function create(filename::Nothing, T::Type, dims::Tuple; - parent=nothing, - suffix=nothing, - missingval, +# Create from filename, type and extent with res or size keywords +function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTuple}, extent::Extents.Extent; + res=nokw, + size=nokw, + crs=nothing, + sampling=Points(), + reverse_y=nokw, kw... ) - T = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) - data = isnothing(parent) ? Array{T}(undef, dims) : similar(parent, T, size(dims)) - Raster(data, dims; missingval, kw...) -end - -_maybe_add_suffix(filename::Nothing, suffix) = nothing -_maybe_add_suffix(filename::Nothing, suffix::Nothing) = nothing -_maybe_add_suffix(filename, suffix::Nothing) = filename -function _maybe_add_suffix(filename, suffix) - base, ext = splitext(filename) - if string(suffix) == "" - filename + dims = _extent2dims(extent; size, res, crs, sampling) + dims = if reverse_y isa Bool && reverse_y && hasdim(ds, Y()) + DD.setdims(ds, reverse(DD.dims(ds, Y()))) else - return string(base, "_", suffix, ext) + dims end + return create(filename, T, dims; kw...) end +# Create in-memory Raster from type and dims +function create(filename::Nothing, ::Type{T}, dims::Tuple; + missingval=nokw, + fill=nokw, + parent=nokw, + verbose=true, + f=identity, + # Not used but here for consistency + suffix=nokw, + force=false, + chunks=nokw, + driver=nokw, + options=nokw, + kw... +) where T + if verbose + isnokw(chunks) || _warn_keyword_not_used("chunks", chunks) + isnokw(driver) || _warn_keyword_not_used("driver", driver) + isnokw(options) || _warn_keyword_not_used("options", options) + end + # Split inner and outer missingval if needed + # For in-memory rasters we just ignore the inner value + mv_inner, mv_outer = missingval isa Pair ? missingval : (missingval, missingval) + # Get the element type from T and outer missingval + eltype = isnokwornothing(mv_outer) ? T : promote_type(T, typeof(mv_outer)) + # Create the array + data = if isnokwornothing(parent) + Array{eltype}(undef, dims) + else + similar(parent, eltype, size(dims)) + end + # Maybe fill the array + isnokwornothing(fill) || fill!(data, fill) + # Wrap as a Raster + rast = Raster(data, dims; missingval=mv_outer, kw...) + # Apply `f` before returning + f(rast) + return rast +end +# Create in-memory RasterStack from type and dims +function create(filename::Nothing, types::NamedTuple, dims::Tuple; + suffix=keys(types), + force=false, + chunks=nokw, + verbose=true, + driver=nokw, + options=nokw, + parent=nokw, + missingval=nokw, + fill=nokw, + layerdims=nokw, + layermetadata=nokw, + f=identity, + kw... +) + layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims + layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, types) + layerfill = fill isa NamedTuple ? fill : map(_ -> fill, types) + layermissingvals = missingval isa NamedTuple ? missingval : map(_ -> missingval, types) + layers = map(types, layermissingvals, layerfill, layerdims, layermetadata) do T, lmv, lfv, ld, lm + create(nothing, T, DD.dims(dims, ld); + parent, missingval=lmv, fill=lfv, metadata=lm, driver, options, + ) + end + st = RasterStack(layers; kw...) + f(st) + return st +end +# Create on-disk Raster from filename, source, type and dims +function create(filename::AbstractString, source::Source, ::Type{T}, dims::Tuple; + name=nokw, + missingval=nokw, + fill=nokw, + metadata=nokw, + chunks=nokw, + scale=nokw, + offset=nokw, + dropband=!hasdim(dims, Band()), + lazy=true, + verbose=true, + force=false, + coerce=nokw, + f=identity, + kw... +) where T + mv_inner, mv_outer = _missingval_pair(missingval) + eltype = Missings.nonmissingtype(T) + if isnokw(fill) || isnothing(fill) + write = false # Leave fill undefined + A = FillArrays.Zeros{eltype}(map(length, dims)) + else + fill isa eltype || throw(ArgumentError("fill must be of type $eltype, got $fill")) + write = true # Write fill to disk + A = FillArrays.Fill{eltype}(fill, map(length, dims)) + end + # Create layers of zero arrays + rast = Raster(A, dims; name, missingval=mv_inner) + Rasters.write(f, filename, source, rast; + eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... + ) do W + # write returns a variable, wrap it as a Raster + f(rebuild(rast; data=W)) + end + # Don't pass in `missingval`, read it again from disk in case it changed + r = Raster(filename; source, lazy, metadata, dropband, coerce, missingval=mv_outer) + return r +end +# Create on-disk RasterStack from filename, source, type and dims +function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::Tuple; + lazy=true, + verbose=true, + force=false, + missingval=nokw, + fill=nokw, + metadata=nokw, + layerdims=nokw, + layermetadata=nokw, + chunks=nokw, + scale=nokw, + offset=nokw, + dropband=!hasdim(dims, Band), + coerce=nokw, + f=identity, + kw... +) + layerdims = if isnokwornothing(layerdims) + # Use the same dims for all layers by default + map(_ -> DD.basedims(dims), layertypes) + else + layerdims + end + mv_inner, mv_outer = _missingval_pair(missingval) + # Only write value to disk variables if fill is defined + write = !isnokwornothing(fill) + # Make sure fill is per-layer + fill_nt = fill isa NamedTuple ? fill : map(_ -> fill, layertypes) + # Define no-allocation layers with FillArrays + layers = map(layertypes, layerdims, fill_nt) do T, ld, fi + lks = lookup(dims, ld) + eltype = Missings.nonmissingtype(T) + size = map(length, lks) + if isnokwornothing(fi) + A = FillArrays.Zeros{eltype}(size) + else + A = FillArrays.Fill{eltype}(fi, size) + end + end + # Create layers of zero arrays + st1 = RasterStack(layers, dims; layerdims, layermetadata, missingval=mv_inner) + fn = Rasters.write(filename, st1; + chunks, metadata, scale, offset, missingval=mv_inner, verbose, force, coerce, write, kw... + ) do W + # write returns a variable, wrap it as a RasterStack + f(rebuild(st1; data=W)) + end + st2 = RasterStack(fn; source, lazy, metadata, layerdims, dropband, coerce, missingval=mv_outer) + return st2 +end + +_warn_keyword_not_used(label, obj) = @warn "`$label` of `$obj` found. But `chunks` are not used for in-memory rasters" +_missingval_pair(missingval::Pair) = missingval +_missingval_pair(missingval) = missingval => missingval \ No newline at end of file diff --git a/src/extensions.jl b/src/extensions.jl index 30f31fdd1..61d90a8e8 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -114,8 +114,12 @@ Run `using ArchGDAL` to make this method available. # Keywords +$MISSINGVAL_KEYWORD $FILENAME_KEYWORD $SUFFIX_KEYWORD +- `missingval`: the missing value to use during warping, will default to + `Rasters.missingval(A). Passing a pair will specify the missing value + to use after warping. Any additional keywords are passed to `ArchGDAL.Dataset`. @@ -125,7 +129,7 @@ This simply resamples the array with the `:tr` (output file resolution) and `:r` flags, giving us a pixelated version: ```jldoctest -using Rasters, RasterDataSources, Plots +using Rasters, ArchGDAL, RasterDataSources, Plots A = Raster(WorldClim{Climate}, :prec; month=1) a = plot(A) @@ -178,7 +182,7 @@ where each value in the raster encodes the area of the cell (in meters by defaul ## Example ```julia -using Rasters, ArchGDAL, Rasters.Lookups +using Rasters, Proj, Rasters.Lookups xdim = X(Projected(90.0:10.0:120; sampling=Intervals(Start()), crs=EPSG(4326))) ydim = Y(Projected(0.0:10.0:50; sampling=Intervals(Start()), crs=EPSG(4326))) myraster = rand(xdim, ydim) @@ -261,7 +265,7 @@ using Rasters, Rasters.Lookups, Proj, StatsBase xdim = X(Projected(90.0:10.0:120; sampling=Intervals(Start()), crs=EPSG(4326))) ydim = Y(Projected(0.0:10.0:50; sampling=Intervals(Start()), crs=EPSG(4326))) myraster = rand(xdim, ydim) -Rasters.sample(myraster, 5; weights = cellarea(myraster)) +Rasters.sample(myraster, 5; weights=cellarea(myraster)) # output diff --git a/src/filearray.jl b/src/filearray.jl index 500e2e151..a599731d1 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -1,4 +1,3 @@ - """ FileArray{S} <: DiskArrays.AbstractDiskArray @@ -6,38 +5,56 @@ Filearray is a DiskArrays.jl `AbstractDiskArray`. Instead of holding an open object, it just holds a filename string that is opened lazily when it needs to be read. """ -struct FileArray{S,T,N,Na,G,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} +struct FileArray{S,T,N,Na,G,EC,HC,M<:AbstractModifications} <: DiskArrays.AbstractDiskArray{T,N} filename::String size::NTuple{N,Int} name::Na group::G eachchunk::EC haschunks::HC + mod::M write::Bool end function FileArray{S,T,N}( filename, - size, + size::NTuple{N,Int}, name::Na, - group::G=nothing, - eachchunk::EC=size, - haschunks::HC=DA.Unchunked(), - write=false -) where {S,T,N,Na,G,EC,HC} - FileArray{S,T,N,Na,G,EC,HC}(filename, size, name, group, eachchunk, haschunks, write) + group::G, + eachchunk::EC, + haschunks::HC, + mod::M, + write::Bool, +) where {S,T,N,Na,G,EC,HC,M} + FileArray{S,T,N,Na,G,EC,HC,M}( + String(filename), size, name, group, eachchunk, haschunks, mod, write + ) end -function FileArray{S,T,N}(filename::String, size::Tuple; - name=nokw, group=nokw, eachchunk=size, haschunks=DA.Unchunked(), write=false +function FileArray{S,T,N}(filename::AbstractString, size::Tuple; + name=nokw, + group=nokw, + eachchunk=size, + haschunks=DA.Unchunked(), + mod, + write=false ) where {S,T,N} name = isnokw(name) ? nothing : name group = isnokw(group) ? nothing : group - FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, write) + FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, mod, write) +end +function FileArray{S}( + var::AbstractArray{<:Any,N}, filename; mod, kw... +) where {S,N} + eachchunk = DA.eachchunk(var) + haschunks = DA.haschunks(var) + T = _mod_eltype(var, mod) + return FileArray{S,T,N}(filename, size(var); eachchunk, haschunks, mod, kw...) end # FileArray has S, T and N parameters not recoverable from fields ConstructionBase.constructorof(::Type{<:FileArray{S,T,N}}) where {S,T,N} = FileArray{S,T,N} filename(A::FileArray) = A.filename +mod(A::FileArray) = A.mod DD.name(A::FileArray) = A.name Base.size(A::FileArray) = A.size DA.eachchunk(A::FileArray) = A.eachchunk @@ -45,7 +62,7 @@ DA.haschunks(A::FileArray) = A.haschunks # Run function `f` on the result of _open for the file type function Base.open(f::Function, A::FileArray{S}; write=A.write, kw...) where S - _open(f, S(), filename(A); name=name(A), write, kw...) + _open(f, S(), filename(A); name=name(A), group=A.group, write, mod=mod(A), kw...) end function DA.readblock!(A::FileArray, dst, r::AbstractUnitRange...) @@ -88,9 +105,6 @@ DA.eachchunk(A::RasterDiskArray) = A.eachchunk DA.readblock!(A::RasterDiskArray, aout, r::AbstractUnitRange...) = aout .= parent(A)[r...] DA.writeblock!(A::RasterDiskArray, v, r::AbstractUnitRange...) = parent(A)[r...] .= v -# Already open, doesn't use `name` -_open(f, ::Source, A::RasterDiskArray; name=nokw, group=nokw) = f(A) - struct MissingDiskArray{T,N,V} <: DiskArrays.AbstractDiskArray{T,N} var::V end diff --git a/src/filestack.jl b/src/filestack.jl index 865864973..d943cc03e 100644 --- a/src/filestack.jl +++ b/src/filestack.jl @@ -9,24 +9,26 @@ typically netcdf or hdf5. `S` is a backend type like `NCDsource`, and `Na` is a tuple of `Symbol` keys. """ -struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC} +struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC,M} filename::String sizes::SZ group::G eachchunk::EC haschunks::HC + mods::M write::Bool end function FileStack{S,Na,T}( - filename::AbstractString, sizes::SZ, group::G, eachchunk::EC, haschunks::HC, write::Bool -) where {S,Na,T,SZ,G,EC,HC} - FileStack{S,Na,T,SZ,G,EC,HC}(String(filename), sizes, group, eachchunk, haschunks, write) + filename::AbstractString, sizes::SZ, group::G, eachchunk::EC, haschunks::HC, mods::M, write::Bool +) where {S,Na,T,SZ,G,EC,M,HC} + FileStack{S,Na,T,SZ,G,EC,HC,M}(String(filename), sizes, group, eachchunk, haschunks, mods, write) end # FileStack has `S,Na,T` parameters that are not recoverable from fields. ConstructionBase.constructorof(::Type{<:FileStack{S,Na,T}}) where {S,Na,T} = FileStack{S,Na,T} filename(fs::FileStack) = fs.filename +mods(fs::FileStack) = fs.mods DD.name(::FileStack{<:Any,Na}) where Na = Na DD.data_eltype(::FileStack{<:Any,<:Any,T}) where T = T @@ -43,8 +45,9 @@ function Base.getindex(fs::FileStack{S,Na,T}, name::Symbol) where {S,Na,T} size = fs.sizes[i] eachchunk = fs.eachchunk[i] haschunks = fs.haschunks[i] + mod = fs.mods[i] N = length(size) - return FileArray{S,_itype(T, i),N}(filename(fs), size, name, fs.group, eachchunk, haschunks, fs.write) + return FileArray{S,_itype(T, i),N}(filename(fs), size, name, fs.group, eachchunk, haschunks, mod, fs.write) end @inline _itype(::Type{<:NamedTuple{<:Any,T}}, i) where T = T.parameters[i] diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index 10ddd5937..a700a6454 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -1,5 +1,5 @@ -# Like `create` but without disk writes, mostly for Bool/Union{Missing,Boo}, +# Like `create` but without disk writes, mostly for Bool or Union{Missing,Bool}, # and uses `similar` where possible # TODO merge this with `create` somehow _init_bools(to; kw...) = _init_bools(to, BitArray; kw...) @@ -7,18 +7,30 @@ _init_bools(to, T::Type; kw...) = _init_bools(to, T, nothing; kw...) _init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) _init_bools(to::AbstractRasterStack, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) _init_bools(to::AbstractRaster, T::Type, data; kw...) = _init_bools(to, dims(to), T, data; kw...) -_init_bools(to::Extents.Extent, T::Type, data; kw...) = _init_bools(to, _extent2dims(to; kw...), T, data; kw...) _init_bools(to::DimTuple, T::Type, data; kw...) = _init_bools(to, to, T, data; kw...) -function _init_bools(to::Nothing, T::Type, data; geometrycolumn=nothing,kw...) +function _init_bools(to::Nothing, T::Type, data; + geometrycolumn=nothing, + collapse=nokw, + res=nokw, + size=nokw, + kw... +) # Get the extent of the geometries - ext = _extent(data; geometrycolumn, kw...) + ext = _extent(data; geometrycolumn) isnothing(ext) && throw(ArgumentError("no recognised dimensions, extent or geometry")) + return _init_bools(ext, T, data; collapse, res, size, kw...) +end +function _init_bools(to::Extents.Extent, T::Type, data; + collapse=nokw, size=nokw, res=nokw, sampling=nokw, kw... +) # Convert the extent to dims (there must be `res` or `size` in `kw`) - dims = _extent2dims(ext; kw...) - return _init_bools(to, dims, T, data; kw...) + dims = _extent2dims(to; size, res, sampling, kw...) + _init_bools(to, dims, T, data; collapse, kw...) end -function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Nothing}=nothing, kw...) - if isnothing(data) || isnothing(collapse) || collapse +function _init_bools(to, dims::DimTuple, T::Type, data; + collapse::Union{Bool,Nothing,NoKW}=nokw, kw... +) + if isnothing(data) || isnokwornothing(collapse) || collapse _alloc_bools(to, dims, T; kw...) else n = if Base.IteratorSize(data) isa Base.HasShape @@ -31,13 +43,17 @@ function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Not end end -function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; missingval::Bool=false, metadata=NoMetadata(), kw...) +function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; + missingval::Bool=false, metadata=NoMetadata(), kw... +) # Use a BitArray vals = missingval == false ? falses(size(dims)) : trues(size(dims)) return Raster(vals, dims; missingval, metadata) end -function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; missingval=false, metadata=NoMetadata(), kw...) where T - # Use an `Array` +function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; + missingval=false, metadata=NoMetadata(), kw... +) where T + # Use an Array data = fill!(Raster{T}(undef, dims), missingval) return rebuild(data; missingval, metadata) end diff --git a/src/methods/classify.jl b/src/methods/classify.jl index be046ecb9..dbd501e4b 100644 --- a/src/methods/classify.jl +++ b/src/methods/classify.jl @@ -58,11 +58,9 @@ function classify(A::AbstractRaster, pairs::Union{Tuple,AbstractArray}; # We use `Val{T}` to force type stability through the closure valT = Val{T}() f(x) = _convert_val(valT, _classify(x, pairs, lower, upper, others, Rasters.missingval(A), missingval)) - A1 = create(filename, T, A; suffix, missingval) - open(A1; write=true) do O - broadcast!(f, O, A) + return create(filename, T, A; suffix, missingval) do C + broadcast!(f, C, A) end - return A1 end function classify(xs::AbstractRasterStack, pairs...; suffix=keys(xs), kw...) mapargs(xs, suffix) do x, s diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index aacb1920c..2026e8451 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -9,7 +9,7 @@ to match the size of the object `to`, or smallest of any dimensions that are sha # Keywords -- `to`: the object to crop to. This can be $OBJ_ARGUMENT +- `to`: the object to crop to. This can be $OBJ_ARGUMENT If no `to` keyword is passed, the smallest shared area of all `xs` is used. - `touches`: `true` or `false`. Whether to use `Touches` wraper on the object extent. @@ -157,12 +157,12 @@ function extend end function extend(l1::RasterStackOrArray, l2::RasterStackOrArray, ls::RasterStackOrArray...; kw...) extend((l1, l2, ls...); kw...) end -function extend(xs; to=nothing) +function extend(xs; to=nothing, kw...) if isnothing(to) to = _subsetbounds((min, max), xs) - map(l -> _extend_to(l, to), xs) + map(l -> _extend_to(l, to, kw...), xs) else - map(l -> extend(l; to), xs) + map(l -> extend(l; to, kw...), xs) end end extend(x::RasterStackOrArray; to=dims(x), kw...) = _extend_to(x, to; kw...) @@ -176,8 +176,12 @@ end _extend_to(x::RasterStackOrArray, to::Dimension; kw...) = _extend_to(x, (to,); kw...) function _extend_to(A::AbstractRaster, to::DimTuple; - filename=nothing, suffix=nothing, touches=false, - missingval=(isnothing(missingval(A)) ? missing : missingval(A)) + filename=nothing, + missingval=(isnothing(missingval(A)) ? nokw : missingval(A)), + fill=nokw, + touches=false, + verbose=true, + kw... ) others = otherdims(to, A) # Allow not specifying all dimensions @@ -193,35 +197,44 @@ function _extend_to(A::AbstractRaster, to::DimTuple; end others1 = otherdims(to, A) final_to = (set(dims(A), map(=>, dims(A, to), to)...)..., others1...) + + # If we are writing to disk swap missingval to something writeable + if ismissing(missingval) + missingval = _writeable_missing(filename, eltype(A); verbose) + end + # If no `fill` is passed use `missingval` or zero + if isnokw(fill) + fill = isnokwornothing(missingval) ? zero(Missings.nonmissingtype(eltype(A))) : missingval + end # Create a new extended array - newA = create(filename, eltype(A), final_to; - suffix, parent=parent(A), missingval, - name=name(A), metadata=metadata(A) - ) - # Input checks - map(dims(A, to), dims(newA, to)) do d1, d2 - if lookup(d1) isa Union{AbstractSampled,NoLookup} - b1, b2 = bounds(d1), bounds(d2) - b1[1] >= b2[1] || throw(ArgumentError("Lower bound of $(basetypeof(d1)) lookup of `$(b2[1])` are not larger than the original `$(b1[1])`")) - b1[2] <= b2[2] || throw(ArgumentError("Upper bound of $(basetypeof(d2)) lookup of `$(b2[2])` is not larger than the original `$(b1[2])`")) - elseif lookup(d1) isa Categorical - map(lookup(d1)) do x - x in d2 || throw(ArgumentError("category $x not in new dimension")) + return create(filename, eltype(A), final_to; + parent=parent(A), + missingval, + name=name(A), + metadata=metadata(A), + verbose, + fill, + kw... + ) do C + # Input checks + map(dims(A, to), dims(C, to)) do d1, d2 + if lookup(d1) isa Union{AbstractSampled,NoLookup} + b1, b2 = bounds(d1), bounds(d2) + b1[1] >= b2[1] || throw(ArgumentError("Lower bound of $(basetypeof(d1)) lookup of `$(b2[1])` are not larger than the original `$(b1[1])`")) + b1[2] <= b2[2] || throw(ArgumentError("Upper bound of $(basetypeof(d2)) lookup of `$(b2[2])` is not larger than the original `$(b1[2])`")) + elseif lookup(d1) isa Categorical + map(lookup(d1)) do x + x in d2 || throw(ArgumentError("category $x not in new dimension")) + end end end - end - # The missingval may have changed for disk-based arrays - if !isequal(missingval, Rasters.missingval(newA)) - A = replace_missing(A, Rasters.missingval(newA)) - end - open(newA; write=true) do O - # Fill it with missing/nodata values - O .= Rasters.missingval(O) - # Copy the original data to the new array + # The missingval may have changed for disk-based arrays + if !isequal(Rasters.missingval(A), Rasters.missingval(C)) + A = replace_missing(A, Rasters.missingval(C)) + end # Somehow this is slow from disk? - broadcast_dims!(identity, view(O, rangedims...), A) + broadcast_dims!(identity, view(C, rangedims...), A) end - return newA end function _extend_to(st::AbstractRasterStack, to::DimTuple; suffix=keys(st), kw...) mapargs((A, s) -> _extend_to(A, to; suffix=s, kw...), st, suffix) diff --git a/src/methods/fixed.png b/src/methods/fixed.png new file mode 100644 index 0000000000000000000000000000000000000000..df0c2138075ea6c019a1cfb38960a77e4875cecf GIT binary patch literal 18225 zcmd^n1yogCyY4o@A}l^ZKv6+NKmkEoMMXhCLFuq)>24bog-=0{R=Pnt1O!E;8>9sZ z>F(I~S$g{1GsZpV{P+I<{m=f!aDT{NYtJ?3JKy(-clzYAtmL-M^qWZ}(zc5i&dQTW z>yt^Obq^`m;%~&LwG!~Q@usxoSrVD}?@eBm7m2iwbn)!ztG0o|?RJjWW`f2iI%Uo% zKMgnFxo#SBRYlb-=DuNrVoQwM(};|3zS0R{C#^r_B`|8f;^K~L_c7J5R@6*=cz&<>{JB`+8WbnCI+xL@rXg?-%GX66yZ-hxq@~o6e9(ci*hvkKbRuPlI6|K21p?(RlB} z6_ipmc%t=({(2ve4t12f8{}Pbds1`*iFDo9P*3lhm9Wj@Ujx;pv@uDl%0AhtK)T+# zacj`aG4|xC*k3QHU$03~I-w_}rKLGJIg^u-?>=j#TgYn||j1#sb71?%eTa z>W^cVQu$FIjP)C0ubnt?LR(wAJ=dmcPW|WBl;+`XJM!x>71D>%!oS;D_^U_le`5vz z2aTNw)~}v-_?N24%T>(I^Ib9m?b9W4&MC!4ySdcoe};J~Dk{znHVnBP@c2W_(bmjt z!h`CnhK8uq;`BN9-Pe?q#tO)*2hKk``SZ;KvDsS8-yNRg?YHI~2BVbkfv7Hr!+>p591L9Jm~bB@_@{Pd5PafvyD-R|70 zO)e`<#_c&y&j!|!NNM-ne4mNUwONQSl*nadSWop;9FX)VZ@8|a(j0vyY$G*OpuGK9 z-dz6VtEBP(0TW?KKS7JWru^A-YO(n@Z{A%0S^ZWw(CmDYW}dthd4id2R=9yKt=J5? z*YRlA{p8r#!GO+X9sYp!mW^I)a?XP>QSws0n)U-vcX33@1Y}$+Vla{4TL(E{A)``X9S-MrxqfSIc?$-Iol( zQm480%sb5~ZFCPdZY$*+u8k;Lnd@}BA&GHdGrSEv4@uJ1RO|Ex{gfZyf~{vzTL9uc*VU z$kK4Cf>f0_c{%o4ob~KU+`Hw}n|+Qyo(p{TU<{DYSsKlCm>X^H=9d#+u^e^hcAjvT zQJg_wVAL(<0g571<-F_>3bKL`3$eVFCl5qi)zTslr&f)A7$#yu)q7ujLV{?2poQ4t z&#*G?g5`Pzx%L*FUAuO%M|9*l_`Z5|LQB~4XSH{2cd$quRZ;?u>j?C+0Z`Cr5F{ds=)TFlL8xm46 zx>doPiAL)A&MS+%<@OtAdo|9bn)jB@3^iqnjpWU?#La9I@_(A`+vBiV#wqlDFSUE^ z+j!jT$LHti>FL2$7X$Qq#cJ)QI~>M(9X1cY$Aw91nWJHzVl91D#@}Q)6yu}g;^LB$ zOw8*x>$wDx4`t!@Te!0J*(_D>PU>otmywZ?iZN_TB!)peK}FCV3-N@pgAr-gBQ1>r z*8>KJhNeU}WF+M0=f8dXHb1{(&Yi#JxYh+m3&lHZCJJeS+((2wWSJgr+qR8P)3$4} zx6GBAc~y=)E!Wy6W^+F@BO{|YJ%Hb+)ai=4`ul&n{d6G(b6SdZrm)LvIm=|$_@`}!)w1@hkm^lg0}>nJ$?GLhK7cnT{iE-9t}2^O|pR}CtgT04YcRZ zY1p)(XC!|Dwr%TbI9y7ddlB_?%bD}k_Y3s=|%RLpEW<0r>Z*57na5fLYBG_;AI*g z7LZM>h8h!0I`i2h06TWi`8YH>Ub%9GgL&U2_Fn6h^p*OCh6MF&o`|cOn)weTkzT5O zgzRYgj;f^GE_r!~Gtqvsgv~+=pNjC_xb=@eWX$C3JZ1 z;bOu-Yo4WlTq(Ov#}47JUoO0@6wEq`vU%VqP(%32)dP5NsiiO1+~@H#~fydKO3pqw3&UW@IS#Z zXS>KsPsfBGh3$!rX%sz0al^gLWh$Vv=l=L$TvEs6gH=gpC(95qCm(Jt=OzuC6Pt=K z+M_mg2Gk%Ny!OuLFN@1I-v7?^I@q-j=+ZCd3#n;I2tKj`Dqn=_QiAZkDTvnDJqHkO`F z)3k6ycGVsv%m@Yn<2&kN`nBP0`k#jqbB*bkfzUtjTTb#m6uvS1?H#qq_=op)Q|CSQ z3Ode@b31lzK?)$xXpz&A=eoPQOT5?wviH7PPxXoiE zZiQPh((+LjOh<5wPJG(I9jMt$JQaI*%A?8BUm` z*|b|JjoLk^2-)d^67l?xK4r?8cu6r!2-%pF& zrj(@F9wzBoTU*N-ccf3Vw$ckVK#WTFug78j-=RSQQQg&uzBzhmA^3~}rle$U=UtJX zdM1(`?xe3~zgg4Y621G2PK`D_J^iF8h4pz>uZ!&Iw;VDw)UoOjRRW`P z|38q>zuW2jEet%J6kYS}(Qk6ickTGxOGmjiUmtkPTGy}XbQk*W?H{cY?9*?xmT~0T zdP}OxzqVfRpA0BMUU;)|R5eX+b$J2Bz{<*MWoeFJ4Al68wUxlPfKZ)%iBi(iOGx31 z{DzJ3b|}i385zK&A*<6-nYt~=jJX%w_NfE!H7IGV3~7;bN+yX+Tl8?#%HoWnIuG!- zAFr;lg=>?#MMXkpN6RAr4%SOq6Q3C}GiH-NQU!>+tnyPpZ24v-(i~45H&}msG)1>Jz(rA6c zWpx>4GZA=zPRntE|4YS&F1!99o7IJW0j*rr^x?KHJZajiO6G?Jiw#NgQXnRi1fs|~ z=2hZ2=FquR7qZF+0D;fOo7qjt?c#6AGW!7@@IpT4hleng*!*`RkEKR6lL+s}+Zg!o zd`tG^bs?%txL2n(UqD4Z=7T)~5I$_7Wrys^{Sj9Ip48NGD>Q)NtX!5_J1d39h&3?@ zU`SGa$6{mQzq+zCHXcsE{BvyL5?MEkpMqLMdQ%AxC-)fw%bRQM+`T&^KLi+M8gn8J zsyLeU!((KR4T^3yH$YbMx;jWpdXhmRlx1aQnby2jEiEm;)Dhku;0uipk|QHo=iU~! z8s{1bn}~ZYk#4A{sGL4sNI(+1sXk7v{N?WtsKr2Q2nYb?W8>-x?d(m0)#ScAp^1sc zL>dQ%&8e?N6ZwQxKZ?Q4p_a%OHOPgtuDjjjXF(ZW8>^l(-`jf zl`~M(!CFdxd458m({bwc@muDZnfk>o>4uqtmz|tetYu>o>P_ATxw2AnbBaZ_FrfJxgT@@AW`c&(n>1VZ{sg*dT~$ec;_vSd@Mb;KXoN@`5#c+XMvn8BWeuoE zRDKrv8jn}SMj1zAl2b%>2ci30YsAuTrp82*Vrgv;>Q<& z;MG8jN0>WZ-8=k?%bJ1u3q02QT(O8y864(<`!2MZQ%qA3T#Gr7vpKxdY3o+e0;Z4 zn~?=f5hvi~`B#LWD7S4CzBpT4|?Mmv_!I(nmml;mEsPmPM(n8n`Xm6BrKRrE;M z5nG#N#5R3#aLjowH@nXnOe7#FAt9kcw0+D()h&Qx({#i=eNF zuFn4;FNBb@QKkC~4Gjzw6V?6X2RjQ~rqaDwWu(}j?-fh|oD=5ziQRD0^!tY@$39+A zSUs}`ZP|qqRD!o%t+#+eWq?BQchFDYqhQj@pY?QEzBSs+?aJ(I>vauq(0R65S3!z^ zwH{RV-A~x^XByR#@@7@J*aKbzm#1i#$x9K*&0};AcI`Io+P%lFf2fgU4w9!AFeW%u zW^ZhNu)}C6N75yBr`gmH(S;tbfaPNB=sr~dbGbWCZUo&RD2b$JVr*=ueL04g&`6CY);k9-R4_zJ5n}i zF}upic%&trEzq=xu3)|x1wfm}4}up#0>G*@ax51rOq}bM1t0S2lwV*UNv#s2V6?nE zBvFp#t_eNI9)Vny5FUOo({!?@1bS6me7p#@yNCGd`~go99^~1@!8k%ok&uwMc(J6X z#LK3z3$TXdvZ`RgMr7osecj zlc<_LShR)vJ!X$p#-9)Db#~9Rw}LEkj1m)yUK7e4iscb5E++gEJ3T;)-CsdTX^!BB z5z{epP6h$O1-P0JcD!p*#nO8xKO*-(g7g3ttFJOB*K(k?sHjMST67vR;z`h9u~kA9 zb95YGe2A&M$Sw-Q1CcelP!F$zr=(`l7AdE=93^v429`D#Gsh!R;V&7Fre>#qzFHtBc| z{bv!NWD%vN-SJ1)M<_C4?^oC`h-&s;K}w6qRE&#QhfA-Ib+s5KB_t)0+sB-nx^*HV zA_z<9O+wGe%NouTe0*)d0|M=q3i)i92AT`2RW$ifz&aNyZ6NhLYGWpzZ_^G|Zyz82 z&XnlG6=>eE6xfx{|s%b27huNkMT}py~-{B9Crw9B%=Co%S?Jo(igS=SG5x5VxxZ=4BM;*b)$9wwYlTrnxzsY^larGn zH2o64eyd>@f4!KZx)>d7($bk({;5^~JC2APvgRHc7cSs^h>`w{n|}XE5dSYfPB387 zr2em&I-{Cya1DAU;2P9p1*u&Y+$D3*NJ60=;~wpWRv$w4CCCjV^p5bqAeY6t^#$$+ zYWwRup8xV`{_T+ENEuK!Jh?|MY9%NMmyV`gDA5{r>=ZVLUME)@*K zD<-I3Q&3P)RBQxVcdUxo@GE+DTCe*J(N>lC12i)3sNA@5qch)Witw3#X$G+`f4!g) z$cCn*q$DOL0%1i*M~i-$kuY+wjAo|j7G(IxUI2ste>dbc@3Bp3UNQmvGC@LC|B5?z zyB*-lpYLW8wdd=!E%j!HFQL}jpdS@v4McC;pGEjQ?KYixdNrYggM+Y?$k0^9;jiUd zkK}l{BE42ypIP4pP_r~*;<8jB(0NCFlF-@-Al7DHhMX<=?2mW7pTR&Fcy%suIJClW z$>gO>%I@<=?crOUfQJz>C3~BJ^?NuFQg!5j?{jVOjxA5V9-< zlkFR`rDAuwDDS84?oA15O3KRL99=oN?%eDxeI>qF<0&si*h|Et1EPj|!=Q`>7b5?Z z6j#kM?eb<h;~a@Qoxx9+DLvLX|o`V5=mRlMW};lCj{Gtl)3B^>WTKtRK@)~jAzT!aP6 z9${IuiH%RI)s{K{Q#@L2jPbN6vXQsm(AOU@%4~1!js=?1uZxWEt~~kU8K-Z5CCw-z z=f@XMIKxZ6M^)t2)g8a?cijs}0Up{410qB4CtyY{e)N_nudW0nF&?{q3Niy`fvAnA zoC(@~b78WV2vq+QhQP4AMlESzg2`G1&c9^(jaX~7Y;$d?0RZKgxVSRTHrNXAVDZ_h z*3MPy!TJ~(CWV^r!`a**A0d5)TQ+aDpBv4C%>cWvU}l9diUTC;OQ*KmynxzJ5g^di z*qA}Cwp4u19XpbJOJPZPe7to^^iiZwIXTKv?rjX!%;P^RbOcN~)U~ua-|7?_S5%3$ zwDna43=Iw%7#f0_wpyr**Jnun(l{Sk@8Y1;A|RRMyt%BC~x?)p)hopT(pm>3DslK~e3=%`fsan6Nu%6(#ZFHPsGhz|mxo6VNQWJiW zZ@aV4Cz_36Cp0E%v?i!!e!l+LQy*Rt;pyq51{bF+8Ib2f6~Uf~9RRYu({j@DT(xl{ z7(8~=bHtKe-J1E61lz07;q|+8Tb9`^jwl{_xvj=6X)(py%-OSggYKwK;*b`^&(d~-Bn*yKrp<6 zQsnBtD>oek#s&cz9)7zl?&`__@i^NJgfDg4r>-R|i}wqx5N5&hL;-lh7M!(0D6iFh$Wl~$cn;1L6O{w%VTeknt#}nQC$cX$&+5y=WNuZJw5=-7dWB|m(5M(i-q4--Kf@sJ<#T#pgYr+OcwIggtfl*!5S&&e* zd`CO@imbAMS-mt+V9jB@!d*-?Xo&s%h-gg-vf2yE9$=E+eDY_6ujV3TPhB2AB)pNL zkC?x2R6*qPndC`sG7n_Q-ZEc8Qx;#H@FZ#_J_xOmcW&5K&S_8qx|eaAEvPFp=29iv zEHKx>mbDkb;!W)A7NE9SDEVOKXF69$I{n+W>cDP+K68N``qk%0ewX_4mqvxb^g5u5 zu35KnuU!ven^xgQ;L`F^0&S)rtKUkr_Kzd4XGD61?7zegeMxi6?xR^hL)|xT-i$1x z=`_>uXbT;f`8$wE@%ch(n6puxULb0leSWrw(47%qWB4e8an{=e&+TX|bhTRLeaIdG$QGjz)!{f* zVe)?BvzkfXHFb3n1Rhjo8^mYv2%&Z%ya+l7nN1T*ft*H&&21|e0vw?JXY!A}b!8F) z>E3|HP)&=DjMRZ$fQB3Kg7niw{Zet;SKf>P|ATvMEu6_rXj}KziLYk4GC?bHWm=q= zBU%G8gvf`8PMfp~e@nK?FN$?NQ|Xr%^mfCpvPsc()}MYeiG9>U-wRee79@+)Yu|0oiEGkQ=hEg4608`l(QGnz`t)E|{An-aR`F?(dK5XIlQ34QfSJ{#$S zvB@v?T|@>NDD&auIC^v#y(;=et5p_(u9`20Vj2lYtl9N33Y;2Mc2 zK4*>JVvRiUdijO@T6x|<&#=tsb+Y<%lZ}&(RixiKg!JFimj4Us5Ij(?7lPLbAMh$c z0~=a%xH6@^Dt?DR;QrfRL{QDOmRg#CF-`z-6D0a3> zq=Lq&m7JyFdD~gX%3a9kcco~FZWPj~2ud&x=&SPUfv-rU%j|3S6YtSG??HPck*-RG z)eRigKqaqdAyRUY6~1a8)oJZ zlybknxC>+9>Y{#jjEQW7)4YzCla(O+qWpDQXx{D#hPn8NP}4Gjg{3kMaA(F6ciK{ZLg`RX=q3v7<`nDbRc5ZeHLUvL`39L6<^#% zv{`UzzJDDUsDAZoPghsXb&4V%7@xBMhTM76@(20_%+qI~{c|WKhDSyUojjS9lViZ= zuba3t;rxMsvo&86xM@qOstPOze&^RzC+#-XgFK>*{*ByLJ-?msg zU|i)tQc8fNkr0&^N6aZGT+!>Y$4TN*RFrO0 zqQ;C1xce!6eSKb+{mRO~=m3+KD9)t%hlhsVgs%R0Y9@(I#y`BlAb>1fSOiPtZ1}{z zKhK;gPa_81HMfw)x0kzDz?;AG@s9L`PX+^b#iZkuo}N%|I_NY&N78XS6mR z-jZq5+SsPG*=GhQ??`L&^Gy$*t0c5%nT28YSh|OX97fv&wQo2(uY&k{7i7U4idoc4YJ57`OL-OiAQ#XhJ_h!iwF-tys~@u?ntelV`JPbh-NF#^_78w3U(Y$8}C+R zIj`7Xy;_fPwPqN_nZz!xeT%SFR*nuNF>l{~)3z=0{d>piH>X(u@9AXsre6fNg4jgp zoHLBtEHwV}ZR8Zi(`H7Nt0Bxl^T=pohN%cRgTiNJ4>atW96c~t| zt(PUj#`dMTS>-G?T+^pciVTlaZXq=$`}gqVND0}Fzu{R-a#@KR{h49Z>RlsjGg{Bc zdE|(EWs5ypDK7)9QETuRz`z@;D-?@95<8N?b=*mt!mg!a|N0 zY?Ih$OhT5?n+mAn3z3+H_+QnW(lhoN?kvz!F$XzO^_nq!Ul%1Oupn^1*lM&bTT#-F z@}W*bBjhhu7H4N?E7xfX4@8^)aKHzl*9`}SEHd)bR1>!Iz)4fHv4kqOy1I%Rn3Qs_ z+hrQv>T)@Ce0==-_v^h@=g)tJ(08@7{Moauv^9EsrRE>zeGx5VQGnhX`yCnZcf~K_#2cGd^Hjp zkIGqZ34!$ag#~>+`XE`|?jO}s8#Zj%!~bfvJ~yY_=q1>h>{&=7Ed?%QRWE$S@W@EI zs?p7xH!okV=5^kn9AMO%A?m!WY_|eEb+5Q{+o#*9iiV}F2I(z166mrnbw_%Mir&*$ z9?wdPM}~-v>0@I_)vZbtMlWk)RQw-<=l6Z8dbU>(3h0Z{A^gT)`Ot=q8xvJhKV#?q zlwgJbOFqm^7=fx1VDHa8J?ADTA+0z}_FUYti;AOna4@z&S^DhkSf@$1q#5EKY?e32 z{q$Et(0qGB2-(hNw2hsm9Z%kz8r5|fOI!Mi znXQLOZ1^&H(}Xm{ZIz6VYW#^6!(4kvKG~1NBEqiN>UG-F+ZAcA%;w^pNBY%>WLm^KD)?fP z6#dQ@u+R?+$Ze#TYc{chKn%rFP)MjUK!E$$vE-Z_A$InGuP+Xujv(d8NlX7Q-}L$s z25Po6JM#7GSNP~dmO>5=OKxs%&;|Yo-bf_#k=9I9k68#CGe#zB9#>*<>}L#m?Kj!j z%moSC3^EHTDdCQmmeUY-uqx+n;}8yRXkle#lx==v=90jZ$%A?^{j1C@^4G2%VhL=g zNDYsO049z_i_)zx9<;QyNdDGlW+mn2ZRv(e^76Il^W!(^h*uxj%=b7&;%#{Nr`A@Y zUGZ_1l%KeZ^Mwl+AebYc%V<$-sXwzXO&eAQHy0PbXg~S~0{Qf7QyrzCwqii!aaZxf zt;%S6c=F`Q9-iB7U4eiZm=U?CI)FO%efz$vh*9nKe(~b1OL^udh>pR*!Bvx|=f*lc z-P|q*ufeM5R0b|YiLb^f z%+Y(}2xWA3lvPyhdVwAxycqJ%g`bIQfU0bXQyeVaeSI0|>A(>!SLs2u7rT{ZPUK${ z5f!ztu$Y>fdY)eetat9y$B#c^I8JXl3_Y{lzyKJXlZFx-(aYce2n$fwW9q|AP2*Tl zsMG+44^HV-z=(5)K~+D!S4pYKXVXIR$yvs3YZSi&T`;DSZL^A?%p%mJpF)xFJ8Pj&*lilP0M*9 z!fpNf^)!?Tjc-mrpr2~H074P9>ITtn0|f;g9i4-{{pU}g{%}%EdW&Wz_JTT;(avlO z-F6u+%FML1HwKQZO1p6TEi&cA9ql*JSK;c}gB^N(yo7>F)Dw?_!|zC4{fCY;6?2w!`(i8{_HRNfQ1lmk@Fffg1x-zkOhcrPaJy;2$M}? zF*7r>v$LzLtOSbO7Foz97Yys^+}Wa!A1^*QR@%nuckD(HAP<_FiS`_tmKVkfo zy6YDB(+OvSYfQ&>ysQz(T7f+R1T~8X_+%WKMzpdNWTIxorOhK zRu-MlDW}Zz(ua>~kW@K(HAhf8dF^ZVjqmi1^yfEHgs@7`s1t>sX=r;oNx&dgksJ>dw$2iSb8Yp{sj zB>IPuAufc~)VC~=UVs1bW5?dTM_CYO4Iy6fC!Lq)(E)t62f#fe_0|?-ux{Pl8|);jwj%Fd&H45Ly-ku zk~jc2Nx36zn%cdtN}&CoC$qS5=-Qe^k#kx zyD|o`hdn$!wQ?*6epfJ^ll2G@w4t(7gxveQg_825dCvu9W#x+(pO`DY?{K3%0L0Qi zJbWjnr5b|9l_XgH%cC|0J6<5G)zbra!3wAzu-umaWZ;J@cxb6)vR}k@hzRH=_Qny! z_Wgiw5qmDxiErRKc<^j?Mn@VPN#DNRPa4nZ2PkRUMT}P0oxrg;X~F|Q7jlreOFn=R z?ts&N5Pty9bQK!58lwO7Fm23OWV4l@asoSyk$H5nIiY_QyzY{m95*K?dK8@oB78fX zmqv2vb{Dap_;&E|r8`aoi2UO?8vbH3pkNRLrYD( zJa@-Zldek;Qqf&2pj+h}%b1W5Eu^+vw{8suD8kJ!{eDVZyx_x!59o^YiKii5=Osia zWxD6*chJ%GcXzX}0AFNhXP=Ks(99F!=9W84G*{+3EoJCEA(7NhLw))%Xo1F z)$SvGSD}G|mMl+r^k{KB7oMDKN+9AQ?2^&uRLQeMLnfN8Kfat5bI^$ZFRcMmr0bX=qI6Mm0=K#?TZo z)|x37Rud>>Wq`g`AFger>On%oOSVRW#lzK-34IJQcjk3b5-EU~q&K@Lw`{?w;y(%I zAqozQ0QO0wBwgQ<6|eR2myMv?!T+70PKtudD1$`eHTEh8r&9j@gY@s`< zTwRL=zq29g8SbXR!^5+7-8yE_UGTcJG(K)_?y$NvRic*{De52#F$_8`^=~N{!JRe4 z0}h#^T^cEk7GT*O#`zOJlu resample(x, args...; kw...), ser) +end +function fractional_resample(A::AbstractRaster; + categories::NamedTuple, to=nothing, res=nothing, size=nothing +) + ds = _extent2dims(to; size, res, crs(A)) + shared_dims = commondims(A, ds) + isintervals(shared_dims) || throw(ArgumentError("Fractional resampling only supported for `Intervals`.")) + outputdims = setdims(dims(A), ds) + outputdata = Array{typeof(categories)}(undef, outputdims) + output = rebuild(A; data=outputdata, dims=outputdims, refdims=()) + map(DimIndices(otherdims(A, ds))) do D + slice = view(A, D...) + fractions = read_fractions(slice, categories, weights) + end +end + +function read_fractions(slice, categories, weights, indices::CartesianIndices) + vals = view(slice, indices) + map(categories) do c + sum(zip(vals, weights)) do (v, w) + _compare(c, v) * w + end / sum(weights) + end +end + +_compare(c::Interval, v) = v in c +_compare(c, v) = v == c diff --git a/src/methods/mask.jl b/src/methods/mask.jl index 1b19b44cb..1401a990c 100644 --- a/src/methods/mask.jl +++ b/src/methods/mask.jl @@ -4,6 +4,13 @@ const INVERT_KEYWORD = """ masked, and areas missing in `with` are masked. """ +const COLLAPSE_KEYWORD = """ +- `collapse`: if `true`, collapse all geometry masks into a single mask. Otherwise + return a Raster with an additional `geometry` dimension, so that each slice + along this axis is the mask of the `geometry` opbject of each row of the + table, feature in the feature collection, or just each geometry in the iterable. +""" + """ mask(A:AbstractRaster; with, missingval=missingval(A)) mask(x; with) @@ -82,12 +89,10 @@ function _mask(A::AbstractRaster, with::AbstractRaster; filename=nothing, suffix=nothing, missingval=_missingval_or_missing(A), kw... ) missingval = ismissing(missingval) ? missing : convert(eltype(A), missingval) - A1 = create(filename, A; suffix, missingval) - open(A1; write=true) do a + return create(filename, A; suffix, missingval) do C # The values array will be be written to A1 in `mask!` - mask!(a; with, missingval, values=A, kw...) + mask!(C; with, missingval, values=A, kw...) end - return A1 end function _mask(xs::AbstractRasterStack, with::AbstractRaster; suffix=keys(xs), kw...) mapargs((x, s) -> mask(x; with, suffix=s, kw...), xs, suffix) @@ -236,18 +241,9 @@ $GEOMETRYCOLUMN_KEYWORD $THREADED_KEYWORD $PROGRESS_KEYWORD -And specifically for `shape=:polygon`: - -- `boundary`: include pixels where the `:center` is inside the polygon, where - the line `:touches` the pixel, or that are completely `:inside` inside the polygon. - The default is `:center`. - For tabular data, feature collections and other iterables -- `collapse`: if `true`, collapse all geometry masks into a single mask. Otherwise - return a Raster with an additional `geometry` dimension, so that each slice - along this axis is the mask of the `geometry` opbject of each row of the - table, feature in the feature collection, or just each geometry in the iterable. +$COLLAPSE_KEYWORD # Example @@ -338,9 +334,10 @@ function boolmask!(dest::AbstractRaster, data; threaded=false, allocs=_burning_allocs(dest; threaded), geometrycolumn=nothing, + collapse=nokw, kw... ) - if hasdim(dest, :geometry) + if collapse === false && hasdim(dest, :geometry) geoms = _get_geometries(data, geometrycolumn) range = eachindex(geoms) _run(range, threaded, progress, "Burning each geometry to a BitArray slice...") do i @@ -351,16 +348,18 @@ function boolmask!(dest::AbstractRaster, data; burn_geometry!(slice, geom; kw..., fill=!invert, allocs=_get_alloc(allocs)) return nothing end - else + elseif isnokw(collapse) || collapse === true burn_geometry!(dest, data; kw..., allocs, lock, progress, threaded, geometrycolumn, fill=!invert) + else + throw(ArgumentError("`collapse` must be `false` or not passed if there is no `:geometry` dimension in `dest`")) end return dest end """ missingmask(obj::Raster; kw...) - missingmask(obj; [to, res, size, collapse]) - missingmask(obj::RasterStack; alllayers = true, kw...) + missingmask(obj; [to, res, size]) + missingmask(obj::RasterStack; alllayers=true, kw...) Create a mask array of `missing` and `true` values, from another `Raster`. `AbstractRasterStack` or `AbstractRasterSeries` are also accepted- @@ -376,6 +375,7 @@ The array returned from calling `missingmask` on a `AbstractRaster` is a - `obj`: $OBJ_ARGUMENT # Keywords + - `alllayers`: if `true` a mask is taken for all layers, otherwise only the first layer is used. Defaults to `true` $INVERT_KEYWORD $GEOM_KEYWORDS @@ -423,8 +423,8 @@ function missingmask!(dest::AbstractRaster, src::AbstractRaster; end end function missingmask!(dest::AbstractRaster, geom; kw...) - B = boolmask!(dest, geom; kw...) # boolmask! handles `invert` keyword here + B = boolmask!(dest, geom; kw...) dest .= _false_to_missing.(B) return dest end diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 0c913d173..2e9a3f5df 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -65,22 +65,50 @@ function mosaic(f::Function, r1::RasterStackOrArray, rs::RasterStackOrArray...; mosaic(f, (r1, rs...); kw...) end mosaic(f::Function, regions; kw...) = _mosaic(f, first(regions), regions; kw...) -function _mosaic(f::Function, ::AbstractRaster, regions; - missingval=missingval(first(regions)), filename=nothing, suffix=nothing, kw... +function _mosaic(f::Function, A1::AbstractRaster, regions; + missingval=nokw, + filename=nothing, + suffix=nothing, + driver=nokw, + options=nokw, + force=false, + kw... ) - missingval = missingval isa Nothing ? missing : missingval - T = Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) + isnothing(missingval) && throw(ArgumentError("missingval cannot be `nothing` for `mosaic`")) + missingval = if isnokw(missingval) + mv = Rasters.missingval(first(regions)) + isnokwornothing(mv) ? missing : mv + else + missingval + end + missingval_pair = if !isnothing(filename) && (ismissing(missingval) || isnokwornothing(missingval)) + _type_missingval(eltype(A1)) => missing + elseif missingval isa Pair + missingval + else + missingval => missingval + end + T = Base.promote_type(typeof(last(missingval_pair)), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) - A = create(filename, T, dims; name=name(l1), missingval, metadata=metadata(l1)) - open(A; write=true) do a - _mosaic!(f, a, regions; missingval, kw...) + + return create(filename, T, dims; + name=name(l1), + fill=missingval_pair[1], + missingval=missingval_pair, + driver, + options, + force + ) do C + _mosaic!(f, C, regions; missingval, kw...) end - return A end function _mosaic(f::Function, ::AbstractRasterStack, regions; - filename=nothing, suffix=keys(first(regions)), kw... + filename=nothing, + suffix=keys(first(regions)), + kw... ) + # TODO make this write inside a single netcdf layers = map(suffix, map(values, regions)...) do s, A... mosaic(f, A...; filename, suffix=s, kw...) end @@ -139,10 +167,11 @@ mosaic!(f::Function, dest::RasterStackOrArray, regions::RasterStackOrArray...; k _mosaic!(f, dest, regions; kw...) function _mosaic!(f::Function, A::AbstractRaster{T}, regions::Union{Tuple,AbstractArray}; - missingval=missingval(A), atol=maybe_eps(T) + missingval=missingval(A), atol=nothing ) where T + isnokwornothing(missingval) && throw(ArgumentError("destination array must have a `missingval`")) _without_mapped_crs(A) do A1 - broadcast!(A1, DimKeys(A1; atol)) do ds + broadcast!(A1, DimSelectors(A1; atol)) do ds # Get all the regions that have this point ls = foldl(regions; init=()) do acc, l if DD.hasselection(l, ds) diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index b84b7c888..2d9e278ec 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -30,7 +30,7 @@ _reduce_init(::typeof(sum), ::Type{T}, missingval) where T = zero(nonmissingtype _reduce_init(::typeof(prod), ::Type{T}, missingval) where T = oneunit(nonmissingtype(T)) _reduce_init(::typeof(minimum), ::Type{T}, missingval) where T = typemax(nonmissingtype(T)) _reduce_init(::typeof(maximum), ::Type{T}, missingval) where T = typemin(nonmissingtype(T)) -_reduce_init(::typeof(last), ::Type{T}, missingval) where T = _maybe_nothing_to_missing(missingval) +_reduce_init(::typeof(last), ::Type{T}, missingval) where T = _maybe_to_missing(missingval) struct FillChooser{F,I,M} fill::F @@ -73,10 +73,13 @@ RasterCreator(to, data; kw...) = RasterCreator(_extent(to; kw...); kw...) function RasterCreator(to::Extents.Extent; res::Union{Nothing,Real,NTuple{<:Any,<:Real}}=nothing, - size::Union{Nothing,Int,NTuple{<:Any,Int}}=nothing, kw... + size::Union{Nothing,Int,NTuple{<:Any,Int}}=nothing, + crs=nokw, + mappedcrs=nokw, + kw... ) - to_as_dims = _extent2dims(to; size, res, kw...) - return RasterCreator(to_as_dims; kw...) + to_as_dims = _extent2dims(to; size, res, crs, mappedcrs) + return RasterCreator(to_as_dims; crs, mappedcrs, kw...) end @@ -477,12 +480,8 @@ function alloc_rasterize(f, r::RasterCreator; if prod(size(r.to)) == 0 throw(ArgumentError("Destination array is is empty, with size $(size(r.to))). Rasterization is not possible")) end - A = create(r.filename, eltype, r.to; name, missingval, metadata, suffix) - # TODO f should apply to the file when it is initially created - # instead of reopening but we need a `create(f, filename, ...)` method - open(A; write=true) do A - A .= Ref(missingval) - f(A) + A = create(r.filename, fill=missingval, eltype, r.to; name, missingval, metadata, suffix) do O + f(O) end return A end diff --git a/src/methods/replace_missing.jl b/src/methods/replace_missing.jl index 58c85b72c..7a820c6e2 100644 --- a/src/methods/replace_missing.jl +++ b/src/methods/replace_missing.jl @@ -23,7 +23,7 @@ missing """ replace_missing(x; missingval=missing, kw...) = replace_missing(x, missingval; kw...) function replace_missing(A::AbstractRaster{T}, missingval::MV; - filename=nothing, suffix=nothing + filename=nothing, kw... ) where {T,MV} MT = if ismissing(missingval) promote_type(T, Missing) @@ -37,13 +37,11 @@ function replace_missing(A::AbstractRaster{T}, missingval::MV; # But in both cases we make sure we return an array with the missingval # in the eltype, even if there are no missing values in the array. if !isnothing(filename) - A1 = create(filename, MT, dims(A); - parent=parent(A), suffix, missingval, name=name(A), metadata=metadata(A) - ) - open(A1; write=true) do O + return create(filename, MT, dims(A); + parent=parent(A), missingval, name=name(A), metadata=metadata(A), kw... + ) do O O .= repmissing.(A) end - return A1 else # We need to force T of Union{T,Missing} for DiskArrays broadcasts if isdisk(A) diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 443808750..88bb5437d 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -1,5 +1,17 @@ # Share common docstrings here to keep things consistent +const NAME_KEYWORD = """ +- `name`: a `Symbol` name for a Raster, which will also retrieve the + a named layer if `Raster` is used on a multi-layered file like a NetCDF. +""" +const METADATA_KEYWORD = """ +- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. +""" +const REFDIMS_KEYWORD = """ +- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. + Usually not needed. +""" + const TO_KEYWORD = """ - `to`: a `Raster`, `RasterStack`, `Tuple` of `Dimension` or `Extents.Extent`. If no `to` object is provided the extent will be calculated from the geometries, @@ -115,13 +127,71 @@ const GROUP_KEYWORD = """ at any nested depth, i.e `group=:group1 => :group2 => :group3`. """ -const REPLACE_MISSING_KEYWORD = """ -- `replace_missing`: replace `missingval` with `missing`. This is done lazily if `lazy=true`. - Note that currently for NetCDF and GRIB files `replace_missing` is always true. - In future `replace_missing=false` will also work for these data sources. -""" - const CHECKMEMORY_KEYWORD = """ -- `checkmemory`: If `true` (the default), check if there is enough memory for the operation. +- `checkmemory`: if `true` (the default), check if there is enough memory for the operation. `false` will ignore memory needs. """ + +const SCALE_KEYWORD = """ +- `scale`: set `scale` for `x * scale + offset` transformations. +""" + +const OFFSET_KEYWORD = """ +- `offset`: set `offset` for `x * scale + offset` transformations. +""" + +const RAW_KEYWORD = """ +- `raw`: turn of all scaling and masking and load the raw values from disk. + `false` by default. If `true`, `scaled` will be set to `false` and `missingval` + will to the existing missing value in the file. A warning will be printed if + `scaled` or `missingval` are manually set to another value. +""" + +const SCALED_KEYWORD = """ +- `scaled`: apply scale and offset as `x * scale + offset` where + `scale` and/or `offset` are found in file metadata. `true` by default. + This is common where data has been convert to e.g. UInt8 to save disk space. + To ignore `scale` and `offset` metadata, use `scaled=false`. + Note 1: If `scale` and `offset` are `1.0` and `0.0` they will be ignored and the + original type will be used even when `scaled=true`. This is because these values + may be fallback defaults and we do not want to convert every `Real` array to larger + `Float64` values. + Note 2: `raw=true` will ignore `scaled` and `missingval` and return + the raw values. +""" + +const COERCE_KEYWORD = """ +- `coerce`: where `scale` and/or `offset` are present during `setindex!` to disk, + coerce values to the element type used on dist. `convert` is the default, + but `round`, `trunc` or or `ceil` or other functions with `f(::Type{T}, x)` + signature may be needed where the values are not exact. +""" + +const MISSINGVAL_KEYWORD = """ +- `missingval`: value representing missing data, normally detected from the file and + automatically converted to `missing`. Setting to an alternate value, such as `0` + or `NaN` may be desirable for improved perfomance. `nothing` specifies no missing value. + Using the same `missingval` the file already has removes the overhead of replacing it, + this can be done by passing the `missingval` function as `missingval`. + If the file has an incorrect value, we can manually define the transformation + as a pair like `correct_value => missing` or `correct_value => NaN`. + `correct_value => correct_value` will keep remove the overhead of changing it. + Note: When `raw=true` is set, `missingval` is not changed from the value specified + in the file. +""" + +const WRITE_MISSINGVAL_KEYWORD = """ +- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, + as Julia's `missing` cannot be stored. If not passed in, an appropriate `missingval` + will be detected from the objects `missingval`, its `metadata`, or a default will be + chosen base on the array element type(s). +""" + +const CHUNKS_KEYWORD = """ +- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. + To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` + or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk + size of `1`. `true` can be used to mean: use the original + chunk size of the lazy `Raster` being written or X and Y of 256 by 256. + `false` means don't use chunks at all. +""" \ No newline at end of file diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 1263a0acf..c08b62cc5 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -83,7 +83,7 @@ _zonal(f, x::Raster, of::Extents.Extent; skipmissing=true) = _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true) cropped = crop(x; to=ext, touches=true) - prod(size(cropped)) > 0 || return missing + length(cropped) > 0 || return missing return maplayers(cropped) do A _maybe_skipmissing_call(f, A, skipmissing) end @@ -99,7 +99,7 @@ function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; skipmissing=true, kw... ) cropped = crop(x; to=geom, touches=true) - prod(size(cropped)) > 0 || return missing + length(cropped) > 0 || return missing masked = mask(cropped; with=geom, kw...) return _maybe_skipmissing_call(f, masked, skipmissing) end @@ -107,10 +107,10 @@ function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; skipmissing=true, kw... ) cropped = crop(st; to=geom, touches=true) - prod(size(cropped)) > 0 || return map(_ -> missing, layerdims(st)) + length(cropped) > 0 || return map(_ -> missing, st) masked = mask(cropped; with=geom, kw...) return maplayers(masked) do A - prod(size(A)) > 0 || return missing + length(A) > 0 || return missing _maybe_skipmissing_call(f, A, skipmissing) end end diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl new file mode 100644 index 000000000..93bddd210 --- /dev/null +++ b/src/modifieddiskarray.jl @@ -0,0 +1,247 @@ +abstract type AbstractModifications end +struct NoMod{T,Mi} <: AbstractModifications + missingval::Mi +end +NoMod{T}(missingval::Mi) where {T,Mi} = NoMod{T,Mi}(missingval) +NoMod() = NoMod{Any}() +NoMod{T}() where T = NoMod{T}(nothing) +NoMod{T}(::NoKW) where T = NoMod{T}(nothing) + +Base.eltype(::NoMod{T}) where T = T +source_eltype(::NoMod{T}) where T = T + +struct Mod{T1,T2,Mi,S,O,F} <: AbstractModifications + missingval::Mi + scale::S + offset::O + coerce::F + function Mod{T}(missingval, scale, offset, coerce) where T + missingval = missingval isa Pair && missingval[1] === missingval[2] ? missingval[1] : missingval + if isnokw(coerce) || isnothing(coerce) + coerce = convert + end + vals = map(_nokw2nothing, (missingval, scale, offset)) + T1 = _resolve_mod_eltype(T, vals...) + new{T1,T,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) + end +end + +Base.eltype(::Mod{T1}) where T1 = T1 +source_eltype(::Mod{<:Any,T2}) where T2 = T2 + +function _resolve_mod_eltype(::Type{T}, missingval, scale, offset) where T + omv = _outer_missingval(missingval) + T1 = isnothing(omv) ? T : promote_type(T, typeof(omv)) + T2 = isnothing(scale) ? T1 : promote_type(T1, typeof(scale)) + T3 = isnothing(offset) ? T2 : promote_type(T2, typeof(offset)) + return T3 +end + +missingval(m::Mod) = m.missingval +missingval(m::NoMod) = m.missingval + +_inner_missingval(m::Mod) = _inner_missingval(m.missingval) +_inner_missingval(mv) = mv +_inner_missingval(mv::Pair) = mv[1] + +_outer_missingval(m::AbstractModifications) = _outer_missingval(m.missingval) +_outer_missingval(mv::Pair) = mv[2] +_outer_missingval(mv) = mv + +struct ModifiedDiskArray{I,T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} + var::V + mod::M +end +function ModifiedDiskArray(v::V, m::M; invert=false) where {V<:AbstractArray{<:Any,N},M} where N + T = invert ? source_eltype(m) : eltype(m) + return ModifiedDiskArray{invert,T,N,V,M}(v, m) +end + +Base.parent(A::ModifiedDiskArray) = A.var +Base.size(A::ModifiedDiskArray, args...) = size(parent(A), args...) +filename(A::ModifiedDiskArray) = filename(parent(A)) +missingval(A::ModifiedDiskArray) = A.missingval +DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(parent(A)) +DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(parent(A)) + +function DiskArrays.readblock!( + A::ModifiedDiskArray{false,<:Any,0}, out_block, I::AbstractVector... +) + out_block[] = _applymod(parent(A)[I...][], A.mod) + return out_block +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{true,T,<:Any,0}, out_block, I::AbstractVector... +) where T + out_block[] = _invertmod(Val{T}(), parent(A)[I...], A.mod) + return out_block +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{false}, out_block, I::AbstractVector... +) + out_block .= _applymod.(parent(A)[I...], (A.mod,)) + return out_block +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{true,T}, out_block, I::AbstractVector... +) where T + out_block .= _invertmod.((Val{T}(),), parent(A)[I...], (A.mod,)) + return out_block +end + +function DiskArrays.writeblock!( + A::ModifiedDiskArray{false,<:Any,0,<:AbstractArray{T}}, block, I::AbstractVector... +) where T + + parent(A)[I...] = _invertmod(Val{source_eltype(A.mod)}(), block[], A.mod) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{true,<:Any,0,<:AbstractArray{T}}, _block, I::AbstractVector... +) where T + parent(A)[I...] = _applymod(Val{eltype(A.mod)}(), block[], A.mod) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{<:Any,<:Any,<:Any,<:AbstractArray{T}}, block, I::AbstractVector... +) where T + parent(A)[I...] = _invertmod.((Val{source_eltype(A.mod)}(),), block, (A.mod,)) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{true,<:Any,<:Any,<:AbstractArray{T}}, _block, I::AbstractVector... +) where T + parent(A)[I...] = _applymod.((Val{eltype(A.mod)}(),), block, (A.mod,)) + return nothing +end + +Base.@assume_effects :foldable function _applymod(x, m::Mod) + if _ismissing(x, _inner_missingval(m)) + _outer_missingval(m) + else + _scaleoffset(x, m) + end +end +Base.@assume_effects :foldable _applymod(x, ::NoMod) = x + +_ismissing(x, mv) = isequal(x, mv) +_ismissing(_, ::Nothing) = false + +_scaleoffset(x, m::Mod) = _scaleoffset(x, m.scale, m.offset) +_scaleoffset(x, scale, offset) = x * scale + offset +_scaleoffset(x, ::Nothing, offset) = x + offset +_scaleoffset(x, scale, ::Nothing) = x * scale +_scaleoffset(x, ::Nothing, ::Nothing) = x + +Base.@assume_effects :foldable function _invertmod(::Val{T}, x, m::Mod) where T + tm = if !isnothing(m.missingval) && _ismissing(x, _outer_missingval(m)) + return _inner_missingval(m) + else + x + end + return _scaleoffset_inv(T, tm, m) +end +Base.@assume_effects :foldable _invertmod(v, x, m::NoMod) = x + +Base.@assume_effects :foldable _scaleoffset_inv(::Type{T}, x, m::Mod) where T = + _scaleoffset_inv(m.coerce, T, x, m)::T +Base.@assume_effects :foldable _scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = + coerce(T, _scaleoffset_inv1(x, m.scale, m.offset))::T + +Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, offset) = (x - offset) / scale +Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, ::Nothing) = x / scale +Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, offset) = x - offset +Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, ::Nothing) = x + +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::AbstractVector; + scaled::Bool, coerce +) + map(eltypes, metadata, missingval) do T, md, mv + scale, offset = get_scale(md, scaled) + _mod(T, mv, scale, offset, coerce) + end +end +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::Pair; + scaled::Bool, coerce +) + map(eltypes, metadata) do T, md + scale, offset = get_scale(md, scaled) + _mod(T, missingval, scale, offset, coerce) + end +end + +function _mod(::Type{T}, metadata, missingval; scaled::Bool, coerce) where T + scale, offset = get_scale(metadata, scaled) + _mod(T, missingval, scale, offset, coerce) +end +function _mod(::Type{T}, missingval, scale, offset, coerce) where T + if (isnokwornothing(missingval) || !(missingval isa Pair && !(isnothing(last(missingval))))) && + isnokwornothing(scale) && isnokwornothing(offset) + return NoMod{T}(missingval) + else + return Mod{T}(missingval, scale, offset, coerce) + end +end + +@inline get_scale(metadata::NoKW, scaled::Bool) = nothing, nothing +@inline function get_scale(metadata, scaled::Bool) + scale = scaled ? get(metadata, "scale", nothing) : nothing + offset = scaled ? get(metadata, "offset", nothing) : nothing + return scale, offset +end + +_mod_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_eltype(::AbstractArray, m::Mod{T}) where T = T + +_mod_inverse_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = + Base.promote_op(_invertmod, typeof(m.coerce), T, typeof(m)) + +_maybe_modify(var, m::Mod; kw...) = ModifiedDiskArray(var, m; kw...) +_maybe_modify(var, ::NoMod; kw...) = var + +_write_missingval_pair(A, missingval::Pair; kw...) = missingval +function _write_missingval_pair(A, missingval; + verbose=true, eltype, metadata=metadata(A), required=false +)::Pair + source_mv = Rasters.missingval(A) + disk_mv = if isnothing(source_mv) || isnothing(missingval) + if required + source_mv = isnothing(source_mv) ? missing : source_mv + _writeable_missing(eltype; verbose) + else + nothing + end + elseif isnokw(missingval) || ismissing(missingval) + # See if there is a missing value in metadata + md_mv = Rasters.missingval(metadata) + if isnothing(md_mv) + _writeable_missing(eltype; verbose) + else + md_mv + end + else + missingval + end + + return disk_mv => source_mv +end + +function _read_missingval_pair(var, metadata, missingval) + if isnokw(missingval) + mv = Rasters.missingval(var, metadata) + isnothing(mv) ? nothing => nothing : mv => missing + elseif isnothing(missingval) + nothing => nothing + elseif missingval isa Pair + # Pair: inner and outer missing values are manually defined + missingval + elseif missingval === Rasters.missingval + # `missingval` func: detect missing value and keep it as-is + mv = Rasters.missingval(var, metadata) + mv => mv + else + # Otherwise: detect missing value and convert it to `missingval` + Rasters.missingval(var, metadata) => missingval + end +end \ No newline at end of file diff --git a/src/nokw.jl b/src/nokw.jl index 7a6a4743f..89e0828e1 100644 --- a/src/nokw.jl +++ b/src/nokw.jl @@ -5,3 +5,8 @@ struct NoKW end const nokw = NoKW() @inline isnokw(::NoKW) = true @inline isnokw(_) = false +@inline isnokwornothing(::Union{NoKW,Nothing}) = true +@inline isnokwornothing(_) = false + +_nokw2nothing(::NoKW) = nothing +_nokw2nothing(x) = x diff --git a/src/openstack.jl b/src/openstack.jl index 84c3a0f04..0f3bec92c 100644 --- a/src/openstack.jl +++ b/src/openstack.jl @@ -12,15 +12,21 @@ contained in a single file. `X` is a backend type like `NCDsource`, and `K` is a tuple of `Symbol` keys. """ -struct OpenStack{X,K,T,DS} +struct OpenStack{X,K,T,DS,M} dataset::DS + mods::M +end +function OpenStack{X,K,T}( + dataset::DS, mods::M=NoMod() +) where {X,K,T,DS,M} + OpenStack{X,K,T,DS,M}(dataset, mods) end -OpenStack{X,K,T}(dataset::DS) where {X,K,T,DS} = OpenStack{X,K,T,DS}(dataset) dataset(os::OpenStack) = os.dataset # OpenStack has `X` and `K` parameter that is not recoverable from fields. -ConstructionBase.constructorof(::Type{<:OpenStack{X,K,T}}) where {X,K,T} = OpenStack{X,K,T} +ConstructionBase.constructorof(::Type{<:OpenStack{X,K,T}}) where {X,K,T} = + OpenStack{X,K,T} DD.data_eltype(::OpenStack{<:Any,<:Any,T}) where T = T @@ -28,4 +34,11 @@ Base.keys(::OpenStack{<:Any,K}) where K = K # TODO test this, and does it make sense to return an iterator here? Base.values(os::OpenStack{<:Any,K}) where K = (os[k] for k in K) # Indexing OpenStack returns memory-backed Raster. -Base.getindex(os::OpenStack, key::Symbol) = dataset(os)[key] +function Base.getindex(os::OpenStack{<:Any,K}, key::Symbol) where K + mods = os.mods + if mods isa AbstractModifications + _maybe_modify(dataset(os)[key], mods) + else + _maybe_modify(dataset(os)[key], NamedTuple{K}(mods)[key]) + end +end diff --git a/src/show.jl b/src/show.jl index 05f61bbc9..500a3ea56 100644 --- a/src/show.jl +++ b/src/show.jl @@ -19,9 +19,8 @@ function print_geo(io, mime, A; blockwidth) DD.print_block_separator(io, "raster", blockwidth) printstyled(io, "\n extent: "; color=:light_black) show(io, mime, Extents.extent(A)) - println(io) if missingval(A) !== nothing - printstyled(io, " missingval: "; color=:light_black) + printstyled(io, "\n missingval: "; color=:light_black) show(io, mime, missingval(A)) end if crs(A) !== nothing diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 5e9084da5..88f288a97 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -1,5 +1,7 @@ const CDM = CommonDataModel +const UNNAMED_FILE_KEY = "unnamed" + const CDM_DIM_MAP = Dict( "lat" => Y, "latitude" => Y, @@ -31,128 +33,50 @@ const CDM_STANDARD_NAME_MAP = Dict( "time" => Ti, ) -const UNNAMED_CDM_FILE_KEY = "unnamed" - - - -# CFDiskArray ######################################################################## - -struct CFDiskArray{T,N,TV,TA,TSA} <: DiskArrays.AbstractDiskArray{T,N} - var::CDM.CFVariable{T,N,TV,TA,TSA} -end - -# Rasters methods -FileArray{source}(var::CFDiskArray, filename::AbstractString; kw...) where source = - FileArray{source}(parent(var), filename; kw...) - -cleanreturn(A::CFDiskArray) = Array(A) -missingval(A::CFDiskArray) = missingval(parent(A)) - -# DimensionalData methods -_dims(var::CFDiskArray, args...) = _dims(parent(var), args...) -_metadata(var::CFDiskArray, args...) = _metadata(parent(var), args...) - -# Base methods -Base.parent(A::CFDiskArray) = A.var - -Base.getindex(os::OpenStack{<:CDMsource}, name::Symbol) = CFDiskArray(dataset(os)[name]) - -# DiskArrays.jl methods -function DiskArrays.readblock!(A::CFDiskArray, aout, i::AbstractUnitRange...) - aout .= getindex(parent(A), i...) -end -function DiskArrays.writeblock!(A::CFDiskArray, data, i::AbstractUnitRange...) - setindex!(parent(A), data, i...) - return data -end - -# We have to dig down to find the chunks as they are not implemented -# in the CDM, but they are in their internal objects. -DiskArrays.eachchunk(var::CFDiskArray) = _get_eachchunk(var) -DiskArrays.haschunks(var::CFDiskArray) = _get_haschunks(var) - -_get_eachchunk(var::CFDiskArray) = _get_eachchunk(parent(var)) -_get_eachchunk(var::CDM.CFVariable) = _get_eachchunk(var.var) -_get_haschunks(var::CFDiskArray) = _get_haschunks(parent(var)) -_get_haschunks(var::CDM.CFVariable) = _get_haschunks(var.var) - -_sourcetrait(var::CFDiskArray) = _sourcetrait(parent(var)) _sourcetrait(var::CDM.CFVariable) = _sourcetrait(var.var) -# CommonDataModel.jl methods -for method in (:size, :name, :dimnames, :dataset, :attribnames) - @eval begin - CDM.$(method)(var::CFDiskArray) = CDM.$(method)(parent(var)) - end -end - -for method in (:attrib, :dim) - @eval begin - CDM.$(method)(var::CFDiskArray, name::CDM.SymbolOrString) = CDM.$(method)(parent(var), name) - end -end - # Rasters methods for CDM types ############################### -function FileArray{source}(var::AbstractVariable, filename::AbstractString; kw...) where source<:CDMsource - eachchunk = DA.eachchunk(var) - haschunks = DA.haschunks(var) - T = eltype(var) - N = ndims(var) - FileArray{source,T,N}(filename, size(var); eachchunk, haschunks, kw...) -end - -function FileStack{source}( - ds::AbstractDataset, filename::AbstractString; +function FileStack{source}(ds::AbstractDataset, filename::AbstractString; write::Bool=false, group=nokw, name::NTuple{N,Symbol}, - vars + mods, + vars, ) where {source<:CDMsource,N} - T = NamedTuple{name,Tuple{map(var -> Union{Missing,eltype(var)}, vars)...}} + T = NamedTuple{name,Tuple{map(_mod_eltype, vars, mods)...}} layersizes = map(size, vars) - eachchunk = map(_get_eachchunk, vars) - haschunks = map(_get_haschunks, vars) + eachchunk = map(DiskArrays.eachchunk, vars) + haschunks = map(DiskArrays.haschunks, vars) group = isnokw(group) ? nothing : group - return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, write) -end - -function Base.open(f::Function, A::FileArray{source}; write=A.write, kw...) where source<:CDMsource - _open(source(), filename(A); name=name(A), group=A.group, write, kw...) do var - f(var) - end + return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, mods, write) end -function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, group=nothing, kw...) +function _open(f, ::CDMsource, ds::AbstractDataset; + name=nokw, + group=nothing, + mod=NoMod(), + kw... +) g = _getgroup(ds, group) - x = isnokw(name) ? g : CFDiskArray(g[_firstname(g, name)]) - cleanreturn(f(x)) + x = if isnokw(name) + g + else + v = CDM.variable(g, string(_name_or_firstname(g, name))) + _maybe_modify(v, mod) + end + return cleanreturn(f(x)) end -_open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) +_open(f, ::CDMsource, var::AbstractArray; mod=NoMod(), kw...) = + cleanreturn(f(_maybe_modify(var, mod))) # This allows arbitrary group nesting _getgroup(ds, ::Union{Nothing,NoKW}) = ds _getgroup(ds, group::Union{Symbol,AbstractString}) = ds.group[String(group)] _getgroup(ds, group::Pair) = _getgroup(ds.group[String(group[1])], group[2]) -function create(filename, source::CDMsource, T::Type, dims::DimTuple; - name=nokw, - missingval=nokw, - metadata=nokw, - lazy=true, - verbose=true, - chunks=nokw, -) - # Create layers of zero arrays - A = FillArrays.Zeros{T}(map(length, dims)) - rast = Raster(A, dims; name, missingval, metadata) - write(filename, source, rast; chunks) - return Raster(filename; metadata, source, lazy) -end - -filekey(ds::AbstractDataset, name) = _firstname(ds, name) -missingval(var::AbstractDataset) = missing -missingval(var::AbstractVariable{T}) where T = missing isa T ? missing : nothing +filekey(ds::AbstractDataset, name::Union{String,Symbol}) = Symbol(name) +filekey(ds::AbstractDataset, name) = _name_or_firstname(ds, name) cleanreturn(A::AbstractVariable) = Array(A) haslayers(::CDMsource) = true defaultcrs(::CDMsource) = EPSG(4326) @@ -183,7 +107,7 @@ end function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) nondim = _nondimnames(ds) grid_mapping = String[] - vars = map(k -> ds[k], nondim) + vars = map(k -> CDM.variable(ds, k), nondim) attrs = map(CDM.attribs, vars) for attr in attrs if haskey(attr, "grid_mapping") @@ -197,9 +121,10 @@ function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) attrs=attrs[bitinds], ) end - -function _layers(ds::AbstractDataset, names, ::NoKW) - vars = map(k -> ds[k], names) +_layers(ds::AbstractDataset, names, ::NoKW) = + _layers(ds, collect(names), nokw) +function _layers(ds::AbstractDataset, names::Vector, ::NoKW) + vars = map(n -> CDM.variable(ds, n), names) attrs = map(CDM.attribs, vars) (; names, vars, attrs) end @@ -258,11 +183,11 @@ end # Utils ######################################################################## # TODO don't load all keys here with _layers -_firstname(ds::AbstractDataset, name) = Symbol(name) -function _firstname(ds::AbstractDataset, name::NoKW=nokw) +_name_or_firstname(ds::AbstractDataset, name) = Symbol(name) +function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=nokw) names = _nondimnames(ds) if length(names) > 0 - Symbol(first(names)) + return Symbol(first(names)) else throw(ArgumentError("No non-dimension layers found in dataset with keys: $(keys(ds))")) end @@ -475,7 +400,14 @@ function _parse_period(period_str::String) end end -_attribdict(md::Metadata{<:CDMsource}) = Dict{String,Any}(string(k) => v for (k, v) in md) +function _attribdict(md::Metadata{<:CDMsource}) + attrib = Dict{String,Any}() + for (k, v) in md + v isa Tuple && continue + attrib[string(k)] = v + end + return attrib +end _attribdict(md) = Dict{String,Any}() # Add axis and standard name attributes to dimension variables @@ -508,46 +440,74 @@ end _unuseddimerror(dimname) = error("Dataset contains unused dimension $dimname") - # Add a var array to a dataset before writing it. -function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; +function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, + metadata=nokw, chunks=nokw, chunksizes=_chunks_to_tuple(A, dims(A), chunks), + scale=nokw, + offset=nokw, + coerce=convert, + eltype=Missings.nonmissingtype(T), + write=true, + name=DD.name(A), + options=nokw, + driver=nokw, + f=identity, kw... ) where {T,N} - missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval + _check_allowed_type(source, eltype) + write = f === identity ? write : true _def_dim_var!(ds, A) - attrib = _attribdict(metadata(A)) - # Set _FillValue - eltyp = Missings.nonmissingtype(T) - _check_allowed_type(_sourcetrait(ds), eltyp) - if ismissing(missingval) - fillval = if haskey(attrib, "_FillValue") && attrib["_FillValue"] isa eltyp - attrib["_FillValue"] - else - CDM.fillvalue(eltyp) - end - attrib["_FillValue"] = fillval - A = replace_missing(A, fillval) - elseif Rasters.missingval(A) isa T - attrib["_FillValue"] = missingval + metadata = if isnokw(metadata) + DD.metadata(A) + elseif isnothing(metadata) + NoMetadata() + else + metadata + end + + missingval_pair = _write_missingval_pair(A, missingval; eltype, verbose, metadata) + + attrib = _attribdict(metadata) + # Scale and offset + scale = if isnokw(scale) || isnothing(scale) + delete!(attrib, "scale_factor") + nothing + else + attrib["scale_factor"] = scale + end + offset = if isnokw(offset) || isnothing(offset) + delete!(attrib, "add_offset") + nothing else - verbose && !(missingval isa Nothing) && @warn "`missingval` $(missingval) is not the same type as your data $T." + attrib["add_offset"] = offset end - key = if string(DD.name(A)) == "" - UNNAMED_CDM_FILE_KEY + mod = _mod(eltype, missingval_pair, scale, offset, coerce) + + if !isnothing(missingval_pair[1]) + attrib["_FillValue"] = missingval_pair[1] + end + + key = if isnokw(name) || string(name) == "" + UNNAMED_FILE_KEY else - string(DD.name(A)) + string(name) end - dimnames = lowercase.(string.(map(name, dims(A)))) - var = CDM.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...) |> CFDiskArray + dimnames = lowercase.(string.(map(Rasters.name, dims(A)))) + var = CDM.defVar(ds, key, eltype, dimnames; attrib, chunksizes, kw...) - # Write with a DiskArrays.jl broadcast - var .= A + if write + m = _maybe_modify(var.var, mod) + # Write with a DiskArays.jl broadcast + m .= A + # Apply `f` while the variable is open + f(m) + end return nothing end diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 1ebc46b8c..b7f7556ba 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -41,6 +41,16 @@ filename(grd::GRDdataset) = grd.filename filekey(grd::GRDdataset, name::NoKW) = get(attrib(grd), "layername", Symbol("")) filekey(A::RasterDiskArray{GRDsource}, name) = filekey(A.attrib, name) +# Already open, doesn't use `name` +function _open(f, ::GRDsource, A::RasterDiskArray{GRDsource}; + name=nokw, + group=nokw, + mod=NoMod(), + kw... +) + cleanreturn(f(_maybe_modify(A, mod))) +end + Base.eltype(::GRDdataset{T}) where T = T function Base.size(grd::GRDdataset) ncols = parse(Int, grd.attrib["ncols"]) @@ -59,7 +69,12 @@ DiskArrays.haschunks(::GRDdataset) = DiskArrays.Unchunked() function _dims(A::RasterDiskArray{GRDsource}, crs=nokw, mappedcrs=nokw) attrib = A.attrib.attrib - crs = crs isa NoKW ? ProjString(attrib["projection"]) : crs + crs = if crs isa NoKW + str = attrib["projection"] + str == "" ? nothing : ProjString(str) + else + crs + end mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs xsize, ysize, nbands = size(A) @@ -115,14 +130,18 @@ function _metadata(A::RasterDiskArray{GRDsource}, args...) metadata end -function missingval(A::RasterDiskArray{GRDsource,T}) where T - if haskey(A.attrib.attrib, "nodatavalue") - ndv = A.attrib.attrib["nodatavalue"] +function missingval(A::RasterDiskArray{GRDsource,T}, args...) where T + _grd_mv(T, A.attrib.attrib) +end + +function _grd_mv(::Type{T}, md; verbose=true) where T + if haskey(md, "nodatavalue") + ndv = md["nodatavalue"] ndv === "nothing" && return nothing try return parse(T, ndv) catch - @warn "nodatavalue $(ndv) is not convertible to data type $T. `missingval` set to `nothing`." + verbose && @warn "nodatavalue $(ndv) is not convertible to data type $T. `missingval` set to `nothing`." return nothing end else @@ -133,16 +152,6 @@ end _sizeof(A::GRDdataset{T}) where T = sizeof(T) * prod(size(A)) _sizeof(A::RasterDiskArray{GRDsource}) = _sizeof(A.attrib) - -# Array ######################################################################## - -function FileArray{GRDsource}(A::RasterDiskArray{<:Any,T}, filename=filename(A.attrib); kw...) where T - filename = first(splitext(filename)) - eachchunk = DiskArrays.eachchunk(A) - haschunks = DiskArrays.haschunks(A) - FileArray{GRDsource,T,3}(filename, size(A); eachchunk, haschunks, kw...) -end - # Base methods """ @@ -163,13 +172,31 @@ Returns the base of `filename` with a `.grd` extension. """ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; force=false, + verbose=true, + write=true, missingval=nokw, chunks=nokw, + scale=nokw, + offset=nokw, + coerce=nokw, + eltype=Missings.nonmissingtype(eltype(A)), + f=identity, kw... ) - chunks isa NoKW || @warn "specifying chunks not supported for .grd files" check_can_write(filename, force) - A = _maybe_use_type_missingval(A, GRDsource(), missingval) + write = f === identity ? write : true + haskey(REVGRD_DATATYPE_TRANSLATION, eltype) || throw(ArgumentError(""" + Element type $eltype cannot be written to grd file. Convert it to one of $(keys(REVGRD_DATATYPE_TRANSLATION)), + usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) + """ + )) + isnokwornothing(scale) && isnokwornothing(offset) || throw(ArgumentError("Cant write scale or offset to .grd files")) + chunks isa NoKW || @warn "specifying chunks not supported for .grd files" + # Missing values + missingval_pair = _write_missingval_pair(A, missingval; eltype, verbose) + + # Missing values + if hasdim(A, Band) correctedA = permutedims(A, (X, Y, Band)) |> a -> reorder(a, (X(GRD_X_ORDER), Y(GRD_Y_ORDER), Band(GRD_BAND_ORDER))) @@ -181,19 +208,40 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; end # Remove extension filename = splitext(filename)[1] - minvalue = minimum(skipmissing(A)) - maxvalue = maximum(skipmissing(A)) - _write_grd(filename, eltype(A), dims(A), Rasters.missingval(A), minvalue, maxvalue, name(A)) - # Data: gri file - open(filename * ".gri", write=true) do IO - write(IO, parent(correctedA)) + # Data: write a raw gri file from the array + mod = _mod(eltype, missingval_pair, scale, offset, coerce) + gri_filename = filename * ".gri" + isfile(gri_filename) && rm(gri_filename) + _write_gri(gri_filename, Val{source_eltype(mod)}(), mod, parent(correctedA)) + _write_grd(filename, eltype, dims(A), missingval_pair[1], name(A)) + + if write + _mmapgrd(filename, source_eltype(mod), size(A); write=true) do M + f(rebuild(A, _maybe_modify(M, mod))) + end end return filename * ".grd" end -function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) +function _write_gri(filename, v, ::NoMod, A::Array{T}) where T + open(filename; write=true, lock=false) do io + write(io, A) + end +end +function _write_gri(filename, v, mod, A::AbstractArray) + open(filename; write=true, lock=false) do io + # Avoid `Ref` allocations + ref = Ref{source_eltype(mod)}(_invertmod(v, first(A), mod)) + for x in A # We are modifying the source array so invert the modifications + ref[] = _invertmod(v, x, mod) + write(io, ref) + end + end +end + +function _write_grd(filename, T, dims, missingval, name) filename = splitext(filename)[1] x, y = map(DD.dims(dims, (X(), Y()))) do d @@ -204,12 +252,12 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) ncols, nrows = length(x), length(y) xmin, xmax = bounds(x) ymin, ymax = bounds(y) - proj = convert(String, convert(ProjString, crs(x))) + proj = isnothing(crs(x)) ? "" : convert(String, convert(ProjString, crs(x))) datatype = REVGRD_DATATYPE_TRANSLATION[T] nodatavalue = missingval # Metadata: grd file - open(filename * ".grd"; write=true) do IO + open(filename * ".grd"; write=true, lock=false) do IO write(IO, """ [general] @@ -228,8 +276,6 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) nodatavalue= $nodatavalue byteorder= little nbands= $nbands - minvalue= $minvalue - maxvalue= $maxvalue [description] layername= $name """ @@ -237,44 +283,31 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) end end -function create(filename, ::GRDsource, T::Type, dims::DD.DimTuple; - name="layer", metadata=nothing, missingval=nothing, lazy=true, +# Rasters methods +function _open(f, ::GRDsource, filename::AbstractString; + mod=NoMod(), + write=false, + kw... ) - # Remove extension - basename = splitext(filename)[1] - minvalue = maxvalue = zero(T) - sze = map(length, DD.dims(dims, (XDim, YDim, Band))) - - # Metadata: grd file - _write_grd(basename, T, dims, missingval, minvalue, maxvalue, name) - - # Data: gri file - open(basename * ".gri", write=true) do IO - write(IO, FillArrays.Zeros(sze)) - end - return Raster(filename; source=GRDsource(), lazy) -end - -# AbstractRasterStack methods - -# Custom `open` because the data and metadata objects are separate -# Here we _mmapgrd instead of `_open` -function Base.open(f::Function, A::FileArray{GRDsource}, args...; write=A.write) - _mmapgrd(mm -> f(RasterDiskArray{GRDsource}(mm, A.eachchunk, A.haschunks)), A; write) -end - -function _open(f, ::GRDsource, filename::AbstractString; write=false, name=nokw, group=nokw) isfile(filename) || _filenotfound_error(filename) attr = GRDdataset(filename) _mmapgrd(attr; write) do mm A = RasterDiskArray{GRDsource}(mm, DA.eachchunk(attr), DA.haschunks(attr), attr) - f(A) + A1 = _maybe_modify(A, mod) + f(A1) end end _open(f, ::GRDsource, attrib::GRDdataset; kw...) = f(attrib) +function _open(f, ::GRDsource, A::RasterDiskArray; + mod=NoMod(), + kw... +) + cleanreturn(f(_maybe_modify(A, mod))) +end haslayers(::GRDsource) = false + # Utils ######################################################################## function _mmapgrd(f, x::Union{FileArray,GRDdataset}; kw...) diff --git a/src/stack.jl b/src/stack.jl index 796793e73..8fe81df14 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -68,15 +68,24 @@ function DD.layers(s::AbstractRasterStack{<:Any,<:Any,<:Any,<:OpenStack{<:Any,Ke end function DD.rebuild( - s::AbstractRasterStack, data, dims=dims(s), refdims=refdims(s), - layerdims=DD.layerdims(s), metadata=metadata(s), layermetadata=DD.layermetadata(s), + s::AbstractRasterStack, + data, + dims=dims(s), + refdims=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), missingval=missingval(s), ) DD.basetypeof(s)(data, dims, refdims, layerdims, metadata, layermetadata, missingval) end function DD.rebuild(s::AbstractRasterStack; - data=parent(s), dims=dims(s), refdims=refdims(s), layerdims=DD.layerdims(s), - metadata=metadata(s), layermetadata=DD.layermetadata(s), + data=parent(s), + dims=dims(s), + refdims=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), missingval=missingval(s), ) DD.basetypeof(s)( @@ -158,19 +167,21 @@ Load a file path or a `NamedTuple` of paths as a `RasterStack`, or convert argum - `name`: Used as stack layer names when a `Tuple`, `Vector` or splat of `Raster` is passed in. Has no effect when `NameTuple` is used - the `NamedTuple` keys are the layer names. -$GROUP_KEYWORD +$GROUP_KEYWORD - `metadata`: A `Dict` or `DimensionalData.Metadata` object. -- `missingval`: a single value for all layers or a `NamedTuple` of - missingval for each layer. `nothing` specifies no missing value. -$CONSTRUCTOR_CRS_KEYWORD -$CONSTRUCTOR_MAPPEDCRS_KEYWORD +$MISSINGVAL_KEYWORD + For `RasterStack` a `NamedTuple` can also be passed if layers + should have different `missingval`. +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. For when one or multiple filepaths are used: $DROPBAND_KEYWORD $LAZY_KEYWORD -$REPLACE_MISSING_KEYWORD +$RAW_KEYWORD +$SCALED_KEYWORD $SOURCE_KEYWORD For when a single `Raster` is used: @@ -216,16 +227,22 @@ function RasterStack( missingval=nokw, kw... ) - # Handle values that must be be `NamedTuple` + K = keys(data) + # Handle values that musbe be `NamedTuple` layermetadata = if layermetadata isa NamedTuple layermetadata - elseif layermetadata isa Union{NoKW,Nothing,NoMetadata} - map(_ -> NoMetadata(), layerdims) + elseif layermetadata isa Union{Nothing,NoKW,NoMetadata} + NamedTuple{K}(map(_ -> NoMetadata(), K)) else throw(ArgumentError("$layermetadata is not a valid input for `layermetadata`. Try a `NamedTuple` of `Dict`, `MetaData` or `NoMetadata`")) end metadata = isnokw(metadata) ? NoMetadata() : metadata missingval = _maybe_collapse_missingval(missingval) + layerdims = if layerdims isa NamedTuple + layerdims + else + NamedTuple{K}(ntuple(i -> layerdims[i], Val{length(K)}())) + end st = RasterStack( data, dims, refdims, layerdims, metadata, layermetadata, missingval ) @@ -233,7 +250,7 @@ function RasterStack( end # Convert Tuple/Array of array to NamedTuples using name/key function RasterStack(data::Tuple{Vararg{AbstractArray}}, dims::Tuple; - name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nokw, + name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nokw, kw... ) isnokw(name) && throw(ArgumentError("Pass a Tuple, Array or NamedTuple of names to the `name` keyword")) @@ -246,7 +263,7 @@ function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{AbstractArray}}}, dim ) if isnokw(layerdims) # TODO: make this more sophisticated and match dimension length to axes? - # We don't worry about Raster keywords because these rasters will be deconstructed + # We don't worry about Raster keywords because these rasters will be deconstructed # again later, and `kw` will define the RasterStack keywords layers = map(data) do A Raster(A, dims[1:ndims(A)]) @@ -274,7 +291,7 @@ function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{AbstractDimArray}}}; refdims::Tuple=(), missingval=map(missingval, _layers), metadata=NoMetadata(), - layermetadata=map(DD.metadata, _layers), + layermetadata::NamedTuple{K}=map(DD.metadata, _layers), layerdims::NamedTuple{K}=map(DD.basedims, _layers), kw... ) where K @@ -297,7 +314,7 @@ function RasterStack(table, dims::Tuple; layers = map(name) do k col = Tables.getcolumn(table, k) reshape(col, map(length, dims)) - end |> NamedTuple{name} + end |> NamedTuple{cleankeys(name)} end return RasterStack(layers, dims; kw...) end @@ -332,8 +349,8 @@ function RasterStack(s::DD.AbstractDimStack; data=parent(s), dims::Union{Tuple,NoKW}=dims(s), refdims::Tuple=refdims(s), - layerdims=DD.layerdims(s), metadata=metadata(s), + layerdims=DD.layerdims(s), layermetadata=DD.layermetadata(s), missingval=missingval(s), kw... @@ -348,21 +365,33 @@ function RasterStack( name=map(filekey, filenames), kw... ) - RasterStack(NamedTuple{cleankeys(Tuple(name))}(filenames); kw...) + RasterStack(NamedTuple{cleankeys(name)}(filenames); kw...) end function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; source=nokw, - missingval=nokw, metadata=nokw, resize=nokw, layermetadata::Union{NoKW,NamedTuple{K}}=nokw, layerdims::Union{NoKW,NamedTuple{K}}=nokw, + missingval=nokw, + replace_missing=nokw, + scaled=nokw, + raw=false, + verbose=true, kw... ) where K - missingval = missingval isa NamedTuple ? missingval : map(_ -> missingval, filenames) - layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, filenames) - layers = map(keys(filenames), values(filenames), values(missingval), values(layermetadata)) do name, fn, mv, md - Raster(fn; source=_sourcetrait(fn, source), name, missingval=mv, metadata=md, kw...) + _maybe_warn_replace_missing(replace_missing) + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + + layermissingval = collect(_stack_nt(filenames, missingval)) + fn = collect(filenames) + layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) + layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) + layers = map(K, fn, layermetadata, layerdims, layermissingval) do name, fn, md, d, mv + Raster(fn; + source=_sourcetrait(fn, source), + dims=d, name, metadata=md, missingval=mv, scaled, verbose, kw... + ) end return RasterStack(NamedTuple{K}(layers); resize, metadata) end @@ -370,12 +399,20 @@ end function RasterStack(filename::AbstractString; lazy::Bool=false, dropband::Bool=true, - replace_missing::Bool=false, + raw::Bool=false, source::Union{Symbol,Source,NoKW}=nokw, + missingval=nokw, name=nokw, - group=nokw, + group::Union{Symbol,AbstractString,NoKW}=nokw, + scaled::Union{Bool,NoKW}=nokw, + coerce=nokw, + verbose::Bool=true, + replace_missing=nokw, # deprecated kw... ) + _maybe_warn_replace_missing(replace_missing) + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + source = _sourcetrait(filename, source) st = if isdir(filename) && !(source isa Zarrsource) # Load as a whole directory @@ -383,19 +420,21 @@ function RasterStack(filename::AbstractString; length(filenames) > 0 || throw(ArgumentError("No files in directory $filename")) # Detect keys from names name = if isnokw(name) - all_shared = true stripped = lstrip.(x -> x in (" ", "_"), (x -> x[1:end]).(filenames)) Symbol.(replace.(first.(splitext.(stripped)), Ref(" " => "_"))) else name end - RasterStack(joinpath.(Ref(filename), filenames); lazy, replace_missing, dropband, group, kw...) + RasterStack(joinpath.(Ref(filename), filenames); + missingval, scaled, coerce, lazy, dropband, group, kw... + ) else # Load as a single file if haslayers(source) # With multiple named layers - l_st = _layer_stack(filename; source, name, lazy, group, replace_missing, kw...) - + l_st = _layer_stack(filename; + source, name, lazy, group, missingval, scaled, coerce, kw... + ) # Maybe split the stack into separate arrays to remove extra dims. if isnokw(name) l_st @@ -404,20 +443,15 @@ function RasterStack(filename::AbstractString; end else # With bands actings as layers - RasterStack(Raster(filename; source, lazy, replace_missing, dropband=false); kw...) + raster = Raster(filename; + source, lazy, missingval, scaled, coerce, dropband=false, + ) + RasterStack(raster; kw...) end end # Maybe drop the Band dimension - if dropband && hasdim(st, Band()) && size(st, Band()) == 1 - if lazy - return view(st, Band(1)) # TODO fix dropdims in DiskArrays - else - return dropdims(st; dims=Band()) - end - else - return st - end + return _maybe_drop_single_band(st, dropband, lazy) end # TODO test this properly @@ -478,12 +512,14 @@ function _layer_stack(filename; name=nokw, group=nokw, metadata=nokw, - layerdims=nokw, layermetadata=nokw, + layerdims=nokw, missingval=nokw, crs=nokw, mappedcrs=nokw, - replace_missing=false, + coerce=convert, + scaled=nokw, + checkmem=true, lazy=false, kw... ) @@ -493,29 +529,54 @@ function _layer_stack(filename; dimdict = _dimdict(ds, crs, mappedcrs) refdims = isnokw(refdims) || isnothing(refdims) ? () : refdims metadata = isnokw(metadata) ? _metadata(ds) : metadata - layerdims = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims - dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims) - layermetadata = isnokw(layermetadata) ? _layermetadata(ds; layers) : layermetadata - missingval = isnokw(missingval) ? Rasters.missingval(ds) : missingval + layerdims_vec = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims + dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims_vec) + layermetadata_vec = if isnokw(layermetadata) + _layermetadata(ds; layers) + else + layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) + end name = Tuple(map(Symbol, layers.names)) + NT = NamedTuple{name} + layer_mvs = map(Rasters.missingval, layers.vars, layermetadata_vec) + missingval_vec = if isnokw(missingval) + layer_mvs .=> missing + elseif missingval isa NamedTuple + keys(missingval) == name || throw(ArgumentError("`missingval` names $(keys(missingval)) do not match layer names $name")) + layer_mvs .=> collect(missingval) + elseif missingval === Rasters.missingval + layer_mvs + else + layer_mvs .=> (missingval,) # Wrap in case its not iterable + end::Vector + eltype_vec = map(eltype, layers.vars) + mod_vec = _stack_mods(eltype_vec, layermetadata_vec, missingval_vec; scaled, coerce) data = if lazy - # TODO replace_missing is currently always true for - # CommonDataModel FileStack. We should change this. - FileStack{typeof(source)}(ds, filename; name, group, vars=Tuple(layers.vars)) + vars = ntuple(i -> layers.vars[i], length(name)) + mods = ntuple(i -> mod_vec[i], length(name)) + FileStack{typeof(source)}(ds, filename; name, group, mods, vars) else - map(layers.vars) do v - x = Array(replace_missing ? _replace_missing(v, missingval) : v) + map(layers.vars, layermetadata_vec, mod_vec) do var, md, mod + modvar = _maybe_modify(var, mod) + checkmem && _checkobjmem(modvar) + x = Array(modvar) x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug - end |> NamedTuple{name} - end - if replace_missing - missingval = missing + end |> NT end - data, (; dims, refdims, layerdims=NamedTuple{name}(layerdims), metadata, layermetadata=NamedTuple{name}(layermetadata), missingval) + mv_outer = NT(map(_outer_missingval, mod_vec)) + return data, (; + dims, + refdims, + layerdims=NT(layerdims_vec), + metadata, + layermetadata=NT(layermetadata_vec), + missingval=mv_outer, + ) end return RasterStack(data; field_kw..., kw...) end + # Try to sort the dimensions by layer dimension into a sensible # order that applies without permutation, preferencing the layers # with most dimensions, and those that come first. diff --git a/src/utils.jl b/src/utils.jl index 3de723b03..41c11f609 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,27 +1,29 @@ +# File paths, urls and strings + filter_ext(path, ext::AbstractString) = filter(fn -> splitext(fn)[2] == ext, readdir(path; join=true)) filter_ext(path, exts::Union{Tuple,AbstractArray}) = filter(fn -> splitext(fn)[2] in exts, readdir(path; join=true)) filter_ext(path, ext::Nothing) = readdir(path; join=true) -cleankeys(name) = (_cleankey(name),) -function cleankeys(keys::Union{NamedTuple,Tuple,AbstractArray}) - Tuple(map(_cleankey, keys, ntuple(i -> i, length(keys)))) -end - -function _cleankey(name::Union{Symbol,AbstractString,Name,NoName}, i=1) - if name in (NoName(), Symbol(""), Name(Symbol(""))) - Symbol("layer$i") +_maybe_add_suffix(filename::Nothing, suffix) = nothing +_maybe_add_suffix(filename::Nothing, suffix::Union{Nothing,NoKW}) = nothing +_maybe_add_suffix(filename, suffix::Union{Nothing,NoKW}) = filename +function _maybe_add_suffix(filename, suffix) + base, ext = splitext(filename) + if string(suffix) == "" + filename else - Symbol(name) + return string(base, "_", suffix, ext) end end -nolookup_to_sampled(A) = rebuild(A; dims=nolookup_to_sampled(dims(A))) -nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) -nolookup_to_sampled(d::Dimension) = - lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d +# Modified from IsURL.jl, many thanks to @zlatanvasovic +const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" +const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" + +_isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missingval=nokw) where T if ismissing(Rasters.missingval(A)) @@ -34,16 +36,17 @@ function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missin end end -# Create a standardised Metadata object of source T, containing a `Dict{String,Any}` -_metadatadict(s::Source, p1::Pair, pairs::Pair...) = - _metadatadict(s, (p1, pairs...)) -_metadatadict(::S) where S<:Source = Metadata{S}(Dict{String,Any}()) -function _metadatadict(::S, pairs) where S<:Source - dict = Dict{String,Any}() - for (k, v) in pairs - dict[String(k)] = v +cleankeys(name) = (_cleankey(name),) +function cleankeys(keys::Union{NamedTuple,Tuple,AbstractArray}) + Tuple(map(_cleankey, keys, ntuple(i -> i, length(keys)))) +end + +function _cleankey(name::Union{Symbol,AbstractString,Name,NoName}, i=1) + if name in (NoName(), Symbol(""), Name(Symbol(""))) + Symbol("layer$i") + else + Symbol(name) end - return Metadata{S}(dict) end # We often need to convert the locus and the lookup in the same step, @@ -74,129 +77,158 @@ end # _convert_by_lookup(::Type{Projected}, dim) = shiftlocus(Center(), convertlookup(Projected, dim)) -_unwrap(::Val{X}) where X = X -_unwrap(x) = x +# Missing values -_missingval_or_missing(x) = _maybe_nothing_to_missing(missingval(x)) +_missingval_or_missing(x) = _maybe_to_missing(missingval(x)) -_maybe_nothing_to_missing(::Nothing) = missing -_maybe_nothing_to_missing(missingval) = missingval +_maybe_to_missing(::Union{Nothing,NoKW}) = missing +_maybe_to_missing(missingval) = missingval -maybe_eps(dims::DimTuple; kw...) = map(maybe_eps, dims; kw...) -maybe_eps(dim::Dimension; kw...) = maybe_eps(eltype(dim); kw...) -maybe_eps(x; kw...) = maybe_eps(typeof(x); kw...) -maybe_eps(::Type; kw...) = nothing -maybe_eps(T::Type{<:AbstractFloat}; kw...) = _default_eps(T; kw...) - -# These are pretty random defaults, but seem to work -_default_eps(T::Type{<:Float32}; grow=true) = grow ? eps(T) : 100eps(T) -_default_eps(T::Type{<:Float64}; grow=true) = grow ? eps(T) : 1000eps(T) -_default_eps(T::Type{<:Integer}) = T(1) -_default_eps(::Type) = nothing - -_writeable_missing(filename::Nothing, T) = missing -_writeable_missing(filename::AbstractString, T) = _writeable_missing(T) -function _writeable_missing(T) - missingval = _type_missingval(Missings.nonmissingtype(T)) - @info "`missingval` set to $missingval" +_writeable_missing(filename::Nothing, T; kw...) = missing +_writeable_missing(filename::AbstractString, T; kw...) = _writeable_missing(T; kw...) +_writeable_missing(::Type{Missing}; verbose=true) = _writeable_missing(UInt8; verbose=true) +function _writeable_missing(T; verbose=true) + missingval = _type_missingval(T) + verbose && @info "`missingval` set to $missingval on disk" return missingval end -# Map filename suffix over a stack -function mapargs(f, st::AbstractRasterStack, args...) - layers = map(values(st), args...) do A, mappedargs... - f(A, mappedargs...) - end - return DD.rebuild_from_arrays(st, Tuple(layers)) -end - -_without_mapped_crs(f, x) = _without_mapped_crs(f, x, mappedcrs(x)) -_without_mapped_crs(f, x, ::Nothing) = f(x) -function _without_mapped_crs(f, dims::DimTuple, mappedcrs::GeoFormat) - dims1 = setmappedcrs(dims, nothing) - x = f(dims1) - return if x isa DimTuple - setmappedcrs(x, mappedcrs) - else - x - end -end -function _without_mapped_crs(f, A::AbstractRaster, mappedcrs::GeoFormat) - A = setmappedcrs(A, nothing) - x = f(A) - return if x isa AbstractRaster - setmappedcrs(x, mappedcrs) - else - x - end -end -function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) - st1 = maplayers(A -> setmappedcrs(A, nothing), st) - x = f(st1) - return if x isa AbstractRasterStack - setmappedcrs(x, mappedcrs) +_type_missingval(::Type{T}) where T = _type_missingval1(Missings.nonmissingtype(T)) + +_type_missingval1(::Type{T}) where T<:Number = typemin(T) +_type_missingval1(::Type{T}) where T<:Unsigned = typemax(T) +_type_missingval1(::Type{T}) where T<:AbstractString = T("") + +_fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing +_fix_missingval(::AbstractArray, ::Nothing) = nothing +_fix_missingval(A::AbstractArray, ::NoKW) = _fix_missingval(A, Rasters.missingval(A)) +_fix_missingval(::AbstractArray{T}, missingval) where T = _fix_missingval(T, missingval) +function _fix_missingval(::Type{T}, missingval::M) where {T,M} + T1 = nonmissingtype(T) + if missingval isa T + missingval + elseif hasmethod(convert, Tuple{Type{T1},M}) && isreal(missingval) && + missingval <= typemax(T1) && missingval >= typemin(T1) + if T1 <: Integer && !isinteger(missingval) + nothing + else + convert(T, missingval) + end else - x + nothing end end -function _extent2dims(to; size=nothing, res=nothing, crs=nothing, kw...) - _extent2dims(to, size, res, crs) -end -function _extent2dims(to::Extents.Extent, size::Nothing, res::Nothing, crs) - isnothing(res) && throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) -end -function _extent2dims(to::Extents.Extent, size, res, crs) - isnothing(res) || _size_and_res_error() -end -function _extent2dims(to::Extents.Extent{K}, size::Nothing, res::Real, crs) where K - tuple_res = ntuple(_ -> res, length(K)) - _extent2dims(to, size, tuple_res, crs) -end -function _extent2dims(to::Extents.Extent{K}, size::Nothing, res, crs) where K - ranges = map(values(to), res) do bounds, r - start, stop_closed = bounds - stop_open = stop_closed + maybe_eps(stop_closed; grow=false) - length = ceil(Int, (stop_open - start) / r) - range(; start, step=r, length) - end - return _extent2dims(to, ranges, crs) -end -function _extent2dims(to::Extents.Extent{K}, size, res::Nothing, crs) where K - if size isa Int - size = ntuple(_ -> size, length(K)) + +# Extents + +function _extent2dims(to::Extents.Extent; + size=nokw, res=nokw, crs=nokw, mappedcrs=nokw, sampling=nokw, kw... +) + sampling = _match_to_extent(to, isnokw(sampling) ? Intervals(Start()) : sampling) + _extent2dims(to, size, res; crs, mappedcrs, sampling, kw...) +end +_extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Union{Nothing,NoKW}; kw...) = + throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) +_extent2dims(to::Extents.Extent, size, res; kw...) = _size_and_res_error() +function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res; + sampling::Tuple, closed=true, kw... +) + res = _match_to_extent(to, res) + ranges = map(values(to), res, sampling) do (start, stop), step, samp + @assert step >= zero(step) "only positive `res` are supported, got $step" + if samp isa Intervals + if locus(samp) isa End + reverse(range(; start=stop+step, step=-step, stop=start+step)) + else + r = range(; start, step, stop) + if locus(samp) isa Start + r + else # Center + r .+ step / 2 + end + end + else + range(; start, step, stop) + end end - ranges = map(values(to), size) do bounds, length - start, stop_closed = bounds - stop_open = stop_closed + maybe_eps(stop_closed; grow=false) - step = (stop_open - start) / length - range(; start, step, length) + return _extent2dims(to, ranges; sampling, kw...) +end +function _extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; + sampling::Tuple, crs, mappedcrs, closed=true, kw... +) + size = _match_to_extent(to, size) + ranges = map(values(to), size, sampling) do (start, stop), length, samp + r1 = range(; start, stop, length) + if samp isa Points + r1 + else + # We need to buffer extent for a closed interval input so that e.g. + # The raster will actually contain points of the extent, as raster + # pixels are closed/open intervals not closed/closed. + # Its hard to add a very small amount to any fp number and have + # it propagate through to the final extent. But this setup seems to work. + # We use the step or r1 to offset the end point of r, the buffer with 10 float + # steps, which seems to be enough. But it's arbitrary and count be revised. + nfloatsteps = 10 + step = (stop - start) / length + if locus(samp) isa End + start_open = if (closed && start isa AbstractFloat) + prevfloat(start + step, nfloatsteps) + else + start + step + end + reverse(range(; start=stop, stop=start_open, length)) + else + stop_open = if closed && stop isa AbstractFloat + nextfloat(stop - step, nfloatsteps) + else + stop - step + end + r = range(; start, stop=stop_open, length) + if locus(samp) isa Start + r + else # Center + r .+ (step / 2) + end + end + end end - return _extent2dims(to, ranges, crs) + return _extent2dims(to, ranges; sampling, crs, mappedcrs) end -function _extent2dims(to::Extents.Extent{K}, ranges, crs) where K +function _extent2dims(::Extents.Extent{K}, ranges; + crs, mappedcrs, sampling::Tuple, kw... +) where K + crs = isnokw(crs) ? nothing : crs + mappedcrs = isnokw(mappedcrs) ? nothing : mappedcrs emptydims = map(name2dim, K) - lookups = map(ranges) do range - Projected(range; - order=ForwardOrdered(), - sampling=Intervals(Start()), - span=Regular(step(range)), - crs, - ) + lookups = map(emptydims, ranges, sampling) do d, range, s + order = Lookups.orderof(range) + span = Regular(step(range)) + if d isa SpatialDim && !isnothing(crs) + Projected(range; sampling=s, order, span, crs, mappedcrs) + else + Sampled(range; sampling=s, order, span) + end end d = map(rebuild, emptydims, lookups) return d end -function _as_intervals(ds::Tuple) - # Rasterization only makes sense on Sampled Intervals - interval_dims = map(dims(ds, DEFAULT_POINT_ORDER)) do d - l = parent(d) - rebuild(d, rebuild(l; sampling=Intervals(locus(l)))) +function _match_to_extent(::Extents.Extent{K}, x) where K + if x isa DimTuple + map(val, dims(x, map(name2dim, K))) + elseif x isa NamedTuple + values(x[K]) + elseif x isa Tuple + x + else + map(_ -> x, K) end - return setdims(ds, interval_dims) end +# Geometries + # get geometries from what may be a table with a geometrycolumn or an interable of geometries # if it has no geometry column and does not iterate valid geometries, error informatively function _get_geometries(data, ::Nothing) @@ -238,7 +270,7 @@ function _get_geometries(data, geometrycolumn::NTuple{<:Any, <:Symbol}) ismissing(r) && return missing end return row - end + end return points end function _check_geometries(geoms) @@ -251,61 +283,27 @@ function _check_geometries(geoms) end # to distinguish between objects returned by _get_geometries and other objects struct IterableOfGeometries end -_warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." - -_filenotfound_error(filename) = throw(ArgumentError("file \"$filename\" not found")) - -_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) - -# Function barrier for splatted vector broadcast -@noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) - -_size_and_res_error() = throw(ArgumentError("Both `size` and `res` keywords are passed, but only one can be used")) - -_no_crs_error() = throw(ArgumentError("The provided object does not have a CRS. Use `setcrs` to set one.")) - -_type_missingval(::Type{T}) where T = typemin(T) -_type_missingval(::Type{T}) where T<:Unsigned = typemax(T) -# Modified from IsURL.jl, many thanks to @zlatanvasovic -const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" -const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" -_isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) - -# Run `f` threaded or not, w -function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) - p = progress ? _progress(length(range); desc) : nothing - if threaded - Threads.@threads :static for i in range - f(i) - isnothing(p) || ProgressMeter.next!(p) - end - else - for i in range - f(i) - isnothing(p) || ProgressMeter.next!(p) - end - end -end +# Chunking # NoKW means true -@inline function _chunks_to_tuple(template, dims, chunks::Bool) +@inline function _chunks_to_tuple(template, dimorder, chunks::Bool) if chunks == true if template isa AbstractArray && DA.haschunks(template) == DA.Chunked() # Get chunks from the template DA.max_chunksize(DA.eachchunk(template)) else # Use defaults - _chunks_to_tuple(template, dims, (X(512), Y(512))) + _chunks_to_tuple(template, dimorder, (X(512), Y(512))) end else nothing end end @inline function _chunks_to_tuple(template, dimorder, chunks::NTuple{N,Integer}) where N - n = length(dimorder) - if n < N + n = length(dimorder) + if n < N throw(ArgumentError("Length $n tuple needed for `chunks`, got $N")) elseif n > N (chunks..., ntuple(_ -> 1, Val{n-N}())...) @@ -330,24 +328,59 @@ end @inline _chunks_to_tuple(template, dimorder, chunks::Nothing) = nothing @inline _chunks_to_tuple(template, dims, chunks::NoKW) = nothing - _checkregular(A::AbstractRange) = true function _checkregular(A::AbstractArray) step = stepof(A) for i in eachindex(A)[2:end] if !(A[i] - A[i-1] ≈ step) - return false + return false end end return true end -function _checkobjmem(obj) +# Constructor helpers + +function _raw_check(raw, scaled, missingval, verbose) + if raw + # Scaled is false if raw is true + scaled isa Bool && scaled && verbose && @warn "`scaled=true` set to `false` because of `raw=true`" + # Only missingval of `nothing` has a meaning with `raw=true`, + # it turns off missingval completely. Other msissingval values are + # ignored and a warning is thrown unless verbose=false + if !isnokwornothing(missingval) + verbose && @warn "`missingval=$missingval` target value is not used because of `raw=true`" + end + return false, isnothing(missingval) ? nothing : Rasters.missingval + else + # Otherwise scaled is true and missingval is unchanged + scaled = isnokw(scaled) ? true : scaled + return scaled, missingval + end +end + +function _maybe_drop_single_band(x, dropband::Bool, lazy::Bool) + dropband || return x + if hasdim(x, Band()) && size(x, Band()) < 2 + if lazy + return view(x, Band(1)) # TODO fix dropdims in DiskArrays + else + return dropdims(x; dims=Band()) + end + else + return x + end +end + + +# Memory + +function _checkobjmem(obj) f = bytes -> """ - required memory $(bytes) is greater than system memory $(Sys.free_memory()). + required memory $(bytes) is greater than system memory $(Sys.free_memory()). Use `lazy=true` if you are loading dataset, and only call `read` on a subset after `view`. """ - _checkobjmem(f, obj) + _checkobjmem(f, obj) end _checkobjmem(f, obj) = _checkmem(f, _sizeof(obj)) @@ -360,13 +393,131 @@ _sizeof(s::AbstractRasterSeries) = function _no_memory_error(f, bytes) msg = f(bytes) * """ - If you beleive this is not correct, pass the keyword `checkmem=false` or set `Rasters.checkmem!(false)` + If you beleive this is not correct, pass the keyword `checkmem=false` or set `Rasters.checkmem!(false)` and try again. These options may crash your system if the file is actually larger than memory. """ return error(msg) end +# Lookups + +function _as_intervals(ds::Tuple) + # Rasterization only makes sense on Sampled Intervals + interval_dims = map(dims(ds, DEFAULT_POINT_ORDER)) do d + l = parent(d) + rebuild(d, rebuild(l; sampling=Intervals(locus(l)))) + end + return setdims(ds, interval_dims) +end + +nolookup_to_sampled(A) = rebuild(A; dims=nolookup_to_sampled(dims(A))) +nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) +nolookup_to_sampled(d::Dimension) = + lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d + + +# Metadata + +# Create a standardised Metadata object of source T, containing a `Dict{String,Any}` +_metadatadict(s::Source, p1::Pair, pairs::Pair...) = + _metadatadict(s, (p1, pairs...)) +_metadatadict(::S) where S<:Source = Metadata{S}(Dict{String,Any}()) +function _metadatadict(::S, pairs) where S<:Source + dict = Dict{String,Any}() + for (k, v) in pairs + dict[String(k)] = v + end + return Metadata{S}(dict) +end + + +# Other + +_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) + +# Function barrier for splatted vector broadcast +@noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) + +# Run `f` threaded or not, w +function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) + p = progress ? _progress(length(range); desc) : nothing + if threaded + Threads.@threads :static for i in range + f(i) + isnothing(p) || ProgressMeter.next!(p) + end + else + for i in range + f(i) + isnothing(p) || ProgressMeter.next!(p) + end + end +end + +_unwrap(::Val{X}) where X = X +_unwrap(x) = x + +# Map filename suffix over a stack +function mapargs(f, st::AbstractRasterStack, args...; kw...) + layers = map(values(st), args...) do A, mappedargs... + f(A, mappedargs...; kw...) + end + return DD.rebuild_from_arrays(st, Tuple(layers)) +end + +_without_mapped_crs(f, x) = _without_mapped_crs(f, x, mappedcrs(x)) +_without_mapped_crs(f, x, ::Nothing) = f(x) +function _without_mapped_crs(f, dims::DimTuple, mappedcrs::GeoFormat) + dims1 = setmappedcrs(dims, nothing) + x = f(dims1) + if x isa DimTuple + x = setmappedcrs(x, mappedcrs) + end + return x +end +function _without_mapped_crs(f, A::AbstractRaster, mappedcrs::GeoFormat) + A = setmappedcrs(A, nothing) + x = f(A) + return if x isa AbstractRaster + setmappedcrs(x, mappedcrs) + else + x + end +end +function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) + st1 = maplayers(A -> setmappedcrs(A, nothing), st) + x = f(st1) + return if x isa AbstractRasterStack + setmappedcrs(x, mappedcrs) + else + x + end +end + + +# Warnings and erros + +_maybe_warn_replace_missing(replace_missing::NoKW) = nothing +function _maybe_warn_replace_missing(replace_missing) + @warn """ + `replace_missing` keyword no longer used. Rasters now automatically replaces `missingval` with `missing`. + Set `missingval=Rasters.missingval`, to keep the internal missing value, or to replace with some value besides + `missing` use e.g. `missingval=Rasters.missingval => NaN` for `NaN`. + """ +end + +@noinline _warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." + +_filenotfound_error(filename) = throw(ArgumentError("file \"$filename\" not found")) + +_size_and_res_error() = throw(ArgumentError("Both `size` and `res` keywords are passed, but only one can be used")) + +_no_crs_error() = throw(ArgumentError("The provided object does not have a CRS. Use `setcrs` to set one.")) + + +# Rows for extraction etc + # _rowtype returns the complete NamedTuple type for a point row # This code is entirely for types stability and performance. # It is used in extract and Rasters.sample diff --git a/src/write.jl b/src/write.jl index debc7e697..0ad84007b 100644 --- a/src/write.jl +++ b/src/write.jl @@ -1,19 +1,4 @@ -const CHUNKS_KEYWORD = """ -- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. - To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` - or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk - size of `1`. `true` can be used to mean: use the original - chunk size of the lazy `Raster` being written or X and Y of 256 by 256. - `false` means don't use chunks at all. -""" - -const MISSINGVAL_KEYWORD = """ -- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, - as Julias `missing` cannot be stored. If not passed in, `missingval` will be detected - from metadata or a default will be chosen. -""" - const SOURCE_WRITE_DOCSTRING = """ Other keyword arguments are passed to the `write` method for the backend. @@ -33,12 +18,14 @@ Other keyword arguments are passed to the `write` method for the backend. ## GDAL Keywords $FORCE_KEYWORD -- `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. +- `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. By default `driver` is guessed from the filename extension. -- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. - For example: `Dict("COMPRESS" => "DEFLATE")`. -Valid `options` for each specific `driver` can be found at: https://gdal.org/drivers/raster/index.html +- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. + For example: `Dict("COMPRESS" => "DEFLATE")`. + +Valid `driver` names and the `options` for each can be found at: +[https://gdal.org/drivers/raster/index.html](https://gdal.org/drivers/raster/index.html) ## Source comments @@ -50,7 +37,7 @@ Returns the base of `filename` with a `.grd` extension. ### GDAL (tiff, and everything else) -Used if you `write` a `Raster` with a `filename` extension that no other backend can write. +Used if you `write` a `Raster` with a `filename` extension that no other backend can write. GDAL is the fallback, and writes a lot of file types, but is not guaranteed to work. """ @@ -64,18 +51,20 @@ file extension or using the `source` keyword. $CHUNKS_KEYWORD $FORCE_KEYWORD -$MISSINGVAL_KEYWORD +$WRITE_MISSINGVAL_KEYWORD $SOURCE_KEYWORD $SOURCE_WRITE_DOCSTRING Returns `filename`. """ -function Base.write( filename::AbstractString, A::AbstractRaster; - source=_sourcetrait(filename), +function Base.write(filename::AbstractString, A::AbstractRaster; + source=_sourcetrait(filename), + missingval=nokw, kw... ) - write(filename, _sourcetrait(source), A; kw...) + missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval + write(filename, _sourcetrait(source), A; missingval, kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) # Fallback @@ -83,7 +72,7 @@ function Base.write( filename::AbstractString, source::Source, A::Union{AbstractRaster,AbstractRasterStack}; kw... ) missing_package = SOURCE2PACKAGENAME[source] - error("Missing package extension for $source. Run `using $missing_package` before useing `write` for this file.") + error("Missing package extension for $source. Run `using $missing_package` before using `write` for this file extension.") end """ @@ -113,11 +102,13 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; ext=nothing, source=_sourcetrait(path, ext), verbose=true, + missingval=nokw, kw... ) source = _sourcetrait(source) + missingval = _stack_missingvals(s, missingval) if haslayers(source) - write(path, source, s; kw...) + write(path, source, s; missingval, kw...) else # Otherwise write separate files for each layer if isnothing(ext) @@ -136,13 +127,38 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; if verbose @warn string("Cannot write complete stacks to \"", ext, "\", writing layers as individual files") end - map(keys(s), suffix1) do key, suf + map(keys(s), suffix1, missingval) do key, suf, mv fn = string(base, suf, ext) - write(fn, source, s[key]; kw...) + write(fn, source, s[key]; missingval=mv, kw...) end |> NamedTuple{keys(s)} end end +_stack_missingvals(::RasterStack{<:Any,T}, x) where T = _stack_missingvals(T, x) +function _stack_missingvals(::Type{T}, missingval::NamedTuple{K}) where {K,T<:NamedTuple{K}} + map(_types(T), values(missingval)) do t, mv + ismissing(mv) ? _type_missingval(t) : mv + end |> NamedTuple{K} +end +_stack_missingvals(::Type{T}, missingval::NamedTuple{K1}) where {K1,T<:NamedTuple{K2}} where K2 = + throw(ArgumentError("stack keys $K1 do not match misssingval keys $K2")) +_stack_missingvals(::Type{T}, missingval::Missing) where T<:NamedTuple{K} where K = + NamedTuple{K}(map(_type_missingval, _types(T))) +_stack_missingvals(::Type{T}, missingval) where T = + _stack_nt(T, missingval) + +_stack_nt(::NamedTuple{K}, x) where K = NamedTuple{K}(map(_ -> x, K)) +_stack_nt(::RasterStack{<:Any,T}, x) where T = _stack_nt(T, x) +_stack_nt(::Type{T}, x::NamedTuple{K}) where {K,T<:NamedTuple{K}} = x +_stack_nt(::Type{T}, x::NamedTuple{K1}) where {K1,T<:NamedTuple{K2}} where K2 = + throw(ArgumentError("stack keys $K1 do not match misssingval keys $K2")) +_stack_nt(::Type{T}, x) where T<:NamedTuple{K} where K = + NamedTuple{K}(map(_ -> x, K)) + +@generated function _types(::Type{<:NamedTuple{K,T}}) where {K,T} + Tuple(T.parameters) +end + """ Base.write(filepath::AbstractString, s::AbstractRasterSeries; kw...) @@ -191,15 +207,18 @@ function Base.write(path::AbstractString, A::AbstractRasterSeries; written_paths end end +Base.write(f::Function, args...; kw...) = write(args...; f, kw...) # Trait for source data that has stack layers haslayers(T) = false # This is used in source `write` methods +check_can_write(filename::Union{Nothing,NoKW}, force) = true function check_can_write(filename, force) if !check_can_write(Bool, filename, force) throw(ArgumentError("filename already exists at $filename. use the keyword `force=true` to write anyway")) end return true end +check_can_write(::Type{Bool}, filename::Union{Nothing,NoKW}, force) = true check_can_write(::Type{Bool}, filename, force) = (force || (!isfile(filename) && !isdir(filename))) diff --git a/test/array.jl b/test/array.jl index e77167091..fbd52580d 100644 --- a/test/array.jl +++ b/test/array.jl @@ -1,6 +1,7 @@ using Rasters, Test, Dates, DiskArrays using Rasters.Lookups, Rasters.Dimensions using Rasters: isdisk, ismem, filename +using ArchGDAL data1 = cumsum(cumsum(ones(10, 11); dims=1); dims=2) data2 = 2cumsum(cumsum(ones(10, 11, 1); dims=1); dims=2) @@ -82,7 +83,7 @@ end @testset "collect and Array" begin - @test collect(ga1) isa Array + @test collect(ga1) isa Array{Float64,2} @test collect(ga1) == data1 @test Array(ga1) isa Array{Float64,2} @test Array(ga1) == data1 diff --git a/test/create.jl b/test/create.jl new file mode 100644 index 000000000..e76579c04 --- /dev/null +++ b/test/create.jl @@ -0,0 +1,220 @@ +using Rasters, Test, Dates, DiskArrays, Extents, ArchGDAL, NCDatasets +using Rasters.Lookups, Rasters.Dimensions +using Rasters: isdisk, ismem, filename + +@testset "create Raster" begin + rast = Rasters.create(Int32, Extents.Extent(X=(0, 10), Y=(0, 5)); + size=(1024, 1024), + crs=EPSG(4326), + chunks=(X=128, Y=128), + force=true, + name=:testname, + fill=Int32(2), + ) do A + A .*= 3 + end + @test all(rast .=== Int32(6)) + @test crs(rast) == EPSG(4326) + @test size(rast) == (1024, 1024) + @test Rasters.name(rast) == :testname + @test missingval(rast) === nothing + @test ispoints(rast) + + ext = Extents.Extent(X=(0, 10), Y=(0, 5), Ti=(DateTime(2001), DateTime(2002))) + rast = @test_nowarn Rasters.create(Float64, ext; + res=(X=0.2, Y=0.1, Ti=Month(1)), + crs=EPSG(4326), + force=true, + sampling=Intervals(Start()), + name=:testname, + missingval=missing, + reverse_y=false, + fill=2.0, + ) do A + A .*= 3 + end + @test all(rast .=== 6.0) + @test crs(rast) == EPSG(4326) + # We need closed/open extents to fix this + @test_broken extent(rast) == ext + @test_broken size(rast) == (50, 50, 12) + @test Rasters.name(rast) == :testname + @test missingval(rast) === missing + @test isintervals(rast) + @test map(step, lookup(rast)) == (0.2, 0.1, Month(1)) + + D = (Ti(DateTime(2000):Month(1):DateTime(2000, 12); sampling=Intervals(Start())), X(0.0:0.01:10.0), Y(0.0:0.01:10)) + rast = Rasters.create(Int32, D; fill=1, missingval=missing, crs=EPSG(4326), name=:testname) + map(length, Rasters.dims(rast)) + @test crs(rast) == EPSG(4326) + @test size(rast) == (12, 1001, 1001) + @test Rasters.name(rast) == :testname + @test missingval(rast) === missing + @test isintervals(rast, Ti) + @test ispoints(rast, (X, Y)) + @test map(step, lookup(rast)) == (Month(1), 0.01, 0.01) + @test all(x -> x === Int32(1), rast) + + rast1 = Rasters.create(rast) + @test dims(rast1) == dims(rast) + @test eltype(rast1) == eltype(rast) +end + +@testset "create RasterStack" begin + st = Rasters.create((a=Int32, b=Float64, c=Bool), Extents.Extent(X=(0, 10), Y=(0, 5)); + size=(X=1024, Y=1024), + sampling=(X=Points(), Y=Intervals()), + crs=EPSG(4326), + force=true, + verbose=false, + missingval=(a=Int32(-9999), b=Float64(-9999), c=false), + fill=(a=Int32(-9999), b=0, c=false), + ) do st + st.c .= true + end + @test crs(st) == EPSG(4326) + @test size(st) == (1024, 1024) + @test Rasters.name(st) == (:a, :b, :c) + @test eltype(st) === @NamedTuple{a::Int32,b::Float64, c::Bool} + @test missingval(st) === (a=Int32(-9999), b=-9999.0, c=false) + @test ispoints(st, X) + @test isintervals(st, Y) + @test all(x -> x === Int32(-9999), st.a) + @test all(x -> x === 0.0, st.b) + @test all(x -> x === true, st.c) + + st2 = Rasters.create((a=UInt8, b=Float32), st; + layerdims=(a=(X(), Y()), b=(Y(),)), + missingval=(a=UInt8(0), b=1.0f0) + ) + @test basedims(st2.a) == (X(), Y()) + @test basedims(st2.b) == (Y(),) + @test eltype(st2) === @NamedTuple{a::UInt8, b::Float32} + @test missingval(st2) === (a=UInt8(0), b=1.0f0) + + @testset "from template with new dims" begin + st1 = Rasters.create(st; + layerdims=(a=(X, Y), b=(Y,), c=(X,)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:a, :b, :c) + @test missingval(st1) === (a=Int32(-9999), b=-9999.0, c=false) + @test ispoints(st1, X) + @test isintervals(st1, Y) + @test basedims(st1.a) == (X(), Y()) + @test basedims(st1.b) == (Y(),) + @test basedims(st1.c) == (X(),) + end + + @testset "from template with new layers" begin + st1 = Rasters.create((c=UInt8, d=Int16), st; + missingval=(c=0x00, d=Int16(1)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:c, :d) + @test eltype(st1) == @NamedTuple{c::UInt8,d::Int16} + @test missingval(st1) === (c=0x00, d=Int16(1)) + end + + @testset "from template with new dims and layers" begin + st1 = Rasters.create((c=UInt8, d=Int16), st; + layerdims=(c=(X, Y), d=(Y,)), + missingval=(c=UInt8(0), d=Int16(1)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:c, :d) + @test missingval(st1) === (c=0x00, d=Int16(1)) + end +end + +ext = ".nc" +for ext in (".nc", ".tif", ".grd") + @testset "create $ext" begin + fn = tempname() * ext + created = Rasters.create(fn, UInt8, (X(1:10), Y(1:10)); + missingval=0xff, + fill=0x01, + force=true + ) + @test all(Raster(fn) .=== 0x01) + @test missingval(created) === 0xff + + fn = tempname() * ext + if ext == ".grd" + created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); + missingval=typemax(Int16), + force=true, + ); + open(created; write=true) do O + O .= 2 + nothing + end + @test all(Raster(fn) .=== Int16(2)) + @test missingval(Raster(fn; missingval)) === typemax(Int16) + else + @time created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); + missingval=typemax(Int16), + scale=0.1, + offset=5.0, + fill=Int16(1), + force=true, + ) do C + C .*= 3 + end + @test all(Raster(fn) .=== 3.0) + @test all(Raster(fn; scaled=false) .== Int16(-20)) + @test missingval(Raster(fn; missingval, scaled=false)) === typemax(Int16) + end + end +end + +@testset "create .nc stack" begin + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)), + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) + @test missingval(created) == (a=0xff, b=typemax(Float32)) + @test size(created.a) == (10,) + @test size(created.b) == (10, 10) + @test all(created.a .=== 0x01) + @test all(created.b .=== 1.0f0) + st = RasterStack("created.nc"; missingval) + @test missingval(st) == (a=0xff, b=typemax(Float32)) + + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)) => missing, + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) + + @test missingval(created) === missing + @test size(created.a) == (10,) + @test size(created.b) == (10, 10) + @test all(created.a .=== 0x01) + @test all(created.b .=== 1.0f0) + st = RasterStack("created.nc"; missingval) + @test missingval(st) == (a=0xff, b=typemax(Float32)) + + @testset "with a function" begin + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)), + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) do st + map(layers(st)) do A + A .*= 2 + end + end + @test all(read(created.a) .=== 0x02) + @test all(read(created.b) .=== 2.0f0) + end +end + + diff --git a/test/methods.jl b/test/methods.jl index 81823e524..c2e15124f 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -55,9 +55,10 @@ gaMi = replace_missing(ga) @test all(map(values(replace_missing(st, NaN32)), (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y all(x .=== y) end) - dNaN = replace_missing(ga, NaN32; filename="test.tif") + testfile = tempname() * ".tif" + dNaN = replace_missing(ga, NaN32; filename=testfile) + read(dNaN) @test all(isequal.(dNaN, [NaN32 7.0f0; 2.0f0 NaN32])) - rm("test.tif") stNaN = replace_missing(st, NaN32; filename="teststack.tif") @test all(maplayers(stNaN[Band(1)], (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y all(x .=== y) @@ -89,16 +90,51 @@ end @test all(boolmask(se2, alllayers=false) .=== [true true; true false]) @test dims(boolmask(ga)) === dims(ga) x = boolmask(polygon; res=1.0, boundary=:touches) - @test x == trues(X(Projected(-20:1.0:0.0; sampling=Intervals(Start()), crs=nothing)), Y(Projected(10.0:1.0:30.0; sampling=Intervals(Start()), crs=nothing))) + @test x == trues(X(Projected(-20:1.0:0.0; sampling=Intervals(Start()), crs=nothing)), + Y(Projected(10.0:1.0:30.0; sampling=Intervals(Start()), crs=nothing))) @test all(x .!= boolmask(polygon; res=1.0, invert=true, boundary=:touches)) @test parent(x) isa BitMatrix # With a :geometry axis - x = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches) - @test all(x .!= boolmask([polygon, polygon]; collapse=false, res=1.0, invert=true, boundary=:touches)) - @test eltype(x) == Bool - @test size(x) == (21, 21, 2) - @test sum(x) == 882 - @test parent(x) isa BitArray{3} + x1 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches) + x2 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches, sampling=Intervals(Center())) + x3 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches, sampling=Intervals(End())) + @test extent(x1) == extent(x2) == extent(x3) == Extent(X = (-20.0, 1.0), Y = (10.0, 31.0), geometry = (1, 2)) + x4 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches) + x5 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(Center())) + x6 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(End())) + xs = (x1, x2, x3, x4, x5, x6) + @test all(x1 .!= boolmask([polygon, polygon]; collapse=false, res=1.0, invert=true, boundary=:touches)) + @test sampling(x1, X) isa Intervals{Start} + @test sampling(x2, X) isa Intervals{Center} + @test sampling(x3, X) isa Intervals{End} + @test sampling(x4, X) isa Intervals{Start} + @test sampling(x5, X) isa Intervals{Center} + @test sampling(x6, X) isa Intervals{End} + for x in xs + @test eltype(x1) == Bool + @test size(x) == (21, 21, 2) + @test sum(x) == 882 + @test parent(x) isa BitArray{3} + @test eltype(x) == Bool + end + @testset "size adds nextfloat" begin + s = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches) + bounds(s, X)[1] == -20.0 + bounds(s, X)[2] > 0.0 + bounds(s, Y)[1] == 10.0 + bounds(s, Y)[2] > 30.0 + c = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(Center())) + bounds(c, X)[1] == -20.0 + bounds(c, X)[2] > 0.0 + bounds(c, Y)[1] == 10.0 + bounds(c, Y)[2] > 30.0 + e = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(End())) + bounds(e, X)[1] < -20.0 + bounds(e, X)[2] == 0.0 + bounds(e, Y)[1] < 10.0 + bounds(e, Y)[2] == 30.0 + end + x = boolmask([polygon, polygon]; collapse=true, res=1.0, boundary=:touches) @test all(x .!= boolmask([polygon, polygon]; collapse=true, res=1.0, invert=true, boundary=:touches)) @test size(x) == (21, 21) @@ -181,9 +217,9 @@ end ga4 = replace_missing(ga1; missingval=-9999) mask!(ga4; with=ga, invert=true) @test all(ga4 .=== [-9999 -9999; -9999 3]) - dmask = mask(ga3; with=ga, filename="mask.tif") + maskfile = tempname() * ".tif" + dmask = mask(ga3; with=ga, filename=maskfile) @test Rasters.isdisk(dmask) - rm("mask.tif") stmask = mask(replace_missing(st, NaN); with=ga, filename="mask.tif") @test Rasters.isdisk(stmask) rm("mask_a.tif") @@ -228,7 +264,7 @@ end end end -@testset "mask_replace_missing" begin +@testset "mask" begin # Floating point rasters a = Raster([1.0 0.0; 1.0 1.0], dims=(X, Y), missingval=0.0) b = Raster([1.0 1.0; 1.0 0.0], dims=(X, Y), missingval=0.0) @@ -357,7 +393,8 @@ end @test_throws ArgumentError classify(ga1, [1, 2, 3]) end -@testset "points" begin dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) +@testset "points" begin + dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) rast = Raster([1 2; 3 4], dimz; name=:test) rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) rast_m = Raster([1 2; 3 missing], dimz; name=:test) @@ -590,7 +627,8 @@ end extended = extend(cropped, ga)[1] extended_r = extend(cropped_r; to=ga_r) extended1 = extend(extend(cropped; to=dims(ga, X)); to=dims(ga, Y)) - extended_d = extend(cropped; to=ga, filename="extended.tif") + filename = tempname() * ".tif" + extended_d = extend(cropped; to=ga, filename) @test map(identicalelements, maybe_layers.((extended, extended1, replace_missing(extended_d), ga))...) |> all @test map(identicalelements, maybe_layers.((extended_r, ga_r))...) |> all @test all(map(==, lookup(extended_d), lookup(extended))) @@ -665,7 +703,7 @@ end [missing 0.2 0.1; 1.2 1.1 0.3; 1.4 1.3 missing] - ) + ) # 3 dimensions A1 = Raster(ones(2, 2, 2), (X(2.0:-1.0:1.0), Y(5.0:1.0:6.0), Ti(DateTime(2001):Year(1):DateTime(2002)))) diff --git a/test/rasterize.jl b/test/rasterize.jl index 6d91f38f8..105f37ce6 100644 --- a/test/rasterize.jl +++ b/test/rasterize.jl @@ -277,8 +277,8 @@ end size=(250, 250), fill=UInt8(1), missingval=UInt8(0), ); # using Plots - # heatmap(parent(parent(rasters_raster))) - # heatmap(reverse(gdal_raster[:, :, 1]; dims=2)) + # Plots.heatmap(parent(parent(rasters_raster))) + # Plots.heatmap(reverse(gdal_raster[:, :, 1]; dims=2)) # Same results as GDAL @test sum(gdal_raster) == sum(rasters_raster) @test reverse(gdal_raster[:, :, 1]; dims=2) == rasters_raster @@ -289,11 +289,15 @@ end rasters_touches_raster = rasterize(last, shphandle.shapes; size=(250, 250), fill=UInt64(1), missingval=UInt64(0), boundary=:touches ) + # Plots.heatmap(reverse(gdal_touches_raster[:, :, 1]) + # Plots.heatmap(parent(rasters_touches_raster)) + # missingval(rasters_touches_raster) # Not quite the same answer as GDAL @test sum(gdal_touches_raster) == sum(rasters_touches_raster) @test reverse(gdal_touches_raster[:, :, 1], dims=2) == rasters_touches_raster # Test that its knwon to be off by 2: - @test count(reverse(gdal_touches_raster[:, :, 1], dims=2) .== rasters_touches_raster) == length(rasters_touches_raster) + @test count(reverse(gdal_touches_raster[:, :, 1], dims=2) .== rasters_touches_raster) == + length(rasters_touches_raster) # Two pixels differ in the angled line, top right # using Plots # Plots.heatmap(reverse(gdal_touches_raster[:, :, 1], dims=2)) @@ -396,12 +400,15 @@ end @test sum(skipmissing(r)) == (12 * 1 + 8 * 2 + 8 * 3 + 12 * 4) + (4 * 1.5 + 4 * 2.5 + 4 * 3.5) end - prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, filename="test.tif", threaded) + filename = tempname() * ".tif" + prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, filename, threaded) prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, threaded) @test sum(skipmissing(prod_r)) == (12 * 1 + 8 * 2 + 8 * 3 + 12 * 4) + (4 * 1 * 2 + 4 * 2 * 3 + 4 * 3 * 4) - prod_st = rasterize(prod, polygons; res=5, fill=(a=1:4, b=4:-1:1), missingval=missing, boundary=:center, threaded) + prod_st = rasterize(prod, polygons; + res=5, fill=(a=1:4, b=4:-1:1), missingval=missing, boundary=:center, threaded + ) @test_broken all(prod_st.a .=== rot180(parent(prod_st.b))) @test all(prod_r .=== prod_st.a) @@ -428,8 +435,8 @@ end # The outlines of these plots should exactly mactch, # with three values of 2 on the diagonal # using Plots - # Plots.plot(reduced_raster; clims=(0, 3)) - # Plots.plot!(polygons; opacity=0.3, fillcolor=:black) + # Plots.plot(reduced_raster_sum_touches; clims=(0, 3)) + # Plots.plot(polygons; opacity=0.3, fillcolor=:black) reduced_center = rasterize(sum, polygons; res=5, fill=1, boundary=:center, threaded) reduced_touches = rasterize(sum, polygons; res=5, fill=1, boundary=:touches, threaded) reduced_inside = rasterize(sum, polygons; res=5, fill=1, boundary=:inside, threaded) diff --git a/test/resample.jl b/test/resample.jl index 27067984e..d845f4b16 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -24,26 +24,8 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) end end - # Resample cea.tif using resample - cea = Raster(raster_path; missingval=0x00, name = :cea) - raster_output = resample(cea; res=output_res, crs=output_crs, method) - disk_output = resample(cea; res=output_res, crs=output_crs, method, filename="resample.tif") - - cea_permuted = permutedims(Raster(raster_path), (Y, X)) - permuted_output = resample(cea_permuted, output_res; crs=output_crs, method) - - # Compare ArchGDAL, resample and permuted resample - @test AG_output == - raster_output[Band(1)] == - disk_output[Band(1)] == - permutedims(permuted_output, (X, Y)) - @test abs(step(dims(raster_output, Y))) ≈ - abs(step(dims(raster_output, X))) ≈ - abs(step(dims(disk_output, X))) ≈ - abs(step(dims(permuted_output, X))) ≈ output_res - @test name(cea) == name(raster_output) - - rm("resample.tif") + cea = Raster(raster_path; missingval=0x00, name=:cea) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00) @testset "missingval propagates" begin @test missingval(resample(cea; res=output_res, crs=output_crs, method)) == 0x00 @@ -64,9 +46,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) resampled = resample(cea; method) @test crs(cea) == crs(resampled) @test cea == resampled - # There is some floating point error here after Rasters -> GDAL -> Rasterss... - # Should we correct it by detecting almost identical extent and using the original? - # @test_broken extent(cea) == extent(resampled) + @test extent(cea) == extent(resampled) end @testset "only `res` kw changes the array size predictably" begin @@ -133,7 +113,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @testset "resample eltype propagates" begin r = Raster(rand(UInt8, X(1:10), Y(1:10))) r1 = resample(r; to=r) - @test eltype(r1) == UInt8 + @test eltype(r1) == Union{UInt8,Missing} end @testset "dimensions matcha after resampling with only `to`" begin @@ -153,6 +133,41 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, Z) == Z(1:2) end + mv = 0xff + for mv in (0xff, missing, Rasters.nokw) + # Resample cea.tif using resample + cea = Raster(raster_path; missingval=mv, name=:cea) + raster_output = resample(cea; + res=output_res, crs=output_crs, method, missingval=mv + ) + disk_output = resample(cea; + res=output_res, crs=output_crs, method, missingval=mv, filename="resample.tif" + ) + cea_permuted = permutedims(Raster(raster_path; + missingval=mv, name=:cea_permuted), (Y, X) + ) + permuted_output = resample(cea_permuted, output_res; + missingval=mv, crs=output_crs, method + ) + + AG_output1 = if mv === 0xff + AG_output + else + replace(AG_output, 0xff => missing) + end + # Compare ArchGDAL, resample and permuted resample + @test all(AG_output1 .=== parent(raster_output) .=== + read(disk_output) .=== + permutedims(permuted_output, (X, Y))) + @test abs(step(dims(raster_output, Y))) ≈ + abs(step(dims(raster_output, X))) ≈ + abs(step(dims(disk_output, X))) ≈ + abs(step(dims(permuted_output, X))) ≈ output_res + @test Rasters.name(cea) == Rasters.name(raster_output) + + rm("resample.tif") + end + @testset "resample to the same size or resolution leaves raster unchanged" begin res = 2 ys = range(; start=(90 - res / 2), step=-res, stop=(-90 + res / 2)) diff --git a/test/runtests.jl b/test/runtests.jl index e4337545c..052ba3137 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,6 +14,7 @@ end @time @safetestset "array" begin include("array.jl") end @time @safetestset "stack" begin include("stack.jl") end @time @safetestset "series" begin include("series.jl") end +@time @safetestset "create" begin include("create.jl") end @time @safetestset "utils" begin include("utils.jl") end @time @safetestset "set" begin include("set.jl") end @time @safetestset "aggregate" begin include("aggregate.jl") end @@ -26,22 +27,17 @@ end # CommondataModel sources @time @safetestset "commondatamodel" begin include("sources/commondatamodel.jl") end @time @safetestset "ncdatasets" begin include("sources/ncdatasets.jl") end -# @time @safetestset "zarr" begin include("sources/zarr.jl") end # TODO: FIXME +@time @safetestset "zarr" begin include("sources/zarr.jl") end if !Sys.iswindows() # GRIBDatasets doesn't work on Windows for now @time @safetestset "gribdatasets" begin include("sources/gribdatasets.jl") end + # GDAL Environment vars need to be set manually for windows, so skip for now + @time @safetestset "gdal" begin include("sources/gdal.jl") end + @time @safetestset "grd" begin include("sources/grd.jl") end end - # Only test SMAP locally for now, also RasterDataSources because CI downloads keep breaking if !haskey(ENV, "CI") @time @safetestset "rasterdatasources" begin include("sources/rasterdatasources.jl") end end - -if !Sys.iswindows() - # GDAL Environment vars need to be set manually for windows, so skip for now - @time @safetestset "gdal" begin include("sources/gdal.jl") end - @time @safetestset "grd" begin include("sources/grd.jl") end -end @time @safetestset "plot recipes" begin include("plotrecipes.jl") end - -@time @safetestset "resample" begin include("resample.jl") end +@time @safetestset "resample" begin include("resample.jl") end \ No newline at end of file diff --git a/test/sources/commondatamodel.jl b/test/sources/commondatamodel.jl index 6e2460092..0cda80779 100644 --- a/test/sources/commondatamodel.jl +++ b/test/sources/commondatamodel.jl @@ -15,7 +15,7 @@ import Rasters: ForwardOrdered, ReverseOrdered, Regular # test when reading a file ras = Raster(rand(X(f32_indices), Y(indices_one_third))) tempfile = tempname() * ".nc" - write(tempfile, ras) + write(tempfile, ras; force=true) ras_read = Raster(tempfile) steps = step.(dims(ras_read)) @test steps[1] == 0.05 diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index ddfb524e7..9d7731f08 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -16,7 +16,7 @@ gdalpath = maybedownload(url) @testset "lazyness" begin # Eager is the default - @test parent(gdalarray) isa Array + @test parent(gdalarray) isa Array # its a reshaped array now @test parent(lazyarray) isa DiskArrays.AbstractDiskArray @test parent(eagerarray) isa Array @testset "lazy broadcast" begin @@ -25,6 +25,26 @@ gdalpath = maybedownload(url) end end + @testset "cf" begin + # This file has no scale/offset so cf does nothing + @time cfarray = Raster(gdalpath) + @time cf_nomask_array = Raster(gdalpath; missingval=nothing) + @time rawarray = Raster(gdalpath; raw=true) + @time lazycfarray = Raster(gdalpath; lazy=true) + @time lazyrawarray = Raster(gdalpath; lazy=true, raw=true) + @test parent(cfarray) isa Matrix{UInt8} + @test parent(cf_nomask_array) isa Matrix{UInt8} + @test parent(rawarray) isa Matrix{UInt8} + open(lazycfarray) do A + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} + end + open(lazyrawarray) do A + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} + end + end + @testset "load from url" begin A = Raster("/vsicurl/" * url) B = Raster(url; source=:gdal) @@ -102,8 +122,8 @@ gdalpath = maybedownload(url) @test ndims(gdalarray) == 2 @test dims(gdalarray) isa Tuple{<:X,<:Y} @test lookup(refdims(gdalarray), Band) isa DimensionalData.Categorical; - # @test span(gdalarray, (Y, X)) == - # (Regular(-60.02213698319351), Regular(60.02213698319374)) + @test span(gdalarray, (Y, X)) == + (Regular(-60.02213698319351), Regular(60.02213698319374)) @test sampling(gdalarray, (Y, X)) == (Intervals(Start()), Intervals(Start())) # Bounds calculated in python using rasterio @@ -131,7 +151,7 @@ gdalpath = maybedownload(url) @testset "custom keywords" begin customgdalarray = Raster(gdalpath; name=:test, crs=EPSG(1000), mappedcrs=EPSG(4326), refdims=(Ti(),), - write=true, lazy=true, dropband=false, replace_missing=true, + write=true, lazy=true, dropband=false, ) @test name(customgdalarray) == :test @test refdims(customgdalarray) == (Ti(),) @@ -148,7 +168,7 @@ gdalpath = maybedownload(url) dimsgdalarray = Raster(gdalpath; dims=(Z(), X(), Y()), ) - @test dims(dimsgdalarray) isa Tuple{<:Z,X,Y} + @test dims(dimsgdalarray) isa Tuple{Z,X,Y} end @testset "indexing" begin @@ -167,7 +187,7 @@ gdalpath = maybedownload(url) @test gdalarray[Y(4.224e6..4.226e6), Band(1)] isa Raster end - @testset "methods" begin + @testset "methods" begin @testset "mean" begin @test all(mean(gdalarray; dims=Y) .=== mean(parent(gdalarray); dims=2)) end @@ -175,7 +195,7 @@ gdalpath = maybedownload(url) @testset "trim, crop, extend" begin a = read(replace_missing(gdalarray, zero(eltype(gdalarray)))) a[X(1:100)] .= missingval(a) - trimmed = trim(a) + trimmed = Rasters.trim(a) @test size(trimmed) == (414, 514) cropped = Rasters.crop(a; to=trimmed) @test size(cropped) == (414, 514) @@ -253,13 +273,19 @@ gdalpath = maybedownload(url) @time gdalarray = Raster(gdalpath; name=:test) A1 = gdalarray[X(1:300), Y(1:200)] A2 = gdalarray[X(57:500), Y(101:301)] - tempfile = tempname() * ".tif" - Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile) - Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) + tempfile1 = tempname() * ".tif" + tempfile2 = tempname() * ".tif" + tempfile3 = tempname() * ".tif" + Afile = mosaic(first, A1, A2; missingval=0xff, atol=1e-8, filename=tempfile1) + Afile2 = mosaic(first, A1, A2; atol=1e-8, filename=tempfile2) + collect(Afile) + collect(Afile2) + @test missingval(Afile2) === missing + Amem = mosaic(first, A1, A2; missingval=0xff, atol=1e-8) Atest = gdalarray[X(1:500), Y(1:301)] - Atest[X(1:56), Y(201:301)] .= 0x00 - Atest[X(301:500), Y(1:100)] .= 0x00 - @test all(Atest .=== Amem .=== Afile) + Atest[X(1:56), Y(201:301)] .= 0xff + Atest[X(301:500), Y(1:100)] .= 0xff + @test all(Atest .=== Amem .=== Afile .=== replace_missing(Afile2, 0xff)) end end # methods @@ -269,8 +295,8 @@ gdalpath = maybedownload(url) @test size(geoA) == (50, 1) @test eltype(geoA) <: UInt8 @time geoA isa Raster{UInt8,1} - @test dims(geoA) isa Tuple{<:X,Y} - @test refdims(geoA) isa Tuple{<:Band} + @test dims(geoA) isa Tuple{X,Y} + @test refdims(geoA) isa Tuple{Band} @test metadata(geoA) == metadata(gdalarray) @test missingval(geoA) === nothing @test name(geoA) == :test @@ -281,9 +307,9 @@ gdalpath = maybedownload(url) @testset "2d asc" begin filename = tempname() * ".asc" - @time write(filename, gdalarray; force = true) + @time write(filename, gdalarray; force=true) saved1 = Raster(filename); - @test all(saved1 .== gdalarray) + @test all(parent(saved1 .== gdalarray)) # @test typeof(saved1) == typeof(geoA) @test val(dims(saved1, X)) ≈ val(dims(gdalarray, X)) @test val(dims(saved1, Y)) ≈ val(dims(gdalarray, Y)) @@ -391,15 +417,16 @@ gdalpath = maybedownload(url) end @testset "to grd" begin - write("testgrd.gri", gdalarray; force=true) - @test (@allocations write("testgrd.gri", gdalarray; force=true)) < 1e4 - grdarray = Raster("testgrd.gri") + fn = joinpath(tempdir(), tempname() * ".gri") + write(fn, gdalarray; force=true) + fn = joinpath(tempdir(), tempname() * ".gri") + @test (@allocations write(fn, gdalarray; force=true)) < 1e4 + grdarray = Raster(fn) @test crs(grdarray) == convert(ProjString, crs(gdalarray)) @test all(map((a, b) -> all(a .≈ b), bounds(grdarray), bounds(gdalarray))) @test index(grdarray, Y) ≈ index(gdalarray, Y) @test val(dims(grdarray, X)) ≈ val(dims(gdalarray, X)) @test grdarray == gdalarray - rm("testgrd.gri") end @testset "from Raster" begin @@ -439,10 +466,11 @@ gdalpath = maybedownload(url) end @testset "write missing" begin - A = read(replace_missing(gdalarray, missing)) + A = replace_missing(gdalarray, missing) filename = tempname() * ".tif" write(filename, A) - @test missingval(Raster(filename)) === typemax(UInt8) + @test missingval(Raster(filename)) === missing + @test missingval(Raster(filename; missingval)) === typemax(UInt8) rm(filename) end @@ -488,11 +516,11 @@ gdalpath = maybedownload(url) gdalarray[Y(1)] |> plot end - @testset "nodatavalue type matches the array type" begin + @testset "unmasked missingval type matches the array type" begin # Handle WorldClim/ucdavis unreliability A = nothing try - A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1) + A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, missingval) catch end if !isnothing(A) @@ -681,8 +709,8 @@ end write(filename, gdalstack; force=true) base, ext = splitext(filename) filename_b = string(base, "_b", ext) - saved = read(Raster(filename_b)) - @test all(saved .== geoA) + saved = Raster(filename_b) + @test all(saved .=== geoA) end @testset "write multiple files with custom suffix" begin @@ -691,7 +719,7 @@ end base, ext = splitext(filename) filename_b = string(base, "_second", ext) saved = read(Raster(filename_b)) - @test all(saved .== geoA) + @test all(saved .=== geoA) end @testset "write netcdf" begin @@ -711,7 +739,6 @@ end gdalstack2 = RasterStack(filenames; lazy=true) @test DiskArrays.eachchunk(gdalstack2[:b])[1] == (1:128, 1:128) end - end @testset "show" begin @@ -746,21 +773,35 @@ end end ## Resample cea.tif using resample - raster_output = resample(gdalarray, output_res; crs=output_crs, method=resample_method) - disk_output = resample(gdalarray, output_res; crs=output_crs, method=resample_method, filename="resample.tif") - stack_output = resample(gdalstack, output_res; crs=output_crs, method=resample_method) - written_stack_output = resample(gdalstack, output_res; crs=output_crs, method=resample_method, filename="resample.tif") - series_output = resample(gdalser, output_res; crs=output_crs, method=resample_method) - + raster_output = resample(gdalarray, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) + disk_output = resample(gdalarray, output_res; + crs=output_crs, method=resample_method, filename="resample.tif", missingval=0xff=>0xff + ) + stack_output = resample(gdalstack, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) + written_stack_output = resample(gdalstack, output_res; + crs=output_crs, method=resample_method, filename="resample.tif", missingval=0xff=>0xff + ) + series_output = resample(gdalser, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) extradim_raster = cat(gdalarray, gdalarray, gdalarray; dims=Z) - extradim_output = resample(extradim_raster, output_res; crs=output_crs, method=resample_method) + extradim_output = resample(extradim_raster, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) permuted_raster = permutedims(gdalarray, (Y, X)) - permuted_output = resample(permuted_raster, output_res; crs=output_crs, method=resample_method) + permuted_output = resample(permuted_raster, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) # Compare ArchGDAL, resample and permuted resample @test AG_output == - raster_output == disk_output == + raster_output == + disk_output == stack_output[:a] == written_stack_output[:a] == series_output[1] == @@ -867,20 +908,19 @@ end @testset "detect dimension from file name" begin tifser = RasterSeries([gdalpath, gdalpath], Ti([DateTime(2001), DateTime(2002)])) - mkpath("tifseries") - write("tifseries/test.tif", tifser; force=true) - @test isfile("tifseries/test_2001-01-01T00:00:00.tif") - @test isfile("tifseries/test_2002-01-01T00:00:00.tif") - ser1 = RasterSeries("tifseries", Ti(DateTime)) - ser2 = RasterSeries("tifseries", Ti(DateTime); lazy=true) - ser3 = RasterSeries("tifseries/test.tif", Ti(DateTime)) - ser4 = RasterSeries("tifseries", Ti(DateTime; order=ForwardOrdered()); ext=".tif") - ser5 = RasterSeries("tifseries/test", Ti(DateTime); ext=".tif") + path = mkpath(joinpath(tempdir(), tempname(), "tifseries")) + write(joinpath(path, "test.tif"), tifser; force=true) + @test isfile(joinpath(path, "test_2001-01-01T00:00:00.tif")) + @test isfile(joinpath(path, "test_2002-01-01T00:00:00.tif")) + ser1 = RasterSeries(path, Ti(DateTime)) + ser2 = RasterSeries(path, Ti(DateTime); lazy=true) + ser3 = RasterSeries(joinpath(path, "test.tif"), Ti(DateTime)) + ser4 = RasterSeries(path, Ti(DateTime; order=ForwardOrdered()); ext=".tif") + ser5 = RasterSeries(joinpath(path, "test"), Ti(DateTime); ext=".tif") @test dims(ser1) == dims(ser2) == dims(ser3) == dims(ser3) == dims(ser5) == dims(tifser) - @test_throws ErrorException RasterSeries("tifseries", Ti(Int)) - ser6 = RasterSeries("tifseries/test", Ti(DateTime; sampling=Intervals(Center())); ext=".tif") + @test_throws ErrorException RasterSeries(path, Ti(Int)) + ser6 = RasterSeries(joinpath(path, "test"), Ti(DateTime; sampling=Intervals(Center())); ext=".tif") @test sampling(ser6) == (Intervals(Center()),) - rm("tifseries"; recursive=true) end @testset "methods" begin @@ -897,7 +937,7 @@ end mv = zero(eltype(gdalser[1])) ser = read(replace_missing(gdalser, mv)) ser = map(A -> (view(A, X(1:100)) .= mv; A), ser) - trimmed = trim(ser) + trimmed = Rasters.trim(ser) @test size(trimmed[1]) == (414, 514) cropped = crop(ser; to=trimmed[1]) @test size(cropped[1]) == (414, 514) @@ -943,6 +983,7 @@ end @test crs(gdalarray) == wkt @test crs(gdalarray[Y(1)]) == wkt end + end diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 26ae1ca03..ca3c72425 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -4,13 +4,13 @@ using DiskArrays import NCDatasets, ArchGDAL using Rasters: FileArray, GRDsource, GDALsource, metadata, trim -testpath = joinpath(dirname(pathof(Rasters)), "../test/") +testpath = joinpath(dirname(pathof(Rasters)), "..", "test") include(joinpath(testpath, "test_utils.jl")) const DD = DimensionalData maybedownload("https://raw.githubusercontent.com/rspatial/raster/master/inst/external/rlogo.grd", "rlogo.grd") maybedownload("https://github.com/rspatial/raster/raw/master/inst/external/rlogo.gri", "rlogo.gri") -stem = joinpath(testpath, "data/rlogo") +stem = joinpath(testpath, "data", "rlogo") @test isfile(stem * ".grd") @test isfile(stem * ".gri") grdpath = stem * ".gri" @@ -29,11 +29,16 @@ grdpath = stem * ".gri" @test parent(eagerarray) isa Array end - @testset "replace_missing keyword" begin - # Eager is the default - @time missingarray = Raster(grdpath; replace_missing=true) + @testset "missingval" begin + @time missingarray = Raster(grdpath) @test missingval(missingarray) === missing @test eltype(missingarray) === Union{Missing,Float32} + @time missingarray = Raster(grdpath; missingval) + @test missingval(missingarray) === -3.4f38 + @test eltype(missingarray) === Float32 + @time missingarray = Raster(grdpath; missingval=missingval => NaN32) + @test missingval(missingarray) === NaN32 + @test eltype(missingarray) === Float32 end @testset "open" begin @@ -60,7 +65,7 @@ grdpath = stem * ".gri" end @testset "array properties" begin - @test grdarray isa Raster{Float32,3} + @test grdarray isa Raster{Union{Missing,Float32},3} end @testset "dimensions" begin @@ -74,7 +79,7 @@ grdpath = stem * ".gri" @testset "other fields" begin proj = ProjString("+proj=merc +datum=WGS84") @test name(grdarray) == Symbol("red:green:blue") - @test missingval(grdarray) == -3.4f38 + @test missingval(grdarray) === missing @test metadata(grdarray) isa Metadata{GRDsource,Dict{String,Any}} @test label(grdarray) == "red:green:blue" @test units(grdarray) == nothing @@ -100,7 +105,7 @@ grdpath = stem * ".gri" @test mappedcrs(customgrdarray) == EPSG(4326) @test mappedcrs(dims(customgrdarray, Y)) == EPSG(4326) @test mappedcrs(dims(customgrdarray, X)) == EPSG(4326) - @test parent(customgrdarray) isa DiskArrays.BroadcastDiskArray + @test parent(customgrdarray) isa Rasters.FileArray @test eltype(customgrdarray) == Union{Float32,Missing} # Needs to be separate as it overrides crs/mappedcrs dimsgrdarray = Raster(grdpath; @@ -110,9 +115,9 @@ grdpath = stem * ".gri" end @testset "getindex" begin - @test grdarray[Band(1)] isa Raster{Float32,2} - @test grdarray[Y(1), Band(1)] isa Raster{Float32,1} - @test grdarray[X(1), Band(1)] isa Raster{Float32,1} + @test grdarray[Band(1)] isa Raster{Union{Missing,Float32},2} + @test grdarray[Y(1), Band(1)] isa Raster{Union{Missing,Float32},1} + @test grdarray[X(1), Band(1)] isa Raster{Union{Missing,Float32},1} @test grdarray[X(50), Y(30), Band(1)] == 115.0f0 @test grdarray[1, 1, 1] == 255.0f0 @test grdarray[Y(At(20.0; atol=1e10)), X(At(20; atol=1e10)), Band(3)] == 255.0f0 @@ -130,13 +135,13 @@ grdpath = stem * ".gri" @test size(cropped) == (81, 77, 3) kwcropped = crop(a; to=trimmed, dims=(X,)) @test size(kwcropped) == (81, size(a,Y), 3) - @test all(collect(cropped .== trimmed)) + @test all(collect(cropped .=== trimmed)) extended = extend(cropped; to=a); - @test all(collect(extended .== a)) + @test all(collect(extended .=== a)) end @testset "mask and mask! to disk" begin - msk = read(replace_missing(grdarray, missing)) + msk = replace_missing(grdarray, missing) msk[X(1:73), Y([1, 5, 77])] .= missingval(msk) @test !any(grdarray[X(1:73)] .=== missingval(msk)) masked = mask(grdarray; with=msk) @@ -151,8 +156,6 @@ grdpath = stem * ".gri" mask!(A; with=msk, missingval=missingval(A)) end @test all(Raster(tempgri)[X(1:73), Y([1, 5, 77])] .=== missingval(grdarray)) - rm(tempgrd) - rm(tempgri) end @testset "classify! to disk" begin @@ -176,62 +179,54 @@ grdpath = stem * ".gri" tn = tempname() tempgrd = tn * ".grd" tempgri = tn * ".gri" - cp(stem * ".grd", tempgrd) - cp(stem * ".gri", tempgri) - Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd) + Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, force=true) Amem = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1) Atest = grdarray[X(1:80), Y(1:60)] Atest[X(1:26), Y(31:60)] .= 0.0f0 Atest[X(41:80), Y(1:24)] .= 0.0f0 @test size(Atest) == size(Afile) == size(Amem) @test all(Atest .=== Amem .== Afile) - end - - @testset "rasterize" begin - # A = read(grdarray) - # R = rasterize(A; to=A) - # @test all(A .=== R .== grdarray) - # B = rebuild(read(grdarray) .= 0x00; missingval=0x00) - # rasterize!(B, read(grdarray)) - # @test all(B .=== grdarray |> collect) + read(Atest .- Afile) end end @testset "selectors" begin geoA = grdarray[Y(Contains(3)), X(:), Band(1)] - @test geoA isa Raster{Float32,1} + @test geoA isa Raster{Union{Missing,Float32},1} @test grdarray[X(Contains(20)), Y(Contains(10)), Band(1)] isa Float32 end @testset "conversion to Raster" begin geoA = grdarray[X(1:50), Y(1:1), Band(1)] @test size(geoA) == (50, 1) - @test eltype(geoA) <: Float32 - @time geoA isa Raster{Float32,1} + @test eltype(geoA) <: Union{Missing,Float32} + @time geoA isa Raster{Union{Missing,Float32},1} @test dims(geoA) isa Tuple{<:X,Y} @test refdims(geoA) isa Tuple{<:Band} @test metadata(geoA) == metadata(grdarray) - @test missingval(geoA) == -3.4f38 + @test missingval(geoA) === missing @test name(geoA) == Symbol("red:green:blue") end @testset "write" begin @testset "2d" begin filename2 = tempname() * ".gri" - write(filename2, grdarray[Band(1)]; force = true) + write(filename2, grdarray[Band(1)]; force=true) saved = Raster(filename2) # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) - write(filename2, grdarray; force = true) - @test (@allocations write(filename2, grdarray; force = true)) < 3e3 + filename2 = tempname() * ".gri" + write(filename2, grdarray; force=true, verbose=false) + filename2 = tempname() * ".gri" + @test (@allocations write(filename2, grdarray; force=true, verbose=false)) < 3e3 end @testset "3d with subset" begin geoA = grdarray[1:100, 1:50, 1:2] filename = tempname() * ".grd" - write(filename, GRDsource(), geoA; force = true) + write(filename, GRDsource(), geoA; force=true) saved = Raster(filename) @test size(saved) == size(geoA) @test refdims(saved) == () @@ -251,24 +246,25 @@ grdpath = stem * ".gri" @test all(parent(saved) .=== parent(geoA)) @test saved isa typeof(geoA) @test parent(saved) == parent(geoA) + filename = tempname() * ".grd" write(filename, GRDsource(), geoA; force = true) + filename = tempname() * ".grd" @test (@allocations write(filename, GRDsource(), geoA; force = true)) < 3e3 end @testset "to netcdf" begin filename2 = tempname() * ".nc" span(grdarray[Band(1)]) - write(filename2, grdarray[Band(1)]; force = true) + write(filename2, grdarray[Band(1)]; force=true) saved = Raster(filename2; crs=crs(grdarray)) @test size(saved) == size(grdarray[Band(1)]) - @test all(parent(replace_missing(saved, missingval(grdarray))) .≈ parent(grdarray[Band(1)])) + @test all(parent(saved) .≈ parent(grdarray[Band(1)])) @test index(saved, X) ≈ index(grdarray, X) .+ 0.5 @test index(saved, Y) ≈ index(grdarray, Y) .+ 0.5 @test bounds(saved, Y) == bounds(grdarray, Y) @test bounds(saved, X) == bounds(grdarray, X) write(filename2, grdarray; force = true) @test (@allocations write(filename2, grdarray; force = true)) < 3e3 - end @testset "to gdal" begin @@ -280,7 +276,7 @@ grdpath = stem * ".gri" # @test convert(ProjString, crs(gdalarray)) == convert(ProjString, EPSG(4326)) @test val(dims(gdalarray, X)) ≈ val(dims(grdarray, X)) @test val(dims(gdalarray, Y)) ≈ val(dims(grdarray, Y)) - @test Raster(gdalarray) ≈ permutedims(grdarray[Band(1)], [X(), Y()]) + @test gdalarray ≈ replace_missing(permutedims(grdarray[Band(1)], [X(), Y()]), typemin(Int32)) # 3 Bands gdalfilename2 = tempname() * ".tif" write(gdalfilename2, grdarray) @@ -293,8 +289,11 @@ grdpath = stem * ".gri" A = replace_missing(grdarray, missing) filename = tempname() * ".grd" write(filename, A) - @test missingval(Raster(filename)) === typemin(Float32) - rm(filename) + @test missingval(Raster(filename)) === missing + filename = tempname() * ".grd" + write(filename, A) + @test missingval(Raster(filename; missingval=nothing)) === nothing + @test missingval(Raster(filename)) === missing end end @@ -350,7 +349,7 @@ end @testset "child array properties" begin @test size(grdstack[:a]) == size(Raster(grdstack[:a])) == (101, 77, 3) - @test grdstack[:a] isa Raster{Float32,3} + @test grdstack[:a] isa Raster{Union{Missing,Float32},3} end # Stack Constructors @@ -417,7 +416,7 @@ end @testset "child array properties" begin @test size(grdstack[:Band_3]) == size(Raster(grdstack[:Band_3])) == (101, 77) - @test grdstack[:Band_1] isa Raster{Float32,2} + @test grdstack[:Band_1] isa Raster{Union{Missing,Float32},2} end # Stack Constructors @@ -463,7 +462,7 @@ end end @testset "Grd series" begin - grdpath2 = stem * "2" * ".gri" + grdpath2 = joinpath(tempdir(), tempname() * ".gri") write(grdpath2, 2 .* Raster(grdpath); force=true) Raster(grdpath) .* 2 == Raster(grdpath2) eager_grdseries = RasterSeries([grdpath, grdpath2], (Ti,); mappedcrs=EPSG(4326)) diff --git a/test/sources/gribdatasets.jl b/test/sources/gribdatasets.jl index 3779e68d7..255984b3d 100644 --- a/test/sources/gribdatasets.jl +++ b/test/sources/gribdatasets.jl @@ -25,6 +25,9 @@ gribexamples_dir = abspath(joinpath(dirname(pathof(GRIBDatasets)), "..", "test", era5 = joinpath(gribexamples_dir, "era5-levels-members.grib") +ds = GRIBDatasets.GRIBDataset(era5) +v = ds[:z] + @testset "Raster" begin @time gribarray = Raster(era5) @time lazyarray = Raster(era5; lazy=true) @@ -87,8 +90,8 @@ era5 = joinpath(gribexamples_dir, "era5-levels-members.grib") end @testset "cf attributes" begin - z = lazystack[:z] - @test metadata(z)["standard_name"] == "geopotential" + z = lazystack.z + @test metadata(z)["cfName"] == "geopotential" @test metadata(lazystack)["Conventions"] == "CF-1.7" x = dims(lazystack, :X) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 225fe7fcd..4c49ad607 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -3,7 +3,7 @@ using Rasters, DimensionalData, Test, Statistics, Dates, CFTime, Plots using Rasters.Lookups, Rasters.Dimensions using Rasters.DiskArrays import ArchGDAL, NCDatasets -using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim +using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim, metadata testdir = realpath(joinpath(dirname(pathof(Rasters)), "../test")) include(joinpath(testdir, "test_utils.jl")) @@ -31,22 +31,21 @@ stackkeys = ( ) @testset "grid mapping" begin - stack = RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) - @test metadata(stack.mask)["grid_mapping"] == Dict{String, Any}( - "straight_vertical_longitude_from_pole" => 0.0, - "false_easting" => 0.0, - "standard_parallel" => -71.0, - "inverse_flattening" => 298.27940504282, - "latitude_of_projection_origin" => -90.0, - "grid_mapping_name" => "polar_stereographic", - "semi_major_axis" => 6.378273e6, - "false_northing" => 0.0, + st = RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) + @test metadata(st.mask)["grid_mapping"] == Dict{String, Any}( + "straight_vertical_longitude_from_pole" => 0.0, + "false_easting" => 0.0, + "standard_parallel" => -71.0, + "inverse_flattening" => 298.27940504282, + "latitude_of_projection_origin" => -90.0, + "grid_mapping_name" => "polar_stereographic", + "semi_major_axis" => 6.378273e6, + "false_northing" => 0.0, ) end @testset "Raster" begin - @time ncarray = Raster(ncsingle) - + @time ncarray = Raster(ncsingle); @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @test_throws ArgumentError Raster("notafile.nc") @@ -59,6 +58,37 @@ end @time read(lazyarray); end + @testset "scaling and masking" begin + @time cfarray = Raster(ncsingle) + @time cfarray = Raster(ncsingle) + @time cf_nomask_array = Raster(ncsingle; missingval=nothing) + @time nocfarray = Raster(ncsingle; scaled=false) + @time nocf_nomask_array = Raster(ncsingle; scaled=false, missingval=nothing) + @time raw_array = Raster(ncsingle; raw=true) + @time lazycfarray = Raster(ncsingle; lazy=true, scaled=false) + @time lazynocfarray = Raster(ncsingle; lazy=true, scaled=false) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, missingval=nothing) + @test missingval(cfarray) === missing + @test missingval(nocfarray) === missing + @test missingval(cf_nomask_array) === nothing + @test missingval(nocf_nomask_array) === nothing + @test missingval(raw_array) === 1.0f20 + @test all(skipmissing(cfarray) .=== skipmissing(nocfarray)) + @test parent(cfarray) isa Array{Union{Float32,Missing}} + @test parent(nocfarray) isa Array{Union{Float32,Missing}} + @test parent(nocf_nomask_array) isa Array{Float32} + @test parent(raw_array) isa Array{Float32} + open(lazycfarray) do A + @test parent(A) isa Rasters.ModifiedDiskArray{false,Union{Missing,Float32}} + end + open(lazynocfarray) do A + @test parent(A) isa Rasters.ModifiedDiskArray{false,Union{Missing,Float32}} + end + open(lazynocf_nomask_array) do A + @test parent(parent(A)) isa NCDatasets.Variable{Float32} + end + end + # @testset "from url" begin # # TODO we need a permanent url here that doesn't end in .nc # url = "http://apdrc.soest.hawaii.edu:80/dods/public_data/Reanalysis_Data/NCEP/NCEP2/daily/surface/mslp" @@ -85,12 +115,14 @@ end @testset "handle empty variables" begin st = RasterStack((empty=view(ncarray, 1, 1, 1), full=ncarray)) empty_test = tempname() * ".nc" - write(empty_test, st) + write(empty_test, st; force=true) + rast = Raster(empty_test) st = RasterStack(empty_test) - @test name(rast) == name(st[:empty]) == :empty - @test size(rast) == size(st[:empty]) == () - @test all(st[:full] .=== ncarray) + @test name(rast) == name(st.empty) == :empty + @test size(rast) == size(st.empty) == () + @test st.empty[] === missing + @test all(st.full .=== ncarray) st = RasterStack(empty_test; lazy=true) end @@ -178,11 +210,12 @@ end A1 = ncarray[X(1:80), Y(1:100)] A2 = ncarray[X(50:150), Y(90:150)] tempfile = tempname() * ".nc" - Afile = mosaic(first, read(A1), read(A2); missingval=missing, atol=1e-7, filename=tempfile) - Amem = mosaic(first, A1, A2; missingval=missing, atol=1e-7) + Afile = mosaic(first, read(A1), read(A2); atol=1e-7, filename=tempfile, force=true) + Amem = mosaic(first, A1, A2; atol=1e-7) Atest = ncarray[X(1:150), Y(1:150)] Atest[X(1:49), Y(101:150)] .= missing Atest[X(81:150), Y(1:89)] .= missing + read(Afile) @test all(Atest .=== Afile .=== Amem) end @testset "slice" begin @@ -239,16 +272,16 @@ end end @testset "write" begin - @testset "to netcdf" begin - filename = tempname() * ".nc" - write(filename, ncarray; force=true) - @test (@allocations write(filename, ncarray; force=true)) < 1e4 + @testset "to netcdf" begin + fn = tempname() * ".nc" + write(fn, ncarray; force=true); + @test (@allocations write(fn, ncarray; force=true)) < 1e4 @testset "CF attributes" begin - @test NCDatasets.Dataset(filename)[:x].attrib["axis"] == "X" - @test NCDatasets.Dataset(filename)[:x].attrib["bounds"] == "x_bnds" + @test NCDatasets.Dataset(fn)[:x].attrib["axis"] == "X" + @test NCDatasets.Dataset(fn)[:x].attrib["bounds"] == "x_bnds" # TODO better units and standard name handling end - saved = Raster(filename) + saved = Raster(fn) @test size(saved) == size(ncarray) @test refdims(saved) == refdims(ncarray) @test missingval(saved) === missingval(ncarray) @@ -256,7 +289,8 @@ end all(s .== g) end |> all @test metadata(saved) == metadata(ncarray) - @test_broken all(metadata(dims(saved))[2] == metadata.(dims(ncarray))[2]) + # Bounds variable names are renamed so metadata is different + @test_broken all(metadata(dims(saved))[1] == metadata(dims(ncarray))[1]) @test Rasters.name(saved) == Rasters.name(ncarray) @test all(lookup.(dims(saved)) .== lookup.(dims(ncarray))) @test all(order.(dims(saved)) .== order.(dims(ncarray))) @@ -313,8 +347,9 @@ end end @testset "non allowed values" begin - @test_throws ArgumentError write(filename, convert.(Union{Missing,Float16}, ncarray); force=true) + @test_throws ArgumentError write(fn, convert.(Union{Missing,Float16}, ncarray); force=true) end + end @testset "to gdal" begin @@ -322,7 +357,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write(gdalfilename, nccleaned; force=true) @test (@allocations write(gdalfilename, nccleaned; force=true)) < 1e4 - gdalarray = Raster(gdalfilename) + gdalarray = Raster(gdalfilename; missingval=nothing) # gdalarray WKT is missing one AUTHORITY # @test_broken crs(gdalarray) == convert(WellKnownText, EPSG(4326)) # But the Proj representation is the same @@ -336,26 +371,27 @@ end @testset "to grd" begin nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) - write("testgrd.gri", nccleaned; force=true) - @test (@allocations write("testgrd.gri", nccleaned; force=true)) < 1e4 - grdarray = Raster("testgrd.gri"); + fn = tempname() * ".gri" + write(fn, nccleaned; force=true) + fn = tempname() * ".gri" + @test (@allocations write(fn, nccleaned; force=true)) < 1e4 + grdarray = Raster(fn, missingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @test index(grdarray, Y) ≈ reverse(index(nccleaned, Y)) .- 0.5 @test index(grdarray, X) ≈ index(nccleaned, X) .- 1.0 @test parent(reverse(grdarray; dims=Y)) ≈ parent(nccleaned) - rm("testgrd.gri") - rm("testgrd.grd") end @testset "write points" begin + filename = tempname() * ".nc" lon, lat = X(25:1:30), Y(25:1:30) ti = Ti(DateTime(2001):Month(1):DateTime(2002)) ras = Raster(rand(lon, lat, ti)) - write("point_rast.nc", ras; force=true) - saved = Raster("point_rast.nc") + write(filename, ras; force=true) + saved = Raster(filename) @test sampling(saved) == (Points(), Points(), Points()) - @test @allocations(write("point_rast.nc", ras; force=true)) < 10e3 + @test @allocations(write(filename, ras; force=true)) < 10e3 end end @@ -498,10 +534,10 @@ end @test parent(st) isa NamedTuple @test first(parent(st)) isa Array length(dims(st[:aclcac])) - filename = tempname() * ".nc" - write(filename, st; force=true) - @test (@allocations write(filename, st; force=true)) < 1e6 # writing a rasterseries/stack has no force keyword - saved = RasterStack(RasterStack(filename)) + fn = tempname() * ".nc" + write(fn, st; force=true) + @test (@allocations write(fn, st; force=true)) < 1e6 # writing a rasterseries/stack has no force keyword + saved = RasterStack(RasterStack(fn)) @test keys(saved) == keys(st) @test metadata(saved)["advection"] == "Lin & Rood" @test metadata(saved) == metadata(st) == metadata(ncstack) @@ -543,14 +579,15 @@ end rm("test_2.nc") end -# Groups -if !haskey(ENV, "CI") - path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") - stack = RasterStack(path; group="Geophysical_Data") - lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) - rast = Raster(path; name=:surface_temp, group="Geophysical_Data") - lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) - @test all(stack[:surface_temp] .=== read(lazy_stack[:surface_temp]) .=== rast .=== read(lazy_rast)) +h5path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") +if !haskey(ENV, "CI") && isfile(h5path) + @testset "HDF5 with Groups" begin + stack = RasterStack(h5path; group="Geophysical_Data") + lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) + rast = Raster(path; name=:surface_temp, group="Geophysical_Data") + lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) + @test all(stack[:surface_temp] .=== read(lazy_stack[:surface_temp]) .=== rast .=== read(lazy_rast)) + end end nothing diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index c673bceea..f1b9afdab 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -2,23 +2,24 @@ using Rasters, RasterDataSources, Test, Dates, ArchGDAL, NCDatasets, Proj # Too big to test on CI # if !haskey(ENV, "CI") -# @testset "load WorldClim Weather" begin -# # Weather time-series -# dates = (Date(2001), Date(2002)) -# ser = RasterSeries(WorldClim{Weather}, (:prec,); date=dates) -# ser[Date(2001, 1)][:prec] -# A = Raster(WorldClim{Weather}, :prec; date=DateTime(2001, 05), mappedcrs=EPSG(4326)) -# end + # @testset "load WorldClim Weather" begin + # # Weather time-series + # ser = RasterSeries(WorldClim{Weather}, (:prec,); + # date=(Date(2001), Date(2002)), missingval=NaN32 + # ) + # @test all(ser[At(Date(2001, 1))].prec .=== + # Raster(WorldClim{Weather}, :prec; date=DateTime(2001), missingval=NaN32)) + # end # end @testset "load WorldClim Climate" begin # Weather time-series - ser = RasterSeries(WorldClim{Climate}, :prec; res="10m", month=Jan:March, mappedcrs=EPSG(4326)) + ser = RasterSeries(WorldClim{Climate}, :prec; res="10m", month=Jan:March, mappedcrs=EPSG(4326), raw=true) # Select Australia, using regular lat/lon selectors A = ser[month=Jan] @test A isa Raster A[Y(Between(-10, -45)), X(Between(110, 160))] - st = RasterStack(WorldClim{Climate}, (:prec, :tmax); month=1) + st = RasterStack(WorldClim{Climate}, (:prec, :tmax); month=1, raw=true) @test st[:prec] == A @test missingval(st) == (prec=-32768, tmax=-3.4f38) @test st isa RasterStack{(:prec,:tmax),@NamedTuple{prec::Int16,tmax::Float32},2} @@ -28,15 +29,16 @@ end A = Raster(WorldClim{BioClim}, :Bio_1; mappedcrs=EPSG(4326)) A[Y(Between(-10, -45)), X(Between(110, 160))] @test A isa Raster + @test missingval(A) === missing st = RasterStack(WorldClim{BioClim}, (1, 2)) - st[:bio1] + @test all(st.bio1 .=== A) @test st isa RasterStack @test A isa Raster # Future Bioclim works st = RasterStack(WorldClim{Future{BioClim, CMIP6, GFDL_ESM4, SSP370}}, (1, 2); date = Date(2050), res = "10m", lazy=true, - missingval=Inf, + missingval=Inf32, crs=nothing, mappedcrs=EPSG(4326), ) @@ -53,19 +55,19 @@ end @test Rasters.name(A) == :bio1 st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2); lazy=true) @test keys(st) == (:bio1, :bio2) - @test A isa Raster + @test A isa Raster{Float64,2} @test st isa RasterStack - @test st[:bio2] isa Raster - # Allow forcing keywords - st = RasterStack(CHELSA{BioClim}, (1, 2); - lazy=true, - missingval= Int16(9999), - metadata=Rasters.NoMetadata(), - crs=nothing, - mappedcrs=EPSG(4326), - ) - @test missingval(st) === UInt16(9999) - @test missingval(st.bio1) === UInt16(9999) + @test st.bio2 isa Raster{Float64,2} + + A = Raster(CHELSA{BioClim}, 1; lazy=true, raw=true) + st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2); lazy=true, raw=true) + @test A isa Raster{UInt16,2} + @test st isa RasterStack{(:bio1, :bio2),@NamedTuple{bio1::UInt16, bio2::UInt16}} + @test st.bio2 isa Raster{UInt16,2} + + st = RasterStack(CHELSA{BioClim}, (1, 2); lazy=true) + + @test missingval(st) === missingval(st.bio1) === nothing @test metadata(st) == Rasters.NoMetadata() end @@ -98,7 +100,6 @@ end @test crs(st[:s0_pct]) == EPSG(4326) dates = DateTime(2019, 10, 19), DateTime(2021, 11, 20) s = RasterSeries(ALWB{Values,Day}, (:s0_pct, :ss_pct); date=dates, lazy=true) - s[1] @test A isa Raster @test st isa RasterStack @test s isa RasterSeries diff --git a/test/sources/zarr.jl b/test/sources/zarr.jl index 8075d0f94..c823136d8 100644 --- a/test/sources/zarr.jl +++ b/test/sources/zarr.jl @@ -1,4 +1,8 @@ using Rasters +using DimensionalData +using DimensionalData.Lookups +using DimensionalData.Dimensions +using Dates using ZarrDatasets using ZarrDatasets.Zarr using Rasters: FileArray, FileStack, Zarrsource, crs, bounds, name, trim @@ -6,11 +10,10 @@ using Rasters: FileArray, FileStack, Zarrsource, crs, bounds, name, trim path = "https://s3.bgc-jena.mpg.de:9000/esdl-esdc-v3.0.2/esdc-16d-2.5deg-46x72x1440-3.0.2.zarr" @testset "Zarr Raster open" begin - -zraster = Raster(path, name="air_temperature_2m") -lazyarray = Raster(path, lazy=true, name="air_temperature_2m") -eagerarray = Raster(path, lazy=false, name="air_temperature_2m") +zraster = Raster(path; name="air_temperature_2m") +lazyarray = Raster(path; lazy=true, name="air_temperature_2m") +eagerarray = Raster(path; lazy=false, name="air_temperature_2m") @test_throws ArgumentError Raster("notafile.zarr/") @testset "lazyness" begin @@ -19,14 +22,15 @@ eagerarray = Raster(path, lazy=false, name="air_temperature_2m") @test parent(lazyarray) isa FileArray @test parent(eagerarray) isa Array end + @testset "read" begin @time A = read(lazyarray); @test A isa Raster @test parent(A) isa Array A2 = copy(A) .= 0 - @time read!(ncarray, A2); + @time read!(zraster, A2); A3 = copy(A) .= 0 - @time read!(ncsingle, A3) + @time read!(zraster, A3) @test all(A .=== A2) @test all(A .=== A3) end @@ -37,24 +41,27 @@ end @test index(zraster,X) == collect(-178.75:2.5:178.75) # TODO the spatial bounds are strange, because the data is point data # We should find a dataset that has actual intervals + @test bounds(zraster) == ( (-178.75, 178.75), - (-88.75, 88,75), + (-88.75, 88.75), (DateTime("1979-01-09T00:00:00"), DateTime("2021-12-27T00:00:00")), ) end + @testset "dimensions" begin @test ndims(zraster) == 3 @test length.(dims(zraster)) == (144, 72, 989) @test dims(zraster) isa Tuple{<:X,<:Y,<:Ti} @test refdims(zraster) == () - @test val.(span(ncarray)) == (2.5, 2.5, (nothing, nothing)) - @test typeof(lookup(ncarray)) <: Tuple{<:Mapped,<:Mapped,<:Sampled} + @test val.(span(zraster)) == (2.5, 2.5, (nothing, nothing)) + @test typeof(lookup(zraster)) <: Tuple{<:Mapped,<:Mapped,<:Sampled} end + @testset "other fields" begin @test ismissing(missingval(zraster)) - @test metadata(r)["original_name"] == "t2m" - @test metadata(zraster) isa Metadata{<:Rasters.CDMsource, Dict{String, Any}} + @test metadata(zraster)["original_name"] == "t2m" + @test metadata(zraster) isa Rasters.Metadata{<:Rasters.CDMsource, Dict{String, Any}} @test name(zraster) == :air_temperature_2m end @@ -69,4 +76,66 @@ end @test zraster[Ti(At(DateTime(1979,1,9))), X(At(-178.75)), Y(At(-88.75))] == -28.866226f0 end +@testset "CF conventions" begin + path = tempname() * ".zarr" + global_attrs = Dict( + "Conventions" => "CF-1.7", + "_FillValue" => -999.0, + "units" => "K", + "project" => "GOES", + ) + zg = zgroup(path; attrs = global_attrs) + x_attrs = Dict( + "units" => "rad", + "add_offset" => 0, + "_ARRAY_DIMENSIONS" => ["x"], + "axis" => "X", + "long_name" => "GOES fixed grid projection x-coordinate", + "scale_factor" => 1, + "_Netcdf4Dimid" => 1, + "standard_name" => "projection_x_coordinate", + "NAME" => "x", + ) + xs = zcreate(Float64, zg, "x", 100; attrs = x_attrs) + xs .= LinRange(-5.434177815866809e6, 5.322929839864517e6, 100) + + y_attrs = Dict( + "units" => "rad", + "add_offset" => 5.258454335331338e6, + "_ARRAY_DIMENSIONS" => ["y"], + "axis" => "Y", + "long_name" => "GOES fixed grid projection y-coordinate", + "scale_factor" => -1.0625617981243342e7, + "_Netcdf4Dimid" => 1, + "standard_name" => "projection_y_coordinate", + "NAME" => "y", + ) + ys = zcreate(Float64, zg, "y", 100; attrs = y_attrs) + ys .= LinRange(0, 1, 100) + + val_attrs = Dict( + "_Netcdf4Dimid" => 0, + "scale_factor" => 2.0, + "add_offset" => 180.0, + "_FillValue" => -1, + "units" => "K", + "_ARRAY_DIMENSIONS" => ["y", "x"], + "grid_mapping" => "goes_imager_projection", + "valid_range" => [0, -6], + ) + vals = zcreate(Float64, zg, "values", 100, 100; attrs = val_attrs) + vals .= (data = rand(100, 100)) + vals[1, 1] = 1.0 + vals[end, end] = 0.0 + + ra = Raster(path) + + @test extrema(ra) == (180.0, 182.0) # test scale and offset + @test Rasters.GeoInterface.extent(ra) == Rasters.GeoInterface.Extent(X = (-5.434177815866809e6, 5.322929839864517e6), Y = (-5.367163645912004e6, 5.258454335331338e6)) + @test Rasters.isreverse(ra.dims[2]) + @test Rasters.isforward(ra.dims[1]) + @test extrema(ra.dims[1]) == extrema(xs) + @test extrema(ra.dims[2]) == reverse(extrema(ys)) .* y_attrs["scale_factor"] .+ y_attrs["add_offset"] +end + end diff --git a/test/warp.jl b/test/warp.jl index 760a1b620..a2e09dd27 100644 --- a/test/warp.jl +++ b/test/warp.jl @@ -9,17 +9,29 @@ gdalpath = maybedownload(url) # test that warp actually does *something* r = Raster(gdalpath) crs_ = crs(r).val - warped = warp(r, Dict(:t_srs => "EPSG:25832")) - @test warped isa Raster - @test size(warped) == (720,721) - # the crs is way off, the image is rotated - all four corners should be black - @test warped[1,1] == warped[1,end] == warped[end,1] == warped[end,end] == 0 - # now compute mean squared error of the back transformation - warped_back = warp(warped, Dict(:t_srs => crs_)) - # get rid of black border - ex = extrema(findall(>(0), warped_back)) - cropped = warped_back[ex[1]:ex[2]] - # subtracting UInts brings us into hell -> Int - diff_ = parent(Int.(cropped[2:end-1, 2:end-1])) .- parent(r) - @test sum(x->x^2, diff_) / prod(size(diff_)) < 600 + @testset "default missing" begin + warped = warp(r, Dict(:t_srs => "EPSG:25832")) + @test warped isa Raster + @test size(warped) == (720, 721) + # the crs is rotated so the image is rotated an all four corners should be missing + missingval(warped) === missing + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === missing === missingval(warped) + end + + @testset "custom missing" begin + warped = warp(r, Dict(:t_srs => "EPSG:25832"); missingval=0xff => 0xff) + @test warped isa Raster + @test size(warped) == (720, 721) + # the crs is rotated so the image is rotated an all four corners should be missing + missingval(warped) === 0xff + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff === missingval(warped) + # now compute mean squared error of the back transformation + res = map(step, lookup(r)) + warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_); res, missingval=0xff)) + # subtracting UInts brings us into hell -> Int + # we also need to shrink the range because of some bleed during warp + diff_ = parent(warped_back[2:end-1, 2:end-1]) .- r + + @test sum(x -> x^2, diff_) / prod(size(diff_)) < 600 + end end From 991a147395ff1405d3a1b2a37d95848cd7f87519 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 21 Jan 2025 10:38:42 +0100 Subject: [PATCH 2/6] Breaking: Faster/better `aggregate` (#763) * optimise aggregate * specify DiskArrays.cache * cleanup * bugfix * bugfix * bugfix * tets more syntax * rework the meaning of Colon in aggregation * bugfix * comments * more aggregate tests * ismem is just the opposite of isdisk * fix ag test * fixes for disk * bugfix * bugfix gdal tests --- src/array.jl | 2 +- src/methods/aggregate.jl | 489 +++++++++++++++++++++++++-------------- src/modifieddiskarray.jl | 4 +- test/aggregate.jl | 48 +++- test/sources/gdal.jl | 25 +- 5 files changed, 381 insertions(+), 187 deletions(-) diff --git a/src/array.jl b/src/array.jl index 4e75fb72e..d32b22735 100644 --- a/src/array.jl +++ b/src/array.jl @@ -64,7 +64,7 @@ cleanreturn(x) = x isdisk(A::AbstractRaster) = parent(A) isa DiskArrays.AbstractDiskArray isdisk(x) = false -ismem(x) = !isdisk(x) +ismem(A::AbstractRaster) = !isdisk(A) function Base.:(==)(A::AbstractRaster{T,N}, B::AbstractRaster{T,N}) where {T,N} size(A) == size(B) && all(A .== B) diff --git a/src/methods/aggregate.jl b/src/methods/aggregate.jl index 91686fa83..84dfb080e 100644 --- a/src/methods/aggregate.jl +++ b/src/methods/aggregate.jl @@ -5,34 +5,50 @@ const IntOrIntTuple = Union{Int,Tuple{Vararg{Int}}} struct Ag end struct DisAg end +const SKIPMISSING_KEYWORD = """ +- `skipmissing`: if `true`, any `missingval` will be skipped during aggregation, so that + only areas of all missing values will be aggregated to `missingval(dst)`. If `false`, + aggregated areas containing one or more `missingval` will be assigned `missingval`. + `false` by default. `skipmissing` behaviour is independent of function `f`, which is + only applied to completely non-missing values. +""" +const METHOD_ARGUMENT = """ +- `method`: a function such as `mean` or `sum` that can combine the + value of multiple cells to generate the aggregated cell, or a [`Locus`]($DDlocusdocs) + like `Start()` or `Center()` that species where to sample from in the interval. +""" +const SCALE_ARGUMENT = """ +- `scale`: the aggregation factor, which can be an `Int`, a `Tuple` of `Int` + for each dimension, or a `:` colon to mean the whole dimension. + You can also use any `Dimension`, `Selector` or `Int` combination you can + usually use in `getindex`. `Tuple` of `Pair` or `NamedTuple` where keys are dimension names + will also work. Using a `Selector` will determine the scale by the distance from the start + of the index. Selectors will find the first offset and repeat the same aggregation size for the rest. """ - aggregate(method, object, scale; filename, progress, skipmissing) + +""" + aggregate(method, object, scale; kw...) Aggregate a `Raster`, or all arrays in a `RasterStack` or `RasterSeries`, by `scale` using `method`. # Arguments -- `method`: a function such as `mean` or `sum` that can combine the - value of multiple cells to generate the aggregated cell, or a [`Locus`]($DDlocusdocs) - like `Start()` or `Center()` that specifies where to sample from in the interval. +$METHOD_ARGUMENT - `object`: Object to aggregate, like `AbstractRasterSeries`, `AbstractStack`, `AbstractRaster` or `Dimension`. -- `scale`: the aggregation factor, which can be an integer, a tuple of integers - for each dimension, or any `Dimension`, `Selector` or `Int` combination you can - usually use in `getindex`. Using a `Selector` will determine the scale by the - distance from the start of the index. +$SCALE_ARGUMENT When the aggregation `scale` of is larger than the array axis, the length of the axis is used. # Keywords -- `skipmissingval`: if `true`, any `missingval` will be skipped during aggregation, so that - only areas of all missing values will be aggregated to `missingval`. If `false`, any - aggregated area containing a `missingval` will be assigned `missingval`. +$SKIPMISSING_KEYWORD $FILENAME_KEYWORD $SUFFIX_KEYWORD $PROGRESS_KEYWORD +$THREADED_KEYWORD +$VERBOSE_KEYWORD # Example @@ -40,7 +56,7 @@ $PROGRESS_KEYWORD using Rasters, RasterDataSources, Statistics, Plots import ArchGDAL using Rasters: Center -st = read(RasterStack(WorldClim{Climate}; month=1)) +st = RasterStack(WorldClim{Climate}; month=1) ag = aggregate(Center(), st, (Y(20), X(20)); skipmissingval=true, progress=false) plot(ag) savefig("build/aggregate_example.png"); nothing @@ -54,183 +70,201 @@ Note: currently it is faster to aggregate over memory-backed arrays. Use [`read`](@ref) on `src` before use where required. """ function aggregate end -function aggregate(method, series::AbstractRasterSeries, scale, args...; - progress=true, kw... +function aggregate(method, series::AbstractRasterSeries, scale; + progress=true, threaded=false, kw... ) - f(A) = aggregate(method, A, scale, args...; progress=false, kw...) - if progress - ProgressMeter.@showprogress "Aggregating series..." map(f, series) - else - map(f, series) + f(A) = aggregate(method, A, scale; progress=false, kw...) + A1 = f(first(series)) + dest = similar(series, typeof(A1)) + dest[1] = A1 + _run(eachindex(series), threaded, progress, "Aggregating series...") do i + dest[i] = f(series[i]) end -end -function aggregate(method, stack::AbstractRasterStack, scale; - keys=keys(stack), filename=nothing, suffix=keys, progress=true, kw... -) - f(A, suffix) = aggregate(method, A, scale; filename, suffix, kw...) - layers = if progress - ProgressMeter.@showprogress "Aggregating stack..." map(f, values(stack), Tuple(suffix)) - else - map(f, values(stack), Tuple(suffix)) + return dest +end +function aggregate(method, stack::AbstractRasterStack{K}, scale; + keys=keys(stack), filename=nothing, suffix=keys, progress=true, threaded=false, kw... +) where K + src = layers(stack) + dst_vec = Vector{Raster}(undef, length(K)) # Intentionally type unstable + _run(1:length(K), threaded, progress, "Aggregating stack...") do i + dst_vec[i] = aggregate(method, src[i], scale; filename, suffix=suffix[i], kw...) end + dst_tuple = ntuple(i -> dst_vec[i], Val{length(K)}()) - return DD.rebuild_from_arrays(stack, layers) + return DD.rebuild_from_arrays(stack, dst_tuple) end function aggregate(method, src::AbstractRaster, scale; - suffix=nothing, filename=nothing, kw... + suffix=nothing, filename=nothing, progress=true, kw... ) - dst = alloc_ag(method, src, scale; filename, suffix, kw...) - aggregate!(method, dst, src, scale; kw...) + return alloc_ag(method, src, scale; filename, suffix, kw...) do dst + aggregate!(method, dst, src, scale; progress, kw...) + end end aggregate(method, d::Dimension, scale) = rebuild(d, aggregate(method, lookup(d), scale)) -function aggregate(method, lookup::Lookup, scale) - intscale = _scale2int(Ag(), lookup, scale) - intscale == 1 && return lookup - start, stop = _endpoints(method, lookup, intscale) - newlookup = lookup[start:scale:stop] - if lookup isa AbstractSampled - sp = aggregate(method, span(lookup), scale) - return rebuild(newlookup; span=sp) +aggregate(method, l::Lookup, scale::Colon) = aggregate(method, l, length(l)) +aggregate(method, l::Lookup, scale::Nothing) = aggregate(method, l, 1) +function aggregate(method, l::Lookup, scale::Int) + intscale = _scale2int(Ag(), l, scale) + start, stop = _endpoints(method, l, intscale) + if issampled(l) && isordered(l) + newl = l[start:scale:stop] + sp = aggregate(method, span(l), scale) + return rebuild(newl; span=sp) else - return newlookup + # Categorical and Unordered lookups are just broken + # by aggregate, so use NoLookup + return NoLookup(Base.OneTo(length(start:scale:stop))) end end aggregate(method, span::Span, scale) = span aggregate(method, span::Regular, scale) = Regular(val(span) * scale) - """ - aggregate!(method, dst::AbstractRaster, src::AbstractRaster, scale; skipmissingval=false) + aggregate!(method, dst::AbstractRaster, src::AbstractRaster, scale; skipmissing=false) -Aggregate array `src` to array `dst` by `scale`, using `method`. +Aggregate raster `src` to raster `dst` by `scale`, using `method`. # Arguments -- `method`: a function such as `mean` or `sum` that can combine the - value of multiple cells to generate the aggregated cell, or a [`Locus`]($DDlocusdocs) - like `Start()` or `Center()` that species where to sample from in the interval. -- `scale`: the aggregation factor, which can be an integer, a tuple of integers - for each dimension, or any `Dimension`, `Selector` or `Int` combination you can - usually use in `getindex`. Using a `Selector` will determine the scale by the - distance from the start of the index in the `src` array. +$METHOD_ARGUMENT +$SCALE_ARGUMENT When the aggregation `scale` of is larger than the array axis, the length of the axis is used. # Keywords -- `progress`: show a progress bar. -- `skipmissingval`: if `true`, any `missingval` will be skipped during aggregation, so that - only areas of all missing values will be aggregated to `missingval`. If `false`, any - aggregated area containing a `missingval` will be assigned `missingval`. +$SKIPMISSING_KEYWORD +$PROGRESS_KEYWORD +$VERBOSE_KEYWORD -Note: currently it is _much_ faster to aggregate over memory-backed arrays. -Use [`read`](@ref) on `src` before use where required. +Note: currently it is _much_ faster to aggregate over memory-backed source +arrays. Use [`read`](@ref) on `src` before use where required. """ function aggregate!(locus::Locus, dst::AbstractRaster, src, scale; kw...) aggregate!((locus,), dst, src, scale) end -function aggregate!(loci::Tuple{Locus,Vararg}, dst::AbstractRaster, src, scale; kw...) - intscale = _scale2int(Ag(), dims(src), scale) +function aggregate!(loci::Tuple{Locus,Vararg}, dst::AbstractRaster, src, scale; + verbose=true, kw... +) + comparedims(dst, src; length=false) + intscale = _scale2int(Ag(), dims(src), scale; verbose) offsets = _agoffset.(loci, intscale) + # Cache the source if its a disk array + src1 = isdisk(src) ? DiskArrays.cache(src) : src + # Broadcast will make the dest arrays chunks when needed broadcast!(dst, CartesianIndices(dst)) do I - val = src[(upsample.(Tuple(I), intscale) .+ offsets)...] - val === missingval(src) ? missingval(dst) : val + # Upsample for each pixel. Possibly slighly inneficient + # for large aggregations but its simple + J = upsample.(Tuple(I), intscale) .+ offsets + val = src1[J...] + _ismissing(val, missingval(src)) ? missingval(dst) : val end + return dst end # Function/functor methods -function aggregate!(f, dst::AbstractRaster, src, scale; skipmissingval=false) - intscale = _scale2int(Ag(), dims(src), scale) - broadcast!(dst, CartesianIndices(dst)) do I - upper = upsample.(Tuple(I), intscale) - lower = upper .+ intscale .- 1 - block = if isdisk(src) - src[map(:, upper, lower)...] - else - view(src, map(:, upper, lower)...) - end - if skipmissingval - # All missing values return a missing value - if all(x -> x === missingval(src), block) - _missingval_or_missing(dst) - else - # Skip missing values - f((x for x in block if x !== missingval(src))) +function aggregate!(f, dst::AbstractRaster, src, scale; + skipmissingval=false, skipmissing=skipmissingval, progress=true, verbose=true +) + comparedims(dst, src; length=false) + all(Lookups.isaligned, lookup(src)) || + throw(ArgumentError("Currently only grid-alligned dimensions can be aggregated. Make a Rasters.jl Github issue if you need to aggregate with transformed dims")) + + intscale = _scale2int(Ag(), dims(src), scale; verbose) + l = upsample.(map(firstindex, axes(dst)), intscale) + u = upsample.(map(lastindex, axes(dst)), intscale) + checkbounds(src, l...) + checkbounds(src, u...) + # If a disk array, cache the src so we don't read too many times + # where src and dest chunks really don't align this may not be efficient + open(src) do src1 + src2 = isdisk(src1) ? DiskArrays.cache(src1) : src1 + # Broadcast will make the dest arrays chunks when needed + @inbounds broadcast!(dst, CartesianIndices(dst)) do I + # Upsample the lower bounds of the source range + lower = upsample.(Tuple(I), intscale) + upper = map(lower, intscale) do u, i + isnothing(i) ? u : u + i - 1 end - else - # Any missing values return a missing value - if any(x -> x === missingval(src), block) - _missingval_or_missing(dst) + I = map(:, lower, upper) + # Read a block from the source array to reduce + block = isdisk(src2) ? parent(src2)[I...] : Base.view(parent(src2), I...) + # Reduce with or without skipmissing + if skipmissing + # With skipmissing `f` will recieve an iterator + _reduce_skip(f, block, missingval(src), dst) else - f(block) + # Without skipmissing `f` will recieve a Raster + _reduce_noskip(f, block, missingval(src), dst) end end end + return dst end - """ - disaggregate(method, object, scale; filename, progress, keys) + disaggregate(object, scale; kw...) Disaggregate array, or all arrays in a stack or series, by some scale. # Arguments -- `method`: a function such as `mean` or `sum` that can combine the - value of multiple cells to generate the aggregated cell, or a [`Locus`]($DDlocusdocs) - like `Start()` or `Center()` that species where to sample from in the interval. - `object`: Object to aggregate, like `AbstractRasterSeries`, `AbstractStack`, - `AbstractRaster` or a `Dimension`. -- `scale`: the aggregation factor, which can be an integer, a tuple of integers - for each dimension, or any `Dimension`, `Selector` or `Int` combination you can - usually use in `getindex`. Using a `Selector` will determine the scale by the - distance from the start of the index. + `AbstractRaster`, `Dimension` or `Lookup`. +$SCALE_ARGUMENT # Keywords -- `progress`: show a progress bar. - -Note: currently it is faster to aggregate over memory-backed arrays. -Use [`read`](@ref) on `src` before use where required. +$FILENAME_KEYWORD +$SUFFIX_KEYWORD +$PROGRESS_KEYWORD +$THREADED_KEYWORD +Note: currently it is _much_ faster to disaggregate over a memory-backed +source array. Use [`read`](@ref) on `src` before use where required. """ function disaggregate end -function disaggregate(method, series::AbstractRasterSeries, scale; progress=true, kw...) - f = i -> disaggregate(method, series[i], scale; progress=false, kw...) - return if progress - ProgressMeter.@showprogress "Disaggregating series..." map(f, eachindex(series)) - else - map(f, eachindex(series)) +disaggregate(_, x, scale) = disaggregate(x, scale) # legacy +function disaggregate(series::AbstractRasterSeries, scale; + progress=true, threaded=false, kw... +) + A1 = disaggregate(series[1], scale; progress=false, kw...) + dst = similar(series, typeof(A1)) + dst[1] = A1 + _run(eachindex(series)[2:end], threaded, progress, "Disaggregating series...") do i + dst[i] = disaggregate(series[i], scale; progress=false, kw...) end + return dst end -function disaggregate(method, stack::AbstractRasterStack, scale; - keys=keys(stack), suffix=keys, filename=nothing, progress=true -) - f(A, suffix) = disaggregate(method, A, scale; filename, suffix) - - layers = if progress - ProgressMeter.@showprogress "Disaggregating stack..." map(f, values(stack), Tuple(suffix)) - else - map(f, values(stack), Tuple(suffix)) +function disaggregate(stack::AbstractRasterStack{K}, scale; + keys=keys(stack), suffix=keys, filename=nothing, progress=true, threaded=false +) where K + dst_vec = Vector{Raster}(undef, length(K)) + ls = layers(stack) + _run(1:length(K), threaded, progress, "Disaggregating stack...") do i + dst_vec[i] = disaggregate(ls[i], scale; filename, suffix=suffix[i]) end - return DD.rebuild_from_arrays(stack, layers) + dst_tuple = ntuple(i -> dst_vec[i], Val{length(K)}()) + return DD.rebuild_from_arrays(stack, dst_tuple) end -function disaggregate(method, src::AbstractRaster, scale; +function disaggregate(src::AbstractRaster, scale; suffix=nothing, filename=nothing, kw... ) - dst = alloc_disag(method, src, scale; filename, suffix, kw...) - disaggregate!(method, dst, src, scale) + return alloc_disag(Center(), src, scale; filename, suffix, kw...) do dst + disaggregate!(dst, src, scale) + end end -function disaggregate(locus::Locus, dim::Dimension, scale) +function disaggregate(dim::Dimension, scale) rebuild(dim, disaggregate(locus, lookup(dim), scale)) end -function disaggregate(locus, lookup::Lookup, scale) +function disaggregate(lookup::Lookup, scale) intscale = _scale2int(DisAg(), lookup, scale) intscale == 1 && return lookup len = length(lookup) * intscale step_ = step(lookup) / intscale - start = lookup[1] - _agoffset(locus, intscale) * step_ + start = lookup[1] - _agoffset(Start(), intscale) * step_ stop = start + (len - 1) * step_ index = LinRange(start, stop, len) if lookup isa AbstractSampled @@ -241,42 +275,42 @@ function disaggregate(locus, lookup::Lookup, scale) end end -disaggregate(method, span::Span, scale) = span -disaggregate(method, span::Regular, scale) = Regular(val(span) / scale) +disaggregate(span::Span, scale) = span +disaggregate(span::Regular, scale) = Regular(val(span) / scale) """ - disaggregate!(method, dst::AbstractRaster, src::AbstractRaster, filename, scale) + disaggregate!(dst::AbstractRaster, src::AbstractRaster, scale) -Disaggregate array `src` to array `dst` by some scale, using `method`. +Disaggregate array `src` to array `dst` by some scale. -- `method`: a function such as `mean` or `sum` that can combine the - value of multiple cells to generate the aggregated cell, or a [`Locus`]($DDlocusdocs) - like `Start()` or `Center()` that species where to sample from in the interval. -- `scale`: the aggregation factor, which can be an integer, a tuple of integers - for each dimension, or any `Dimension`, `Selector` or `Int` combination you can - usually use in `getindex`. Using a `Selector` will determine the scale by the - distance from the start of the index in the `src` array. - -Note: currently it is faster to aggregate over memory-backed arrays. -Use [`read`](@ref) on `src` before use where required. +$SCALE_ARGUMENT """ -function disaggregate!(locus::Locus, dst::AbstractRaster, src, scale) - disaggregate!((locus,), dst, src, scale) -end -function disaggregate!(loci::Tuple{Locus,Vararg}, dst::AbstractRaster, src, scale) +function disaggregate!(dst::AbstractRaster, src, scale) intscale = _scale2int(DisAg(), dims(src), scale) - broadcast!(dst, CartesianIndices(dst)) do I - val = src[(downsample.(Tuple(I), intscale))...] - val === missingval(src) ? missingval(dst) : val + # For now we just read a DiskArray + open(src) do src1 + src2 = isdisk(src) ? DiskArrays.cache(src1) : src1 + # Fast path for Array backed data + for I in CartesianIndices(src2) + lower = upsample.(Tuple(I), intscale) + upper = map(lower, intscale, size(dst)) do l, is, s + min(l + is - 1, s) + end + ranges = map(:, lower, upper) + val = src2[I] + val1 = _ismissing(src, val) ? missingval(dst) : val + dst[ranges...] .= (val1,) + end end + return dst end # Allocate an array of the correct size to aggregate `A` by `scale` -alloc_ag(method, A::AbstractRaster, scale; kw...) = alloc_ag((method,), A, scale; kw...) -function alloc_ag(method::Tuple, A::AbstractRaster, scale; - filename=nothing, suffix=nothing, skipmissingval=nothing +alloc_ag(f, method, A::AbstractRaster, scale; kw...) = alloc_ag(f, (method,), A, scale; kw...) +function alloc_ag(f, method::Tuple, A::AbstractRaster, scale; + filename=nokw, suffix=nokw, skipmissingval=false, skipmissing=false, progress=false, verbose=false ) - intscale = _scale2int(Ag(), dims(A), scale) + intscale = _scale2int(Ag(), dims(A), scale; verbose=false) # Aggregate the dimensions dims_ = aggregate.(method, dims(A), intscale) # Dim aggregation determines the array size @@ -289,23 +323,25 @@ function alloc_ag(method::Tuple, A::AbstractRaster, scale; T = promote_type(agT, typeof(missingval(A))) mv = convert(T, missingval(A)) end - return create(filename, T, dims_; name=name(A), suffix, missingval=mv) + return create(f, filename, T, dims_; name=name(A), suffix, missingval=mv) end # Allocate an array of the correct size to disaggregate `A` by `scale` -function alloc_disag(method, A::AbstractRaster, scale; kw...) - alloc_disag((method,), A, scale; kw...) +function alloc_disag(f, method, A::AbstractRaster, scale; kw...) + alloc_disag(f, (method,), A, scale; kw...) end -function alloc_disag(method::Tuple, A::AbstractRaster, scale; - filename=nothing, suffix=nothing +function alloc_disag(f, method::Tuple, A::AbstractRaster, scale; + filename=nokw, suffix=nokw ) - intscale = _scale2int(DisAg(), dims(A), scale) - dims_ = disaggregate.(method, dims(A), intscale) + intscale = _scale2int(DisAg(), dims(A), scale; verbose=false) + dims_ = map(dims(A), intscale) do d, i + disaggregate(method, d, i) + end # Dim aggregation determines the array size sze = map(length, dims_) T = ag_eltype(method, A) mv = missingval(A) isa Nothing ? nothing : convert(T, missingval(A)) - return create(filename, T, dims_; name=name(A), suffix, missingval=mv) + return create(f, filename, T, dims_; name=name(A), suffix, missingval=mv) end # Handle how methods like `mean` can change the type @@ -317,31 +353,144 @@ end # Convert indices from the aggregated array to the larger original array. upsample(index::Int, scale::Int) = (index - 1) * scale + 1 -upsample(index::Int, scale::Colon) = index +upsample(index::Int, scale::Nothing) = index # Convert indices from the original array to the aggregated array. downsample(index::Int, scale::Int) = (index - 1) ÷ scale + 1 -downsample(index::Int, scale::Colon) = index +downsample(index::Int, scale::Nothing) = index # Convert scale or tuple of scale to integer using dims2indices -function _scale2int(x, dims::DimTuple, scale::Tuple) - map((d, s) -> _scale2int(x, d, s), dims, DD.dims2indices(dims, scale)) +@inline function _scale2int(x, dims::DimTuple, scale::DimTuple; verbose=true) + map(dims, DD.sortdims(scale, dims)) do d, s + if isnothing(s) + 1 + else + i = dims2indices(d, s) + # Swap Colon as all to Colon as 1 (1 is the no-change option here) + s = i isa Colon ? length(d) : i + _scale2int(x, d, s) + end + end +end +@inline function _scale2int(x, dims::DimTuple, scale::Tuple; verbose=true) + map(dims, DD.dims2indices(dims, scale)) do d, s + _scale2int(x, d, s) + end +end +@inline _scale2int(x, dims::DimTuple, scale::Tuple{<:Pair,Vararg{Pair}}; verbose=true) = + _scale2int(x, dims, Dimensions.pairs2dims(scale...); verbose) +@inline _scale2int(x, dims::DimTuple, scale::NamedTuple; verbose=true) = + _scale2int(x, dims, Dimensions.kw2dims(scale); verbose) +@inline function _scale2int(x, dims::DimTuple, scale::Int; verbose=true) + # If there are other dimensions, we skip categorical dims + vals = map(dims) do d + if iscategorical(d) || !isordered(d) + name(d), nothing + else + name(d), _scale2int(x, d, scale) + end + end + nskipped = count(isnothing ∘ last, vals) + if nskipped == length(dims) + example = join(map(((n, d),) -> "$(name(d))=$(n + 1),", enumerate(dims)), ' ') + # If all dims are categorical we error + throw(ArgumentError("All dimensions are Categorical. To aggregate anyway, list scale explicity for each dimension, e.g. ($example)")) + end + if verbose && nskipped > 0 + scaleddims = join((v[1] for v in vals if v[2] isa Int), ", ", " and ") + skippeddims = join((v[1] for v in vals if isnothing(v[2])), ", ", " and ") + @info """ + Aggregating $scaleddims by $scale. $(skippeddims == "" ? "" : skippeddims) + skipped due to being `Categorical` or `Unordered`. + Specify all scales explicitly in a Tuple or NamedTuple to aggregate these anyway. + """ + end + return map(last, vals) end -_scale2int(x, dims::DimTuple, scale::Int) = map(d -> _scale2int(x, d, scale), dims) -_scale2int(x, dims::DimTuple, scale::Colon) = map(d -> _scale2int(x, d, scale), dims) -_scale2int(x, dim::Dimension, scale::Int) = _scale2int(x, lookup(dim), scale) -_scale2int(::Ag, l::Lookup, scale::Int) = scale > length(l) ? length(l) : scale -_scale2int(::DisAg, l::Lookup, scale::Int) = scale -_scale2int(x, dim::Dimension, scale::Colon) = 1 - -_agoffset(locus::Locus, l::Lookup, scale) = _agoffset(locus, scale) -_agoffset(method, l::Lookup, scale) = _agoffset(locus(l), scale) -_agoffset(locus::Start, scale) = 0 -_agoffset(locus::End, scale) = scale - 1 -_agoffset(locus::Center, scale) = scale ÷ 2 - -function _endpoints(method, l::Lookup, scale) +@inline _scale2int(x, dims::DimTuple, scale::Colon; verbose=true) = + _scale2int(x, dims, map(_ -> Colon(), dims)) +@inline _scale2int(x, d, scale::Colon) = length(d) +@inline _scale2int(x, dim::Dimension, scale::Int) = _scale2int(x, lookup(dim), scale) +@inline _scale2int(::Ag, l::Lookup, scale::Int) = scale > length(l) ? length(l) : scale +@inline _scale2int(::DisAg, l::Lookup, scale::Int) = scale + +_agoffset(locus::Locus, l::Lookup, scale::Int) = _agoffset(locus, scale) +_agoffset(method, l::Lookup, scale::Int) = _agoffset(locus(l), scale) +_agoffset(x, scale::Colon) = 0 +_agoffset(locus::Start, scale::Int) = 0 +_agoffset(locus::End, scale::Int) = scale - 1 +_agoffset(locus::Center, scale::Int) = scale ÷ 2 + +_endpoints(method, l::Lookup, scale::Colon) = firstindex(l), lastindex(l) +_endpoints(method, l::Lookup, scale::Nothing) = firstindex(l), lastindex(l) +function _endpoints(method, l::Lookup, scale::Int) start = firstindex(l) + _agoffset(method, l, scale) stop = (length(l) ÷ scale) * scale return start, stop end + +# Fallback iterator +@propagate_inbounds function _reduce_noskip(f, block, mv, dst) + for x in block + _ismissing(x, mv) && return _missingval_or_missing(dst) + end + return f(block) +end +# Specialised fast paths +@propagate_inbounds function _reduce_noskip(::typeof(count), block, mv, dst) + Missings.nonmissingtype(eltype(block)) <: Bool || throw(ArgumentError("`count` can only reduce rasters of Bool")) + return _reduce_noskip(sum, block, mv, dst) +end +@propagate_inbounds function _reduce_noskip(::typeof(sum), block, mv, dst) + agg = zero(eltype(block)) + for x in block + _ismissing(x, mv) && return _missingval_or_missing(dst) + agg += x + end + return agg +end +@propagate_inbounds function _reduce_noskip(::typeof(DD.Statistics.mean), block, mv, dst) + agg = zero(eltype(block)) + n = 0 + for x in block + _ismissing(x, mv) && return _missingval_or_missing(dst) + n += 1 + agg += x + end + return agg / n +end + +# Fallback iterator +@propagate_inbounds function _reduce_skip(f, block, mv, dst) + for x in block + _ismissing(x, mv) || return f((x for x in block if x !== mv)) + end + return _missingval_or_missing(dst) +end +# Specialised fast paths +@propagate_inbounds function _reduce_skip(::typeof(count), block, mv, dst) + Missings.nonmissingtype(eltype(block)) <: Bool || throw(ArgumentError("`count` can only reduce rasters of Bool")) + return _reduce_skip(sum, block, mv, dst) +end +@propagate_inbounds function _reduce_skip(::typeof(sum), block, mv, dst) + agg = zero(eltype(block)) + found = false + for x in block + _ismissing(x, mv) && continue + found = true + agg += x + end + return found ? agg : _missingval_or_missing(dst) +end +@propagate_inbounds function _reduce_skip(::typeof(DD.Statistics.mean), block, mv, dst) + agg = zero(eltype(block)) + found = false + n = 0 + for x in block + _ismissing(x, mv) && continue + found = true + n += 1 + agg += x + end + return found ? agg / n : _missingval_or_missing(dst) +end diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 93bddd210..6b7ca7d7d 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -124,8 +124,8 @@ Base.@assume_effects :foldable function _applymod(x, m::Mod) end Base.@assume_effects :foldable _applymod(x, ::NoMod) = x -_ismissing(x, mv) = isequal(x, mv) -_ismissing(_, ::Nothing) = false +_ismissing(x, mv) = ismissing(x) || x === mv +_ismissing(x, ::Nothing) = ismissing(x) _scaleoffset(x, m::Mod) = _scaleoffset(x, m.scale, m.offset) _scaleoffset(x, scale, offset) = x * scale + offset diff --git a/test/aggregate.jl b/test/aggregate.jl index 977de8852..6b61ddbcf 100644 --- a/test/aggregate.jl +++ b/test/aggregate.jl @@ -26,8 +26,8 @@ data1 = [ 1 2 3 4 5 6 -1 data2 = 2 * data1 data3 = 3 * data1 data4 = 4 * data1 -dimz = X(Sampled([30., 40., 50.]; order=ForwardOrdered(), span=Regular(10.0), sampling=Points())), - Y(Sampled(LinRange(-10., 20., 7); order=ForwardOrdered(), span=Regular(5.0), sampling=Points())) +dimz = X(Projected([30., 40., 50.]; order=ForwardOrdered(), span=Regular(10.0), sampling=Points(), crs=EPSG(4326))), + Y(Projected(LinRange(-10., 20., 7); order=ForwardOrdered(), span=Regular(5.0), sampling=Points(), crs=EPSG(4326))) array1 = Raster(data1, dimz) array2 = Raster(data2, dimz) array1a = Raster(data3, dimz) @@ -37,7 +37,6 @@ stack2 = RasterStack(array1a, array2a; name=(:array1, :array2)) dates = DateTime(2017):Year(1):DateTime(2018) series = RasterSeries([stack1, stack2], (Ti(dates),)) - @testset "Aggregate a dimension" begin lat = Y(Sampled(LinRange(3, 13, 6), ForwardOrdered(), Regular(2.0), Intervals(Start()), NoMetadata())) aglat = aggregate(Start(), lat, 3) @@ -64,11 +63,11 @@ series = RasterSeries([stack1, stack2], (Ti(dates),)) end @testset "aggregate a single dim" begin - ag = aggregate(Start(), series, (X(3), )) + ag = aggregate(Start(), series, (X(3),)) @test size(first(ag)) == (1, 7) - ag = aggregate(Start(), series, (Y(5), )) + ag = aggregate(Start(), series, (Y(5),)) @test size(first(ag)) == (3, 1) - ag = aggregate(Start(), series, (Y(2), )) + ag = aggregate(Start(), series, (Y(2),)) @test size(first(ag)) == (3, 3) end @@ -143,12 +142,18 @@ end agg = aggregate(Start(), array1, (X(1), Y(Near(-4)))) @test agg == aggregate(Start(), array1, (1, 2)) @testset "scale 1 dims are unchanged" begin - @test dims(agg, X) === dims(array1, X) + @test dims(agg, X) == dims(array1, X) end end end +@testset "Aggregate with Colon" begin + @test aggregate(sum, array1, :) == [sum(array1);;] + @test aggregate(sum, array1, (1, :)) == sum(array1; dims=2) + @test aggregate(sum, array1, (X=1, Y=:)) == sum(array1; dims=2) +end + @testset "Aggregate with a function" begin @test aggregate(sum, array1, 3) == [72 99] @test aggregate(median, array1, 3) == [8 11] @@ -157,19 +162,28 @@ end @test length.(dims(A)) == size(A) end +@testset "test all scale modes" begin + @test aggregate(sum, array1, (3, 2)) == + aggregate(sum, array1, (Y(2), X(3))) == + aggregate(sum, array1, (Y=2, X=3)) == + aggregate(sum, array1, (Y=>2, X=>3)) == + aggregate(sum, array1, (:Y=>2, :X=>3)) == + [45 57 69] +end + @testset "Aggregate with a function with missing values" begin data_m = [ 1 2 3 4 5 6 -1 7 8 9 10 11 12 -1 13 14 15 16 missing 18 -1] array_m = Raster(data_m, dimz) @test all(aggregate(sum, array_m, 3) .=== [72 missing]) - @test all(aggregate(sum, array_m, 3; skipmissingval=true) .=== [72 82]) + @test all(aggregate(sum, array_m, 3; skipmissing=true) .=== [72 82]) data_m0 = [ 1 2 3 4 5 6 -1 7 8 0 10 11 12 -1 13 14 15 16 17 18 -1] array_m0 = Raster(data_m0, dimz; missingval=0) @test aggregate(sum, array_m0, 3) == [0 99] - @test aggregate(sum, array_m0, 3; skipmissingval=true) == [63 99] + @test aggregate(sum, array_m0, 3; skipmissing=true) == [63 99] @test all(aggregate(mean, array_m0, 3) .=== [0.0 11.0]) end @@ -183,9 +197,23 @@ end end @testset "Aggregate different index lookups" begin - dimz = Band(1:3), Dim{:category}([:a, :b, :c]), X([10, 20, 30, 40]) + dimz = Y([1, 3, 2]), Dim{:category}([:a, :b, :c]), X([10, 20, 30, 40]) a1 = [1 2 3; 4 5 6; 7 8 9] A = cat(a1, a1 .+ 10, a1 .+ 20, a1 .+ 30, dims=3) da = Raster(A, dimz) @test vec(aggregate(sum, da, (3, 2, 2))) == [114, 354] + @test size(aggregate(sum, da, 3)) == (3, 3, 1) +end + +@testset "Aggregate ignores categorical by default" begin + rast = Raster(rand(X(1:10), Y(1:10), Z([:a, :b, :c]))) + skipped_ag = @test_logs (:info,) aggregate(sum, rast, 2) + @test size(skipped_ag) == (5, 5, 3) + # Unless specified explicitly + explicit_ag = @test_nowarn aggregate(sum, rast, (X=5, Y=5, Z=3)) + @test size(explicit_ag) == (2, 2, 1) + # Aggregated Categorical become NoLookup + @test isnolookup(explicit_ag, Z) + @test_throws ArgumentError aggregate(sum, rast[X=1, Y=1], 2) end + diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 9d7731f08..fc058d85b 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -259,14 +259,31 @@ gdalpath = maybedownload(url) @testset "aggregate" begin ag = aggregate(mean, gdalarray, 4) - @test ag == aggregate(mean, gdalarray, (X(4), Y(4), Band(1))) + @test ag == aggregate(mean, gdalarray, (X(4), Y(4))) + @test ag == aggregate(mean, lazyarray, 4; filename=tempname() * ".tif") + @time ag_disk = aggregate(mean, lazyarray, 4; filename=tempname() * ".tif") + @test ag_disk == ag tempfile = tempname() * ".tif" write(tempfile, ag) - open(Raster(tempfile); write=true) do dst + open(Raster(tempfile; lazy=true); write=true) do dst aggregate!(mean, dst, gdalarray, 4) end @test Raster(tempfile) == ag - rm(tempfile) + + disag = disaggregate(gdalarray, 2) + @test disag == disaggregate(lazyarray, 2) + @test disag == disaggregate(gdalarray, (X(2), Y(2))) + @test size(disag) == size(gdalarray) .* 2 + + @testset "disaggregate to file" begin + tempfile = tempname() * ".tif" + write(tempfile, disaggregate(gdalarray, 2)) + disaggregate(2 .* gdalarray, 2) + open(Raster(tempfile; lazy=true); write=true) do dst + disaggregate!(dst, 2 .* gdalarray, 2) + end + @test size(Raster(tempfile)) == 2 .* size(gdalarray) + end end @testset "mosaic" begin @@ -762,7 +779,7 @@ end output_crs = EPSG(4326) resample_method = "near" - ## Resample cea.tif manually with ArchGDAL + # Resample cea.tif manually with ArchGDAL wkt = convert(String, convert(WellKnownText, output_crs)) AG_output = ArchGDAL.read(gdalpath) do dataset ArchGDAL.gdalwarp([dataset], ["-t_srs", "$(wkt)", From aa2fd19c1579c7cc84e18bc297f44028cdd47b98 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 21 Jan 2025 11:28:27 +0100 Subject: [PATCH 3/6] Faster `mosaic` (#839) * faster mosaic * bugfix * remove comment * tweaks * cleanup and reorganise * add missing deps * Breaking: Line and Polygon `extract` optimisation (#824) * faster line extract * performance * extract in a separate file * flatten * bugfixes * some more comments * more perf tweaks * bugfix * fix extract ambiguities * more ambiguity * Bugfix geomtrait * arg fixes * add id column * tests and bugfixes * bugfix * set id default to _False() * fix tests * bugfix * bugfix * half fix ncdatasets mosaic * working LinerSolver with tests * fix tests * bugfix doctests * bugfix atol * bugfix create * bugfix tests * pad float intervals before view for fp error * bugfix lazy reorder * remove a # * bugfixes * remove show * fix example * fix examples * fix tests again * fix ambiguities --- .github/dependabot.yml | 11 + Project.toml | 2 +- ext/RastersStatsBaseExt/sample.jl | 1 + src/create.jl | 8 +- src/lookup.jl | 6 +- src/methods/burning/allocs.jl | 24 +- src/methods/burning/array_init.jl | 44 ++- src/methods/burning/geometry.jl | 26 +- src/methods/burning/line.jl | 114 +++--- src/methods/burning/polygon.jl | 14 +- src/methods/extract.jl | 564 +++++++++++++++++++++--------- src/methods/mask.jl | 6 +- src/methods/mosaic.jl | 251 +++++++++---- src/methods/rasterize.jl | 17 +- src/methods/reproject.jl | 10 +- src/methods/shared_docstrings.jl | 13 +- src/methods/zonal.jl | 11 +- src/sources/grd.jl | 2 +- src/utils.jl | 64 ++-- test/extract.jl | 506 +++++++++++++++++++++++++++ test/methods.jl | 237 +------------ test/mosaic.jl | 67 ++++ test/runtests.jl | 8 +- test/sources/gdal.jl | 24 +- test/sources/grd.jl | 1 - test/sources/ncdatasets.jl | 7 +- 26 files changed, 1383 insertions(+), 655 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 test/extract.jl create mode 100644 test/mosaic.jl diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..6867e71eb --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/Project.toml b/Project.toml index 8ef210f98..5b580da91 100644 --- a/Project.toml +++ b/Project.toml @@ -60,7 +60,7 @@ ConstructionBase = "1" CoordinateTransformations = "0.6.2" DataFrames = "1" DimensionalData = "0.29.4" -DiskArrays = "0.3, 0.4" +DiskArrays = "0.4" Extents = "0.1" FillArrays = "0.12, 0.13, 1" Flatten = "0.4" diff --git a/ext/RastersStatsBaseExt/sample.jl b/ext/RastersStatsBaseExt/sample.jl index f54f9d919..80180fa6f 100644 --- a/ext/RastersStatsBaseExt/sample.jl +++ b/ext/RastersStatsBaseExt/sample.jl @@ -52,6 +52,7 @@ function _getindex(::Type{T}, x::AbstractRasterStack{<:Any, NT}, dims, idx) wher RA._maybe_add_fields( T, NT(x[RA.commondims(idx, x)]), + nothing, DimPoints(dims)[RA.commondims(idx, dims)], val(idx) ) diff --git a/src/create.jl b/src/create.jl index 25cdba8ae..a321a9ad2 100644 --- a/src/create.jl +++ b/src/create.jl @@ -291,15 +291,15 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::Tuple end # Create layers of zero arrays rast = Raster(A, dims; name, missingval=mv_inner) + # Write to disk Rasters.write(f, filename, source, rast; - eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... + eltype, chunks, metadata, scale, offset, missingval=mv_inner, verbose, force, coerce, write, kw... ) do W - # write returns a variable, wrap it as a Raster + # `write`` returns a variable, wrap it as a Raster and run f f(rebuild(rast; data=W)) end # Don't pass in `missingval`, read it again from disk in case it changed - r = Raster(filename; source, lazy, metadata, dropband, coerce, missingval=mv_outer) - return r + return Raster(filename; source, lazy, metadata, dropband, coerce, missingval=mv_outer) end # Create on-disk RasterStack from filename, source, type and dims function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::Tuple; diff --git a/src/lookup.jl b/src/lookup.jl index 07d1a68e0..75fd24660 100644 --- a/src/lookup.jl +++ b/src/lookup.jl @@ -95,15 +95,15 @@ dim(lookup::Projected) = lookup.dim end @inline function LA.selectindices(l::Projected, sel::LA.Selector{<:AbstractVector}; kw...) selval = reproject(mappedcrs(l), crs(l), dim(l), val(sel)) - _selectvec(l, rebuild(sel; val=selval)sel; kw...) + LA._selectvec(l, rebuild(sel; val=selval); kw...) end @inline function LA.selectindices(l::Projected, sel::LA.IntSelector{<:Tuple}; kw...) selval = reproject(mappedcrs(l), crs(l), dim(l), val(sel)) - _selecttuple(l, rebuild(sel; val=selval); kw...) + LA._selecttuple(l, rebuild(sel; val=selval); kw...) end @inline LA.selectindices(l::Projected{<:Tuple}, sel::LA.IntSelector{<:Tuple}; kw...) = LA._selectindices(l, sel; kw...) @inline LA.selectindices(l::Projected{<:Tuple}, sel::LA.IntSelector{<:Tuple{<:Tuple,<:Tuple}}; kw...) = - _selecttuple(l, sel; kw...) + LA._selecttuple(l, sel; kw...) function LA.selectindices(l::Projected, sel::Between{<:Tuple}) selval = map(v -> reproject(mappedcrs(l), crs(l), dim(l), v), val(sel)) diff --git a/src/methods/burning/allocs.jl b/src/methods/burning/allocs.jl index 5d916cf76..7956e266f 100644 --- a/src/methods/burning/allocs.jl +++ b/src/methods/burning/allocs.jl @@ -11,22 +11,36 @@ function Allocs(buffer) return Allocs(buffer, edges, scratch, crossings) end -function _burning_allocs(x; nthreads=_nthreads(), threaded=true, kw...) - dims = commondims(x, DEFAULT_POINT_ORDER) +function _burning_allocs(x; + nthreads=_nthreads(), + threaded=true, + burncheck_metadata=Metadata(), + kw... +) if threaded - [Allocs(_init_bools(dims; metadata=Metadata())) for _ in 1:nthreads] + if isnothing(x) + [Allocs(nothing) for _ in 1:nthreads] + else + dims = commondims(x, DEFAULT_POINT_ORDER) + [Allocs(_init_bools(dims; metadata=deepcopy(burncheck_metadata))) for _ in 1:nthreads] + end else - Allocs(_init_bools(dims; metadata=Metadata())) + if isnothing(x) + Allocs() + else + dims = commondims(x, DEFAULT_POINT_ORDER) + Allocs(_init_bools(dims; metadata=burncheck_metadata)) + end end end _get_alloc(allocs::Vector{<:Allocs}) = _get_alloc(allocs[Threads.threadid()]) _get_alloc(allocs::Allocs) = allocs - # TODO include these in Allocs _alloc_burnchecks(n::Int) = fill(false, n) _alloc_burnchecks(x::AbstractArray) = _alloc_burnchecks(length(x)) + function _set_burnchecks(burnchecks, metadata::Metadata{<:Any,<:Dict}, verbose) metadata["missed_geometries"] = .!burnchecks verbose && _burncheck_info(burnchecks) diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index a700a6454..306a26e61 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -4,29 +4,22 @@ # TODO merge this with `create` somehow _init_bools(to; kw...) = _init_bools(to, BitArray; kw...) _init_bools(to, T::Type; kw...) = _init_bools(to, T, nothing; kw...) -_init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) -_init_bools(to::AbstractRasterStack, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) -_init_bools(to::AbstractRaster, T::Type, data; kw...) = _init_bools(to, dims(to), T, data; kw...) -_init_bools(to::DimTuple, T::Type, data; kw...) = _init_bools(to, to, T, data; kw...) -function _init_bools(to::Nothing, T::Type, data; - geometrycolumn=nothing, - collapse=nokw, - res=nokw, - size=nokw, - kw... -) +_init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = + _init_bools(dims(first(to)), T, data; kw...) +_init_bools(to::AbstractRasterStack, T::Type, data; kw...) = + _init_bools(dims(to), dims(to), T, data; kw...) +_init_bools(to::AbstractRaster, T::Type, data; kw...) = + _init_bools(to, dims(to), T, data; kw...) +_init_bools(to::DimTuple, T::Type, data; kw...) = + _init_bools(to, to, T, data; kw...) +function _init_bools(to::Nothing, T::Type, data; geometrycolumn=nothing, kw...) # Get the extent of the geometries ext = _extent(data; geometrycolumn) isnothing(ext) && throw(ArgumentError("no recognised dimensions, extent or geometry")) - return _init_bools(ext, T, data; collapse, res, size, kw...) -end -function _init_bools(to::Extents.Extent, T::Type, data; - collapse=nokw, size=nokw, res=nokw, sampling=nokw, kw... -) - # Convert the extent to dims (there must be `res` or `size` in `kw`) - dims = _extent2dims(to; size, res, sampling, kw...) - _init_bools(to, dims, T, data; collapse, kw...) + return _init_bools(ext, T, data; kw...) end +_init_bools(to::Extents.Extent, T::Type, data; kw...) = + _init_bools(to, _extent2dims(to; kw...), T, data; kw...) function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Nothing,NoKW}=nokw, kw... ) @@ -54,12 +47,12 @@ function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; missingval=false, metadata=NoMetadata(), kw... ) where T # Use an Array - data = fill!(Raster{T}(undef, dims), missingval) + data = fill!(Raster{T}(undef, dims), missingval) return rebuild(data; missingval, metadata) end -function _prepare_for_burning(B, locus=Center()) - B1 = _forward_ordered(B) +function _prepare_for_burning(B; locus=Center(), order=ForwardOrdered()) + B1 = _maybe_lazy_reorder(order, B) start_dims = map(dims(B1, DEFAULT_POINT_ORDER)) do d # Shift lookup values to center of pixels d = DD.maybeshiftlocus(locus, d) @@ -69,9 +62,12 @@ function _prepare_for_burning(B, locus=Center()) end # Convert to Array if its not one already -_lookup_as_array(d::Dimension) = parent(lookup(d)) isa Array ? d : modify(Array, d) +_lookup_as_array(x) = setdims(x, _lookup_as_array(dims(x))) +_lookup_as_array(dims::Tuple) = map(_lookup_as_array, dims) +_lookup_as_array(d::Dimension) = parent(lookup(d)) isa Array ? d : modify(Array, d) -function _forward_ordered(B) +_maybe_lazy_reorder(::Nothing, B) = B +function _maybe_lazy_reorder(::ForwardOrdered, B) reduce(dims(B); init=B) do A, d if DD.order(d) isa ReverseOrdered A = view(A, rebuild(d, lastindex(d):-1:firstindex(d))) diff --git a/src/methods/burning/geometry.jl b/src/methods/burning/geometry.jl index fa61c1f24..2ee27211b 100644 --- a/src/methods/burning/geometry.jl +++ b/src/methods/burning/geometry.jl @@ -22,10 +22,11 @@ function _burn_geometry!(B::AbstractRaster, trait::Nothing, data; lock=Threads.SpinLock(), verbose=true, progress=true, - threaded=true, + threaded=false, fill=true, - allocs=_burning_allocs(B; threaded), geometrycolumn=nothing, + burncheck_metadata=Metadata(), + allocs=_burning_allocs(B; threaded, burncheck_metadata), kw... )::Bool geoms = _get_geometries(data, geometrycolumn) @@ -36,8 +37,8 @@ function _burn_geometry!(B::AbstractRaster, trait::Nothing, data; geom = _getgeom(geoms, i) ismissing(geom) && return nothing a = _get_alloc(allocs) - B1 = a.buffer - burnchecks[i] = _burn_geometry!(B1, geom; fill, allocs=a, lock, kw...) + buffer = a.buffer + burnchecks[i] = _burn_geometry!(buffer, geom; fill, allocs=a, lock, kw...) return nothing end if fill @@ -99,14 +100,17 @@ function _burn_geometry!(B::AbstractRaster, ::GI.AbstractGeometryTrait, geom; return false end # Take a view of the geometry extent - B1 = view(B, Touches(geomextent)) - buf1 = _init_bools(B1; missingval=false) + V = view(B, Touches(geomextent)) + buffer = _init_bools(V; missingval=false) # Burn the polygon into the buffer - allocs = isnothing(allocs) ? Allocs(B) : allocs - hasburned = _burn_polygon!(buf1, geom; shape, geomextent, allocs, boundary, kw...) - @inbounds for i in eachindex(B1) - if buf1[i] - B1[i] = fill + # We allocate a new bitarray for the view for performance + # and always fill with `true`. + allocs = isnothing(allocs) ? Allocs(nothing) : allocs + hasburned = _burn_polygon!(buffer, geom; shape, geomextent, allocs, boundary, kw...) + # We then transfer burned `true` values to B via the V view + @inbounds for i in eachindex(V) + if buffer[i] + V[i] = fill end end else diff --git a/src/methods/burning/line.jl b/src/methods/burning/line.jl index 9155479bb..24f924f6c 100644 --- a/src/methods/burning/line.jl +++ b/src/methods/burning/line.jl @@ -1,33 +1,53 @@ # _burn_lines! # Fill a raster with `fill` where pixels touch lines in a geom -# Separated for a type stability function barrier -function _burn_lines!(B::AbstractRaster, geom; fill=true, kw...) - _check_intervals(B) +# Usually `fill` is `true` of `false` +function _burn_lines!( + B::AbstractRaster, geom; fill=true, verbose=false, kw... +) + _check_intervals(B, verbose) B1 = _prepare_for_burning(B) - return _burn_lines!(B1, geom, fill) + + xdim, ydim = dims(B, DEFAULT_POINT_ORDER) + regular = map((xdim, ydim)) do d + # @assert (parent(lookup(d)) isa Array) + lookup(d) isa AbstractSampled && span(d) isa Regular + end + msg = """ + Can only fill lines where dimensions have `Regular` lookups. + Consider using `boundary=:center`, reprojecting the crs, + or make an issue in Rasters.jl on github if you need this to work. + """ + all(regular) || throw(ArgumentError(msg)) + + # Set indices of B as `fill` when a cell is found to burn. + _burn_lines!(identity, dims(B1), geom) do D + @inbounds B1[D] = fill + end end -_burn_lines!(B, geom, fill) = - _burn_lines!(B, GI.geomtrait(geom), geom, fill) -function _burn_lines!(B::AbstractArray, ::Union{GI.MultiLineStringTrait}, geom, fill) +_burn_lines!(f::F, c::C, dims::Tuple, geom) where {F<:Function,C<:Function} = + _burn_lines!(f, c, dims, GI.geomtrait(geom), geom) +function _burn_lines!( + f::F, c::C, dims::Tuple, ::Union{GI.MultiLineStringTrait}, geom +) where {F<:Function,C<:Function} n_on_line = 0 for linestring in GI.getlinestring(geom) - n_on_line += _burn_lines!(B, linestring, fill) + n_on_line += _burn_lines!(f, c, dims, linestring) end return n_on_line end function _burn_lines!( - B::AbstractArray, ::Union{GI.MultiPolygonTrait,GI.PolygonTrait}, geom, fill -) + f::F, c::C, dims::Tuple, ::Union{GI.MultiPolygonTrait,GI.PolygonTrait}, geom +) where {F<:Function,C<:Function} n_on_line = 0 for ring in GI.getring(geom) - n_on_line += _burn_lines!(B, ring, fill) + n_on_line += _burn_lines!(f, c, dims, ring) end return n_on_line end function _burn_lines!( - B::AbstractArray, ::GI.AbstractCurveTrait, linestring, fill -) + f::F, c::C, dims::Tuple, ::GI.AbstractCurveTrait, linestring +) where {F<:Function,C<:Function} isfirst = true local firstpoint, laststop n_on_line = 0 @@ -46,19 +66,19 @@ function _burn_lines!( stop=(x=GI.x(point), y=GI.y(point)), ) laststop = line.stop - n_on_line += _burn_line!(B, line, fill) + n_on_line += _burn_line!(f, c, dims, line) end return n_on_line end function _burn_lines!( - B::AbstractArray, t::GI.LineTrait, line, fill -) + f::F, c::C, dims::Tuple, t::GI.LineTrait, line +) where {F<:Function,C<:Function} p1, p2 = GI.getpoint(t, line) line1 = ( start=(x=GI.x(p1), y=GI.y(p1)), stop=(x=GI.x(p2), y=GI.y(p2)), ) - return _burn_line!(B, line1, fill) + return _burn_line!(f, c, dims, line1) end # _burn_line! @@ -66,26 +86,22 @@ end # Line-burning algorithm # Burns a single line into a raster with value where pixels touch a line # +# Function `f` does the actual work when passed a Dimension Tuple of a pixel to burn, +# and `c` is an initialisation callback that is passed the maximyum +# number of times `f` will be called. It may be called less than that. +# # TODO: generalise to Irregular spans? -function _burn_line!(A::AbstractRaster, line, fill) - - xdim, ydim = dims(A, DEFAULT_POINT_ORDER) - regular = map((xdim, ydim)) do d - @assert (parent(lookup(d)) isa Array) - lookup(d) isa AbstractSampled && span(d) isa Regular - end - msg = """ - Can only fill lines where dimensions have `Regular` lookups. - Consider using `boundary=:center`, reprojecting the crs, - or make an issue in Rasters.jl on github if you need this to work. - """ - all(regular) || throw(ArgumentError(msg)) +function _burn_line!(f::Function, c::Function, dims::Tuple, line::NamedTuple) + xdim, ydim = DD.dims(dims, DEFAULT_POINT_ORDER) + di = DimIndices(dims) + @assert xdim isa XDim + @assert ydim isa YDim @assert order(xdim) == order(ydim) == Lookups.ForwardOrdered() @assert locus(xdim) == locus(ydim) == Lookups.Center() - raster_x_step = abs(step(span(A, X))) - raster_y_step = abs(step(span(A, Y))) + raster_x_step = abs(step(span(xdim))) + raster_y_step = abs(step(span(ydim))) raster_x_offset = @inbounds xdim[1] - raster_x_step / 2 # Shift from center to start of pixel raster_y_offset = @inbounds ydim[1] - raster_y_step / 2 @@ -117,15 +133,18 @@ function _burn_line!(A::AbstractRaster, line, fill) # Int starting points for the line. +1 converts to julia indexing j, i = trunc(Int, relstart.x) + 1, trunc(Int, relstart.y) + 1 # Int - # For arbitrary dimension indexing - dimconstructors = map(DD.basetypeof, (xdim, ydim)) + n_on_line = 0 + + # Pass of number of runs of `f` to callback `c` + # This can help with e.g. allocating a vector + c(manhattan_distance + 1) if manhattan_distance == 0 - D = map((d, o) -> d(o), dimconstructors, (j, i)) - if checkbounds(Bool, A, D...) - @inbounds A[D...] = fill + D = map(rebuild, dims, (j, i)) + if checkbounds(Bool, di, D...) + f(D) + n_on_line += 1 end - n_on_line = 1 return n_on_line end @@ -153,18 +172,17 @@ function _burn_line!(A::AbstractRaster, line, fill) n_on_line = 0 countx = county = 0 - # Int steps to move allong the line step_j = signbit(diff_x) * -2 + 1 step_i = signbit(diff_y) * -2 + 1 # Travel one grid cell at a time. Start at zero for the current cell for _ in 0:manhattan_distance - D = map((d, o) -> d(o), dimconstructors, (j, i)) - if checkbounds(Bool, A, D...) - @inbounds A[D...] = fill + D = map(rebuild, dims, (j, i)) + if checkbounds(Bool, di, D...) + f(D) + n_on_line += 1 end - # Only move in either X or Y coordinates, not both. if abs(max_x) < abs(max_y) max_x += delta_x @@ -176,12 +194,6 @@ function _burn_line!(A::AbstractRaster, line, fill) county +=1 end end - return n_on_line -end -function _burn_line!(A::AbstractRaster, line, fill, order::Tuple{Vararg{Dimension}}) - msg = """" - Converting a `:line` geometry to raster is currently only implemented for 2d lines. - Make a Rasters.jl github issue if you need this for more dimensions. - """ - throw(ArgumentError(msg)) + + return n_on_line end diff --git a/src/methods/burning/polygon.jl b/src/methods/burning/polygon.jl index f6d4512e4..1ce6c48a9 100644 --- a/src/methods/burning/polygon.jl +++ b/src/methods/burning/polygon.jl @@ -24,14 +24,14 @@ function _burn_polygon!(B::AbstractDimArray, trait, geom; # Lines n_on_line = 0 if boundary !== :center - _check_intervals(B, boundary) + _check_intervals(B, boundary, verbose) if boundary === :touches - if _check_intervals(B, boundary) + if _check_intervals(B, boundary, verbose) # Add line pixels n_on_line = _burn_lines!(B, geom; fill)::Int end elseif boundary === :inside - if _check_intervals(B, boundary) + if _check_intervals(B, boundary, verbose) # Remove line pixels n_on_line = _burn_lines!(B, geom; fill=!fill)::Int end @@ -135,9 +135,9 @@ end const INTERVALS_INFO = "makes more sense on `Intervals` than `Points` and will have more correct results. You can construct dimensions with a `X(values; sampling=Intervals(Center()))` to achieve this" -@noinline _check_intervals(B) = - _chki(B) ? true : (@info "burning lines $INTERVALS_INFO"; false) -@noinline _check_intervals(B, boundary) = - _chki(B) ? true : (@info "`boundary=:$boundary` $INTERVALS_INFO"; false) +@noinline _check_intervals(B, verbose::Bool) = + _chki(B) ? true : (verbose && @info "burning lines $INTERVALS_INFO"; false) +@noinline _check_intervals(B, boundary, verbose::Bool) = + _chki(B) ? true : (verbose && @info "`boundary=:$boundary` $INTERVALS_INFO"; false) _chki(B) = all(map(s -> s isa Intervals, sampling(dims(B)))) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index feb0b6001..a34cbc92e 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -1,9 +1,93 @@ +# Object to hold extract keywords +struct Extractor{T,P,K,N<:NamedTuple{K},ID,G,I,S,F} + names::N + id::ID + geometry::G + index::I + skipmissing::S + flatten::F +end +Base.@constprop :aggressive @inline function Extractor(x, data; + name::NTuple{<:Any,Symbol}, + skipmissing, + flatten, + geometry, + id, + index, + kw... +) + nt = NamedTuple{name}(name) + id = _booltype(id) + g = _booltype(geometry) + i = _booltype(index) + sm = _booltype(skipmissing) + f = _booltype(flatten) + Extractor(x, data, nt, id, g, i, sm, f) +end +function Extractor( + x, data, nt::N, id::ID, g::G, i::I, sm::S, f::F +) where {N<:NamedTuple{K},ID,G,I,S,F} where K + P = _proptype(x; skipmissing=sm, names=nt) + T = _geom_rowtype(x, data; id, geometry=g, index=i, names=nt, skipmissing=sm) + Extractor{T,P,K,N,ID,G,I,S,F}(nt, id, g, i, sm, f) +end + +Base.eltype(::Extractor{T}) where T = T + +# This mutable object is passed into closures as +# fields are type-stable in clusores but variables are not +mutable struct LineRefs{T} + i::Int + prev::CartesianIndex{2} + rows::Vector{T} + function LineRefs{T}() where T + i = 1 + prev = CartesianIndex((typemin(Int), typemin(Int))) + new{T}(i, prev, Vector{T}()) + end +end + +function _initialise!(lr::LineRefs{T}) where T + lr.i = 1 + lr.prev = CartesianIndex((typemin(Int), typemin(Int))) + lr.rows = Vector{T}() +end + +_geom_rowtype(A, geom; kw...) = + _geom_rowtype(A, GI.trait(geom), geom; kw...) +_geom_rowtype(A, ::Nothing, geoms; kw...) = + _geom_rowtype(A, first(skipmissing(geoms)); kw...) +_geom_rowtype(A, ::GI.AbstractGeometryTrait, geom; kw...) = + _geom_rowtype(A, first(GI.getpoint(geom)); kw...) +_geom_rowtype(A, ::Nothing, geoms::Missing; kw...) = + _rowtype(A, missing; kw...) +function _geom_rowtype(A, ::GI.AbstractPointTrait, p; kw...) + tuplepoint = if GI.is3d(p) + (GI.x(p), GI.y(p), GI.z(p)) + else + (GI.x(p), GI.y(p)) + end + return _rowtype(A, tuplepoint; kw...) +end + +# skipinvalid: can G and I be missing. skipmissing: can nametypes be missing +function _proptype(x; + skipmissing, names::NamedTuple{K}, kw... +) where K + NamedTuple{K,Tuple{_nametypes(x, names, skipmissing)...}} +end + """ - extract(x, data; kw...) + extract(x, geometries; kw...) + +Extracts the value of `Raster` or `RasterStack` for the passed in geometries, +returning an `Vector{NamedTuple}` with properties for `:geometry` and `Raster` +or `RasterStack` layer values. + +For lines, linestrings and linear rings points are extracted for each pixel that +the line touches. -Extracts the value of `Raster` or `RasterStack` at given points, returning -an iterable of `NamedTuple` with properties for `:geometry` and raster or -stack layer values. +For polygons, all cells witih centers covered by the polygon are returned. Note that if objects have more dimensions than the length of the point tuples, sliced arrays or stacks will be returned instead of single values. @@ -15,13 +99,22 @@ $DATA_ARGUMENT # Keywords -- `geometry`: include `:geometry` in returned `NamedTuple`, `true` by default. -- `index`: include `:index` of the `CartesianIndex` in returned `NamedTuple`, `false` by default. -- `name`: a `Symbol` or `Tuple` of `Symbol` corresponding to layer/s of a `RasterStack` to extract. All layers by default. +- `geometry`: include a `:geometry` field in rows, which will be a + tuple point. Either the original point for points or the pixel + center point for line and polygon extract. `true` by default. +- `index`: include `:index` field of extracted points in rows, `false` by default. +- `name`: a `Symbol` or `Tuple` of `Symbol` corresponding to layer/s + of a `RasterStack` to extract. All layers are extracted by default. - `skipmissing`: skip missing points automatically. +- `flatten`: flatten extracted points from multiple + geometries into a single vector. `true` by default. + Unmixed point geometries are always flattened. + Flattening is slow and single threaded, `flatten=false` may be a + large performance improvement in combination with `threaded=true`. - `atol`: a tolerance for floating point lookup values for when the `Lookup` contains `Points`. `atol` is ignored for `Intervals`. --$GEOMETRYCOLUMN_KEYWORD +$BOUNDARY_KEYWORD +$GEOMETRYCOLUMN_KEYWORD # Example @@ -50,205 +143,353 @@ extract(st, obs; skipmissing=true) (geometry = (0.32, 40.24), bio1 = 16.321388f0, bio3 = 41.659454f0, bio5 = 30.029825f0, bio7 = 25.544561f0, bio12 = 480.0f0) ``` -Note: passing in arrays, geometry collections or feature collections +Note: passing in arrays, geometry collections or feature collections containing a mix of points and other geometries has undefined results. """ function extract end -@inline function extract(x::RasterStackOrArray, data; - names=_names(x), name=names, skipmissing=false, geometry=true, index=false, geometrycolumn=nothing, kw... +Base.@constprop :aggressive @inline function extract(x::RasterStackOrArray, data; + geometrycolumn=nothing, + names::NTuple{<:Any,Symbol}=_names(x), + name::NTuple{<:Any,Symbol}=names, + skipmissing=false, + flatten=true, + id=false, + geometry=true, + index=false, + kw... ) n = DD._astuple(name) - geoms = _get_geometries(data, geometrycolumn) - _extract(x, geoms; - dims=DD.dims(x, DEFAULT_POINT_ORDER), - names=NamedTuple{n}(n), - # These keywords are converted to _True/_False for type stability later on - # The @inline above helps constant propagation of the Bools - geometry=_booltype(geometry), - index=_booltype(index), - skipmissing=_booltype(skipmissing), - kw... - ) + g, g1 = if GI.isgeometry(data) + data, data + elseif GI.isfeature(data) + g = GI.geometry(data) + g, g + else + gs = _get_geometries(data, geometrycolumn) + gs, (isempty(gs) || all(ismissing, gs)) ? missing : first(Base.skipmissing(gs)) + end + + xp = _prepare_for_burning(x) + e = Extractor(xp, g1; name, skipmissing, flatten, id, geometry, index) + id_init = 1 + return _extract(xp, e, id_init, g; kw...) end -function _extract(A::RasterStackOrArray, geom::Missing, names, kw...) - T = _rowtype(A, geom; names, kw...) - [_maybe_add_fields(T, map(_ -> missing, names), missing, missing)] +# TODO use a GeometryOpsCore method like `applyreduce` here? +function _extract(A::RasterStackOrArray, e::Extractor{T}, id::Int, geom::Missing; kw...) where T + return if istrue(e.skipmissing) + T[] + else + T[_maybe_add_fields(T, map(_ -> missing, e.names), id, missing, missing)] + end end -function _extract(A::RasterStackOrArray, geom; names, kw...) - _extract(A, GI.geomtrait(geom), geom; names, kw...) +function _extract(A::RasterStackOrArray, e::Extractor, id::Int, geom; kw...) + _extract(A, e, id, GI.geomtrait(geom), geom; kw...) end -function _extract(A::RasterStackOrArray, ::Nothing, geoms; - names, skipmissing, kw... -) - T = _rowtype(A, eltype(geoms); names, skipmissing, kw...) +function _extract(A::RasterStackOrArray, e::Extractor{T}, id::Int, ::Nothing, geoms::AbstractArray; + threaded=false, progress=true, kw... +) where T # Handle empty / all missing cases - (length(geoms) > 0 && any(!ismissing, geoms)) || return T[] - - geom1 = first(Base.skipmissing(geoms)) + isempty(geoms) && return T[] + geom1 = all(ismissing, geoms) ? missing : first(Base.skipmissing(geoms)) trait1 = GI.trait(geom1) - # We need to split out points from other geoms - # TODO this will fail with mixed point/geom vectors + # We split out points from other geoms for performance if trait1 isa GI.PointTrait - rows = Vector{T}(undef, length(geoms)) - if istrue(skipmissing) - j = 1 - for i in eachindex(geoms) - g = geoms[i] - ismissing(g) && continue - e = _extract_point(T, A, g, skipmissing; names, kw...) - if !ismissing(e) - rows[j] = e - j += 1 + allpoints = true + rows = Vector{T}(undef, length(geoms)) + if istrue(e.skipmissing) + if threaded + nonmissing = Vector{Bool}(undef, length(geoms)) + _run(1:length(geoms), threaded, progress, "Extracting points...") do i + g = geoms[i] + GI.geomtrait(g) isa GI.PointTrait || return nothing + loc_id = id + i - 1 + nonmissing[i] = _extract_point!(rows, A, e, loc_id, g, i; kw...)::Bool + return nothing + end + # This could use less memory if we reuse `rows` and shorten it + rows = rows[nonmissing] + else + j_ref = Ref(1) + # For non-threaded be more memory efficient + _run(1:length(geoms), false, progress, "Extracting points...") do i + g = geoms[i] + loc_id = id + i - 1 + if GI.geomtrait(g) isa GI.PointTrait + j_ref[] += _extract_point!(rows, A, e, loc_id, g, j_ref[]; kw...)::Bool + end + return nothing end - nothing + deleteat!(rows, j_ref[]:length(rows)) end - deleteat!(rows, j:length(rows)) else - for i in eachindex(geoms) + _run(1:length(geoms), threaded, progress, "Extracting points...") do i g = geoms[i] - rows[i] = _extract_point(T, A, g, skipmissing; names, kw...)::T - nothing + loc_id = id + i - 1 + _extract_point!(rows, A, e, loc_id, g, i; kw...) + return nothing end end - return rows + # If we found a non-point geometry, + # ignore these rows and start again generically + allpoints && return rows + end + + row_vecs = Vector{Vector{T}}(undef, length(geoms)) + if threaded + thread_line_refs = [LineRefs{T}() for _ in 1:Threads.nthreads()] + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + line_refs = thread_line_refs[Threads.threadid()] + loc_id = id + i - 1 + row_vecs[i] = _extract(A, e, loc_id, geoms[i]; line_refs, kw...) + end else - # This will be a list of vectors, we need to flatten it into one - rows = Iterators.flatten(_extract(A, g; names, skipmissing, kw...) for g in geoms) - if istrue(skipmissing) - return collect(_skip_missing_rows(rows, _missingval_or_missing(A), names)) - else - return collect(rows) + line_refs = LineRefs{T}() + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + loc_id = id + i - 1 + row_vecs[i] = _extract(A, e, loc_id, geoms[i]; line_refs, kw...) end end + + # TODO this is a bit slow and only on one thread + if istrue(e.flatten) + n = sum(map(length, row_vecs)) + out_rows = Vector{T}(undef, n) + k::Int = 1 + for row_vec in row_vecs + for row in row_vec + out_rows[k] = row + k += 1 + end + end + return out_rows + else + return row_vecs + end end -function _extract(A::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) - _extract(A, GI.geometry(feature); kw...) -end -function _extract(A::RasterStackOrArray, ::GI.FeatureCollectionTrait, fc; kw...) - # Fall back to the Array/iterator method for feature collections - _extract(A, [GI.geometry(f) for f in GI.getfeature(fc)]; kw...) -end -function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; - skipmissing, kw... -) - T = _rowtype(A, GI.getpoint(geom, 1); names, skipmissing, kw...) - rows = (_extract_point(T, A, p, skipmissing; kw...) for p in GI.getpoint(geom)) - return skipmissing isa _True ? collect(_skip_missing_rows(rows, _missingval_or_missing(A), names)) : collect(rows) +function _extract( + A::RasterStackOrArray, e::Extractor{T}, id::Int, ::GI.AbstractMultiPointTrait, geom; kw... +)::Vector{T} where T + n = GI.npoint(geom) + rows = _init_rows(e, n) + i = 1 + for p in GI.getpoint(geom) + i += _extract_point!(rows, A, e, id, p, i; kw...)::Bool + end + if istrue(skipmissing) + # Remove excees rows where missing + deleteat!(rows, i:length(rows)) + end + return rows end -function _extract(A::RasterStackOrArray, ::GI.PointTrait, geom; - skipmissing, kw... -) - T = _rowtype(A, geom; names, skipmissing, kw...) - _extract_point(T, A, geom, skipmissing; kw...) +function _extract(A::RasterStackOrArray, e::Extractor{T}, id::Int, ::GI.PointTrait, p; kw...) where T + rows = _init_rows(e, 1) + _extract_point!(rows, A, e, id, p, 1; kw...) end -function _extract(A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom; - names, skipmissing, kw... -) - # Make a raster mask of the geometry - template = view(A, Touches(GI.extent(geom))) - ods = otherdims(A, DEFAULT_POINT_ORDER) - if length(ods) > 0 - template = view(template, map(d -> rebuild(d, firstindex(d)), ods)) +@noinline function _extract( + A::RasterStackOrArray, e::Extractor{T}, id::Int, ::GI.AbstractLineStringTrait, geom; + line_refs=LineRefs{T}(), + kw... +)::Vector{T} where T + _initialise!(line_refs) + # Subset/offst is not really needed for line buring + # But without it we can get different fp errors + # to polygons and eng up with lines in different + # places when they are right on the line. + dims, offset = _template(A, geom) + dp = DimPoints(A) + function _length_callback(n) + resize!(line_refs.rows, n) end - p = first(GI.getpoint(geom)) - tuplepoint = if GI.is3d(geom) - GI.x(p), GI.y(p), GI.z(p) - else - GI.x(p), GI.y(p) + + _burn_lines!(_length_callback, dims, geom) do D + I = CartesianIndex(map(val, D)) + # Avoid duplicates from adjacent line segments + line_refs.prev == I && return nothing + line_refs.prev = I + # Make sure we only hit this pixel once + # D is always inbounds + line_refs.i += _maybe_set_row!(line_refs.rows, e.skipmissing, e, id, A, dp, offset, I, line_refs.i) + return nothing end - T = _rowtype(A, tuplepoint; names, skipmissing, kw...) - B = boolmask(geom; to=template, kw...) - offset = CartesianIndex(map(x -> first(x) - 1, parentindices(parent(template)))) + deleteat!(line_refs.rows, line_refs.i:length(line_refs.rows)) + return line_refs.rows +end +@noinline function _extract( + A::RasterStackOrArray, e::Extractor{T}, id::Int, ::GI.AbstractGeometryTrait, geom; kw... +)::Vector{T} where T + # Make a raster mask of the geometry + dims, offset = _template(A, geom) + B = boolmask(geom; to=dims, burncheck_metadata=NoMetadata(), kw...) + n = count(B) + dp = DimPoints(A) + i = 1 + rows = _init_rows(e, n) # Add a row for each pixel that is `true` in the mask - rows = (_missing_or_fields(T, A, Tuple(I + offset), names, skipmissing) for I in CartesianIndices(B) if B[I]) - # Maybe skip missing rows - if istrue(skipmissing) - return collect(Base.skipmissing(rows)) + for I in CartesianIndices(B) + B[I] || continue + i += _maybe_set_row!(rows, e.skipmissing, e, id, A, dp, offset, I, i) + end + # Cleanup + deleteat!(rows, i:length(rows)) + return rows +end + +function _template(x, geom) + ods = otherdims(x, DEFAULT_POINT_ORDER) + # Build a dummy raster in case this is a stack + # views are just easier on an array than dims + t1 = Raster(FillArrays.Zeros(size(x)), dims(x)) + t2 = if length(ods) > 0 + view(t1, map(d -> rebuild(d, firstindex(d)), ods)) else - return collect(rows) + t1 end + I = dims2indices(dims(t2), Touches(GI.extent(geom))) + t3 = view(t2, I...) + offset = CartesianIndex(map(i -> first(i) - 1, I)) + return dims(t3), offset end -_extract(::Type{T}, A::RasterStackOrArray, trait::GI.PointTrait, point; skipmissing, kw...) where T = - _extract_point(T, A, point, skipmissing; kw...) -function _missing_or_fields(::Type{T}, A, I, names, skipmissing) where T - props = _prop_nt(A, I, names) - if istrue(skipmissing) && _ismissingval(A, props) - missing +Base.@assume_effects :foldable function _maybe_set_row!( + rows, skipmissing::_True, e, id, A, dp, offset, I, i; +) + props = _prop_nt(e, A, I, skipmissing) + return if ismissing(props) + 0 else - geom = DimPoints(A)[I...] - _maybe_add_fields(T, props, geom, I) + _maybe_set_row!(rows, _False(), e, id, A, dp, offset, I, i; props) end end +Base.@assume_effects :foldable function _maybe_set_row!( + rows::Vector{T}, skipmissing::_False, e, id, A, dp, offset, I, i; + props=_prop_nt(e, A, I, skipmissing) +) where T + Ioff = I + offset + geom = dp[Ioff] + rows[i] = _maybe_add_fields(T, props, id, geom, Tuple(Ioff)) + return 1 +end + +_init_rows(e::Extractor{T}, n=0) where T = Vector{T}(undef, n) -_ismissingval(A::Union{Raster,RasterStack}, props) = +Base.@assume_effects :foldable _ismissingval(A::Union{Raster,RasterStack}, props)::Bool = _ismissingval(missingval(A), props) -_ismissingval(A::Union{Raster,RasterStack}, props::NamedTuple) = +Base.@assume_effects :foldable _ismissingval(A::Union{Raster,RasterStack}, props::NamedTuple)::Bool = _ismissingval(missingval(A), props) -_ismissingval(mvs::NamedTuple, props::NamedTuple{K}) where K = - any(k -> ismissing(props[k]) || props[k] === mvs[k], K) -_ismissingval(mv, props::NamedTuple) = any(x -> ismissing(x) || x === mv, props) -_ismissingval(mv, prop) = (mv === prop) +Base.@assume_effects :foldable _ismissingval(mvs::NamedTuple{K}, props::NamedTuple{K}) where K = + any(DD.unrolled_map((p, mv) -> ismissing(p) || p === mv, props, mvs))::Bool +Base.@assume_effects :foldable _ismissingval(mvs::NamedTuple, props::NamedTuple{K}) where K = + _ismissingval(mvs[K], props)::Bool +Base.@assume_effects :foldable _ismissingval(mv, props::NamedTuple)::Bool = + any(DD.unrolled_map(p -> ismissing(p) || p === mv, props)) +Base.@assume_effects :foldable _ismissingval(mv, prop)::Bool = (mv === prop) -@inline _prop_nt(st::AbstractRasterStack, I, ::NamedTuple{K}) where K = st[I...][K] -@inline _prop_nt(A::AbstractRaster, I, ::NamedTuple{K}) where K = NamedTuple{K}((A[I...],)) +# We always return NamedTuple +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack, I, sm::_False +)::P where {P,K} + P(st[K][I]) +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack{K}, I, sm::_False +)::P where {P,K} + P(st[I]) +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, A::AbstractRaster, I, sm::_False +)::P where {P,K} + P(NamedTuple{K}((A[I],))) +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack, I, sm::_True +)::Union{P,Missing} where {P,K} + x = st[K][I] + _ismissingval(A, x) ? missing : x::P +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack{K}, I, sm::_True +)::Union{P,Missing} where {P,K} + x = st[I] + _ismissingval(st, x) ? missing : x::P +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, A::AbstractRaster, I, sm::_True +)::Union{P,Missing} where {P,K} + x = A[I] + _ismissingval(A, x) ? missing : NamedTuple{K}((x,))::P +end # Extract a single point -@inline function _extract_point(::Type, x::RasterStackOrArray, point::Missing, skipmissing::_True; kw...) - return missing +# Missing point to remove +@inline function _extract_point!(rows::Vector{T}, x::RasterStackOrArray, e::Extractor, id, point, i; kw...) where T + _extract_point!(rows, x, e, id, e.skipmissing, point, i; kw...) end -@inline function _extract_point(::Type{T}, x::RasterStackOrArray, point::Missing, skipmissing::_False; - names, kw... -) where T - props = map(_ -> missing, names) - geom = missing - I = missing - return _maybe_add_fields(T, props, geom, I) +@inline function _extract_point!( + rows::Vector{T}, x::RasterStackOrArray, e::Extractor{T,<:Any,K}, id, skipmissing::_True, point::Missing, i; + kw... +) where {T,K} + return false end -@inline function _extract_point(::Type{T}, x::RasterStackOrArray, point, skipmissing::_True; - dims, names::NamedTuple{K}, atol=nothing, kw... +# Normal point with missing / out of bounds data removed +@inline function _extract_point!( + rows::Vector{T}, x::RasterStackOrArray, e::Extractor{T,<:Any,K}, id, skipmissing::_True, point, i; + dims=DD.dims(x, DEFAULT_POINT_ORDER), + atol=nothing, + kw... ) where {T,K} # Get the actual dimensions available in the object - coords = map(DD.commondims(x, dims)) do d + coords = map(DD.dims(x, dims)) do d _dimcoord(d, point) end # If any are coordinates missing, also return missing for everything - if any(map(ismissing, coords)) - return missing - else + if !any(map(ismissing, coords)) selector_dims = map(dims, coords) do d, c _at_or_contains(d, c, atol) end selectors = map(val, DD.dims(selector_dims, DD.dims(x))) # Check the selector is in bounds / actually selectable I = DD.selectindices(DD.dims(x, dims), selectors; err=_False())::Union{Nothing,Tuple{Int,Int}} - if isnothing(I) - return missing - else + if !isnothing(I) D = map(rebuild, selector_dims, I) - if x isa Raster + if x isa Raster prop = x[D] - _ismissingval(missingval(x), prop) && return missing + _ismissingval(missingval(x), prop) && return false props = NamedTuple{K,Tuple{eltype(x)}}((prop,)) else props = x[D][K] - _ismissingval(missingval(x), props) && return missing + _ismissingval(missingval(x), props) && return false end - return _maybe_add_fields(T, props, point, I) + rows[i] = _maybe_add_fields(T, props, id, coords, I) + return true end end + return false end -@inline function _extract_point(::Type{T}, x::RasterStackOrArray, point, skipmissing::_False; - dims, names::NamedTuple{K}, atol=nothing, kw... -) where {T,K} +# Missing point to keep +@inline function _extract_point!( + rows::Vector{T}, x::RasterStackOrArray, e::Extractor{T,P,K}, id, skipmissing::_False, point::Missing, i; kw... +) where {T,P,K} + props = map(_ -> missing, e.names) + geom = missing + I = missing + rows[i] = _maybe_add_fields(T, props, id, geom, I) + return true +end +# Normal point with missing / out of bounds data kept with `missing` fields +@inline function _extract_point!( + rows::Vector{T}, x::RasterStackOrArray, e::Extractor{T,P,K}, id, skipmissing::_False, point, i; + dims=DD.dims(x, DEFAULT_POINT_ORDER), + atol=nothing, + kw... +) where {T,P,K} # Get the actual dimensions available in the object - coords = map(DD.commondims(x, dims)) do d + coords = map(dims) do d _dimcoord(d, point) end # If any are coordinates missing, also return missing for everything - if any(map(ismissing, coords)) - return _maybe_add_fields(T, map(_ -> missing, names), missing, missing) + rows[i] = if any(map(ismissing, coords)) + _maybe_add_fields(T, map(_ -> missing, names), id, missing, missing) else selector_dims = map(dims, coords) do d, c _at_or_contains(d, c, atol) @@ -257,40 +498,27 @@ end # Check the selector is in bounds / actually selectable I = DD.selectindices(DD.dims(x, dims), selectors; err=_False())::Union{Nothing,Tuple{Int,Int}} if isnothing(I) - return _maybe_add_fields(T, map(_ -> missing, names), point, missing) + _maybe_add_fields(T, map(_ -> missing, e.names), id, coords, missing) else D = map(rebuild, selector_dims, I) - props = if x isa Raster - NamedTuple{K,Tuple{eltype(x)}}((x[D],)) + props = if x isa Raster + P(NamedTuple{K,Tuple{eltype(x)}}((x[D],))) else - NamedTuple(x[D])[K] + P(NamedTuple(x[D])[K]) end - return _maybe_add_fields(T, props, point, I) + _maybe_add_fields(T, props, id, coords, I) end end + return 1 end # Maybe add optional fields -Base.@assume_effects :total function _maybe_add_fields(::Type{T}, props::NamedTuple, point, I)::T where {T<:NamedTuple{K}} where K - if :geometry in K - :index in K ? merge((; geometry=point, index=I), props) : merge((; geometry=point), props) - else - :index in K ? merge((; index=I), props) : props - end |> T -end - -@inline _skip_missing_rows(rows, ::Missing, names) = - Iterators.filter(row -> !any(ismissing, row), rows) -@inline _skip_missing_rows(rows, missingval, names) = - Iterators.filter(row -> !any(x -> ismissing(x) || x === missingval, row), rows) -@inline function _skip_missing_rows(rows, missingval::NamedTuple, names::NamedTuple{K}) where K - # first check if all fields are equal - if so just call with the first value - if Base.allequal(missingval) == 1 - return _skip_missing_rows(rows, first(missingval), names) - else - Iterators.filter(rows) do row - # rows may or may not contain a :geometry field, so map over keys instead - !any(key -> ismissing(row[key]) || row[key] === missingval[key], K) - end - end +# It is critically important for performance that this is type stable +Base.@assume_effects :total function _maybe_add_fields( + ::Type{T}, props::NamedTuple, id, point::Union{Tuple,Missing}, I +)::T where {T<:NamedTuple{K}} where K + row = :index in K ? merge((; index=I), props) : props + row = :geometry in K ? merge((; geometry=point), row) : row + row = :id in K ? merge((; id), row) : row + return T(row) end diff --git a/src/methods/mask.jl b/src/methods/mask.jl index 1401a990c..fc8d58da8 100644 --- a/src/methods/mask.jl +++ b/src/methods/mask.jl @@ -35,7 +35,8 @@ $SUFFIX_KEYWORD These can be used when `with` is a GeoInterface.jl compatible object: -$SHAPE_KEYWORDS +$SHAPE_KEYWORD +$BOUNDARY_KEYWORD $GEOMETRYCOLUMN_KEYWORD # Example @@ -332,7 +333,8 @@ function boolmask!(dest::AbstractRaster, data; lock=nothing, progress=true, threaded=false, - allocs=_burning_allocs(dest; threaded), + burncheck_metadata=Metadata(), + allocs=_burning_allocs(dest; threaded, burncheck_metadata), geometrycolumn=nothing, collapse=nokw, kw... diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 2e9a3f5df..a823db4d2 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -1,3 +1,6 @@ +const RasterVecOrTuple = Union{Tuple{Vararg{AbstractRaster}},AbstractArray{<:AbstractRaster}} +const RasterStackVecOrTuple = Union{Tuple{Vararg{AbstractRasterStack}},AbstractArray{<:AbstractRasterStack}} + """ mosaic(f, regions...; missingval, atol) mosaic(f, regions; missingval, atol) @@ -28,42 +31,29 @@ If your mosaic has has apparent line errors, increase the `atol` value. Here we cut out Australia and Africa from a stack, and join them with `mosaic`. ```jldoctest -using Rasters, RasterDataSources, ArchGDAL, Plots -st = RasterStack(WorldClim{Climate}; month=1); - -africa = st[X(-20.0 .. 60.0), Y(-40.0 .. 35.0)] -a = plot(africa) - -aus = st[X(100.0 .. 160.0), Y(-50.0 .. -10.0)] -b = plot(aus) - -# Combine with mosaic -mos = mosaic(first, aus, africa) -c = plot(mos) +using Rasters, RasterDataSources, NaturalEarth, DataFrames +countries = naturalearth("admin_0_countries", 110) |> DataFrame +climate = RasterStack(WorldClim{Climate}, (:tmin, :tmax, :prec, :wind); month=July) +country_climates = map(("Norway", "Denmark", "Sweden")) do name + country = subset(countries, :NAME => ByRow(==("Norway"))) + trim(mask(climate; with=country); pad=10) +end +scandinavia_climate = trim(mosaic(first, country_climates)) +plot(scandinavia_climate) -savefig(a, "build/mosaic_example_africa.png") -savefig(b, "build/mosaic_example_aus.png") -savefig(c, "build/mosaic_example_combined.png") -nothing +savefig("build/mosaic_example_combined.png") # output ``` -### Individual continents - -![arica](mosaic_example_africa.png) - -![aus](mosaic_example_aus.png) - -### Mosaic of continents +### Mosaic of countries ![mosaic](mosaic_example_combined.png) $EXPERIMENTAL """ -function mosaic(f::Function, r1::RasterStackOrArray, rs::RasterStackOrArray...; kw...) +mosaic(f::Function, r1::RasterStackOrArray, rs::RasterStackOrArray...; kw...) = mosaic(f, (r1, rs...); kw...) -end mosaic(f::Function, regions; kw...) = _mosaic(f, first(regions), regions; kw...) function _mosaic(f::Function, A1::AbstractRaster, regions; missingval=nokw, @@ -74,23 +64,26 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; force=false, kw... ) - isnothing(missingval) && throw(ArgumentError("missingval cannot be `nothing` for `mosaic`")) - missingval = if isnokw(missingval) + V = Vector{promote_type(map(Missings.nonmissingtype ∘ eltype, regions)...)} + T = Base.promote_op(f, V) + dims = _mosaic(Tuple(map(DD.dims, regions))) + l1 = first(regions) + + missingval = if isnothing(missingval) + throw(ArgumentError("missingval cannot be `nothing` for `mosaic`")) + elseif isnokw(missingval) mv = Rasters.missingval(first(regions)) isnokwornothing(mv) ? missing : mv else missingval end - missingval_pair = if !isnothing(filename) && (ismissing(missingval) || isnokwornothing(missingval)) - _type_missingval(eltype(A1)) => missing - elseif missingval isa Pair + missingval_pair = if missingval isa Pair missingval + elseif !isnothing(filename) && (ismissing(missingval) || isnokw(missingval)) + _type_missingval(eltype(A1)) => missing else missingval => missingval end - T = Base.promote_type(typeof(last(missingval_pair)), Base.promote_eltype(regions...)) - dims = _mosaic(Tuple(map(DD.dims, regions))) - l1 = first(regions) return create(filename, T, dims; name=name(l1), @@ -100,10 +93,10 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; options, force ) do C - _mosaic!(f, C, regions; missingval, kw...) + mosaic!(f, C, regions; missingval, kw...) end end -function _mosaic(f::Function, ::AbstractRasterStack, regions; +function _mosaic(f::Function, r1::AbstractRasterStack, regions; filename=nothing, suffix=keys(first(regions)), kw... @@ -112,14 +105,14 @@ function _mosaic(f::Function, ::AbstractRasterStack, regions; layers = map(suffix, map(values, regions)...) do s, A... mosaic(f, A...; filename, suffix=s, kw...) end - return DD.rebuild_from_arrays(first(regions), Tuple(layers)) + return DD.rebuild_from_arrays(r1, layers) end """ mosaic!(f, x, regions...; missingval, atol) mosaic!(f, x, regions::Tuple; missingval, atol) -Combine `regions` in `x` using the function `f`. +Combine `regions` in `Raster` or `RasterStack` `x` using the function `f`. # Arguments @@ -143,18 +136,25 @@ Combine `regions` in `x` using the function `f`. # Example -Cut out Australia and Africa stacks, then combined them -into a single stack. +Cut out scandinavian countries and plot: ```jldoctest -using Rasters, RasterDataSources, ArchGDAL, Statistics, Plots -st = read(RasterStack(WorldClim{Climate}; month=1)) -aus = st[X=100.0 .. 160.0, Y=-50.0 .. -10.0] -africa = st[X=-20.0 .. 60.0, Y=-40.0 .. 35.0] -mosaic!(first, st, aus, africa) -plot(st) +using Rasters, RasterDataSources, NaturalEarth, DataFrames +# Get climate data form worldclim +climate = RasterStack(WorldClim{Climate}, (:tmin, :tmax, :prec, :wind); month=July) +# And country borders from natural earth +countries = naturalearth("admin_0_countries", 110) |> DataFrame +# Cut out each country +country_climates = map(("Norway", "Denmark", "Sweden")) do name + country = subset(countries, :NAME => ByRow(==(name))) + trim(mask(climate; with=country); pad=10) +end +# Mosaic together to a single raster +scandinavia_climate = mosaic(first, country_climates) +# And plot +plot(scandinavia_climate) + savefig("build/mosaic_bang_example.png") -nothing # output ``` @@ -164,49 +164,149 @@ nothing $EXPERIMENTAL """ mosaic!(f::Function, dest::RasterStackOrArray, regions::RasterStackOrArray...; kw...) = - _mosaic!(f, dest, regions; kw...) - -function _mosaic!(f::Function, A::AbstractRaster{T}, regions::Union{Tuple,AbstractArray}; + mosaic!(f, dest, regions; kw...) +function mosaic!( + f::Function, + dest::RasterStackOrArray, + regions::Union{Tuple,AbstractArray}; + op=_reduce_op(f, missingval(dest)), + kw... +) + # Centering avoids pixel edge floating point error + dest_centered = _prepare_for_burning(dest; order=nothing) + regions_centered = map(r -> _prepare_for_burning(r; order=nothing), regions) + _mosaic!(f, op, dest_centered, regions_centered; kw...) + return dest +end +function _mosaic!( + f::typeof(mean), op::Nothing, dest::Raster, regions::RasterVecOrTuple; + kw... +) + if length(regions) <= typemax(UInt8) + _mosaic_mean!(dest, UInt8, regions; kw...) + elseif length(regions) <= typemax(UInt16) + _mosaic_mean!(dest, UInt16, regions; kw...) + else + _mosaic_mean!(dest, UInt32, regions; kw...) + end +end +function _mosaic!( + f::typeof(length), op::Nothing, dest::AbstractRaster, regions::RasterVecOrTuple; + kw... +) + for region in regions + _count_region!(dest, region; kw...) + end + return dest +end +# Where there is a known reduction operator we can apply each region as a whole +function _mosaic!( + f::Function, op, dest::AbstractRaster, regions::RasterVecOrTuple; + kw... +) + for region in regions + _mosaic_region!(op, dest, region; kw...) + end + return dest +end +# Generic unknown functions +function _mosaic!( + f::Function, op::Nothing, A::AbstractRaster{T}, regions::RasterVecOrTuple; missingval=missingval(A), atol=nothing ) where T isnokwornothing(missingval) && throw(ArgumentError("destination array must have a `missingval`")) + R = promote_type(map(Missings.nonmissingtype ∘ eltype, regions)...) + buffer = Vector{R}(undef, length(regions)) _without_mapped_crs(A) do A1 broadcast!(A1, DimSelectors(A1; atol)) do ds # Get all the regions that have this point - ls = foldl(regions; init=()) do acc, l - if DD.hasselection(l, ds) - v = l[ds...] - (acc..., l) - else - acc - end - end - values = foldl(ls; init=()) do acc, l - v = l[ds...] - if isnothing(Rasters.missingval(l)) - (acc..., v) - elseif ismissing(Rasters.missingval(l)) - ismissing(v) ? acc : (acc..., v) - else - v === Rasters.missingval(l) ? acc : (acc..., v) + i = 0 + for r in regions + if DD.hasselection(r, ds) + x = r[ds...] + if x !== Rasters.missingval(r) + i += 1 + buffer[i] = x + end end end - if length(values) === 0 + if i === 0 missingval else - f(values) + f(view(buffer, 1:i)) end end end return A end -function _mosaic!(f::Function, st::AbstractRasterStack, regions::Union{Tuple,AbstractArray}; kw...) +function _mosaic!( + f::Function, + op, + st::AbstractRasterStack, + regions::RasterStackVecOrTuple; + kw... +) map(values(st), map(values, regions)...) do A, r... - mosaic!(f, A, r...; kw...) + _mosaic!(f, op, A, r; kw...) end return st end +function _mosaic_mean!(dest, ::Type{T}, regions; kw...) where T + # Note: sum and count are separate broadcasts because + # most disk formats don't support writing a tuple + + # Define a Raster to count into + counts = create(nothing, T, dest; missingval=zero(T)) + counts .= zero(T) + for region in regions + # Add region to dest + _mosaic_region!(Base.add_sum, dest, region; kw...) + # Count region + _count_region!(counts, region; kw...) + end + # Divide dest by counts + # Avoid divide by zero for missing values + dest .= ((d, c) -> d === missingval(dest) ? missingval(dest) : d / c).(dest, counts) + return dest +end + +function _mosaic_region!(op, dest, region; atol=nothing, kw...) + function skip_or_op(a, b) + if b === missingval(region) + a + elseif a === missingval(dest) + b + else + op(a, b) + end + end + ext = _maybe_pad_floats(extent(region), sampling(dest)) + selectors = map(sampling(dest)) do sa + ispoints(sa) ? At(; atol) : Contains() + end + ds = DimSelectors(view(dest, ext); selectors) + # `parent` needed to skip broadcast checks + dest[ext] .= skip_or_op.(parent(view(dest, ext)), parent(view(region, ds))) + return dest +end + +function _count_region!(count::AbstractRaster{T}, region::AbstractRaster; kw...) where T + function skip_or_count(a, b) + if b === missingval(region) + a + elseif a === missingval(count) + oneunit(Missings.nonmissingtype(T)) + else + a + oneunit(a) + end + end + ext = extent(region) + ds = DimSelectors(view(count, ext)) + view(count, ext) .= skip_or_count.(view(count, ext), view(region, ds)) + return count +end + _mosaic(alldims::Tuple{<:DimTuple,Vararg{DimTuple}}) = map(_mosaic, alldims...) function _mosaic(dims::Dimension...) map(dims) do d @@ -248,7 +348,6 @@ function _mosaic(span::Regular, lookup::AbstractSampled, lookups::LookupTuple) end return rebuild(lookup; data=newindex) end - function _mosaic(::Irregular, lookup::AbstractSampled, lookups::LookupTuple) newindex = sort(union(map(parent, lookups)...); order=LA.ordering(order(lookup))) return rebuild(lookup; data=newindex) @@ -264,3 +363,15 @@ function _mosaic(span::Explicit, lookup::AbstractSampled, lookups::LookupTuple) newbounds = vcat(permutedims(newlower), permutedims(newupper)) return rebuild(lookup; data=newindex, span=Explicit(newbounds)) end + +# Pad floats for intervals so that small floating point +# error doesn't exclude values in nealy matching lookups +function _maybe_pad_floats(ext::Extent{K}, sampling::Tuple) where K + map(values(Extents.bounds(ext)), sampling) do b, sa + if isintervals(sa) && eltype(first(b)) <: AbstractFloat + b[1] - 10eps(b[1]), b[2] + 10eps(b[2]) + else + b + end + end |> Extent{K} +end \ No newline at end of file diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index 2d9e278ec..1948a8e64 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -1,4 +1,4 @@ -struct _TakeFirst{MV} +struct _TakeFirst{MV} <: Function missingval::MV end (tf::_TakeFirst)(a, b) = a === tf.missingval ? b : a @@ -316,15 +316,10 @@ const RASTERIZE_KEYWORDS = """ - `op`: A reducing function that accepts two values and returns one, like `min` to `minimum`. For common methods this will be assigned for you, or is not required. But you can use it instead of a `reducer` as it will usually be faster. -- `shape`: force `data` to be treated as `:polygon`, `:line` or `:point`, where possible - Points can't be treated as lines or polygons, and lines may not work as polygons, but - an attempt will be made. -- `geometrycolumn`: `Symbol` to manually select the column the geometries are in - when `data` is a Tables.jl compatible table, or a tuple of `Symbol` for columns of - point coordinates. -- `progress`: show a progress bar, `true` by default, `false` to hide.. -- `verbose`: print information and warnings when there are problems with the rasterisation. - `true` by default. +$GEOM_KEYWORDS +$GEOMETRYCOLUMN_KEYWORD +$PROGRESS_KEYWORD +$VERBOSE_KEYWORD $THREADED_KEYWORD - `threadsafe`: specify that custom `reducer` and/or `op` functions are thread-safe, in that the order of operation or blocking does not matter. For example, @@ -360,8 +355,6 @@ $RASTERIZE_ARGUMENTS These are detected automatically from `data` where possible. -$GEOMETRYCOLUMN_KEYWORD -$GEOM_KEYWORDS $RASTERIZE_KEYWORDS $FILENAME_KEYWORD $SUFFIX_KEYWORD diff --git a/src/methods/reproject.jl b/src/methods/reproject.jl index e18910bb6..baad1cc04 100644 --- a/src/methods/reproject.jl +++ b/src/methods/reproject.jl @@ -1,4 +1,3 @@ - """ reproject(obj; crs) @@ -61,17 +60,14 @@ function _reproject(source::GeoFormat, target::GeoFormat, dim::Union{XDim,YDim}, # This is a dumb way to do this. But it save having to inspect crs, # and prevents reprojections that don't make sense from working. # A better method for this should be implemented in future. - return first(reproject(source, target, dim, [val])) + return first(_reproject(source, target, dim, [val])) end function _reproject(source::GeoFormat, target::GeoFormat, dim, vals::NTuple{N}) where N - reps = reproject(source, target, dim, [vals...]) + reps = _reproject(source, target, dim, [vals...]) return ntuple(x -> reps[x], N) end function _reproject(source::GeoFormat, target::GeoFormat, dim::Union{XDim,YDim}, vals::AbstractArray) - reshape(reproject(source, target, dim, vec(vals)), size(vals)) -end -function _reproject(source::Nothing, target::GeoFormat, dim::Union{XDim,YDim}, vals::AbstractArray) - reshape(reproject(source, target, dim, vec(vals)), size(vals)) + reshape(_reproject(source, target, dim, vec(vals)), size(vals)) end function _reproject(source::GeoFormat, target::GeoFormat, dim::Union{XDim,YDim}, vals::AbstractVector) throw_extension_error(reproject, "Proj", :RastersProjExt, (source, target, dim, vals)) diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 88bb5437d..0790f50ab 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -30,15 +30,15 @@ const CRS_KEYWORD = """ - `crs`: a `crs` which will be attached to the resulting raster when `to` not passed or is an `Extent`. Otherwise the crs from `to` is used. """ - -const SHAPE_KEYWORDS = """ -- `shape`: Force `data` to be treated as `:polygon`, `:line` or `:point` geometries. - using points or lines as polygons may have unexpected results. +const BOUNDARY_KEYWORD = """ - `boundary`: for polygons, include pixels where the `:center` is inside the polygon, where the polygon `:touches` the pixel, or that are completely `:inside` the polygon. The default is `:center`. """ - +const SHAPE_KEYWORD = """ +- `shape`: Force `data` to be treated as `:polygon`, `:line` or `:point` geometries. + using points or lines as polygons may have unexpected results. +""" const THREADED_KEYWORD = """ - `threaded`: run operations in parallel, `false` by default. In some circumstances `threaded` can give large speedups over single-threaded operation. This can be true for complicated @@ -53,7 +53,8 @@ $TO_KEYWORD $RES_KEYWORD $SIZE_KEYWORD $CRS_KEYWORD -$SHAPE_KEYWORDS +$BOUNDARY_KEYWORD +$SHAPE_KEYWORD """ const DATA_ARGUMENT = """ diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index c08b62cc5..41fb4c5fb 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -72,16 +72,17 @@ insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Z 3 columns and 243 rows omitted ``` """ -zonal(f, x::RasterStackOrArray; of, kw...) = _zonal(f, x, of; kw...) +zonal(f, x::RasterStackOrArray; of, skipmissing=true, kw...) = + _zonal(f, _prepare_for_burning(x), of; skipmissing, kw...) _zonal(f, x::RasterStackOrArray, of::RasterStackOrArray; kw...) = _zonal(f, x, Extents.extent(of); kw...) _zonal(f, x::RasterStackOrArray, of::DimTuple; kw...) = _zonal(f, x, Extents.extent(of); kw...) # We don't need to `mask` with an extent, it's square so `crop` will do enough. -_zonal(f, x::Raster, of::Extents.Extent; skipmissing=true) = +_zonal(f, x::Raster, of::Extents.Extent; skipmissing) = _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) -function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true) +function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing) cropped = crop(x; to=ext, touches=true) length(cropped) > 0 || return missing return maplayers(cropped) do A @@ -96,7 +97,7 @@ _zonal(f, x, ::GI.AbstractFeatureCollectionTrait, fc; kw...) = _zonal(f, x::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) = _zonal(f, x, GI.geometry(feature); kw...) function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; - skipmissing=true, kw... + skipmissing, kw... ) cropped = crop(x; to=geom, touches=true) length(cropped) > 0 || return missing @@ -104,7 +105,7 @@ function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; return _maybe_skipmissing_call(f, masked, skipmissing) end function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; - skipmissing=true, kw... + skipmissing, kw... ) cropped = crop(st; to=geom, touches=true) length(cropped) > 0 || return map(_ -> missing, st) diff --git a/src/sources/grd.jl b/src/sources/grd.jl index b7f7556ba..9f03e9148 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -218,7 +218,7 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; if write _mmapgrd(filename, source_eltype(mod), size(A); write=true) do M - f(rebuild(A, _maybe_modify(M, mod))) + f(_maybe_modify(M, mod)) end end diff --git a/src/utils.jl b/src/utils.jl index 41c11f609..84878461c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -284,7 +284,6 @@ end # to distinguish between objects returned by _get_geometries and other objects struct IterableOfGeometries end - # Chunking # NoKW means true @@ -434,7 +433,7 @@ end # Other -_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) +_progress(args...; kw...) = ProgressMeter.Progress(args...; dt=0.1, color=:blue, barlen=50, kw...) # Function barrier for splatted vector broadcast @noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) @@ -525,47 +524,48 @@ _names(A::AbstractRaster) = (Symbol(name(A)),) _names(A::AbstractRasterStack) = keys(A) using DimensionalData.Lookups: _True, _False + +_booltype(::_True) = _True() +_booltype(::_False) = _False() _booltype(x) = x ? _True() : _False() + istrue(::_True) = true istrue(::_False) = false +istrue(::Type{_True}) = true +istrue(::Type{_False}) = false # skipinvalid: can G and I be missing. skipmissing: can nametypes be missing _rowtype(x, g, args...; kw...) = _rowtype(x, typeof(g), args...; kw...) function _rowtype( x, ::Type{G}, i::Type{I} = typeof(size(x)); - geometry, index, skipmissing, skipinvalid = skipmissing, names, kw... + id=_False(), geometry, index, skipmissing, skipinvalid=skipmissing, names, kw... ) where {G, I} _G = istrue(skipinvalid) ? nonmissingtype(G) : G _I = istrue(skipinvalid) ? I : Union{Missing, I} - keys = _rowkeys(geometry, index, names) - types = _rowtypes(x, _G, _I, geometry, index, skipmissing, names) + keys = _rowkeys(id, geometry, index, names) + types = _rowtypes(x, _G, _I, id, geometry, index, skipmissing, names) NamedTuple{keys,types} end - -function _rowtypes( - x, ::Type{G}, ::Type{I}, geometry::_True, index::_True, skipmissing, names::NamedTuple{Names} -) where {G,I,Names} - Tuple{G,I,_nametypes(x, names, skipmissing)...} -end -function _rowtypes( - x, ::Type{G}, ::Type{I}, geometry::_True, index::_False, skipmissing, names::NamedTuple{Names} +@generated function _rowtypes( + x, ::Type{G}, ::Type{I}, id, geometry, index, skipmissing, names::NamedTuple{Names} ) where {G,I,Names} - Tuple{G,_nametypes(x, names, skipmissing)...} -end -function _rowtypes( - x, ::Type{G}, ::Type{I}, geometry::_False, index::_True, skipmissing, names::NamedTuple{Names} -) where {G,I,Names} - Tuple{I,_nametypes(x, names, skipmissing)...} -end -function _rowtypes( - x, ::Type{G}, ::Type{I}, geometry::_False, index::_False, skipmissing, names::NamedTuple{Names} -) where {G,I,Names} - Tuple{_nametypes(x, names, skipmissing)...} + ts = Expr(:tuple) + istrue(id) && push!(ts.args, Int) + if istrue(skipmissing) + istrue(geometry) && push!(ts.args, G) + istrue(index) && push!(ts.args, I) + else + istrue(geometry) && push!(ts.args, Union{Missing,G}) + istrue(index) && push!(ts.args, Union{Missing,I}) + end + :(Tuple{$ts...,_nametypes(x, names, skipmissing)...}) end -@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_True) where {T,Names} = (nonmissingtype(T),) -@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_False) where {T,Names} = (Union{Missing,T},) +@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, sm::_True) where {T,Names} = + (nonmissingtype(T),) +@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, sm::_False) where {T,Names} = + (Union{Missing,T},) # This only compiles away when generated @generated function _nametypes( ::RasterStack{<:Any,T}, ::NamedTuple{PropNames}, skipmissing::_True @@ -580,7 +580,11 @@ end return values(nt[PropNames]) end -_rowkeys(geometry::_False, index::_False, names::NamedTuple{Names}) where Names = Names -_rowkeys(geometry::_True, index::_False, names::NamedTuple{Names}) where Names = (:geometry, Names...) -_rowkeys(geometry::_True, index::_True, names::NamedTuple{Names}) where Names = (:geometry, :index, Names...) -_rowkeys(geometry::_False, index::_True, names::NamedTuple{Names}) where Names = (:index, Names...) +_rowkeys(id::_True, geometry::_False, index::_False, names::NamedTuple{Names}) where Names = (:id, Names...) +_rowkeys(id::_True, geometry::_True, index::_False, names::NamedTuple{Names}) where Names = (:id, :geometry, Names...) +_rowkeys(id::_True, geometry::_True, index::_True, names::NamedTuple{Names}) where Names = (:id, :geometry, :index, Names...) +_rowkeys(id::_True, geometry::_False, index::_True, names::NamedTuple{Names}) where Names = (:id, :index, Names...) +_rowkeys(id::_False, geometry::_False, index::_False, names::NamedTuple{Names}) where Names = Names +_rowkeys(id::_False, geometry::_True, index::_False, names::NamedTuple{Names}) where Names = (:geometry, Names...) +_rowkeys(id::_False, geometry::_True, index::_True, names::NamedTuple{Names}) where Names = (:geometry, :index, Names...) +_rowkeys(id::_False, geometry::_False, index::_True, names::NamedTuple{Names}) where Names = (:index, Names...) \ No newline at end of file diff --git a/test/extract.jl b/test/extract.jl new file mode 100644 index 000000000..9247c3754 --- /dev/null +++ b/test/extract.jl @@ -0,0 +1,506 @@ +using Rasters, Test, DataFrames, Extents +import GeoInterface as GI +using Rasters.Lookups, Rasters.Dimensions + +include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) + +dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) +rast = Raster(Union{Int,Missing}[1 2; 3 4], dimz; name=:test, missingval=missing) +rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) +rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing) +st = RasterStack(rast, rast2) + +pts = [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3), (10.0, 0.2)] +poly = GI.Polygon([[(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]]) +linestring = GI.LineString([(8.0, 0.0), (9.5, 0.0), (10.0, 0.4)]) +line = GI.Line([(8.0, 0.0), (12.0, 0.4)]) +table = (geometry=pts, foo=zeros(4)) + +@testset "Points" begin + @testset "From Raster" begin + @testset "skipmissing=false" begin + ex = extract(rast, pts; skipmissing=false) + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + @test eltype(ex) == T + @test all(ex .=== T[ + (geometry = missing, test = missing) + (geometry = (9.0, 0.1), test=1) + (geometry = (9.0, 0.2), test=2) + (geometry = (10.0, 0.3), test=missing) + (geometry = (10.0, 0.2), test=4) + ]) + end + @testset "skipmissing=true" begin + ex = extract(rast_m, pts; skipmissing=true); + T = @NamedTuple{geometry::Tuple{Float64, Float64}, test::Int64} + @test eltype(ex) == T + @test all(ex .=== T[(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)]) + end + + @testset "skipmissing=true, geometry=false" begin + ex = extract(rast_m, pts; skipmissing=true, geometry=false) + T = @NamedTuple{test::Int64} + @test eltype(ex) == T + @test all(ex .=== T[(test = 1,), (test = 2,)]) + @test all(extract(rast_m, pts; skipmissing=true, geometry=false, index=true) .=== [ + (index = (1, 1), test = 1,) + (index = (1, 2), test = 2,) + ]) + end + @testset "reverse points" begin + # NamedTuple (reversed) points - tests a Table that iterates over points + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + @test all(extract(rast, [(Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.2), test = 4) + (geometry = (10.0, 0.3), test = missing) + ]) + end + @testset "Vector points" begin + @test extract(rast, [[9.0, 0.1], [10.0, 0.2]]) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.2), test = 4) + ] + end + end + + @testset "From RasterStack" begin + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64},test2::Union{Missing,Int64}} + @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) .=== T[ + (geometry = missing, test = missing, test2 = missing) + (geometry = (9.0, 0.1), test = 1, test2 = 5) + (geometry = (10.0, 0.2), test = 4, test2 = 8) + (geometry = (10.0, 0.3), test = missing, test2 = missing) + ]) + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true) == [ + (geometry = (10.0, 0.2), test = 4, test2 = 8) + ] + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) == [ + (test = 4, test2 = 8) + ] + T = @NamedTuple{index::Union{Missing, Tuple{Int,Int}}, test::Union{Missing, Int64}, test2::Union{Missing, Int64}} + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) == T[ + (index = (2, 2), test = 4, test2 = 8) + ] + # Subset with `names` + T = @NamedTuple{geometry::Union{Missing, Tuple{Float64, Float64}}, test2::Union{Missing, Int64}} + @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,)) .=== T[ + (geometry = missing, test2 = missing) + (geometry = (9.0, 0.1), test2 = 5) + (geometry = (10.0, 0.2), test2 = 8) + (geometry = (10.0, 0.3), test2 = missing) + ]) + # Subset with `names` and `skipmissing` with mixed missingvals + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,), skipmissing=true) == [ + (geometry = (10.0, 0.2), test2 = 8) + ] + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test,), skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.2), test = 4) + ] + end + + @testset "Errors" begin + # Missing coord errors + @test_throws ArgumentError extract(rast, [(0.0, missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) + @test_throws ArgumentError extract(rast, [(9.0, 0.1), (0.0, missing), (9.0, 0.2), (10.0, 0.3)]) + @test_throws ArgumentError extract(rast, [(X=0.0, Y=missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) + end +end + +@testset "Polygons" begin + # Extract a polygon + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + # Test all the keyword combinations + @test all(extract(rast_m, poly) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + T = @NamedTuple{test::Union{Missing,Int64}} + @test all(extract(rast_m, poly; geometry=false) .=== T[ + (test = 1,) + (test = 3,) + (test = missing,) + ]) + T = @NamedTuple{index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly; geometry=false, index=true) .=== T[ + (index = (1, 1), test = 1) + (index = (2, 1), test = 3) + (index = (2, 2), test = missing) + ]) + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly; index=true) .=== T[ + (geometry = (9.0, 0.1), index = (1, 1), test = 1) + (geometry = (10.0, 0.1), index = (2, 1), test = 3) + (geometry = (10.0, 0.2), index = (2, 2), test = missing) + ]) + @test extract(rast_m, poly; skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + ] + @test extract(rast_m, poly; skipmissing=true, geometry=false) == [ + (test = 1,) + (test = 3,) + ] + @test extract(rast_m, poly; skipmissing=true, geometry=false, index=true) == [ + (index = (1, 1), test = 1) + (index = (2, 1), test = 3) + ] + @test extract(rast_m, poly; skipmissing=true, index=true) == [ + (geometry = (9.0, 0.1), index = (1, 1), test = 1) + (geometry = (10.0, 0.1), index = (2, 1), test = 3) + ] + @test extract(rast2, poly; skipmissing=true) == [ + (geometry = (10.0, 0.1), test2 = 7) + (geometry = (10.0, 0.2), test2 = 8) + ] + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + @test extract(rast_m, poly; skipmissing=true, geometry=false, id=true) == [ + (id=1, test = 1,) + (id=1, test = 3,) + ] + @test extract(rast_m, [poly, poly]; skipmissing=true, geometry=false, id=true) == [ + (id=1, test = 1,) + (id=1, test = 3,) + (id=2, test = 1,) + (id=2, test = 3,) + ] + @test extract(rast_m, [poly, poly]; skipmissing=true, geometry=false, id=true, flatten=false) == [ + [(id=1, test = 1,), (id=1, test = 3,)], + [(id=2, test = 1,), (id=2, test = 3,)], + ] + + @testset "Vector of polygons" begin + ex = extract(rast_m, [poly, poly]) + @test eltype(ex) == T + @test all(ex .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + end +end + +@testset "Extract a linestring" begin + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + Tsm = @NamedTuple{geometry::Tuple{Float64,Float64},test::Int64} + linestrings = [linestring, linestring, linestring] + fc = GI.FeatureCollection(map(GI.Feature, linestrings)) + + # Single LineString + @test extract(rast, linestring) isa Vector{T} + @test all(extract(rast_m, linestring) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + @test all(extract(rast_m, linestring; skipmissing=true) .=== Tsm[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + ]) + + # Multiple linstrings + # Test all combinations of skipmissing, flatten and threaded + @test extract(rast_m, linestrings; skipmissing=true) isa Vector{Tsm} + @test extract(rast_m, fc; skipmissing=true) == + extract(rast_m, fc; skipmissing=true, threaded=true) == + extract(rast_m, linestrings; skipmissing=true) == + extract(rast_m, linestrings; skipmissing=true, threaded=true) == Tsm[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + ] + + @test extract(rast_m, linestrings; skipmissing=false) isa Vector{T} + @test all( + extract(rast_m, fc; skipmissing=false) .=== + extract(rast_m, fc; skipmissing=false, threaded=true) .=== + extract(rast_m, linestrings; skipmissing=false) .=== + extract(rast_m, linestrings; skipmissing=false, threaded=true) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + + @test extract(rast_m, linestrings; skipmissing=true, flatten=false) isa Vector{Vector{Tsm}} + @test extract(rast_m, linestrings; skipmissing=true, flatten=false) == + extract(rast_m, linestrings; skipmissing=true, flatten=false, threaded=true) == Vector{Tsm}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], + ] + + # Nested Vector holding missing needs special handling to check equality + @test extract(rast_m, linestrings; skipmissing=false, flatten=false) isa Vector{Vector{T}} + ref = Vector{T}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)] , + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], + ] + matching(a, b) = all(map(===, a, b)) + @test all(map(matching, extract(rast_m, linestrings; skipmissing=false, flatten=false, threaded=false), ref)) + @test all(map(matching, extract(rast_m, linestrings; skipmissing=false, flatten=false, threaded=true), ref)) +end + +@testset "Extract a line" begin + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + Tsm = @NamedTuple{geometry::Tuple{Float64,Float64},test::Int64} + lines = [line, line] + fc = GI.FeatureCollection(map(GI.Feature, lines)) + + # Single LineString + @test extract(rast, line) isa Vector{T} + @test all(extract(rast_m, line) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.2), test = missing) + ]) + @test all(extract(rast_m, line; skipmissing=true) .=== Tsm[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + ]) + + # Multiple linstrings + # Test all combinations of skipmissing, flatten and threaded + @test extract(rast_m, lines; skipmissing=true) isa Vector{Tsm} + @test extract(rast_m, fc; skipmissing=true) == + extract(rast_m, fc; skipmissing=true, threaded=true) == + extract(rast_m, lines; skipmissing=true) == + extract(rast_m, lines; skipmissing=true, threaded=true) == Tsm[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + ] + + @test extract(rast_m, lines; skipmissing=false) isa Vector{T} + @test all( + extract(rast_m, fc; skipmissing=false) .=== + extract(rast_m, fc; skipmissing=false, threaded=true) .=== + extract(rast_m, lines; skipmissing=false) .=== + extract(rast_m, lines; skipmissing=false, threaded=true) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.2), test = missing) + ]) + + Tsm_i = @NamedTuple{id::Int,geometry::Tuple{Float64,Float64},test::Int64} + @test extract(rast, lines; skipmissing=true, id=true) isa Vector{Tsm_i} + @test all(extract(rast_m, fc; skipmissing=true, id=true) .=== + extract(rast_m, fc; skipmissing=true, threaded=true, id=true) .=== + extract(rast_m, lines; skipmissing=true, id=true) .=== + extract(rast_m, lines; skipmissing=true, threaded=true, id=true) .=== Tsm_i[ + (id=1, geometry = (9.0, 0.1), test = 1) + (id=1, geometry = (9.0, 0.2), test = 2) + (id=2, geometry = (9.0, 0.1), test = 1) + (id=2, geometry = (9.0, 0.2), test = 2) + ]) + + @test extract(rast_m, lines; skipmissing=true, flatten=false) isa Vector{Vector{Tsm}} + @test extract(rast_m, lines; skipmissing=true, flatten=false) == + extract(rast_m, lines; skipmissing=true, flatten=false, threaded=true) == Vector{Tsm}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)], + ] + + # Nested Vector holding missing needs special handling to check equality + @test extract(rast_m, lines; skipmissing=false, flatten=false) isa Vector{Vector{T}} + ref = Vector{T}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2), (geometry = (10.0, 0.2), test = missing)] , + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2), (geometry = (10.0, 0.2), test = missing)], + ] + matching(a, b) = all(map(===, a, b)) + @test all(map(matching, extract(rast_m, lines; skipmissing=false, flatten=false, threaded=false), ref)) + @test all(map(matching, extract(rast_m, lines; skipmissing=false, flatten=false, threaded=true), ref)) +end + +@testset "Table" begin + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64, Float64}}, test::Union{Missing, Int64}} + @test all(extract(rast, table) .=== T[ + (geometry = missing, test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.3), test = missing) + (geometry = (10.0, 0.2), test = 4) + ]) + @test extract(rast, table; skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.2), test = 4) + ] + @test extract(rast, table; skipmissing=true, geometry=false) == [ + (test = 1,) + (test = 2,) + (test = 4,) + ] + @test extract(rast, table; skipmissing=true, geometry=false, index=true) == [ + (index = (1, 1), test = 1,) + (index = (1, 2), test = 2,) + (index = (2, 2), test = 4,) + ] + @test extract(rast, table; skipmissing=true, geometry=false, id=true) == [ + (id=2, test = 1,) + (id=3, test = 2,) + (id=5, test = 4,) + ] + T = @NamedTuple{id::Int, test::Union{Missing, Int64}} + @test all(extract(rast, table; skipmissing=false, geometry=false, id=true) .=== T[ + (id=1, test = missing,) + (id=2, test = 1,) + (id=3, test = 2,) + (id=4, test = missing,) + (id=5, test = 4,) + ]) + @test_throws ArgumentError extract(rast, (foo = zeros(4),)) +end + +@testset "Empty geoms" begin + @test all(extract(rast, []) .=== @NamedTuple{geometry::Missing,test::Union{Missing,Int}}[]) + @test all(extract(rast, []; geometry=false) .=== @NamedTuple{test::Tuple{Missing}}[]) + @test all(extract(rast, [missing, missing]; geometry=false) .=== @NamedTuple{test::Union{Missing,Int}}[ + (test = missing,) + (test = missing,) + ]) + @test typeof(extract(rast, []; geometry=false, skipmissing=true)) == Vector{@NamedTuple{test::Int}} + @test typeof(extract(rast, [missing, missing]; geometry=false, skipmissing=true)) == Vector{@NamedTuple{test::Int}} +end + +#= +Some benchmark and plotting code + +using BenchmarkTools +using ProfileView +using Chairmarks +using Cthulhu +using Rasters +using DataFrames +import GeoInterface as GI +using Rasters.Lookups, Rasters.Dimensions + +dimz = (X(5.0:0.1:15.0; sampling=Intervals(Start())), Y(-0.1:0.01:0.5; sampling=Intervals(Start()))) +rast_m = Raster(rand([missing, 1, 2], dimz); name=:test, missingval=missing) +rast = Raster(rand(Int, dimz); name=:test2, missingval=5) +st = RasterStack((a=rast, b=rast, c=Float32.(rast))) +ks = ntuple(x -> gensym(), 100) +st100 = RasterStack(NamedTuple{ks}(ntuple(x -> rast, length(ks)))) + +poly = GI.Polygon([[(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]]) +linestring = GI.LineString([(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]) +line = GI.Line([(8.0, 0.0), (12.0, 4.0)]) +polys = [poly for i in 1:10000] +lines = [line for i in 1:100000] +linestrings = [linestring for i in 1:100000] + +extract(rast, linestring) +extract(rast, polys; threaded=false) +@time extract(rast, polys; geometry=false, threaded=true, flatten=true) |> length +@time extract(rast, lines; geometry=false, threaded=true, flatten=true) |> length +@time extract(st, polys; geometry=false, threaded=true, flatten=true) |> length +@time extract(st, lines; geometry=false, threaded=false, flatten=true) |> length + +@time extract(rast_m, lines; geometry=false, threaded=true, flatten=false) |> length +@time extract(rast, lines; geometry=false, threaded=true, flatten=false) |> length +@time extract(rast_m, lines; skipmissing=true, geometry=false, threaded=true, flatten=false) |> length +@time extract(rast, lines; skipmissing=true, geometry=false, threaded=true, flatten=false) |> length +@time extract(st, lines; skipmissing=true, geometry=false, threaded=true, flatten=false) |> length +@time extract(st100, lines; geometry=false, threaded=false, flatten=false) |> length +@time extract(st100, lines; geometry=false, threaded=true, flatten=false) |> length +@time extract(rast, lines; geometry=false, threaded=true, flatten=true) |> length +@time extract(rast, lines; threaded=true, flatten=true, progress=false) |> length +@time extract(rast, lines; threaded=false, flatten=false, progress=false) |> length +@benchmark extract(rast, lines; threaded=false, flatten=true) +@benchmark extract(rast, lines; threaded=true, flatten=true) +@benchmark extract(rast, lines; threaded=false, flatten=false) +@benchmark extract(rast, lines; threaded=true, flatten=false) +@time extract(rast, polys; flatten=false); +extract(rast, polys; flatten=true); +extract(st100, linestring) +extract(st100, poly) + +@profview extract(rast, lines) +@profview extract(rast, lines; threaded=true) +@profview extract(rast, linestrings) +@profview extract(rast_m, linestrings) +@profview extract(rast, polys) +@profview extract(rast, polys; flatten=false) +@profview extract(st, linestring) +@profview extract(st, poly) + +# Profile running one function a lot. +# People will do this +f(rast, ls, n; skipmissing=true) = for _ in 1:n extract(rast, ls; skipmissing) end +@profview f(rast, polies, 10; skipmissing=false) +@profview f(rast, poly, 10000; skipmissing=false) +@profview f(rast_m, poly, 10000; skipmissing=false) +@profview f(rast, linestring, 10000; skipmissing=false) +@profview f(rast_m, linestring, 10000; skipmissing=false) +@profview f(st, linestring, 10000; skipmissing=false) +@profview f(rast, linestring, 10000) +@profview f(st, linestring, 100000; skipmissing=false) +@profview f(st, linestring, 100000; skipmissing=true) +@profview f(st100, linestring, 1000) +@profview f(st100, line, 1000) +@profview f(st100, poly, 10000; skipmissing=true) + +@profview f(st100, linestring, 1000; skipmissing=true) +@profview f(st100, linestring, 1000; skipmissing=false) +@profview f(st100, linestring, 1000; names=skipmissing=false) +keys(st100)[1:50] + + + +# Demo plots +using GLMakie +using GeoInterfaceMakie + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) +Makie.plot!(polys; alpha=0.2) +ps = getindex.(extract(rast, poly; index=true, flatten=true), :geometry) +Makie.scatter!(ps; color=:yellow) + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) +Makie.plot!(poly; alpha=0.7) +ps = getindex.(extract(rast, poly; index=true, flatten=true, boundary=:touches), :geometry); +Makie.scatter!(ps; color=:pink) + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) +Makie.plot!(polys; alpha=0.7) +ps = getindex.(extract(rast, poly; index=true, flatten=true, boundary=:inside), :geometry) +Makie.scatter!(ps; color=:pink) + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) +Makie.plot!(linestring; color=:violet, linewidth=5) +ps = getindex.(extract(rast, linestring; index=true), :geometry); +Makie.scatter!(ps; color=:yellow) + +=# diff --git a/test/methods.jl b/test/methods.jl index c2e15124f..5ebfe1fc2 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -17,6 +17,8 @@ pointvec = [(-20.0, 30.0), (0.0, 10.0), (0.0, 30.0), (-20.0, 30.0)] + + vals = [1, 2, 3, 4, 5] polygon = ArchGDAL.createpolygon(pointvec) multi_polygon = ArchGDAL.createmultipolygon([[pointvec]]) @@ -409,190 +411,6 @@ end createpoint(args...) = ArchGDAL.createpoint(args...) -@testset "extract" begin - dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) - rast = Raster(Union{Int,Missing}[1 2; 3 4], dimz; name=:test, missingval=missing) - rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) - rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing) - mypoints = [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3), (10.0, 0.2)] - table = (geometry=mypoints, foo=zeros(4)) - st = RasterStack(rast, rast2) - @testset "from Raster" begin - # Tuple points - ex = extract(rast, mypoints) - T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} - @test eltype(ex) == T - @test all(ex .=== T[ - (geometry = missing, test = missing) - (geometry = (9.0, 0.1), test=1) - (geometry = (9.0, 0.2), test=2) - (geometry = (10.0, 0.3), test=missing) - (geometry = (10.0, 0.2), test=4) - ]) - ex = extract(rast_m, mypoints; skipmissing=true) - T = @NamedTuple{geometry::Tuple{Float64, Float64}, test::Int64} - @test eltype(ex) == T - @test all(ex .=== T[(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)]) - ex = extract(rast_m, mypoints; skipmissing=true, geometry=false) - T = @NamedTuple{test::Int64} - @test eltype(ex) == T - @test all(ex .=== T[(test = 1,), (test = 2,)]) - @test all(extract(rast_m, mypoints; skipmissing=true, geometry=false, index=true) .=== [ - (index = (1, 1), test = 1,) - (index = (1, 2), test = 2,) - ]) - # NamedTuple (reversed) points - tests a Table that iterates over points - T = @NamedTuple{geometry::Union{@NamedTuple{Y::Float64,X::Float64}},test::Union{Missing,Int64}} - @test all(extract(rast, [(Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== T[ - (geometry = (Y = 0.1, X = 9.0), test = 1) - (geometry = (Y = 0.2, X = 10.0), test = 4) - (geometry = (Y = 0.3, X = 10.0), test = missing) - ]) - # Vector points - @test all(extract(rast, [[9.0, 0.1], [10.0, 0.2]]) .== [ - (geometry = [9.0, 0.1], test = 1) - (geometry = [10.0, 0.2], test = 4) - ]) - # Extract a polygon - p = ArchGDAL.createpolygon([[[8.0, 0.0], [11.0, 0.0], [11.0, 0.4], [8.0, 0.0]]]) - T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},test::Union{Missing,Int64}} - @test all(extract(rast_m, p) .=== T[ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - ]) - # Extract a vector of polygons - ex = extract(rast_m, [p, p]) - @test eltype(ex) == T - @test all(ex .=== T[ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - ]) - # Test all the keyword combinations - @test all(extract(rast_m, p) .=== T[ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - ]) - T = @NamedTuple{test::Union{Missing,Int64}} - @test all(extract(rast_m, p; geometry=false) .=== T[ - (test = 1,) - (test = 3,) - (test = missing,) - ]) - T = @NamedTuple{index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} - @test all(extract(rast_m, p; geometry=false, index=true) .=== T[ - (index = (1, 1), test = 1) - (index = (2, 1), test = 3) - (index = (2, 2), test = missing) - ]) - T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} - @test all(extract(rast_m, p; index=true) .=== T[ - (geometry = (9.0, 0.1), index = (1, 1), test = 1) - (geometry = (10.0, 0.1), index = (2, 1), test = 3) - (geometry = (10.0, 0.2), index = (2, 2), test = missing) - ]) - @test extract(rast_m, p; skipmissing=true) == [ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - ] - @test extract(rast_m, p; skipmissing=true, geometry=false) == [ - (test = 1,) - (test = 3,) - ] - @test extract(rast_m, p; skipmissing=true, geometry=false, index=true) == [ - (index = (1, 1), test = 1) - (index = (2, 1), test = 3) - ] - @test extract(rast_m, p; skipmissing=true, index=true) == [ - (geometry = (9.0, 0.1), index = (1, 1), test = 1) - (geometry = (10.0, 0.1), index = (2, 1), test = 3) - ] - @test extract(rast2, p; skipmissing=true) == [ - (geometry = (10.0, 0.1), test2 = 7) - (geometry = (10.0, 0.2), test2 = 8) - ] - # Empty geoms - @test extract(rast, []) == NamedTuple{(:geometry, :test),Tuple{Missing,Missing}}[] - @test extract(rast, []; geometry=false) == NamedTuple{(:test,),Tuple{Missing}}[] - # Missing coord errors - @test_throws ArgumentError extract(rast, [(0.0, missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) - @test_throws ArgumentError extract(rast, [(9.0, 0.1), (0.0, missing), (9.0, 0.2), (10.0, 0.3)]) - @test_throws ArgumentError extract(rast, [(X=0.0, Y=missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) - end - - @testset "with table" begin - T = @NamedTuple{geometry::Union{Missing, Tuple{Float64, Float64}}, test::Union{Missing, Int64}} - @test all(extract(rast, table) .=== T[ - (geometry = missing, test = missing) - (geometry = (9.0, 0.1), test = 1) - (geometry = (9.0, 0.2), test = 2) - (geometry = (10.0, 0.3), test = missing) - (geometry = (10.0, 0.2), test = 4) - ]) - @test extract(rast, table; skipmissing=true) == [ - (geometry = (9.0, 0.1), test = 1) - (geometry = (9.0, 0.2), test = 2) - (geometry = (10.0, 0.2), test = 4) - ] - @test extract(rast, table; skipmissing=true, geometry=false) == [ - (test = 1,) - (test = 2,) - (test = 4,) - ] - @test extract(rast, table; skipmissing=true, geometry=false, index=true) == [ - (index = (1, 1), test = 1,) - (index = (1, 2), test = 2,) - (index = (2, 2), test = 4,) - ] - - @test_throws ArgumentError extract(rast, (foo = zeros(4),)) - end - - @testset "from stack" begin - T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64},test2::Union{Missing,Int64}} - @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) .=== T[ - (geometry = missing, test = missing, test2 = missing) - (geometry = (9.0, 0.1), test = 1, test2 = 5) - (geometry = (10.0, 0.2), test = 4, test2 = 8) - (geometry = (10.0, 0.3), test = missing, test2 = missing) - ]) - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true) == [ - (geometry = (10.0, 0.2), test = 4, test2 = 8) - ] - @test extract(st2, [missing, (2, 2), (2, 1)]; skipmissing=true) == [ - (geometry = (2, 1), a = 7.0, b = 2.0) - ] - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) == [ - (test = 4, test2 = 8) - ] - T = @NamedTuple{index::Union{Missing, Tuple{Int,Int}}, test::Union{Missing, Int64}, test2::Union{Missing, Int64}} - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) == T[ - (index = (2, 2), test = 4, test2 = 8) - ] - # Subset with `names` - T = @NamedTuple{geometry::Union{Missing, Tuple{Float64, Float64}}, test2::Union{Missing, Int64}} - @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,)) .=== T[ - (geometry = missing, test2 = missing) - (geometry = (9.0, 0.1), test2 = 5) - (geometry = (10.0, 0.2), test2 = 8) - (geometry = (10.0, 0.3), test2 = missing) - ]) - # Subset with `names` and `skipmissing` with mixed missingvals - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,), skipmissing=true) == [ - (geometry = (10.0, 0.2), test2 = 8) - ] - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test,), skipmissing=true) == [ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.2), test = 4) - ] - end -end - @testset "trim, crop, extend" begin A = [missing missing missing missing 2.0 0.5 @@ -676,51 +494,6 @@ end end -@testset "mosaic" begin - reg1 = Raster([0.1 0.2; 0.3 0.4], (X(2.0:1.0:3.0), Y(5.0:1.0:6.0))) - reg2 = Raster([1.1 1.2; 1.3 1.4], (X(3.0:1.0:4.0), Y(6.0:1.0:7.0))) - irreg1 = Raster([0.1 0.2; 0.3 0.4], (X([2.0, 3.0]), Y([5.0, 6.0]))) - irreg2 = Raster([1.1 1.2; 1.3 1.4], (X([3.0, 4.0]), Y([6.0, 7.0]))) - - span_x1 = Explicit(vcat((1.5:1.0:2.5)', (2.5:1.0:3.5)')) - span_x2 = Explicit(vcat((2.5:1.0:3.5)', (3.5:1.0:4.5)')) - exp1 = Raster([0.1 0.2; 0.3 0.4], (X(Sampled([2.0, 3.0]; span=span_x1)), Y([5.0, 6.0]))) - exp2 = Raster([1.1 1.2; 1.3 1.4], (X(Sampled([3.0, 4.0]; span=span_x2)), Y([6.0, 7.0]))) - @test val(span(mosaic(first, exp1, exp2), X)) == [1.5 2.5 3.5; 2.5 3.5 4.5] - @test all(mosaic(first, [reg1, reg2]) .=== - mosaic(first, irreg1, irreg2) .=== - mosaic(first, (irreg1, irreg2)) .=== - [0.1 0.2 missing; - 0.3 0.4 1.2; - missing 1.3 1.4]) - @test all(mosaic(last, reg1, reg2) .=== - mosaic(last, irreg1, irreg2) .=== - mosaic(last, exp1, exp2) .=== [0.1 0.2 missing; - 0.3 1.1 1.2; - missing 1.3 1.4]) - - @test all(mosaic(first, [reverse(reg2; dims=Y), reverse(reg1; dims=Y)]) .=== - [missing 0.2 0.1; - 1.2 1.1 0.3; - 1.4 1.3 missing] - ) - - # 3 dimensions - A1 = Raster(ones(2, 2, 2), (X(2.0:-1.0:1.0), Y(5.0:1.0:6.0), Ti(DateTime(2001):Year(1):DateTime(2002)))) - A2 = Raster(zeros(2, 2, 2), (X(3.0:-1.0:2.0), Y(4.0:1.0:5.0), Ti(DateTime(2002):Year(1):DateTime(2003)))) - @test all(mosaic(mean, A1, A2) |> parent .=== - mosaic(mean, RasterStack(A1), RasterStack(A2)).layer1 .=== - cat([missing missing missing - missing 1.0 1.0 - missing 1.0 1.0 ], - [0.0 0.0 missing - 0.0 0.5 1.0 - missing 1.0 1.0 ], - [0.0 0.0 missing - 0.0 0.0 missing - missing missing missing], dims=3)) -end - using StableRNGs, StatsBase test = rebuild(ga; name = :test) @testset "sample" begin @@ -754,9 +527,9 @@ test = rebuild(ga; name = :test) end @test all(Rasters.sample(StableRNG(123), st2, 2, name = (:a,)) .=== extract(st2, [(2,2), (1,2)], name = (:a,))) - # in this case extract and sample always return different types - @test eltype(Rasters.sample(StableRNG(123), test, 2, index = true)) != eltype(extract(test, [(2.0,1.0), (2.0,1.0)], index = true)) - @test eltype(Rasters.sample(StableRNG(123), st2, 2, index = true)) != eltype(extract(st2, [(2,1), (2,1)], index = true)) + # extract and sample return the same type + @test eltype(Rasters.sample(StableRNG(123), test, 2, index = true)) == eltype(extract(test, [(2.0,1.0), (2.0,1.0)], index = true)) + @test eltype(Rasters.sample(StableRNG(123), st2, 2, index = true)) == eltype(extract(st2, [(2,1), (2,1)], index = true)) @test all( Rasters.sample(StableRNG(123), test, 2, weights = DimArray([1,1000], X(1:2)), skipmissing = true) .=== diff --git a/test/mosaic.jl b/test/mosaic.jl new file mode 100644 index 000000000..b9242a976 --- /dev/null +++ b/test/mosaic.jl @@ -0,0 +1,67 @@ +using Rasters, Dates, Statistics, Test +using Rasters.Lookups, Rasters.Dimensions + +@testset "mosaic" begin + reg1 = Raster([0.1 0.2; 0.3 0.4], (X(2.0:1.0:3.0), Y(5.0:1.0:6.0))) + reg2 = Raster([1.1 1.2; 1.3 1.4], (X(3.0:1.0:4.0), Y(6.0:1.0:7.0))) + irreg1 = Raster([0.1 0.2; 0.3 0.4], (X([2.0, 3.0]), Y([5.0, 6.0]))) + irreg2 = Raster([1.1 1.2; 1.3 1.4], (X([3.0, 4.0]), Y([6.0, 7.0]))) + + span_x1 = Explicit(vcat((1.5:1.0:2.5)', (2.5:1.0:3.5)')) + span_x2 = Explicit(vcat((2.5:1.0:3.5)', (3.5:1.0:4.5)')) + exp1 = Raster([0.1 0.2; 0.3 0.4], (X(Sampled([2.0, 3.0]; span=span_x1)), Y([5.0, 6.0]))) + exp2 = Raster([1.1 1.2; 1.3 1.4], (X(Sampled([3.0, 4.0]; span=span_x2)), Y([6.0, 7.0]))) + @test val(span(mosaic(first, exp1, exp2), X)) == [1.5 2.5 3.5; 2.5 3.5 4.5] + @test all(mosaic(first, [reg1, reg2]) .=== + mosaic(first, irreg1, irreg2) .=== + mosaic(first, (irreg1, irreg2)) .=== + [0.1 0.2 missing; + 0.3 0.4 1.2; + missing 1.3 1.4]) + @test all(mosaic(last, reg1, reg2) .=== + mosaic(last, irreg1, irreg2) .=== + mosaic(last, exp1, exp2) .=== [0.1 0.2 missing; + 0.3 1.1 1.2; + missing 1.3 1.4]) + + @test all(mosaic(first, [reverse(reg2; dims=Y), reverse(reg1; dims=Y)]) .=== + [missing 0.2 0.1; + 1.2 1.1 0.3; + 1.4 1.3 missing] + ) + + @testset "Generic functions" begin + @test all(mosaic(xs -> count(x -> x > 0, xs), reg1, reg2) .=== + [1 1 missing + 1 2 1 + missing 1 1] + ) + end + + @testset "3 dimensions" begin + A1 = Raster(ones(2, 2, 2), (X(2.0:-1.0:1.0), Y(5.0:1.0:6.0), Ti(DateTime(2001):Year(1):DateTime(2002)))) + A2 = Raster(zeros(2, 2, 2), (X(3.0:-1.0:2.0), Y(4.0:1.0:5.0), Ti(DateTime(2002):Year(1):DateTime(2003)))) + mean_mos = cat([missing missing missing + missing 1.0 1.0 + missing 1.0 1.0 ], + [0.0 0.0 missing + 0.0 0.5 1.0 + missing 1.0 1.0 ], + [0.0 0.0 missing + 0.0 0.0 missing + missing missing missing], dims=3) + @test all(mosaic(mean, A1, A2) .=== + mosaic(mean, RasterStack(A1), RasterStack(A2)).layer1 .=== + mean_mos) + @test mosaic(length, A1, A2; missingval=0) == cat([0 0 0 + 0 1 1 + 0 1 1], + [1 1 0 + 1 2 1 + 0 1 1], + [1 1 0 + 1 1 0 + 0 0 0], dims=3) + end + +end diff --git a/test/runtests.jl b/test/runtests.jl index 052ba3137..a9ed7ea04 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,16 +10,18 @@ using Rasters, Test, Aqua, SafeTestsets end @time @safetestset "extensions" begin include("extensions.jl") end -@time @safetestset "methods" begin include("methods.jl") end @time @safetestset "array" begin include("array.jl") end @time @safetestset "stack" begin include("stack.jl") end @time @safetestset "series" begin include("series.jl") end @time @safetestset "create" begin include("create.jl") end @time @safetestset "utils" begin include("utils.jl") end @time @safetestset "set" begin include("set.jl") end +@time @safetestset "adapt" begin include("adapt.jl") end +@time @safetestset "methods" begin include("methods.jl") end @time @safetestset "aggregate" begin include("aggregate.jl") end +@time @safetestset "mosaic" begin include("mosaic.jl") end @time @safetestset "rasterize" begin include("rasterize.jl") end -@time @safetestset "adapt" begin include("adapt.jl") end +@time @safetestset "extract" begin include("extract.jl") end @time @safetestset "reproject" begin include("reproject.jl") end @time @safetestset "warp" begin include("warp.jl") end @time @safetestset "cellarea" begin include("cellarea.jl") end @@ -35,7 +37,7 @@ if !Sys.iswindows() @time @safetestset "gdal" begin include("sources/gdal.jl") end @time @safetestset "grd" begin include("sources/grd.jl") end end -# Only test SMAP locally for now, also RasterDataSources because CI downloads keep breaking +# Only test RasterDataSources locally for now, because CI downloads keep breaking if !haskey(ENV, "CI") @time @safetestset "rasterdatasources" begin include("sources/rasterdatasources.jl") end end diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index fc058d85b..aa4737723 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -187,7 +187,7 @@ gdalpath = maybedownload(url) @test gdalarray[Y(4.224e6..4.226e6), Band(1)] isa Raster end - @testset "methods" begin + @testset "methods" begin @testset "mean" begin @test all(mean(gdalarray; dims=Y) .=== mean(parent(gdalarray); dims=2)) end @@ -289,20 +289,26 @@ gdalpath = maybedownload(url) @testset "mosaic" begin @time gdalarray = Raster(gdalpath; name=:test) A1 = gdalarray[X(1:300), Y(1:200)] - A2 = gdalarray[X(57:500), Y(101:301)] + A2 = gdalarray[X(57:End()), Y(101:End())] tempfile1 = tempname() * ".tif" tempfile2 = tempname() * ".tif" tempfile3 = tempname() * ".tif" - Afile = mosaic(first, A1, A2; missingval=0xff, atol=1e-8, filename=tempfile1) + view(gdalarray, extent(A2)) + Afile = mosaic(first, A1, A2; missingval=0xff, atol=1e-1, filename=tempfile1) + @test missingval(Afile) === 0xff Afile2 = mosaic(first, A1, A2; atol=1e-8, filename=tempfile2) - collect(Afile) - collect(Afile2) @test missingval(Afile2) === missing - Amem = mosaic(first, A1, A2; missingval=0xff, atol=1e-8) - Atest = gdalarray[X(1:500), Y(1:301)] - Atest[X(1:56), Y(201:301)] .= 0xff - Atest[X(301:500), Y(1:100)] .= 0xff + Amem = mosaic(first, A1, A2; missingval=0xff, atol=1e-5) + + Atest = rebuild(gdalarray[Extents.union(extent(A1), extent(A2))]; missingval=0xff) + Atest .= 0xff + + Atest[DimSelectors(Atest[extent(A1)]; selectors=Contains())] .= A1 + Atest[DimSelectors(Atest[extent(A2)]; selectors=Contains())] .= A2 + + @test size(Amem) == size(gdalarray) @test all(Atest .=== Amem .=== Afile .=== replace_missing(Afile2, 0xff)) + filter(x -> !Bool(first(x)), tuple.((Atest .=== Amem), Atest, Amem)) end end # methods diff --git a/test/sources/grd.jl b/test/sources/grd.jl index ca3c72425..dc6a87b79 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -171,7 +171,6 @@ grdpath = stem * ".gri" A = Raster(tempgrd) @test count(==(100.0f0), A) + count(==(255.0f0), A) == length(A) end - @testset "mosaic" begin @time grdarray = Raster(grdpath) A1 = grdarray[X(1:40), Y(1:30)] diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 4c49ad607..bf9337429 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -210,12 +210,13 @@ end A1 = ncarray[X(1:80), Y(1:100)] A2 = ncarray[X(50:150), Y(90:150)] tempfile = tempname() * ".nc" - Afile = mosaic(first, read(A1), read(A2); atol=1e-7, filename=tempfile, force=true) + Afile = mosaic(first, A1, A2; + atol=1e-7, filename=tempfile, force=true + ) |> read Amem = mosaic(first, A1, A2; atol=1e-7) Atest = ncarray[X(1:150), Y(1:150)] Atest[X(1:49), Y(101:150)] .= missing Atest[X(81:150), Y(1:89)] .= missing - read(Afile) @test all(Atest .=== Afile .=== Amem) end @testset "slice" begin @@ -400,7 +401,7 @@ end B = rebuild(boolmask(ncarray) .* 1; missingval=nothing) write(filename, B) nomissing = Raster(filename) - @test missingval(nomissing) == nothing + @test missingval(nomissing) === nothing @test eltype(nomissing) == Int64 end From d4b2487e8dc0a22c3f1480c70f8934b48df536af Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 21 Jan 2025 11:56:12 +0100 Subject: [PATCH 4/6] using Dates in examples --- src/methods/mosaic.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index a823db4d2..506bf5bfd 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -31,7 +31,7 @@ If your mosaic has has apparent line errors, increase the `atol` value. Here we cut out Australia and Africa from a stack, and join them with `mosaic`. ```jldoctest -using Rasters, RasterDataSources, NaturalEarth, DataFrames +using Rasters, RasterDataSources, NaturalEarth, DataFrames, Dates countries = naturalearth("admin_0_countries", 110) |> DataFrame climate = RasterStack(WorldClim{Climate}, (:tmin, :tmax, :prec, :wind); month=July) country_climates = map(("Norway", "Denmark", "Sweden")) do name @@ -139,7 +139,7 @@ Combine `regions` in `Raster` or `RasterStack` `x` using the function `f`. Cut out scandinavian countries and plot: ```jldoctest -using Rasters, RasterDataSources, NaturalEarth, DataFrames +using Rasters, RasterDataSources, NaturalEarth, DataFrames, Dates # Get climate data form worldclim climate = RasterStack(WorldClim{Climate}, (:tmin, :tmax, :prec, :wind); month=July) # And country borders from natural earth From 36e25b0676639df5ae0f28eac8e7014261c5ee59 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 21 Jan 2025 13:09:43 +0100 Subject: [PATCH 5/6] plots dep in examples --- src/methods/mosaic.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 506bf5bfd..e436ab310 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -31,7 +31,7 @@ If your mosaic has has apparent line errors, increase the `atol` value. Here we cut out Australia and Africa from a stack, and join them with `mosaic`. ```jldoctest -using Rasters, RasterDataSources, NaturalEarth, DataFrames, Dates +using Rasters, RasterDataSources, NaturalEarth, DataFrames, Dates, Plots countries = naturalearth("admin_0_countries", 110) |> DataFrame climate = RasterStack(WorldClim{Climate}, (:tmin, :tmax, :prec, :wind); month=July) country_climates = map(("Norway", "Denmark", "Sweden")) do name @@ -139,7 +139,7 @@ Combine `regions` in `Raster` or `RasterStack` `x` using the function `f`. Cut out scandinavian countries and plot: ```jldoctest -using Rasters, RasterDataSources, NaturalEarth, DataFrames, Dates +using Rasters, RasterDataSources, NaturalEarth, DataFrames, Dates, Plots # Get climate data form worldclim climate = RasterStack(WorldClim{Climate}, (:tmin, :tmax, :prec, :wind); month=July) # And country borders from natural earth From dfb69dbb17eac7c1b51fcecebe386b633ee46cc3 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 21 Jan 2025 13:20:49 +0100 Subject: [PATCH 6/6] fix docs fp changes --- src/methods/zonal.jl | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 41fb4c5fb..a4368d36e 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -27,38 +27,26 @@ These can be used when `of` is or contains (a) GeoInterface.jl compatible object # Example ```jldoctest -using Rasters, RasterDataSources, ArchGDAL, Shapefile, DataFrames, Downloads, Statistics, Dates - -# Download a borders shapefile -ne_url = "https://github.com/nvkelso/natural-earth-vector/raw/master/10m_cultural/ne_10m_admin_0_countries" -shp_url, dbf_url = ne_url * ".shp", ne_url * ".dbf" -shp_name, dbf_name = "country_borders.shp", "country_borders.dbf" -isfile(shp_name) || Downloads.download(shp_url, shp_name) -isfile(dbf_url) || Downloads.download(dbf_url, dbf_name) - +using Rasters, RasterDataSources, ArchGDAL, DataFrames, Statistics, Dates, NaturalEarth +# Download borders +countries = naturalearth("admin_0_countries", 10) |> DataFrame # Download and read a raster stack from WorldClim -st = RasterStack(WorldClim{Climate}; month=Jan, lazy=false) - -# Load the shapes for world countries -countries = Shapefile.Table(shp_name) |> DataFrame - +st = RasterStack(WorldClim{Climate}; month=Jan) # Calculate the january mean of all climate variables for all countries january_stats = zonal(mean, st; of=countries, boundary=:touches, progress=false) |> DataFrame - # Add the country name column (natural earth has some string errors it seems) insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Za-z ]"))) - # output 258×8 DataFrame Row │ country tmin tmax tavg prec ⋯ │ SubStrin… Float32 Float32 Float32 Float64 ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ Indonesia 21.5447 29.1864 25.3656 271.063 ⋯ + 1 │ Indonesia 21.5447 29.1865 25.3656 271.063 ⋯ 2 │ Malaysia 21.3087 28.4291 24.8688 273.381 - 3 │ Chile 7.24534 17.9263 12.5858 78.1287 - 4 │ Bolivia 17.2065 27.7454 22.4759 192.542 - 5 │ Peru 15.0273 25.5504 20.2888 180.007 ⋯ - 6 │ Argentina 13.6751 27.6715 20.6732 67.1837 + 3 │ Chile 7.24534 17.9262 12.5858 78.1287 + 4 │ Bolivia 17.2065 27.7454 22.4758 192.542 + 5 │ Peru 15.0273 25.5504 20.2889 180.007 ⋯ + 6 │ Argentina 13.6751 27.6716 20.6732 67.1837 7 │ Dhekelia Sovereign Base Area 5.87126 15.8991 10.8868 76.25 8 │ Cyprus 5.65921 14.6665 10.1622 97.4474 ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱