stars
stars copied to clipboard
Inconsistent identification of regular coordinates by read_ncdf()
read_ncdf() appears to recognize regular coordinates correctly ("offset" and "delta") only if the netCDF file does not contain "axis" attributes (or if ignore_bounds is requested).
It seems that this is because ncmeta::nc_coord_var() does not find coordinate bounds if there are no "axis" attributes (or if the coordinate bounds are ignored because of ignore_bounds)
If coordinate bounds have been identified (that is, if an "axis" attribute is present), then stars:::.get_nc_dimensions() finds try_bounds to be TRUE
https://github.com/r-spatial/stars/blob/630071044c582b636951f9949a3847cab3619b8a/R/ncdf.R#L734
and continues to correctly assign offset and delta (the coordinate values are regular, is_reg is TRUE, and abs(v - u) < eps is TRUE)
https://github.com/r-spatial/stars/blob/630071044c582b636951f9949a3847cab3619b8a/R/ncdf.R#L758
In a next step, however, offset and delta are then erased because regular[i] & !try_bounds is FALSE.
https://github.com/r-spatial/stars/blob/630071044c582b636951f9949a3847cab3619b8a/R/ncdf.R#L786
read_mdim() works correctly whether or not the file contains "axis" attributes.
Many thanks!
Example code to illustrate the varying behavior:
getNamespaceVersion("stars")
#> version
#> "0.6-8"
getNamespaceVersion("ncmeta")
#> version
#> "0.4.0"
# Example netcd files
nLat <- 3
nLon <- 2
nVertical <- 9
tsl <- array(
data = rowSums(
expand.grid(
seq_len(nVertical),
100 * (seq_len(nLat) - 1)),
10 * (seq_len(nLon) - 1)
),
dim = c(nVertical, nLat, nLon)
)
dlat <- 5
crdsLat <- 40 + dlat * seq_len(nLat)
crdsLatBounds <- rbind(crdsLat - dlat / 2, crdsLat + dlat / 2)
dlon <- 5
crdsLon <- -100 + dlon * seq_len(nLon)
crdsLonBounds <- rbind(crdsLon - dlon / 2, crdsLon + dlon / 2)
tmp <- 10 * (1:nVertical) ^ 2
crdsVerticalBounds <- rbind(c(0, tmp[-length(tmp)]), tmp)
crdsVertical <- colMeans(crdsVerticalBounds) # not regular
# Create filename1
filename1 <- tempfile("tsl_example_", fileext = ".nc")
nc <- RNetCDF::create.nc(filename1)
id_bnds <- RNetCDF::dim.def.nc(nc, "bnds", 2)
id_lon <- RNetCDF::dim.def.nc(nc, "lon", nLon)
iv_lon <- RNetCDF::var.def.nc(nc, "lon", "NC_FLOAT", id_lon)
RNetCDF::var.put.nc(nc, "lon", crdsLon)
RNetCDF::att.put.nc(nc, "lon", "bounds", "NC_CHAR", "lon_bnds")
iv_blon <- RNetCDF::var.def.nc(nc, "lon_bnds", "NC_FLOAT", c(id_bnds, id_lon))
RNetCDF::var.put.nc(nc, "lon_bnds", crdsLonBounds)
id_lat <- RNetCDF::dim.def.nc(nc, "lat", nLat)
iv_lat <- RNetCDF::var.def.nc(nc, "lat", "NC_FLOAT", id_lat)
RNetCDF::var.put.nc(nc, "lat", crdsLat)
RNetCDF::att.put.nc(nc, "lat", "bounds", "NC_CHAR", "lat_bnds")
iv_blat <- RNetCDF::var.def.nc(nc, "lat_bnds", "NC_FLOAT", c(id_bnds, id_lat))
RNetCDF::var.put.nc(nc, "lat_bnds", crdsLatBounds)
id_vertical <- RNetCDF::dim.def.nc(nc, "vertical", nVertical)
iv_vertical <- RNetCDF::var.def.nc(nc, "vertical", "NC_INT", id_vertical)
RNetCDF::att.put.nc(nc, "vertical", "positive", "NC_CHAR", "down")
RNetCDF::var.put.nc(nc, "vertical", crdsVertical)
iv_tsl <- RNetCDF::var.def.nc(nc, "soil_variable", "NC_FLOAT", c(id_vertical, id_lat, id_lon))
RNetCDF::var.put.nc(nc, "soil_variable", tsl)
RNetCDF::close.nc(nc)
# filename2 is a copy of filename1 but it has X, Y, Z axis attributes
filename2 <- tempfile("tsl_example_", fileext = ".nc")
file.copy(from = filename1, to = filename2)
#> [1] TRUE
nc <- RNetCDF::open.nc(filename2, write = TRUE)
RNetCDF::att.put.nc(nc, "lon", "axis", "NC_CHAR", "X")
RNetCDF::att.put.nc(nc, "lat", "axis", "NC_CHAR", "Y")
RNetCDF::att.put.nc(nc, "vertical", "axis", "NC_CHAR", "Z")
RNetCDF::close.nc(nc)
# read_mdim() sets offset and delta correctly
(x1m <- stars::read_mdim(filename1)) # correct
#> stars object with 3 dimensions and 1 attribute
#> attribute(s):
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> soil_variable 1 7.25 105 105 202.75 209
#> dimension(s):
#> from to offset delta values x/y
#> vertical 1 9 NA NA [5,25),...,[725,885)
#> lat 1 3 42.5 5 NULL [y]
#> lon 1 2 -97.5 5 NULL [x]
(x2m <- stars::read_mdim(filename2)) # correct
#> stars object with 3 dimensions and 1 attribute
#> attribute(s):
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> soil_variable 1 7.25 105 105 202.75 209
#> dimension(s):
#> from to offset delta values x/y
#> vertical 1 9 NA NA [5,25),...,[725,885)
#> lat 1 3 42.5 5 NULL [y]
#> lon 1 2 -97.5 5 NULL [x]
# read_ncdf() sets offset and delta correctly only if there are no "axis" attributes
(x1n <- stars::read_ncdf(filename1, var = "soil_variable")) # correct
#> Will return stars object with 54 cells.
#> Warning in .get_nc_projection(meta$attribute, rep_var, cv): No projection
#> information found in nc file.
#> stars object with 3 dimensions and 1 attribute
#> attribute(s):
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> soil_variable 1 7.25 105 105 202.75 209
#> dimension(s):
#> from to offset delta values x/y
#> vertical 1 9 NA NA [9] 5,...,725 [x]
#> lat 1 3 42.5 5 NULL [y]
#> lon 1 2 -97.5 5 NULL
(x2n <- stars::read_ncdf(filename2, var = "soil_variable")) # no offset/delta
#> Warning in FUN(X[[i]], ...): Non-canonical axis order found, attempting to
#> correct.
#> Will return stars object with 54 cells.
#> Warning in .get_nc_projection(meta$attribute, rep_var, cv): No projection
#> information found in nc file.
#> stars object with 3 dimensions and 1 attribute
#> attribute(s):
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> soil_variable 1 7.25 105 105 202.75 209
#> dimension(s):
#> from to values x/y
#> lon 1 2 -95, -90 [x]
#> lat 1 3 45, 50, 55 [y]
#> vertical 1 9 [9] 5,...,725
(x2ni <- stars::read_ncdf(filename2, var = "soil_variable", ignore_bounds = TRUE)) # correct
#> Warning in FUN(X[[i]], ...): Non-canonical axis order found, attempting to
#> correct.
#> Will return stars object with 54 cells.
#> Warning in .get_nc_projection(meta$attribute, rep_var, cv): No projection
#> information found in nc file.
#> stars object with 3 dimensions and 1 attribute
#> attribute(s):
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> soil_variable 1 7.25 105 105 202.75 209
#> dimension(s):
#> from to offset delta values x/y
#> lon 1 2 -97.5 5 NULL [x]
#> lat 1 3 42.5 5 NULL [y]
#> vertical 1 9 NA NA [9] 5,...,725
# `ncmeta::nc_coord_var()` does not find coordinate bounds
# if there are no "axis" attributes
ncmeta::nc_coord_var(filename1) # bounds are not found
#> # A tibble: 2 × 6
#> variable X Y Z T bounds
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 vertical <NA> <NA> vertical <NA> <NA>
#> 2 soil_variable <NA> <NA> vertical <NA> <NA>
ncmeta::nc_coord_var(filename2) # bounds are found
#> # A tibble: 6 × 6
#> variable X Y Z T bounds
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 lon lon <NA> <NA> <NA> lon_bnds
#> 2 lon_bnds lon <NA> <NA> <NA> <NA>
#> 3 lat <NA> lat <NA> <NA> lat_bnds
#> 4 lat_bnds <NA> lat <NA> <NA> <NA>
#> 5 vertical <NA> <NA> vertical <NA> <NA>
#> 6 soil_variable lon lat vertical <NA> <NA>
# Clean up
unlink(filename1)
unlink(filename2)
Created on 2025-04-22 with reprex v2.1.1
@mdsumner
I have never seen a netcdf that uses bounds exclusively to specify a regular grid. that's cool, does it occur in the wild? can you point to a real example?
I expect any fix will be in ncmeta. (and, I'll have to tell the GeoZarr people about this)
also GDAL doesn't recognize this way of georeferencing, is it mentioned in CF anywhere? I've only ever seen bounds used to disambiguate the centre vs corner thing for (usually degenerate) rectilinear arrays. I'm not opposed to helping this but I just don't think it's a standard thing, is it really a problem? I usually just set the extent of a raster in R and move on. I guess if ncmeta supports this we'd have to derive a rectlinear 1D coordinate array from the bnds, which is trivially degenerate for regular rectilinear cases, and icky for truly non-regular coordinates - i.e. where is the centre of any interval?
Thanks much for looking into this! I'm ok to close this issue -- there are several options that work for my specific case: I can use stars::read_mdim(), use argument ignore_bounds or remove the bounds.
This was not a file completely from the "wild" (processed with some in-house code that's more geared towards non-regular grids and adds bounds) -- so it may be all due to our handling of the nc. However, we didn't interpret CF1-12 "7.1.1. Bounds for one-dimensional coordinate variables" and example 7.1 to mean that bounds on regular grids are illegal.
GDAL seems to handle both my example files (continued from above) -- if I correct the order of dimensions to lon, lat, vertical and use a negative delta for latitude:
tsl <- array(
data = rowSums(
expand.grid(
10 * (seq_len(nLon) - 1),
100 * (seq_len(nLat) - 1),
seq_len(nVertical)
)
),
dim = c(nLon, nLat, nVertical)
)
dlat <- -5
crdsLat <- 60 + dlat * seq_len(nLat)
iv_tsl <- RNetCDF::var.def.nc(nc, "soil_variable", "NC_FLOAT", c(id_lon, id_lat, id_vertical))
then with GDAL 3.10.3
ftif1 <- tempfile("tsl_example_", fileext = ".tif")
ftif2 <- tempfile("tsl_example_", fileext = ".tif")
system2("gdal_translate", args = c(filename1, ftif1))
system2("gdal_translate", args = c(filename2, ftif2))
# Compare values -- all are ok
all.equal(stars::read_mdim(filename1)[[1]], tsl, check.attributes = FALSE)
all.equal(stars::read_ncdf(filename1)[[1]], tsl)
all.equal(stars::read_stars(ftif1)[[1]], tsl, check.attributes = FALSE)
all.equal(stars::read_mdim(filename2)[[1]], tsl, check.attributes = FALSE)
all.equal(stars::read_ncdf(filename2, ignore_bounds = TRUE)[[1]], tsl)
all.equal(stars::read_stars(ftif2)[[1]], tsl, check.attributes = FALSE)
Again, thank you, I'm ok to close this issue