terra icon indicating copy to clipboard operation
terra copied to clipboard

`makeValid()` produces incorrect results when polygons have `NaN` coordinates

Open dfriend21 opened this issue 8 months ago • 1 comments

I've found that makeValid produces incorrect (and inconsistent) results for polygons when one feature's geometry consists of a single NaN point. The example below demonstrates the problem.

library(terra)
#> terra 1.7.78
# Note: also tested with dev version (1.7.79) - results are the same

# create a polygon dataset where one feature has NaN coordinates
geom <-rbind(
  c(1, 1, 0, 0, 0),
  c(1, 1, 0, 1, 0),
  c(1, 1, 1, 1, 0),
  c(2, 1, NaN, NaN, 0),
  c(3, 1, 2, 0, 0),
  c(3, 1, 2, 1, 0),
  c(3, 1, 1, 2, 0)
)

v1 <- vect(geom, type = "polygons")
v1$id <- seq_len(nrow(v1))
v1a <- makeValid(v1)

# both have three rows, but 'id' of v1a is incorrect
v1
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 3, 1  (geometries, attributes)
#>  extent      : 0, 2, 0, 2  (xmin, xmax, ymin, ymax)
#>  coord. ref. :  
#>  names       :    id
#>  type        : <int>
#>  values      :     1
#>                    2
#>                    3

v1a
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 3, 1  (geometries, attributes)
#>  extent      : 0, 2, 0, 2  (xmin, xmax, ymin, ymax)
#>  coord. ref. :  
#>  names       :    id
#>  type        : <int>
#>  values      :     1
#>                    3
#>                    1

# NaN feature hasn't been removed
geom(v1)
#>       geom part   x   y hole
#>  [1,]    1    1   0   0    0
#>  [2,]    1    1   0   1    0
#>  [3,]    1    1   1   1    0
#>  [4,]    1    1   0   0    0
#>  [5,]    2    1 NaN NaN    0
#>  [6,]    3    1   2   0    0
#>  [7,]    3    1   2   1    0
#>  [8,]    3    1   1   2    0
#>  [9,]    3    1   2   0    0

geom(v1a)
#>       geom part   x   y hole
#>  [1,]    1    1   0   0    0
#>  [2,]    1    1   0   1    0
#>  [3,]    1    1   1   1    0
#>  [4,]    1    1   0   0    0
#>  [5,]    2    1 NaN NaN    0
#>  [6,]    3    1   2   0    0
#>  [7,]    3    1   2   1    0
#>  [8,]    3    1   1   2    0
#>  [9,]    3    1   2   0    0

# running makeValid on only the NaN feature causes R to abort
# makeValid(v1[2]

# it behaves differently (but still incorrectly) if read from a shapefile

# write it to a shapefile and then read back in
tf <- tempfile(fileext = ".shp")
writeVector(v1, tf)

v2 <- vect(tf)
v2a <- makeValid(v2)

# v2a has one less feature - the NaN feature has been removed
v2
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 3, 1  (geometries, attributes)
#>  extent      : 0, 2, 0, 2  (xmin, xmax, ymin, ymax)
#>  source      : file2f70450a14ac.shp
#>  coord. ref. :  
#>  names       :    id
#>  type        : <int>
#>  values      :     1
#>                    2
#>                    3

v2a
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 2, 1  (geometries, attributes)
#>  extent      : 0, 2, 0, 2  (xmin, xmax, ymin, ymax)
#>  coord. ref. :  
#>  names       :    id
#>  type        : <int>
#>  values      :     1
#>                    3

geom(v2)
#>       geom part   x   y hole
#>  [1,]    1    1   0   0    0
#>  [2,]    1    1   0   1    0
#>  [3,]    1    1   1   1    0
#>  [4,]    1    1   0   0    0
#>  [5,]    2    1 NaN NaN    0
#>  [6,]    3    1   2   0    0
#>  [7,]    3    1   1   2    0
#>  [8,]    3    1   2   1    0
#>  [9,]    3    1   2   0    0

geom(v2a)
#>      geom part x y hole
#> [1,]    1    1 0 0    0
#> [2,]    1    1 0 1    0
#> [3,]    1    1 1 1    0
#> [4,]    1    1 0 0    0
#> [5,]    2    1 2 0    0
#> [6,]    2    1 1 2    0
#> [7,]    2    1 2 1    0
#> [8,]    2    1 2 0    0

# however, using as.data.frame on 'v2a' sometimes (but not always) returns 3 rows, not 2
as.data.frame(v2a)
#>   id
#> 1  1
#> 2  3

# trying to add a column fails sometimes, but not always
v2a$id <- 1
# when it does fail, we get the following error message
#> Error in `[[<-.data.frame`(`*tmp*`, name, value = c(1, 1)) : 
#>  replacement has 2 rows, data has 3

# if we wrap and unwrap the vector the results are the same as we saw with v1
v2b <- makeValid(unwrap(wrap(v2)))
as.data.frame(v2b)
#>   id
#> 1  1
#> 2  3
#> 3  1

Created on 2024-05-31 with reprex v2.1.0

Sys.info()[c('sysname', 'release')]
#>   sysname   release 
#> "Windows"  "10 x64"

R.Version()$version.string
#> [1] "R version 4.4.0 (2024-04-24 ucrt)"

packageVersion("terra")
#> [1] '1.7.78'

gdal(lib = "all")
#>     gdal     proj     geos 
#>  "3.8.4"  "9.3.1" "3.12.1"

Created on 2024-05-31 with reprex v2.1.0

dfriend21 avatar May 31 '24 20:05 dfriend21