TileDB-R icon indicating copy to clipboard operation
TileDB-R copied to clipboard

`tiledb_array_close()` does not reset S4 timestamp slots

Open cgiachalis opened this issue 4 months ago • 0 comments

Context

In R the TileDB arrays are represented with S4 classes that have slots (OOP).

Creating a tiledb array object using tiledb_array() one can set timestamps via timestamp_start, timestamp_end args and these are used by [ method to materialise the array to the given time range.

Describe the issue

By closing the array, the defined timestamps slots remain. So re-opening it, we're getting the same time range snapshot via [, unless you manually modify them e.g arr@timestamp_start <- as.POSIXct(double()). So when a closed array is passed around, it carries the previous time stamps which causes issues.

Source helpers:


write_array_tstamps <- function(uri) {

  ts <- as.POSIXct(c("2025-08-18 16:12:50", "2025-08-18 16:12:55", "2025-08-18 16:13:01"))

  df <- data.frame(id = 1L, val = 1.0)
  tiledb::fromDataFrame(df, uri, col_index = 1, mode = "schema_only")

  arr <- tiledb::tiledb_array(uri)

  for (i in seq_along(ts) ) {

    tm <- ts[i]
    arr <- tiledb::tiledb_array_open_at(arr, "WRITE", timestamp = tm)
    arr[] <- data.frame(id = 1, val = i)
    arr <- tiledb::tiledb_array_open_at(arr, "WRITE", timestamp = tm)
    tiledb::tiledb_put_metadata(arr, paste0("key", i), as.character(tm))
    arr <- tiledb::tiledb_array_close(arr)
  }

  ts
}


print_open_timestamps <- function(arr) {
  cat(sep = "\n")
  cat("Array Open Timestamps (via libtiledb)", sep = "\n")

  cat(paste0("  Timestamp start: ", format(tiledb:::libtiledb_array_open_timestamp_start(arr@ptr))), sep = "\n")
  cat(paste0("  Timestamp end  : ", format(tiledb:::libtiledb_array_open_timestamp_end(arr@ptr))), sep = "\n")
}

print_r_timestamps <- function(arr) {
  cat(sep = "\n")
  cat("Array User Timestamps", sep = "\n")

  cat(paste0("  Timestamp start: ", format(arr@timestamp_start)), sep = "\n")
  cat(paste0("  Timestamp end  : ", format(arr@timestamp_end)), sep = "\n")
}

print_timestamps <- function(arr) {

  print_r_timestamps(arr)
  cat("\n  --- \n")
  print_open_timestamps(arr)
}


Reprex:

library(tiledb)

set_return_as_preference("data.frame")

uri <- tempdir()
out <- write_array_tstamps(uri)

# Timestamps:
format(out)
#> [1] "2025-08-18 16:12:50" "2025-08-18 16:12:55"
#> [3] "2025-08-18 16:13:01"

arr <- tiledb_array(uri)
arr[]
#>   id val
#> 1  1   1
#> 2  1   2
#> 3  1   3

print_timestamps(arr)
#> 
#> Array User Timestamps
#>   Timestamp start: 
#>   Timestamp end  : 
#> 
#>   --- 
#> 
#> Array Open Timestamps (via libtiledb)
#>   Timestamp start: 1970-01-01 02:00:00
#>   Timestamp end  : 2025-08-19 14:18:22

# open with time range  "2025-08-18 16:12:55 / 2025-08-18 16:13:01"
arr <- tiledb_array(uri, timestamp_start = out[2], timestamp_end = out[3])
print_timestamps(arr)
#> 
#> Array User Timestamps
#>   Timestamp start: 2025-08-18 16:12:55
#>   Timestamp end  : 2025-08-18 16:13:01
#> 
#>   --- 
#> 
#> Array Open Timestamps (via libtiledb)
#>   Timestamp start: 2025-08-18 16:12:55
#>   Timestamp end  : 2025-08-19 14:18:22

# ok
arr[]
#>   id val
#> 1  1   2
#> 2  1   3

arr <- tiledb_array_close(arr)

print_timestamps(arr)
#> 
#> Array User Timestamps
#>   Timestamp start: 2025-08-18 16:12:55
#>   Timestamp end  : 2025-08-18 16:13:01
#> 
#>   --- 
#> 
#> Array Open Timestamps (via libtiledb)
#>   Timestamp start: 2025-08-18 16:12:55
#>   Timestamp end  : 2025-08-19 14:18:22

arr <- tiledb_array_open(arr)

# Oh, no
arr[]
#>   id val
#> 1  1   2
#> 2  1   3

print_timestamps(arr)
#> 
#> Array User Timestamps
#>   Timestamp start: 2025-08-18 16:12:55
#>   Timestamp end  : 2025-08-18 16:13:01
#> 
#>   --- 
#> 
#> Array Open Timestamps (via libtiledb)
#>   Timestamp start: 2025-08-18 16:12:55
#>   Timestamp end  : 2025-08-18 16:13:01

# Open at array timestamp (no effect) -  "2025-08-18 16:12:50"
arr <- tiledb_array_close(arr)
arr <- tiledb_array_open_at(arr,
                            type = "READ",
                            timestamp = out[1])

# nope, tiledb_array_open_at does not set r timestamps for query method
arr[]
#>   id val
#> 1  1   2
#> 2  1   3

print_timestamps(arr)
#> 
#> Array User Timestamps
#>   Timestamp start: 2025-08-18 16:12:55 <- Nope
#>   Timestamp end  : 2025-08-18 16:13:01 <- Nope
#> 
#>   --- 
#> 
#> Array Open Timestamps (via libtiledb)
#>   Timestamp start: 1970-01-01 02:00:00
#>   Timestamp end  : 2025-08-18 16:12:50  <- That's correct


# reset directly
arr@timestamp_start <- as.POSIXct(double())
arr@timestamp_end <- out[1]

# Yeah!!
arr[]
#>   id val
#> 1  1   1

print_timestamps(arr)
#> 
#> Array User Timestamps
#>   Timestamp start: 
#>   Timestamp end  : 2025-08-18 16:12:50
#> 
#>   --- 
#> 
#> Array Open Timestamps (via libtiledb)
#>   Timestamp start: 1970-01-01 02:00:00
#>   Timestamp end  : 2025-08-18 16:12:50


arr <- tiledb_array_close(arr)

print_timestamps(arr)
#> 
#> Array User Timestamps
#>   Timestamp start: 
#>   Timestamp end  : 2025-08-18 16:12:50
#> 
#>   --- 
#> 
#> Array Open Timestamps (via libtiledb)
#>   Timestamp start: 1970-01-01 02:00:00
#>   Timestamp end  : 2025-08-18 16:12:50

cgiachalis avatar Aug 19 '25 11:08 cgiachalis