TileDB-R
TileDB-R copied to clipboard
`tiledb_query_import_buffer()` is not working with `Date` and `POSIXct`
tiledb_query_import_buffer() works for nanosecond resolution but not for Date and POSIXct objects.
library(tiledb)
uri <- tempfile()
d1 <- data.frame(date32 = Sys.Date(),
datetime = Sys.time())
nms <- colnames(d1)
# Map to TileDB as DATETIME_DAY, DATETIME_MS
tiledb::fromDataFrame(d1, uri)
arr <- tiledb::tiledb_array(uri)
qry <- tiledb::tiledb_query(arr, "WRITE")
# Date
na1 <- nanoarrow::as_nanoarrow_array(d1[[1]], schema = arrow::date32())
qry <- tiledb::tiledb_query_import_buffer(qry, nms[1], na1)
#> Error: [TileDB-Arrow]: Unknown or unsupported Arrow format string 'tdD'
# Posixct
na2 <- nanoarrow::as_nanoarrow_array(d1[[2]], schema = arrow::timestamp("ms", "UTC"))
qry <- tiledb::tiledb_query_import_buffer(qry, nms[2], na2)
#> Error: [TileDB-Arrow]: Unknown or unsupported Arrow format string 'tsm:UTC'
The above error has been raised in arrow_type_to_tiledb where there is no mapping for tdD and tsm
https://github.com/TileDB-Inc/TileDB-R/blob/f7792f1a90d0c1ea5d20c3e53da0266742ad8dfe/src/updated_arrow_io_impl.h#L307-L317
nanosecond example works OK:
library(tiledb)
uri <- tempfile()
d1 <- data.frame(index = nanotime::as.nanotime(1), var = "a")
# Map to TileDB as DATETIME_NS
tiledb::fromDataFrame(d1, uri, col_index = 1)
arr <- tiledb::tiledb_array(uri)
qry <- tiledb::tiledb_query(arr, "WRITE")
d2 <- data.frame(index = nanotime::as.nanotime(2:3), var = c("b", "c"))
# we create an arrow Array object as nanoarrow::as_nanoarrow_array doesn't handle
# int64->timestamp mapping; see https://github.com/apache/arrow-nanoarrow/issues/811
na <- arrow::arrow_array(bit64::as.integer64(d2[[1]]))$cast(arrow::timestamp("ns"))
na <- nanoarrow::as_nanoarrow_array(na)
qry <- tiledb::tiledb_query_import_buffer(qry, "index", na)
na2 <- nanoarrow::as_nanoarrow_array(d2[[2]])
qry <- tiledb::tiledb_query_import_buffer(qry, "var", na2)
tiledb::tiledb_query_set_layout(qry, "UNORDERED")
tiledb::tiledb_query_submit(qry)
tiledb::tiledb_query_finalize(qry)
arr <- tiledb::tiledb_array(uri, return_as = "data.table")
arr[]
#> index var
#> <nanotime> <char>
#> 1: 1970-01-01T00:00:00.000000001+00:00 a
#> 2: 1970-01-01T00:00:00.000000002+00:00 b
#> 3: 1970-01-01T00:00:00.000000003+00:00 c