NAs causing failures in training combination models
I have a combination model that's running into some problems with NAs.
The data looks like this (see end of message for actual dput format to recreate the exact data set):
Browse[2]> summary(t1)
interval_time dalmp target
Min. :2023-08-29 23:00:00 Min. :21.89 Min. :-1.8695
1st Qu.:2023-08-30 06:15:00 1st Qu.:24.90 1st Qu.:-0.1650
Median :2023-08-30 13:30:00 Median :34.82 Median : 0.1793
Mean :2023-08-30 13:30:00 Mean :40.45 Mean : 0.2144
3rd Qu.:2023-08-30 20:45:00 3rd Qu.:53.67 3rd Qu.: 0.7105
Max. :2023-08-31 04:00:00 Max. :81.69 Max. : 1.5769
NA's :2
Browse[2]> t1
# A tsibble: 30 x 3 [1h] <UTC>
interval_time dalmp target
<dttm> <dbl> <dbl>
1 2023-08-29 23:00:00 64.8 1.58
2 2023-08-30 00:00:00 55.0 1.57
3 2023-08-30 01:00:00 49.6 NA
4 2023-08-30 02:00:00 37.0 NA
5 2023-08-30 03:00:00 34.1 0.217
6 2023-08-30 04:00:00 27.9 0.111
7 2023-08-30 05:00:00 24.8 -0.132
8 2023-08-30 06:00:00 24.6 -0.121
9 2023-08-30 07:00:00 22.8 -0.195
10 2023-08-30 08:00:00 22.1 -0.328
# ℹ 20 more rows
# ℹ Use `print(n = ...)` to see more rows
And the training goes like this:
rt_fit <- fabletools::model(training,
cmbn1 = combination_model(
ARIMA(target ~ dalmp),
NNETAR(target ~ dalmp),
AR(target ~ order(2)),
cmbn_args = list(weights = "inv_var")),
.safely = FALSE)
# Error in qr.default(XX) : NA/NaN/Inf in foreign function call (arg 1)
I thought I might be able to work around this by deleting the rows with NAs, but it doesn't seem happy with that either:
Error in check_gaps(.data) :
.data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
Is there a problem in combination_model that's causing this? Any suggested workaround? Any help greatly appreciated!
I'm using:
> R.version
_
platform aarch64-apple-darwin20
arch aarch64
os darwin20
system aarch64, darwin20
status
major 4
minor 2.2
year 2022
month 10
day 31
svn rev 83211
language R
version.string R version 4.2.2 (2022-10-31)
nickname Innocent and Trusting
> packageVersion('fabletools')
[1] ‘0.3.3’
> packageVersion('fable')
[1] ‘0.3.3’
Here's the input data for reproduction purposes:
training <-
structure(list(interval_time = structure(c(1693350000, 1693353600,
1693357200, 1693360800, 1693364400, 1693368000, 1693371600, 1693375200,
1693378800, 1693382400, 1693386000, 1693389600, 1693393200, 1693396800,
1693400400, 1693404000, 1693407600, 1693411200, 1693414800, 1693418400,
1693422000, 1693425600, 1693429200, 1693432800, 1693436400, 1693440000,
1693443600, 1693447200, 1693450800, 1693454400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), dalmp = c(64.772, 55.031, 49.5954, 37.0346, 34.122,
27.9416, 24.8358, 24.6128, 22.81, 22.1077, 21.8885, 21.9106,
22.9985, 25.0922, 25.7776, 30.3662, 32.1598, 35.5114, 39.2446,
44.2576, 59.0444, 65.3967, 76.7988, 81.6873, 73.1935, 64.3783,
45.2286, 36.5493, 26.4268, 22.6344), target = c(1.57691363525177,
1.56855930002442, NA, NA, 0.216786192700598, 0.110616995970005,
-0.131849633308032, -0.12072764832231, -0.195075151428118, -0.327716438359099,
-0.403658249907274, -0.361481112249875, -0.254285754950902, -0.154945697500086,
-0.142376837846178, 0.141843887915466, 0.376564223792779, 0.327934476018599,
0.875235233052582, 0.550416417555717, 0.682786557929835, 0.482133615075935,
0.952303345137791, 1.16381971509555, 0.793706899407923, 0.854539241873788,
0.239481464973916, -0.0814464383521666, -1.86950100289322, -0.867599159975814
)), class = c("tbl_ts", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-30L), key = structure(list(.rows = structure(list(1:30), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), index = structure("interval_time", ordered = TRUE), index2 = "interval_time", interval = structure(list(
year = 0, quarter = 0, month = 0, week = 0, day = 0, hour = 1,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 0), .regular = TRUE, class = c("interval",
"vctrs_rcrd", "vctrs_vctr")))
This is just a limitation of the AR() model, not combination models. MRE:
training <-
structure(list(interval_time = structure(c(1693350000, 1693353600,
1693357200, 1693360800, 1693364400, 1693368000, 1693371600, 1693375200,
1693378800, 1693382400, 1693386000, 1693389600, 1693393200, 1693396800,
1693400400, 1693404000, 1693407600, 1693411200, 1693414800, 1693418400,
1693422000, 1693425600, 1693429200, 1693432800, 1693436400, 1693440000,
1693443600, 1693447200, 1693450800, 1693454400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), dalmp = c(64.772, 55.031, 49.5954, 37.0346, 34.122,
27.9416, 24.8358, 24.6128, 22.81, 22.1077, 21.8885, 21.9106,
22.9985, 25.0922, 25.7776, 30.3662, 32.1598, 35.5114, 39.2446,
44.2576, 59.0444, 65.3967, 76.7988, 81.6873, 73.1935, 64.3783,
45.2286, 36.5493, 26.4268, 22.6344), target = c(1.57691363525177,
1.56855930002442, NA, NA, 0.216786192700598, 0.110616995970005,
-0.131849633308032, -0.12072764832231, -0.195075151428118, -0.327716438359099,
-0.403658249907274, -0.361481112249875, -0.254285754950902, -0.154945697500086,
-0.142376837846178, 0.141843887915466, 0.376564223792779, 0.327934476018599,
0.875235233052582, 0.550416417555717, 0.682786557929835, 0.482133615075935,
0.952303345137791, 1.16381971509555, 0.793706899407923, 0.854539241873788,
0.239481464973916, -0.0814464383521666, -1.86950100289322, -0.867599159975814
)), class = c("tbl_ts", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-30L), key = structure(list(.rows = structure(list(1:30), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L)), index = structure("interval_time", ordered = TRUE), index2 = "interval_time", interval = structure(list(
year = 0, quarter = 0, month = 0, week = 0, day = 0, hour = 1,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 0), .regular = TRUE, class = c("interval",
"vctrs_rcrd", "vctrs_vctr")))
library(fable)
#> Loading required package: fabletools
training |>
model(AR(target ~ order(2)))
#> Warning: 1 error encountered for AR(target ~ order(2))
#> [1] NA/NaN/Inf in foreign function call (arg 1)
#> # A mable: 1 x 1
#> `AR(target ~ order(2))`
#> <model>
#> 1 <NULL model>
Created on 2024-02-09 with reprex v2.0.2
As such, I'm moving this to {fable} wherein maybe the AR model can be improved to better handle missing values.