scoringutils
scoringutils copied to clipboard
Use nice illustration of CRPS/DSS/Log score + the number of samples needed for scores to converge
We have this visualisation, which among others makes the point that you need quite a lot of samples for the sample-based log score to be reliable.
It was previously used in the manuscript, but we're not using it anymore. I like It and think it makes an important point. It would be really nice to use this visualisation in a vignette explaining which score to use when.
This was the code that generated the plot:
library(data.table)
library(scoringutils)
library(ggplot2)
# ==============================================================================
# =================== Convergence of scores ====================================
# ==============================================================================
sample_sizes <- round(1 * 10^(seq(1, 5, 0.1)))
n_rep <- 500
true_value = 0
sd <- 3
mu <- 2
# analytical scores
true_crps <- scoringRules::crps(y = 0, family = "normal", mean = mu, sd = sd)
true_logs <- scoringRules::logs(y = 0, family = "normal", mean = mu, sd = sd)
true_dss <- scoringRules::dss_norm(y = 0, mean = mu, sd = sd)
if (!file.exists("inst/manuscript/output/sample-convergence.rds")) {
results <- list()
for (i in sample_sizes) {
samples <- as.data.table(
replicate(n_rep,
rnorm(n = i, mean = mu, sd = sd))
)
setnames(samples, as.character(1:n_rep))
samples[, sample_id := 1:i]
samples <- melt(samples, id.vars = "sample_id",
variable.name = "repetition",
value.name = "predicted")
samples[, observed := true_value]
samples <- as_forecast_sample(samples)
results[[paste(i)]] <- score(
samples, get_metrics(samples, select = c("crps", "log_score", "dss"))
)[, n_samples := i]
}
saveRDS(results, "inst/manuscript/output/sample-convergence.rds")
} else {
results <- readRDS("inst/manuscript/output/sample-convergence.rds")
}
results <- rbindlist(results)
results <- melt(results, id.vars = c("n_samples", "repetition"),
variable.name = "score")
label_fn <- function(x) {
ifelse (x >= 1000,
paste0(x / 1000, "k"),
x)
}
df <- results[, .(mean = mean(value),
quantile_0.05 = quantile(value, 0.05),
quantile_0.25 = quantile(value, 0.25),
quantile_0.75 = quantile(value, 0.75),
quantile_0.95 = quantile(value, 0.95)),
by = c("n_samples", "score")]
df[score == "crps", true_score := true_crps]
df[score == "log_score", true_score := true_logs]
df[score == "dss", true_score := true_dss]
df[, score := ifelse(score == "dss", "DSS",
ifelse(score == "crps", "CRPS",
"Log score"))]
p_convergence <- ggplot(df, aes(x = n_samples)) +
geom_line(aes(y = mean)) +
geom_ribbon(aes(ymax = quantile_0.95, ymin = quantile_0.05),
alpha = 0.1) +
geom_ribbon(aes(ymax = quantile_0.75, ymin = quantile_0.25),
alpha = 0.3) +
geom_hline(aes(yintercept = true_score), linetype = "dashed") +
facet_wrap(~ score, scales = "free") +
scale_x_continuous(trans = "log10", labels = label_fn) +
theme_scoringutils() +
labs(x = "Number of samples",
y = "Score based on samples")
# ==============================================================================
# =================== scores and outliers, sd ==================================
# ==============================================================================
library(scoringRules)
library(dplyr)
library(patchwork)
# define simulation parameters
n_steps = 500
n_rep <- 5000
true_mean = 0
true_sd = 5
true_values <- rnorm(n = n_rep, mean = true_mean, sd = true_sd)
sd <- 10^(seq(-1, 1.6, length.out = n_steps))
mu <- seq(0, 100, length.out = n_steps)
# look at effect of change in sd on score
res_sd <- data.table(sd = sd,
mu = true_mean)
res_sd[, `:=`(CRPS = mean(scoringRules::crps(y = true_values, family = "normal", mean = mu, sd = sd)),
`Log score` = mean(scoringRules::logs(y = true_values, family = "normal", mean = mu, sd = sd)),
DSS = mean(scoringRules::dss_norm(y = true_values, mean = mu, sd = sd))),
by = "sd"]
deviation_sd <- res_sd |>
melt(id.vars = c("sd", "mu"), value.name = "value", variable.name = "Score") |>
ggplot(aes(x = sd, y = value, color = Score)) +
geom_line() +
scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
theme_scoringutils() +
geom_vline(aes(xintercept = 5), linetype = "dashed") +
coord_cartesian(ylim=c(0, 20)) +
annotate(geom="text", x=6, y=12, label="Sd of true \ndata-generating \ndistribution: 5",
color="black", hjust = "left", size = 3) +
labs(y = "Score", x = "Standard deviation of predictive distribution")
# define simulation parameters
true_values <- seq(0, 4, length.out = 1000)
true_sd = 1
true_mu = 0
# look at effect of change in sd on score
res_mu2 <- data.table(true_value = true_values)
res_mu2[, `:=`(CRPS = scoringRules::crps(y = true_value, family = "normal", mean = true_mu, sd = true_sd) / 10,
`Log score` = scoringRules::logs(y = true_value, family = "normal", mean = true_mu, sd = true_sd) / 10,
DSS = scoringRules::dss_norm(y = true_value, mean = true_mu, sd = true_sd) / 10)]
label_fn <- function(x) {
paste(10*x)
}
outlier <- res_mu2 |>
melt(id.vars = c("true_value"), value.name = "value", variable.name = "Score") |>
ggplot(aes(x = true_value, y = value, color = Score)) +
geom_line() +
theme_scoringutils() +
annotate(geom="text", x=0, y=.8, label="Predictive distribution: \nN(0,1)",
color="black", hjust = "left", size = 3) +
labs(y = "Score", x = "Observed value") +
# geom_vline(aes(xintercept = 0), linetype = "dashed") +
scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
geom_area(stat = "function", fun = dnorm, color = "grey", fill = "grey", alpha = 0.5, xlim = c(0, 4)) +
scale_y_continuous(label = label_fn)
layout <- "
AA
BC
"
plot <- p_convergence /
deviation_sd + outlier +
plot_layout(guides = "collect",
design = layout) &
plot_annotation(tag_levels = "A") &
theme(legend.position = "bottom")
ggsave("inst/manuscript/output/score-convergence-outliers.png", plot = plot,
height = 6, width = 8)
Related: #930, #929