autumn icon indicating copy to clipboard operation
autumn copied to clipboard

Error for missing levels in the data

Open Hadsga opened this issue 4 years ago • 0 comments

I have some variables in a data set the a want to weight. However, not all levels are present in the data set.

library(dplyr)
library(autumn)

harvest(d, weights)

So I get this error:

Error in check_any_data_issues(data, target, weights) : Errors detected in data. Some variables have values in the weight targets which are not present in the data:`

Here is a dput of the quotes

dput(weights)
list(Rec_Age = c(`1` = 0, `2` = 0.181, `3` = 0.2877, `4` = 0.3311, 
`5` = 0.2001), Rec_Income = c(`1` = 0.1105, `2` = 0.2852, `3` = 0.2343, 
`4` = 0.3699), Q6 = c(`1` = 0.067, `2` = 0.3409, `3` = 0.592), 
    RECQ5_1 = c(`1` = 0.4099, `2` = 0.5239, `3` = 0.0662), RECQ5_2 = c(`1` = 0.1621, 
    `2` = 0.3803, `3` = 0.4576), RECQ5_3 = c(`1` = 0.0508, `2` = 0.294, 
    `3` = 0.6551), RECQ5_4 = c(`1` = 0.103, `2` = 0.4864, `3` = 0.4106
    ))

and the data:


dput(d)
structure(list(RESPID = structure(c(459, 311, 223, 60, 613, 495, 
300, 273, 78, 170, 217, 61, 175, 619, 270, 218, 453, 492, 23, 
65, 33, 113, 532, 26, 119, 49, 208, 102, 200, 165, 435, 298, 
593, 220, 111, 53, 494, 271, 305, 420, 323, 607, 105, 19, 426, 
171, 330, 201, 332, 277), label = "RESPID - Respondent ID", format.spss = "F10.0", display_width = 0L), 
    Rec_Age = structure(c(4, 2, 4, 3, 4, 4, 4, 3, 2, 2, 3, 2, 
    3, 4, 4, 2, 4, 4, 2, 3, 2, 2, 2, 3, 3, 2, 2, 2, 2, 3, 2, 
    3, 2, 3, 4, 3, 4, 3, 2, 3, 3, 3, 4, 4, 4, 2, 2, 3, 4, 3), label = "Rec_Age - Recode Age", format.spss = "F1.0", display_width = 0L), 
    Rec_Income = structure(c(3, 1, 2, 1, 1, 2, 2, 3, 2, 1, 2, 
    2, 2, 1, 1, 2, 2, 3, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, 1, 2, 
    2, 2, 1, 3, 1, 1, 1, 1, 1, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2, 
    2), label = "Rec_Income - Recode Income", format.spss = "F1.0", display_width = 0L), 
    Q6 = structure(c(2, 1, 2, 3, 2, 3, 2, 1, 3, 2, 2, 3, 3, 3, 
    2, 2, 3, 3, 2, 1, 2, 3, 3, 2, 2, 2, 1, 2, 1, 2, 2, 3, 3, 
    2, 3, 2, 3, 2, 2, 1, 3, 2, 2, 2, 3, 2, 2, 1, 3, 2), label = "Q6 - Wie stark interessieren Sie sich für Bekleidung und Mode?", format.spss = "F1.0", display_width = 0L), 
    RECQ5_1 = c(1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 
    1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 2, 
    3, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1), RECQ5_2 = c(2, 
    2, 3, 3, 3, 2, 3, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, 3, 
    2, 3, 2, 2, 2, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 2, 1, 
    1, 3, 3, 2, 1, 3, 1, 2, 1, 3, 2), RECQ5_3 = c(3, 1, 3, 3, 
    3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 1, 2, 2, 3, 3, 
    2, 2, 2, 1, 3, 3, 2, 2, 3, 2, 3, 3, 2, 3, 3, 2, 1, 3, 3, 
    3, 2, 3, 1, 3, 3, 3, 2), RECQ5_4 = c(1, 2, 2, 2, 2, 2, 1, 
    1, 3, 2, 2, 3, 3, 3, 1, 1, 2, 3, 1, 1, 1, 3, 2, 1, 2, 1, 
    1, 1, 1, 2, 1, 3, 3, 3, 2, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 
    1, 2, 3, 2, 2)), row.names = c(NA, -50L), class = "data.frame")

Hadsga avatar Feb 18 '20 15:02 Hadsga