dm
dm copied to clipboard
Full list of problematic values from `dm_examine_constraints()`
Is it possible to receive the full list of problem values from dm_examine_constraints() instead of ... below?
library(dm)
#>
#> Attaching package: 'dm'
#> The following object is masked from 'package:stats':
#>
#> filter
dm <- dm_nycflights13(cycle = TRUE)
output <- dm_examine_constraints(dm)
output$problem
#> [1] "values of `flights$dest` not in `airports$faa`: SJU (30), BQN (6), STT (4), PSE (2)"
#> [2] "values of `flights$tailnum` not in `planes$tailnum`: N725MQ (6), N537MQ (5), N722MQ (5), N730MQ (5), N736MQ (5), …"
#> [3] ""
#> [4] ""
#> [5] ""
#> [6] ""
#> [7] ""
#> [8] ""
#> [9] ""
Created on 2024-03-04 with reprex v2.1.0
not as part of {dm}, you would need to create this list yourself from the info given in the output, e.g.:
suppressPackageStartupMessages({
library(dm)
library(tidyverse)
})
dm <- dm_nycflights13(cycle = TRUE)
output <- dm_examine_constraints(dm)
problematic <- as_tibble(output) %>%
filter(!is_key)
problematic
#> # A tibble: 2 × 6
#> table kind columns ref_table is_key problem
#> <chr> <chr> <keys> <chr> <lgl> <chr>
#> 1 flights FK dest airports FALSE values of `flights$dest` not in `airpo…
#> 2 flights FK tailnum planes FALSE values of `flights$tailnum` not in `pl…
# full list of entries in flights$dest that are not in airports$faa:
anti_join(
dm$flights,
dm$airports,
by = c("dest" = "faa")
)
#> # A tibble: 42 × 19
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 2013 1 10 3 2359 4 426 437
#> 2 2013 1 10 16 2359 17 447 444
#> 3 2013 1 10 535 540 -5 1015 1017
#> 4 2013 1 10 610 610 0 1028 1051
#> 5 2013 1 10 633 630 3 1142 1140
#> 6 2013 1 10 705 710 -5 1158 1203
#> 7 2013 1 10 742 745 -3 1214 1234
#> 8 2013 1 10 807 810 -3 1247 1315
#> 9 2013 1 10 818 819 -1 1251 1307
#> 10 2013 1 10 828 830 -2 1258 1335
#> # ℹ 32 more rows
#> # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#> # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#> # hour <dbl>, minute <dbl>, time_hour <dttm>
# count per entry (partly given in the output of `dm_examine_constraints()`):
anti_join(
dm$flights,
dm$airports,
by = c("dest" = "faa")
) %>%
count(dest) %>%
arrange(desc(n))
#> # A tibble: 4 × 2
#> dest n
#> <chr> <int>
#> 1 SJU 30
#> 2 BQN 6
#> 3 STT 4
#> 4 PSE 2
# full list of entries in flights$tailnum that are not in planes$tailnum:
anti_join(
dm$flights,
dm$planes,
by = "tailnum"
)
#> # A tibble: 282 × 19
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 2013 1 10 531 540 -9 832 850
#> 2 2013 1 10 553 600 -7 837 910
#> 3 2013 1 10 555 600 -5 733 745
#> 4 2013 1 10 555 605 -10 746 805
#> 5 2013 1 10 556 600 -4 818 825
#> 6 2013 1 10 604 610 -6 851 910
#> 7 2013 1 10 604 610 -6 858 915
#> 8 2013 1 10 606 615 -9 740 750
#> 9 2013 1 10 619 630 -11 814 830
#> 10 2013 1 10 621 630 -9 823 830
#> # ℹ 272 more rows
#> # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#> # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#> # hour <dbl>, minute <dbl>, time_hour <dttm>
# count per entry
anti_join(
dm$flights,
dm$planes,
by = "tailnum"
) %>%
count(tailnum) %>%
arrange(desc(n))
#> # A tibble: 167 × 2
#> tailnum n
#> <chr> <int>
#> 1 <NA> 9
#> 2 N725MQ 6
#> 3 N537MQ 5
#> 4 N722MQ 5
#> 5 N730MQ 5
#> 6 N736MQ 5
#> 7 N3GBAA 4
#> 8 N509MQ 4
#> 9 N719MQ 4
#> 10 N723MQ 4
#> # ℹ 157 more rows
Created on 2024-03-04 with reprex v2.0.2