r4ds-exercise-solutions
r4ds-exercise-solutions copied to clipboard
Exercise 13.3.3 - new code with {dm} package since {datamodelr} is no longer actively maintained
Brief summary: Just an update, since the datamodelr package is no longer maintained, here are the same two exercises, but with {dm} syntax.
Link to page, and reference the exercise number (if applicable): https://jrnold.github.io/r4ds-exercise-solutions/relational-data.html#exercise-13.3.3
New code:
First exercise:
# dm1
# Data frames
Lahman::Batting
Lahman::People
Lahman::Salaries
# Adding tables
lahman_dm_no_keys <- dm(
Batting = Lahman::Batting,
People = Lahman::People,
Salaries = Lahman::Salaries
)
lahman_dm_no_keys
# A dm is a list
names(lahman_dm_no_keys)
lahman_dm_no_keys$Batting
lahman_dm_no_keys[c("People", "Salaries")]
# Defining Keys
## Primary Keys - candidates
dm_enum_pk_candidates(
dm = lahman_dm_no_keys,
table = Batting
)
dm_enum_pk_candidates(
dm = lahman_dm_no_keys,
table = People
)
dm_enum_pk_candidates(
dm = lahman_dm_no_keys,
table = Salaries
)
## Primary Keys - add the keys
lahman_dm_only_pks <- lahman_dm_no_keys %>%
dm_add_pk(table = Batting, columns = c(playerID, yearID, stint)) %>%
dm_add_pk(People, playerID) %>%
dm_add_pk(Salaries, c(yearID, teamID, playerID))
lahman_dm_only_pks
## Foreign Keys - candidates
dm_enum_fk_candidates(
dm = lahman_dm_only_pks,
table = Batting,
ref_table = People
)
dm_enum_fk_candidates(
dm = lahman_dm_only_pks,
table = Salaries,
ref_table = People
)
## Foreign Keys - add the keys
lahman_dm_all_keys <-
lahman_dm_only_pks %>%
dm_add_fk(table = Batting, columns = playerID, ref_table = People) %>%
dm_add_fk(table = Salaries, columns = playerID, ref_table = People)
lahman_dm_all_keys
# Visualization
## No keys
lahman_dm_no_keys %>%
dm_draw(
rankdir = "TB",
view_type = "all"
)
## With a primary key
lahman_dm_no_keys %>%
dm_add_pk(Batting, c(playerID, yearID, stint)) %>%
dm_draw()
## With all the primary keys and a foreign key
lahman_dm_only_pks %>%
dm_add_fk(Salaries, playerID, People) %>%
dm_draw()
## With all the keys
lahman_dm_all_keys %>%
dm_draw()
## With all the keys and all the other columns as well
lahman_dm_all_keys %>%
dm_draw(rankdir = "LR", view_type = "all", column_types = TRUE)
# Integrity Checks
lahman_dm_no_keys %>%
dm_examine_constraints()
lahman_dm_only_pks %>%
dm_examine_constraints()
lahman_dm_all_keys %>%
dm_examine_constraints()
# Programming
# Return a data frame with our primary keys
lahman_dm_only_pks %>%
dm_get_all_pks()
# Return a data frame with our foreign keys
lahman_dm_all_keys %>%
dm_get_all_fks()
# Using as_tibble() to programmatically inspect which constraints (if any) are not satisfied
lahman_dm_all_keys %>%
dm_examine_constraints() %>%
tibble::as_tibble()
Second exercise:
# dm2
# Data frames
Lahman::People
Lahman::Managers
Lahman::AwardsManagers
# Adding tables
lahman_dm2_no_keys <- dm(
People = Lahman::People,
Managers = Lahman::Managers,
AwardsManagers = Lahman::AwardsManagers
)
lahman_dm2_no_keys
# A dm is a list
names(lahman_dm2_no_keys)
lahman_dm2_no_keys$People
lahman_dm2_no_keys[c("Managers", "AwardsManagers")]
# Defining Keys
## Primary Keys - candidates
dm_enum_pk_candidates(
dm = lahman_dm2_no_keys,
table = People
)
dm_enum_pk_candidates(
dm = lahman_dm2_no_keys,
table = Managers
)
dm_enum_pk_candidates(
dm = lahman_dm2_no_keys,
table = AwardsManagers
)
## Primary Keys - add the keys
lahman_dm2_only_pks <- lahman_dm2_no_keys %>%
dm_add_pk(table = People, columns = playerID) %>%
dm_add_pk(Managers, c(yearID, teamID, inseason)) %>%
dm_add_pk(AwardsManagers, c(playerID, awardID, yearID))
lahman_dm2_only_pks
## Foreign Keys - candidates
dm_enum_fk_candidates(
dm = lahman_dm2_only_pks,
table = Managers,
ref_table = People
)
dm_enum_fk_candidates(
dm = lahman_dm2_only_pks,
table = AwardsManagers,
ref_table = People
)
## Foreign Keys - add the keys
lahman_dm2_all_keys <-
lahman_dm2_only_pks %>%
dm_add_fk(table = Managers, columns = playerID, ref_table = People) %>%
dm_add_fk(table = AwardsManagers, columns = playerID, ref_table = People)
lahman_dm2_all_keys
# Visualization
## No keys
lahman_dm2_no_keys %>%
dm_draw(
rankdir = "TB",
view_type = "all"
)
## With a primary key
lahman_dm2_no_keys %>%
dm_add_pk(Managers, c(yearID, teamID, inseason)) %>%
dm_draw()
## With all the primary keys and a foreign key
lahman_dm2_only_pks %>%
dm_add_fk(AwardsManagers, playerID, People) %>%
dm_draw()
## With all the keys
lahman_dm2_all_keys %>%
dm_draw()
## With all the keys and all the other columns as well
lahman_dm2_all_keys %>%
dm_draw(rankdir = "LR", view_type = "all", column_types = TRUE)
# Integrity Checks
lahman_dm2_no_keys %>%
dm_examine_constraints()
lahman_dm2_only_pks %>%
dm_examine_constraints()
lahman_dm2_all_keys %>%
dm_examine_constraints()
# Programming
# Return a data frame with our primary keys
lahman_dm2_only_pks %>%
dm_get_all_pks()
# Return a data frame with our foreign keys
lahman_dm2_all_keys %>%
dm_get_all_fks()
# Using as_tibble() to programmatically inspect which constraints (if any) are not satisfied
lahman_dm2_all_keys %>%
dm_examine_constraints() %>%
tibble::as_tibble()