gnomad_methods
gnomad_methods copied to clipboard
Add `assemble_constraint_context_ht` function to create a fully annotated context HT for computing constraint on
Also adds some functions to help transform the methylation data for annotation onto the context HT:
-
transform_methylation_level
-
transform_grch37_methylation
-
transform_grch38_methylation
Tested with:
from gnomad.resources.grch38.gnomad import coverage, all_sites_an, public_release
from gnomad.resources.grch38.reference_data import methylation_sites, vep_context
context_ht = vep_context.ht()
coverage_hts = {
"exomes": coverage("exomes").ht(),
"genomes": coverage("genomes").ht(),
}
an_hts = {
"exomes": all_sites_an("exomes").ht(),
"genomes": all_sites_an("genomes").ht(),
}
exome_ht = public_release("exomes").ht()
genomes_ht = public_release("genomes").ht()
freq_hts = {
"exomes": exome_ht.select("freq"),
"genomes": genomes_ht.select("freq"),
}
filter_hts = {
"exomes": exome_ht.select("filters"),
"genomes": genomes_ht.select("filters"),
}
methylation_ht = methylation_sites.ht()
gerp_ht = hl.experimental.load_dataset(name="gerp_scores", version="hg19", reference_genome="GRCh38")
annotated_context_ht = assemble_constraint_context_ht(
context_ht,
coverage_hts=coverage_hts,
an_hts=an_hts,
freq_hts=freq_hts,
filter_hts=filter_hts,
methylation_ht=methylation_ht,
gerp_ht=gerp_ht,
transformation_funcs=None,
)
annotated_context_ht.describe()
annotated_context_ht.show()
and
from gnomad.resources.grch37.gnomad import coverage, public_release
from gnomad.resources.grch37.reference_data import methylation_sites, vep_context
context_ht = vep_context.ht()
coverage_hts = {
"exomes": coverage("exomes").ht(),
"genomes": coverage("genomes").ht(),
}
an_hts = None
exome_ht = public_release("exomes").ht()
genomes_ht = public_release("genomes").ht()
freq_hts = {
"exomes": exome_ht.select("freq"),
"genomes": genomes_ht.select("freq"),
}
filter_hts = {
"exomes": exome_ht.select("filters"),
"genomes": genomes_ht.select("filters"),
}
methylation_ht = methylation_sites.ht()
gerp_ht = hl.experimental.load_dataset(name="gerp_scores", version="hg19", reference_genome="GRCh37")
annotated_context_ht = assemble_constraint_context_ht(
context_ht,
coverage_hts=coverage_hts,
an_hts=an_hts,
freq_hts=freq_hts,
filter_hts=filter_hts,
methylation_ht=methylation_ht,
gerp_ht=gerp_ht,
transformation_funcs=None,
)
annotated_context_ht.describe()
annotated_context_ht.show()