modkit
modkit copied to clipboard
Question in “DMR model and scoring details”
When I implemented the single-site DMR score algorithm using the documentation at https://nanoporetech.github.io/modkit/dmr_scoring_details.html, the results I calculated did not match those from the official tool.
import numpy as np
from scipy.special import betaln
def log_marginal_likelihood(x, n, alpha=0.5, beta=0.5):
"""
Compute the log marginal likelihood for binomial-beta model.
Parameters:
x : int
Number of successes (methylated counts).
n : int
Total number of trials (coverage).
alpha, beta : float
Prior parameters (Jeffreys prior: 0.5 by default).
Returns:
float
Log marginal likelihood.
"""
if x < 0 or n < x:
raise ValueError("Invalid count values.")
log_B_posterior = betaln(x + alpha, n - x + beta)
log_B_prior = betaln(alpha, beta)
return log_B_posterior - log_B_prior
def llr_score(xa, na, xb, nb, alpha=0.5, beta=0.5):
"""
Compute the LLR-based score using log marginal likelihoods.
Parameters:
xa, na : int
Counts and coverage in condition A.
xb, nb : int
Counts and coverage in condition B.
alpha, beta : float
Prior parameters (Jeffreys prior: 0.5 by default).
Returns:
float
The LLR-based score.
"""
log_la = log_marginal_likelihood(xa, na, alpha, beta)
log_lb = log_marginal_likelihood(xb, nb, alpha, beta)
x_total = xa + xb
n_total = na + nb
log_lab = log_marginal_likelihood(x_total, n_total, alpha, beta)
score = log_la + log_lb - log_lab
return score
# 示例用法:
if __name__ == "__main__":
xa, na = 100, 5084
xb, nb = 53, 2648
score = llr_score(xa, na, xb, nb)
print(f"LLR Score: {score:.2f}") # => LLR Score: -3.95
# But in modkit, it will output: -0.34073...
Hello @WeiHea,
Thanks for the question. I'll get your code running and see about the differences.