tfmodisco-lite icon indicating copy to clipboard operation
tfmodisco-lite copied to clipboard

`modisco meme` command does not return trimmed motifs and no negative patterns but only positive ones

Open ThorbenMaa opened this issue 1 year ago • 3 comments

This worked for me to get all motifs out:

import click
import h5py
import numpy as np

@click.command()
@click.option(
    "--reportFile",
    "report_file",
    required=True,
    multiple=False,
    type=str,
    default="modisco_resultshypothetical_contribution_scores_mean_diffTeloHEAC_CTRL_vs_6h.npz.h5",
    help="e.g. modico output",
)
@click.option(
    "--outputPWM",
    "out_pwm",
    required=True,
    multiple=False,
    type=str,
    help="output PWM file",
)
@click.option(
    "--outputHCWM",
    "out_hcwm",
    required=True,
    multiple=False,
    type=str,
    help="output PWM file",
)
def cli(report_file, out_pwm, out_hcwm):
    
    # read data base entries
    report=h5py.File(report_file , 'r')


    # trim pos patterns
    trimmed_ppms=[]
    trimmed_hcwms=[]
    for key1 in report.keys(): #pos patterns and neg patterns
        for key2 in report[key1].keys():  
            pwm=report[key1][key2]["sequence"][:]
            cwm=(report[key1][key2]["hypothetical_contribs"][:])

            score = np.sum(np.abs(cwm), axis=1)
            trim_thresh = np.max(score) * 0.2  # Cut off anything less than 30% of max score
            pass_inds = np.where(score >= trim_thresh)[0]
            trimmed = pwm[np.min(pass_inds): np.max(pass_inds) + 1]

            trimmed_ppms.append(trimmed)
    report.close()
    
    #write MEME file with trimmed PWM motifs
    background=[0.25, 0.25, 0.25, 0.25]
    f = open(out_pwm, 'w')
    f.write('MEME version 4\n\n')
    f.write('ALPHABET= ACGT\n\n')
    f.write('strands: + -\n\n')
    f.write('Background letter frequencies (from unknown source):\n')
    f.write('A %.3f C %.3f G %.3f T %.3f\n\n' % tuple(list(background)))

    for i in range (0, len(trimmed_ppms), 1):
        f.write('\nMOTIF '+ str(i) +'\n')
        f.write('letter-probability matrix: alength= 4 w= %d nsites= 1 E= 0e+0\n' % trimmed_ppms[i].shape[0])
        for s in trimmed_ppms[i]:
            f.write('%.5f %.5f %.5f %.5f\n' % tuple(s))
    f.close()


         
    


            
if __name__ == "__main__":
    cli()


ThorbenMaa avatar Nov 10 '23 11:11 ThorbenMaa