chanjo icon indicating copy to clipboard operation
chanjo copied to clipboard

Investigate GC content integration

Open robinandeer opened this issue 9 years ago • 0 comments

This is a first very simple example:

def gc_content(self, query=None, gc_amount='high', gene_ids=None):
    """Generate query to estimate coverage performace.

    Works by default on a small subset of genes with high/low GC
    content levels (BioMart).
    """
    # use the average metrics query unless otherwise requested
    query = query or self.average_metrics()

    if gc_amount == 'high':
      # highest GC content supersets
      identifiers = gene_ids or ['UTF1', 'BHLHA9', 'C20orf201', 'LRRC26',
                                 'HES4', 'BHLHE23', 'C9orf172', 'NKX6-2',
                                 'CITED4']

    elif gc_amount == 'low':
      # lowest GC content supersets
      identifiers = gene_ids or ['DEFB114', 'NTS', 'ANGPTL3', 'CYLC2',
                                 'GPR22', 'SI', 'CSN3', 'KLRC4', 'CSN1S1']

    else:
      raise ValueError("'gc_amount' must be either 'high' or 'low'")

    # build and return the query
    return query.filter(SuperblockData.parent_id.in_(identifiers))

robinandeer avatar Sep 04 '15 10:09 robinandeer