cutevariant icon indicating copy to clipboard operation
cutevariant copied to clipboard

[Import file] Importing VCF with variant-sample already existing

Open antonylebechec opened this issue 2 years ago • 1 comments

When importing a VCF which include an existing variant and sample association, fields count_var, count_het... are recalculated by incrementing number, which is not correct. Test by creating a db with a VCF, and import this VCF again

antonylebechec avatar Mar 10 '22 09:03 antonylebechec

Suggestion (case/control NOT tested):

# conn.execute(
    #     """
    #     CREATE TRIGGER count_homo AFTER INSERT ON sample_has_variant 
    #     WHEN new.gt = 2 BEGIN 
    #     UPDATE variants SET count_hom = count_hom + 1 WHERE variants.id = new.variant_id ;

    #     UPDATE variants SET
    #     case_count_hom = case_count_hom + (SELECT COUNT(*) FROM samples WHERE phenotype=2 and samples.id = new.sample_id)
    #     WHERE variants.id = new.variant_id ; 

    #     UPDATE variants SET
    #     control_count_hom = control_count_hom + (SELECT COUNT(*) FROM samples WHERE phenotype=1 and samples.id = new.sample_id)
    #     WHERE variants.id = new.variant_id ;

    #     END;"""
    # )

    conn.execute(
        """
        CREATE TRIGGER count_homo AFTER INSERT ON sample_has_variant 
        WHEN new.gt = 2 BEGIN

        UPDATE variants SET
        count_hom = (SELECT count(sample_has_variant.gt)
                        FROM sample_has_variant
                        WHERE sample_has_variant.variant_id=new.variant_id AND sample_has_variant.gt = 2)
        WHERE variants.id = new.variant_id ;

        UPDATE variants SET
        case_count_hom = (SELECT count(sample_has_variant.gt)
                            FROM samples
                            INNER JOIN sample_has_variant ON sample_has_variant.sample_id=samples.id 
                            WHERE phenotype=1 AND sample_has_variant.gt = 2)
        WHERE variants.id = new.variant_id ; 

        UPDATE variants SET
        control_count_hom = (SELECT count(sample_has_variant.gt)
                            FROM samples
                            INNER JOIN sample_has_variant ON sample_has_variant.sample_id=samples.id 
                            WHERE phenotype=0 AND sample_has_variant.gt = 2)
        WHERE variants.id = new.variant_id ;

        END;"""
    )

    # conn.execute(
    #     """
    #     CREATE TRIGGER count_var AFTER INSERT ON sample_has_variant 
    #     WHEN new.gt > 0 BEGIN 
    #     UPDATE variants SET count_var = count_var + 1 WHERE variants.id = new.variant_id ;
        
    #     END;"""
    # )

    conn.execute(
        """
        CREATE TRIGGER count_var AFTER INSERT ON sample_has_variant 
        WHEN new.gt > 0 BEGIN 

        UPDATE variants SET
        count_var = (SELECT count(sample_has_variant.gt)
                        FROM sample_has_variant
                        WHERE sample_has_variant.variant_id=new.variant_id AND sample_has_variant.gt > 0)
        WHERE variants.id = new.variant_id ;
        
        END;"""
    )

    # conn.execute(
    #     """
    #     CREATE TRIGGER count_het AFTER INSERT ON sample_has_variant 
    #     WHEN new.gt = 1 BEGIN 
    #     UPDATE variants SET count_het = count_het + 1 WHERE variants.id = new.variant_id ; 

    #     UPDATE variants SET
    #     case_count_het = case_count_het + (SELECT COUNT(*) FROM samples WHERE phenotype=2 and samples.id = new.sample_id)
    #     WHERE variants.id = new.variant_id ; 

    #     UPDATE variants SET
    #     control_count_het = control_count_het + (SELECT COUNT(*) FROM samples WHERE phenotype=1 and samples.id = new.sample_id)
    #     WHERE variants.id = new.variant_id ;

    #     END;"""
    # )

    conn.execute(
        """
        CREATE TRIGGER count_het AFTER INSERT ON sample_has_variant 
        WHEN new.gt = 1 BEGIN

        UPDATE variants SET
        count_het = (SELECT count(sample_has_variant.gt) FROM sample_has_variant WHERE sample_has_variant.variant_id=new.variant_id AND sample_has_variant.gt = 1) WHERE variants.id = new.variant_id ; 

        UPDATE variants SET
        case_count_het = (SELECT count(sample_has_variant.gt)
                            FROM samples
                            INNER JOIN sample_has_variant ON sample_has_variant.sample_id=samples.id 
                            WHERE phenotype=1 AND sample_has_variant.gt = 1)
        WHERE variants.id = new.variant_id ; 

        UPDATE variants SET
        control_count_het = (SELECT count(sample_has_variant.gt)
                            FROM samples
                            INNER JOIN sample_has_variant ON sample_has_variant.sample_id=samples.id 
                            WHERE phenotype=0 AND sample_has_variant.gt = 1)
        WHERE variants.id = new.variant_id ;

        END;"""
    )

    # conn.execute(
    #     """
    #     CREATE TRIGGER count_ref AFTER INSERT ON sample_has_variant 
    #     WHEN new.gt = 0 BEGIN 
    #     UPDATE variants SET count_ref = count_ref + 1 WHERE variants.id = new.variant_id ;
        
    #     UPDATE variants SET
    #     case_count_ref = case_count_ref + (SELECT COUNT(*) FROM samples WHERE phenotype=1 and samples.id = new.sample_id)
    #     WHERE variants.id = new.variant_id ; 

    #     UPDATE variants SET
    #     control_count_ref = control_count_ref + (SELECT COUNT(*) FROM samples WHERE phenotype=0 and samples.id = new.sample_id)
    #     WHERE variants.id = new.variant_id ; 

    #     END;"""
    # )

    conn.execute(
        """
        CREATE TRIGGER count_ref AFTER INSERT ON sample_has_variant 
        WHEN new.gt = 0 BEGIN

        UPDATE variants SET
        count_ref = (SELECT count(sample_has_variant.gt)
                            FROM sample_has_variant
                            WHERE sample_has_variant.variant_id=new.variant_id AND sample_has_variant.gt = 0)
        WHERE variants.id = new.variant_id ;
        
        UPDATE variants SET
        case_count_ref = (SELECT count(sample_has_variant.gt)
                            FROM samples
                            INNER JOIN sample_has_variant ON sample_has_variant.sample_id=samples.id 
                            WHERE phenotype=1 AND sample_has_variant.gt = 0)
        WHERE variants.id = new.variant_id ; 

        UPDATE variants SET
        control_count_ref = (SELECT count(sample_has_variant.gt)
                            FROM samples
                            INNER JOIN sample_has_variant ON sample_has_variant.sample_id=samples.id 
                            WHERE phenotype=0 AND sample_has_variant.gt = 0)
        WHERE variants.id = new.variant_id ; 

        END;"""
    )
    
    conn.execute(
        """
        CREATE TRIGGER freq_var_update AFTER UPDATE ON variants 
        WHEN old.count_hom <> new.count_hom
              OR old.count_het <> new.count_het
              OR old.count_var <> new.count_var
        BEGIN

        UPDATE variants SET
        freq_var = ( cast((new.count_hom*2 + new.count_het) as real) / (cast(new.count_var as real)*2) ) WHERE variants.id = new.id ;
        
        END;"""
    )

    conn.execute(
        """
        CREATE TRIGGER freq_var_full_update_on_variants AFTER UPDATE ON variants 
        WHEN old.count_hom <> new.count_hom
              OR old.count_het <> new.count_het
              OR old.count_var <> new.count_var
        BEGIN

        UPDATE variants SET
        freq_var_full = ( cast((new.count_hom*2 + new.count_het) as real) / ((SELECT COUNT(id) FROM samples)*2) ) WHERE variants.id = new.id ;
        
        END;"""
    )

    conn.execute(
        """
        CREATE TRIGGER freq_var_full_insert_on_sample AFTER INSERT ON samples 
        BEGIN

        UPDATE variants SET
        freq_var_full = ( cast((count_hom*2 + count_het) as real) / ((SELECT COUNT(id) FROM samples)*2) ) ;
        
        END;"""
    )

antonylebechec avatar Mar 10 '22 09:03 antonylebechec