giotto-ph icon indicating copy to clipboard operation
giotto-ph copied to clipboard

Generators of persistence diagrams do not correspond to longest edge in hole

Open longyuxi opened this issue 2 years ago • 0 comments

Hi,

If I am understanding it correctly, the generators of a persistence diagram returned by ripser_parallel should match up with each persistence, right? In other words, the distance between the two points that make up a generator should correspond to either the birth or the death filtration parameter of a homology class in the persistence, right? I have found the results of ripser_parallel to be inconsistent with this hypothesis:

import numpy as np
import gph

# Take any two protein atoms and any two ligand atoms and you will create a hole in the 1D homology that appears at the maximum distance between a protein atom and a ligand atom
# The one dimensional holes created by this metric all disappear only at r=infinity
def opposition_homology(protein_coords, ligand_coords, maxdim):
    # Append each coordinate with 1 for protein and 4 for ligand
    protein_coords = np.concatenate((protein_coords, np.ones((len(protein_coords), 1))), axis=1)
    ligand_coords = np.concatenate((ligand_coords, 4 * np.ones((len(ligand_coords), 1))), axis=1)

    def opposition_distance_metric(vec1, vec2):
        if np.abs(vec1[-1] - vec2[-1]) > 2:  # If the two atoms do not have the same affiliation
            return np.linalg.norm(vec1[:3] - vec2[:3])
        else:
            return np.Inf

    combined_coords = np.concatenate((protein_coords, ligand_coords), axis=0)

    if combined_coords.shape[0] == 0:
        return None

    if protein_coords is None or ligand_coords is None:
        return None

    output = gph.ripser_parallel(combined_coords, maxdim=maxdim, metric=opposition_distance_metric, thresh=50, n_threads=-1, return_generators=True)

    return output

protein_coords = 50 * np.random.rand(100, 3)
ligand_coords = 50 * np.random.rand(100, 3)

output = opposition_homology(protein_coords, ligand_coords, maxdim=1)

diagrams = output['dgms']
generators = output['gens']

specific_dim_diagrams = diagrams[1][:, 0]  # Just the birth times can encode all the information since death times are inf
specific_dim_generators = generators[3][0]

assert specific_dim_diagrams.shape[0] == specific_dim_generators.shape[0]  # It is expected that we have the same number of diagrams as generators, now let's check that the birth radius is the same as the length of the edge between the two atoms

combined_coords = np.concatenate((protein_coords, ligand_coords))


## This assertion will fail, because for some unknown reason the generator indices are not in order.
count = 0
for idx in range(specific_dim_diagrams.shape[0]):
    test_diagram = specific_dim_diagrams[idx]  # Just keeps track of the birth radius. e.g. 49.98954
    test_diagram_generator = specific_dim_generators[idx]  # Indices of the vertices of the edge with that length. e.g. [1038  416]
    try:
        assert np.abs(np.linalg.norm(combined_coords[test_diagram_generator[0]] - combined_coords[test_diagram_generator[1]]) - test_diagram) < 0.1
    except AssertionError:
        count += 1

print(f'Number of diagrams: {specific_dim_diagrams.shape[0]}')
print(f'Number of assertions that failed: {count}')

longyuxi avatar Aug 08 '23 15:08 longyuxi