Search code examples
pythonpersistencepoint-clouds

Python Ripser Get Vertices for Diagram Point


I am using the Python package ripser for persistence homology. I would like to leverage this to aide in segmenting 2D point clouds.

As an example, I am following Elizabeth Munch: Python Tutorial on Topological Data Analysis. Here, I take the DoubleAnnulus and increase the separation between the two:

import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import persim
import ripser
import teaspoon.MakeData.DynSysLib.DynSysLib as DSL
import teaspoon.MakeData.PointCloud as makePtCloud
import teaspoon.TDA.Draw as Draw
from teaspoon.parameter_selection.MsPE import MsPE_tau
from teaspoon.SP.network import ordinal_partition_graph
from teaspoon.SP.network_tools import make_network
from teaspoon.TDA.PHN import PH_network


def DoubleAnnulus(r1=1, R1=2, r2=0.8, R2=1.3, xshift=3):
    P = makePtCloud.Annulus(r=r1, R=R1)
    Q = makePtCloud.Annulus(r=r2, R=R2)

    Q[:, 0] = Q[:, 0] + xshift

    return np.concatenate((P, Q))


def drawTDAtutorial(P, diagrams, R=2):
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))

    plt.sca(axes[0])
    plt.title("Point Cloud")
    plt.scatter(P[:, 0], P[:, 1])

    plt.sca(axes[1])
    plt.title("0-dim Diagram")
    Draw.drawDgm(diagrams[0])

    plt.sca(axes[2])
    plt.title("1-dim Diagram")
    Draw.drawDgm(diagrams[1])
    plt.axis([0, R, 0, R])

    plt.show()


if __name__ == "__main__":

    P = DoubleAnnulus(r1=1, R1=2, r2=0.5, R2=1.3, xshift=6)
    plt.scatter(*zip(*P))
    plt.show()

    diagrams = ripser.ripser(P)["dgms"]

    drawTDAtutorial(P, diagrams)

DoubleAnnulus

How can I grab the point cloud vertices corresponding to the largest 2 diagram points on the 1-dim Diagram?


Solution

  • K-means clustering would work nicely here (our data is fairly convex and the algorithm doesn't require labels). Since key features on persistence graphs have a large y-distance from the line y = x, we can generate a histogram of the 1-dim Diagram y-distances from the line y = x and use the number of points more than 3 standard deviations above the mean as our k-value.

    Code

    import pprint
    
    import matplotlib.gridspec as gridspec
    import matplotlib.pyplot as plt
    import networkx as nx
    import numpy as np
    import persim
    import ripser
    import teaspoon.MakeData.DynSysLib.DynSysLib as DSL
    import teaspoon.MakeData.PointCloud as makePtCloud
    import teaspoon.TDA.Draw as Draw
    from sklearn.cluster import KMeans
    from teaspoon.parameter_selection.MsPE import MsPE_tau
    from teaspoon.SP.network import ordinal_partition_graph
    from teaspoon.SP.network_tools import make_network
    from teaspoon.TDA.PHN import PH_network
    
    
    def DoubleAnnulus(r1=1, R1=2, r2=0.8, R2=1.3, xshift=3):
        P = makePtCloud.Annulus(r=r1, R=R1)
        Q = makePtCloud.Annulus(r=r2, R=R2)
    
        Q[:, 0] = Q[:, 0] + xshift
    
        return np.concatenate((P, Q))
    
    
    def drawTDAtutorial(P, diagrams, R=2):
        fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))
    
        plt.sca(axes[0])
        plt.title("Point Cloud")
        plt.scatter(P[:, 0], P[:, 1])
    
        plt.sca(axes[1])
        plt.title("0-dim Diagram")
        Draw.drawDgm(diagrams[0])
    
        plt.sca(axes[2])
        plt.title("1-dim Diagram")
        Draw.drawDgm(diagrams[1])
        plt.axis([0, R, 0, R])
    
        plt.show()
    
    
    if __name__ == "__main__":
    
        P = DoubleAnnulus(r1=1, R1=2, r2=0.5, R2=1.3, xshift=6)
        # plt.scatter(*zip(*P))
        # plt.show()
    
        result = ripser.ripser(P)
    
        diagrams = result["dgms"]
        # drawTDAtutorial(P, diagrams, R=3)
    
        loop_points = diagrams[1]  # H1 class
        y_heights = loop_points[:, 1] - loop_points[:, 0]
        features = y_heights[y_heights - np.mean(y_heights) >= 3 * np.std(y_heights)]
        k = len(features)
    
        print(f"Number of Features: {k}")
    
        kmeans = KMeans(n_clusters=k).fit(P)
        labels = kmeans.labels_
    
        points1 = P[labels == 0]
        points2 = P[labels == 1]
    
        fig, ax = plt.subplots()
    
        ax.scatter(*zip(*points1), color="red")
        ax.scatter(*zip(*points2), color="blue")
    
        plt.show()
    

    Output

    Number of Features: 2
    

    Clustered