Python Ripser Get Vertices for Diagram Point

I am using the Python package ripser for persistence homology. I would like to leverage this to aide in segmenting 2D point clouds.

As an example, I am following Elizabeth Munch: Python Tutorial on Topological Data Analysis. Here, I take the DoubleAnnulus and increase the separation between the two:

import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import persim
import ripser
import teaspoon.MakeData.DynSysLib.DynSysLib as DSL
import teaspoon.MakeData.PointCloud as makePtCloud
import teaspoon.TDA.Draw as Draw
from teaspoon.parameter_selection.MsPE import MsPE_tau
from teaspoon.SP.network import ordinal_partition_graph
from teaspoon.SP.network_tools import make_network
from teaspoon.TDA.PHN import PH_network


def DoubleAnnulus(r1=1, R1=2, r2=0.8, R2=1.3, xshift=3):
    P = makePtCloud.Annulus(r=r1, R=R1)
    Q = makePtCloud.Annulus(r=r2, R=R2)

    Q[:, 0] = Q[:, 0] + xshift

    return np.concatenate((P, Q))


def drawTDAtutorial(P, diagrams, R=2):
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))

    plt.sca(axes[0])
    plt.title("Point Cloud")
    plt.scatter(P[:, 0], P[:, 1])

    plt.sca(axes[1])
    plt.title("0-dim Diagram")
    Draw.drawDgm(diagrams[0])

    plt.sca(axes[2])
    plt.title("1-dim Diagram")
    Draw.drawDgm(diagrams[1])
    plt.axis([0, R, 0, R])

    plt.show()


if __name__ == "__main__":

    P = DoubleAnnulus(r1=1, R1=2, r2=0.5, R2=1.3, xshift=6)
    plt.scatter(*zip(*P))
    plt.show()

    diagrams = ripser.ripser(P)["dgms"]

    drawTDAtutorial(P, diagrams)

How can I grab the point cloud vertices corresponding to the largest 2 diagram points on the 1-dim Diagram?

Solution

K-means clustering would work nicely here (our data is fairly convex and the algorithm doesn't require labels). Since key features on persistence graphs have a large y-distance from the line y = x, we can generate a histogram of the 1-dim Diagram y-distances from the line y = x and use the number of points more than 3 standard deviations above the mean as our k-value.

Code

import pprint

import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import persim
import ripser
import teaspoon.MakeData.DynSysLib.DynSysLib as DSL
import teaspoon.MakeData.PointCloud as makePtCloud
import teaspoon.TDA.Draw as Draw
from sklearn.cluster import KMeans
from teaspoon.parameter_selection.MsPE import MsPE_tau
from teaspoon.SP.network import ordinal_partition_graph
from teaspoon.SP.network_tools import make_network
from teaspoon.TDA.PHN import PH_network


def DoubleAnnulus(r1=1, R1=2, r2=0.8, R2=1.3, xshift=3):
    P = makePtCloud.Annulus(r=r1, R=R1)
    Q = makePtCloud.Annulus(r=r2, R=R2)

    Q[:, 0] = Q[:, 0] + xshift

    return np.concatenate((P, Q))


def drawTDAtutorial(P, diagrams, R=2):
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))

    plt.sca(axes[0])
    plt.title("Point Cloud")
    plt.scatter(P[:, 0], P[:, 1])

    plt.sca(axes[1])
    plt.title("0-dim Diagram")
    Draw.drawDgm(diagrams[0])

    plt.sca(axes[2])
    plt.title("1-dim Diagram")
    Draw.drawDgm(diagrams[1])
    plt.axis([0, R, 0, R])

    plt.show()


if __name__ == "__main__":

    P = DoubleAnnulus(r1=1, R1=2, r2=0.5, R2=1.3, xshift=6)
    # plt.scatter(*zip(*P))
    # plt.show()

    result = ripser.ripser(P)

    diagrams = result["dgms"]
    # drawTDAtutorial(P, diagrams, R=3)

    loop_points = diagrams[1]  # H1 class
    y_heights = loop_points[:, 1] - loop_points[:, 0]
    features = y_heights[y_heights - np.mean(y_heights) >= 3 * np.std(y_heights)]
    k = len(features)

    print(f"Number of Features: {k}")

    kmeans = KMeans(n_clusters=k).fit(P)
    labels = kmeans.labels_

    points1 = P[labels == 0]
    points2 = P[labels == 1]

    fig, ax = plt.subplots()

    ax.scatter(*zip(*points1), color="red")
    ax.scatter(*zip(*points2), color="blue")

    plt.show()

Output

Number of Features: 2