I am using the Python package ripser
for persistence homology. I would like to leverage this to aide in segmenting 2D point clouds.
As an example, I am following Elizabeth Munch: Python Tutorial on Topological Data Analysis. Here, I take the DoubleAnnulus
and increase the separation between the two:
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import persim
import ripser
import teaspoon.MakeData.DynSysLib.DynSysLib as DSL
import teaspoon.MakeData.PointCloud as makePtCloud
import teaspoon.TDA.Draw as Draw
from teaspoon.parameter_selection.MsPE import MsPE_tau
from teaspoon.SP.network import ordinal_partition_graph
from teaspoon.SP.network_tools import make_network
from teaspoon.TDA.PHN import PH_network
def DoubleAnnulus(r1=1, R1=2, r2=0.8, R2=1.3, xshift=3):
P = makePtCloud.Annulus(r=r1, R=R1)
Q = makePtCloud.Annulus(r=r2, R=R2)
Q[:, 0] = Q[:, 0] + xshift
return np.concatenate((P, Q))
def drawTDAtutorial(P, diagrams, R=2):
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))
plt.sca(axes[0])
plt.title("Point Cloud")
plt.scatter(P[:, 0], P[:, 1])
plt.sca(axes[1])
plt.title("0-dim Diagram")
Draw.drawDgm(diagrams[0])
plt.sca(axes[2])
plt.title("1-dim Diagram")
Draw.drawDgm(diagrams[1])
plt.axis([0, R, 0, R])
plt.show()
if __name__ == "__main__":
P = DoubleAnnulus(r1=1, R1=2, r2=0.5, R2=1.3, xshift=6)
plt.scatter(*zip(*P))
plt.show()
diagrams = ripser.ripser(P)["dgms"]
drawTDAtutorial(P, diagrams)
How can I grab the point cloud vertices corresponding to the largest 2 diagram points on the 1-dim Diagram
?
K-means clustering would work nicely here (our data is fairly convex and the algorithm doesn't require labels). Since key features on persistence graphs have a large y-distance from the line y = x
, we can generate a histogram of the 1-dim Diagram
y-distances from the line y = x
and use the number of points more than 3 standard deviations above the mean as our k
-value.
import pprint
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import persim
import ripser
import teaspoon.MakeData.DynSysLib.DynSysLib as DSL
import teaspoon.MakeData.PointCloud as makePtCloud
import teaspoon.TDA.Draw as Draw
from sklearn.cluster import KMeans
from teaspoon.parameter_selection.MsPE import MsPE_tau
from teaspoon.SP.network import ordinal_partition_graph
from teaspoon.SP.network_tools import make_network
from teaspoon.TDA.PHN import PH_network
def DoubleAnnulus(r1=1, R1=2, r2=0.8, R2=1.3, xshift=3):
P = makePtCloud.Annulus(r=r1, R=R1)
Q = makePtCloud.Annulus(r=r2, R=R2)
Q[:, 0] = Q[:, 0] + xshift
return np.concatenate((P, Q))
def drawTDAtutorial(P, diagrams, R=2):
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))
plt.sca(axes[0])
plt.title("Point Cloud")
plt.scatter(P[:, 0], P[:, 1])
plt.sca(axes[1])
plt.title("0-dim Diagram")
Draw.drawDgm(diagrams[0])
plt.sca(axes[2])
plt.title("1-dim Diagram")
Draw.drawDgm(diagrams[1])
plt.axis([0, R, 0, R])
plt.show()
if __name__ == "__main__":
P = DoubleAnnulus(r1=1, R1=2, r2=0.5, R2=1.3, xshift=6)
# plt.scatter(*zip(*P))
# plt.show()
result = ripser.ripser(P)
diagrams = result["dgms"]
# drawTDAtutorial(P, diagrams, R=3)
loop_points = diagrams[1] # H1 class
y_heights = loop_points[:, 1] - loop_points[:, 0]
features = y_heights[y_heights - np.mean(y_heights) >= 3 * np.std(y_heights)]
k = len(features)
print(f"Number of Features: {k}")
kmeans = KMeans(n_clusters=k).fit(P)
labels = kmeans.labels_
points1 = P[labels == 0]
points2 = P[labels == 1]
fig, ax = plt.subplots()
ax.scatter(*zip(*points1), color="red")
ax.scatter(*zip(*points2), color="blue")
plt.show()
Number of Features: 2