Search code examples
pythonpython-3.xscikit-learnmulti-dimensional-scaling

Is it possible to find similar orders by multi-dimensional scaling in scikit learn?


I have several files containing 3D positions of 10 points (as plotting in corresponding pictures). I would like to use multi-dimensional scaling to find similar orderings and print out different orderings. For example, here ordering from text file 1, 2 and 4 are completely the same, but different from 3.

import numpy as np

from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection

from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA

A1=[[0.000, 0.000, 0.5],
[0.250, 0.000, 0.5],
[0.125, 0.250, 0.5],
[0.375, 0.250, 0.5],
[0.250, 0.500, 0.5],
[0.500, 0.500, 0.5],
[0.125, 0.750, 0.5],
[0.375, 0.750, 0.5],
[0.000, 1.000, 0.5],
[0.250, 1.000, 0.5]]
A2=[[0.500, 0.000, 0.5],
[0.750, 0.000, 0.5],
[0.375, 0.250, 0.5],
[0.625, 0.250, 0.5],
[0.250, 0.500, 0.5],
[0.500, 0.500, 0.5],
[0.375, 0.750, 0.5],
[0.625, 0.750, 0.5],
[0.500, 1.000, 0.5],
[0.750, 1.000, 0.5]]
A3=[[0.500, 0.000, 0.5],
[0.750, 0.000, 0.5],
[0.625, 0.250, 0.5],
[0.875, 0.250, 0.5],
[0.250, 0.500, 0.5],
[0.500, 0.500, 0.5],
[0.375, 0.750, 0.5],
[0.625, 0.750, 0.5],
[0.500, 1.000, 0.5],
[0.750, 1.000, 0.5]]
A4=[[0.250, 0.000, 0.5],
[0.500, 0.000, 0.5],
[0.375, 0.250, 0.5],
[0.625, 0.250, 0.5],
[0.500, 0.500, 0.5],
[0.750, 0.500, 0.5],
[0.375, 0.750, 0.5],
[0.625, 0.750, 0.5],
[0.250, 1.000, 0.5],
[0.500, 1.000, 0.5]]

print(len(A1), len(A2), len(A3), len(A4))
a1=euclidean_distances(A1)
a2=euclidean_distances(A2)
a3=euclidean_distances(A3)
a4=euclidean_distances(A4)
print(a1)

OUTPUT

Number of different orders: 2
A1
A3

enter image description here


Solution

  • Set up the data and libraries:

    import numpy as np
    import pandas as pd
    
    from matplotlib import pyplot as plt
    from matplotlib.collections import LineCollection
    
    from sklearn import manifold
    from sklearn.metrics import euclidean_distances
    from sklearn.decomposition import PCA
    
    A1=[[0.000, 0.000, 0.5],
    [0.250, 0.000, 0.5],
    [0.125, 0.250, 0.5],
    [0.375, 0.250, 0.5],
    [0.250, 0.500, 0.5],
    [0.500, 0.500, 0.5],
    [0.125, 0.750, 0.5],
    [0.375, 0.750, 0.5],
    [0.000, 1.000, 0.5],
    [0.250, 1.000, 0.5]]
    A2=[[0.500, 0.000, 0.5],
    [0.750, 0.000, 0.5],
    [0.375, 0.250, 0.5],
    [0.625, 0.250, 0.5],
    [0.250, 0.500, 0.5],
    [0.500, 0.500, 0.5],
    [0.375, 0.750, 0.5],
    [0.625, 0.750, 0.5],
    [0.500, 1.000, 0.5],
    [0.750, 1.000, 0.5]]
    A3=[[0.500, 0.000, 0.5],
    [0.750, 0.000, 0.5],
    [0.625, 0.250, 0.5],
    [0.875, 0.250, 0.5],
    [0.250, 0.500, 0.5],
    [0.500, 0.500, 0.5],
    [0.375, 0.750, 0.5],
    [0.625, 0.750, 0.5],
    [0.500, 1.000, 0.5],
    [0.750, 1.000, 0.5]]
    A4=[[0.250, 0.000, 0.5],
    [0.500, 0.000, 0.5],
    [0.375, 0.250, 0.5],
    [0.625, 0.250, 0.5],
    [0.500, 0.500, 0.5],
    [0.750, 0.500, 0.5],
    [0.375, 0.750, 0.5],
    [0.625, 0.750, 0.5],
    [0.250, 1.000, 0.5],
    [0.500, 1.000, 0.5]]
    

    Let's place the data in a handy way and calculate distances.

    """"""
    # Number of different elemnts
    segments_dic = {'A1': A1,
        'A2': A2,
        'A3': A3,
        'A4': A4,}
    
    # To clasify the elements
    segments_distances = []
    for i in segments_dic.keys():
        segments_distances.append(round(euclidean_distances(segments_dic[i]).sum(),3))
    

    Let's now check which are the closer groups of points:

    """Number of different elements / orders
    I will round the results to make them comparable"""
    different_elements = np.unique(segments_distances)
    print("number of different orders: ",np.unique(segments_distances).__len__())
    print("different orders: ", different_elements)
    np.unique(segments_distances).__len__()
    
    for i in different_elements:
        print("For element distance ",i," corresponding groups are: ")
        for j in segments_dic.keys():
            if i == round(euclidean_distances(segments_dic[j]).sum(),3):
                print(j)
    

    This is how the outputs looks like:

    number of different orders:  2
    different orders:  [46.952 48.496]
    For element distance  46.952  corresponding groups are: 
    A1
    A2
    A4
    For element distance  48.496  corresponding groups are: 
    A3
    

    Let see if we can validate this results with pics.

    The plots in 2D:

    """Plots"""
    for i in segments_dic.keys():
        # Rotate the data
        clf = PCA(n_components=2)
        X = clf.fit_transform(segments_dic[i])
        aux = pd.DataFrame(X)
        fig = plt.figure()
        plt.scatter(aux.iloc[:,0],aux.iloc[:,1])
        plt.title('{}'.format(i))
        fig.savefig('{}_representation.svg'.format(i))
    

    Upload the pics:

    enter image description here enter image description here enter image description here enter image description here

    The results are validated on the pics.