Search code examples
pythonpandasmatplotlibrdkit

Insert matplotlib images into a pandas dataframe


PURPOSE: I am currently working with rdkit to colour the structures of my molecules according to rdkit.Chem.Draw.SimilarityMaps. Now, I would like to use the matplotlib images SimilarityMaps function to introduce them in a pandas dataframe and export this table in the form of an html file.

CODE: I tried to do that with the following code

import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import SimilarityMaps
from rdkit.Chem.Draw import IPythonConsole #Needed to show molecules
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions

df = pd.DataFrame({'smiles':['Nc1nc(NC2CC2)c3ncn([C@@H]4C[C@H](CO)C=C4)c3n1','CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O','CCN(CC)CCNC(=O)C1=CC=C(C=C1)NC(=O)C','CC(=O)NC1=CC=C(C=C1)O','CC(=O)Nc1sc(nn1)[S](N)(=O)=O']})

def getSim(smi):
    mol = Chem.MolFromSmiles(smi)
    refmol = Chem.MolFromSmiles('c1ccccc1')
    fp = SimilarityMaps.GetMorganFingerprint(mol, fpType='bv')
    fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint(refmol, mol, SimilarityMaps.GetMorganFingerprint)
    return fig

df['map'] = df['smiles'].map(getSim)
df.to_html('/.../test.html')

When I open the file test.html, the map column contains the information "Figure (200x200)". I check if my dataframe map column contains object: it's OK in python but not in html file.

QUESTION: I'm not sure how to get a dataframe with images and I'd like to have the help of the community to clarify this subject.

Thanks in advance


Solution

  • What you see as Figure (200x200) is the __repr__ string of the matplotlib Figure class. It is the text representation of that python object (the same that you would see when doing print(fig)).

    What you want instead is to have an actual image in the table. An easy option would be to save the matplotlib figure as png image, create an html tag, <img src="some.png" /> and hence show the table.

    import pandas as pd
    import numpy as np;np.random.seed(1)
    import matplotlib.pyplot as plt
    import matplotlib.colors
    
    df = pd.DataFrame({"info" : np.random.randint(0,10,10), 
                       "status" : np.random.randint(0,3,10)})
    
    cmap = matplotlib.colors.ListedColormap(["crimson","orange","limegreen"])
    
    def createFigure(i):
        fig, ax = plt.subplots(figsize=(.4,.4))
        fig.subplots_adjust(0,0,1,1)
        ax.axis("off")
        ax.axis([0,1,0,1])
        c = plt.Circle((.5,.5), .4, color=cmap(i))
        ax.add_patch(c)
        ax.text(.5,.5, str(i), ha="center", va="center")
        return fig
    
    def mapping(i):
        fig = createFigure(i)
        fname = "data/map_{}.png".format(i)
        fig.savefig(fname)
        imgstr = '<img src="{}" /> '.format(fname)
        return imgstr
    
    
    df['image'] = df['status'].map(mapping)
    df.to_html('test.html', escape=False)
    

    enter image description here

    The drawback of this is that you have a lot of images saved somewhere on disk. If this is not desired, you may store the image encoded as base64 in the html file, <img src="..." />.

    import pandas as pd
    import numpy as np;np.random.seed(1)
    import matplotlib.pyplot as plt
    import matplotlib.colors
    from io import BytesIO
    import base64
    
    df = pd.DataFrame({"info" : np.random.randint(0,10,10), 
                       "status" : np.random.randint(0,3,10)})
    
    cmap = matplotlib.colors.ListedColormap(["crimson","orange","limegreen"])
    
    def createFigure(i):
        fig, ax = plt.subplots(figsize=(.4,.4))
        fig.subplots_adjust(0,0,1,1)
        ax.axis("off")
        ax.axis([0,1,0,1])
        c = plt.Circle((.5,.5), .4, color=cmap(i))
        ax.add_patch(c)
        ax.text(.5,.5, str(i), ha="center", va="center")
        return fig
    
    def fig2inlinehtml(fig,i):
        figfile = BytesIO()
        fig.savefig(figfile, format='png')
        figfile.seek(0) 
        # for python 2.7:
        #figdata_png = base64.b64encode(figfile.getvalue())
        # for python 3.x:
        figdata_png = base64.b64encode(figfile.getvalue()).decode()
        imgstr = '<img src="data:image/png;base64,{}" />'.format(figdata_png)
        return imgstr
    
    def mapping(i):
        fig = createFigure(i)
        return fig2inlinehtml(fig,i)
    
    
    with pd.option_context('display.max_colwidth', -1):
        df.to_html('test.html', escape=False, formatters=dict(status=mapping))
    

    The output looks the same, but there are no images saved to disk.

    This also works nicely in a Jupyter Notebook, with a small modification,

    from IPython.display import HTML
    # ...
    pd.set_option('display.max_colwidth', -1)
    HTML(df.to_html(escape=False, formatters=dict(status=mapping)))
    

    enter image description here