Search code examples
pythonpandasmatplotlib-widget

using non-global variables inside onselect function of SpanSelector object


I am trying to slice and save data (pandas.DataFrame with more than one column) based on features visible in a plot into seperate files each time I select a slice. So far I used the matplotlib SpanSelector with it's onselect function. However, this only works with global variables since there seems to be now easy way to pass the DataFrame into the function. Is there any solution to avoid declaring a global variable each time?

The DataFrame itself comes from a programm that reads an inputfile into a DataFrame.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.widgets import SpanSelector



def get_data():
    # example of DataFrame, real Data will come from input-files
    x = np.arange(100,step=0.2)
    y = np.sin(x)
    y2 = np.cos(x)
    data = pd.DataFrame(np.array((y,y2)).transpose(), index=x, columns=["a","b"])
    return data

def cut_data(data_frame):
    # use a single plot or as many subplots as there are columns in dataframe if more than one
    if data_frame.shape[1] == 1:
        fig, ax = plt.subplots(data_frame.shape[1], 1, sharex=True)
        ax.plot(data_frame)
        span = SpanSelector(ax, onselect, 'horizontal', useblit=True,
                        rectprops=dict(alpha=0.35, facecolor='red'), span_stays=True)
    else:
        fig, axlst = plt.subplots(data_frame.shape[1], 1, sharex=True)
        for n, col in enumerate(data_frame):
            axlst[n].plot(data_frame[col])
        span = SpanSelector(axlst[0], onselect, 'horizontal', useblit=True,
                        rectprops=dict(alpha=0.35, facecolor='red'), span_stays=True)
    plt.show()

def onselect(xmin, xmax):
    pass
    # get indices of x-values each time a subset of the data is selected
    # slice every column in DataFrame and save to file as new DataFrame


cut_data(get_data())

Solution

  • I wrote a callable object to circumvent the problem

    class OnselectObject(object):
    '''
    This classed is used to circumvent the limitations of the matplotlib SpanSelector object
    '''
    def __init__(self, data_frame):
        self.data = data_frame
    
    def __call__(self, xmin, xmax):
        cut_data = self.data[:][xmin:xmax] # this is awesome
        save_cut_data(cut_data)