I am trying to slice and save data (pandas.DataFrame with more than one column) based on features visible in a plot into seperate files each time I select a slice. So far I used the matplotlib SpanSelector with it's onselect function. However, this only works with global variables since there seems to be now easy way to pass the DataFrame into the function. Is there any solution to avoid declaring a global variable each time?
The DataFrame itself comes from a programm that reads an inputfile into a DataFrame.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.widgets import SpanSelector
def get_data():
# example of DataFrame, real Data will come from input-files
x = np.arange(100,step=0.2)
y = np.sin(x)
y2 = np.cos(x)
data = pd.DataFrame(np.array((y,y2)).transpose(), index=x, columns=["a","b"])
return data
def cut_data(data_frame):
# use a single plot or as many subplots as there are columns in dataframe if more than one
if data_frame.shape[1] == 1:
fig, ax = plt.subplots(data_frame.shape[1], 1, sharex=True)
ax.plot(data_frame)
span = SpanSelector(ax, onselect, 'horizontal', useblit=True,
rectprops=dict(alpha=0.35, facecolor='red'), span_stays=True)
else:
fig, axlst = plt.subplots(data_frame.shape[1], 1, sharex=True)
for n, col in enumerate(data_frame):
axlst[n].plot(data_frame[col])
span = SpanSelector(axlst[0], onselect, 'horizontal', useblit=True,
rectprops=dict(alpha=0.35, facecolor='red'), span_stays=True)
plt.show()
def onselect(xmin, xmax):
pass
# get indices of x-values each time a subset of the data is selected
# slice every column in DataFrame and save to file as new DataFrame
cut_data(get_data())
I wrote a callable object to circumvent the problem
class OnselectObject(object):
'''
This classed is used to circumvent the limitations of the matplotlib SpanSelector object
'''
def __init__(self, data_frame):
self.data = data_frame
def __call__(self, xmin, xmax):
cut_data = self.data[:][xmin:xmax] # this is awesome
save_cut_data(cut_data)