I am currently writing a SQL query interface through Python. When writing the data into a QTableView, I am using a QAbstractTableModel to write the query result.
This works fine for smaller queries, but becomes extremely slow when trying to present many rows and columns. Is there anyway to increase the speed that the dataframe is loaded into the QTableView?
Here is the code for my QAbtractTableModel class:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = df.copy()
self.original_df = df
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
# setup menu options
def toDataFrame(self):
return self._df.copy()
def headerData(self, section, orientation, role=QtCore.Qt.DisplayRole):
if role != QtCore.Qt.DisplayRole:
return QtCore.QVariant()
if orientation == QtCore.Qt.Horizontal:
try:
return self._df.columns.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
elif orientation == QtCore.Qt.Vertical:
try:
# return self.df.index.tolist()
return self._df.index.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
def data(self, index, role=QtCore.Qt.DisplayRole):
if not index.isValid():
return QtCore.QVariant()
if role == QtCore.Qt.DisplayRole or role == QtCore.Qt.EditRole:
#print(type(self._df.iloc[index.row(), index.column()]))
if isinstance(self._df.iloc[index.row(), index.column()], bytes):
return QtCore.QVariant('(BLOB)')
else:
return QtCore.QVariant(str(self._df.iloc[index.row(), index.column()]))
def setData(self, index, value, role):
row = self._df.index[index.row()]
col = self._df.columns[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df[col].dtype
if dtype != object:
if ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and value.isnumeric():
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 1',row , col)
self.conSig.dataChanged.emit(row, col, value)
return True
elif ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and not(value.isnumeric()):
return False
else:
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 2', row, col)
self.conSig.dataChanged.emit(row, col, value)
return True
else:
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 3', row, col)
self.original_df.at[row, col] = value
#print(self.original_df)
self.conSig.dataChanged.emit(row, col, self.original_df)
return True
def rowCount(self, parent=QtCore.QModelIndex()):
return len(self._df.index)
def columnCount(self, parent=QtCore.QModelIndex()):
return len(self._df.columns)
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
I would appreciate any assistance I could get in increasing the speed that the dataframe is loaded. Thank you.
Well I answered my own question. For those in the future that are running into an issue where you are trying to populate QTableViews using a QAbstractTableModel with large datasets...here you go:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = np.array(df.values)
self.original_df = df.copy()
self._cols = df.columns
self.r, self.c = np.shape(self._df)
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
def rowCount(self, parent=None):
return self.r
def columnCount(self, parent=None):
return self.c
def data(self, index, role=QtCore.Qt.DisplayRole):
if index.isValid():
if role == QtCore.Qt.DisplayRole:
if isinstance(self._df[index.row(), index.column()], bytes):
row = self._df[index.row()][0] - 1
col = self._df[index.column()][0] -1
self._df[row, col] = '(BLOB)'
return str('(BLOB)')
return str(self._df[index.row(),index.column()])
return None
def setData(self, index, value, role):
row = self._df[index.row()]
col = self._df[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df.dtype
if dtype != object:
value = None if value == '' else dtype.type(value)
table_row = row[0]-1
table_col = col[0]-1
self._df[table_row, table_col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
column_name = self.original_df.columns[table_col]
self.original_df.loc[table_row ,column_name] = value
my_df = pd.DataFrame(self._df)
my_df.columns = self.original_df.columns
self.conSig.dataChanged.emit(table_row, table_col, my_df)
return True
def headerData(self, p_int, orientation, role):
if role == QtCore.Qt.DisplayRole:
if orientation == QtCore.Qt.Horizontal:
return self._cols[p_int]
elif orientation == QtCore.Qt.Vertical:
return p_int
return None
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
I converted the pandas df into a numpy array and that really sped it up. Finally, I convert the numpy back to pandas when I need to print out. This increased the speed drastically.
Pandas loading time for 120k rows and 10 columns: ~280 seconds. Numpy loading time for 120k rows and 10 columns: 6.634 seconds.