How to drop multiple columns in pandas and python ?
import pandas as pd
df =pd.DataFrame({
"source_number": [
[11199,11328,11287,32345,12342,1232,13456,123244,13456],
"location":
["loc2","loc1-loc3","loc3","loc1","loc2-loc1","loc2","loc3-loc2","loc2","loc1"],
"category":
["cat1","cat2","cat1","cat3","cat3","cat3","cat2","cat3","cat2"],
})
def remove_columns(dataset,cols):
for col in cols:
del dataset[col]
return dataset
for col in df.columns:
df=remove_columns(df,col)
df.head()
in the code above the task is done and the columns are droped.
But when I tried this code on streamlit where the user select multiple columns that he want to remove from the dataframe.
But the problem is that the system just take the first element and not all the item in the list.
like if the user select location and source number the col variable will contains just location and display the below error:
KeyError: 'location'
Traceback:
File "f:\aienv\lib\site-packages\streamlit\script_runner.py", line 333, in _run_script
exec(code, module.__dict__)
File "F:\AIenv\streamlit\app.py", line 373, in <module>
sidebars[y]=st.sidebar.multiselect('Filter '+y, df[y].unique(),key="1")
File "f:\aienv\lib\site-packages\pandas\core\frame.py", line 2902, in __getitem__
indexer = self.columns.get_loc(key)
File "f:\aienv\lib\site-packages\pandas\core\indexes\base.py", line 2893, in get_loc
raise KeyError(key) from err
import numpy as np
import pandas as pd
import streamlit as st
#function drop unwanted columns
def remove_columns(dataset,cols):
for col in cols:
del dataset[col]
return dataset
df =pd.DataFrame({
"source_number": [
[11199,11328,11287,32345,12342,1232,13456,123244,13456],
"location":
["loc2","loc1-loc3","loc3","loc1","loc2-loc1","loc2","loc3-loc2","loc2","loc1"],
"category":
["cat1","cat2","cat1","cat3","cat3","cat3","cat2","cat3","cat2"],
})
drop_button = st.sidebar.button("Remove")
columns = st.sidebar.multiselect("Select column/s", df.columns)
sidebars = {}
for y in columns:
ucolumns=list(df[y].unique())
st.write(y)
if (drop_button):
df_drop=df.drop(y,axis=1,inplace=True)
print(y)
st.table(df)
Use DataFrame.drop
:
def remove_columns(dataset,cols):
return dataset.drop(cols, axis=1)
And for call pass function with no loop - is possible pass scalar or list:
df = remove_columns(df,'location')
df = remove_columns(df,['location','category'])
EDIT:
If need remove column seelcted in multiselect use:
drop_button = st.sidebar.button("Remove")
#in columns variable are selected values
columns = st.sidebar.multiselect("Select column/s", df.columns)
print (columns)
#so if use button remove values by variable columns
if (drop_button):
df.drop(columns,axis=1,inplace=True)
st.table(df)