Is it possible to copy a dataframe in the middle of a method chain to a new variable? Something like:
import pandas as pd
df = (pd.DataFrame([[2, 4, 6],
[8, 10, 12],
[14, 16, 18],
])
.assign(something_else=100)
.div(2)
.copy_to_new_variable(df_imag) # Imaginated method to copy df to df_imag.
.div(10)
)
print(df_imag)
would then return:
0 1 2 something_else
0 1.0 2.0 3.0 50.0
1 4.0 5.0 6.0 50.0
2 7.0 8.0 9.0 50.0
.copy_to_new_variable(df_imag)
could be replaced by df_imag = df.copy()
but this would result in compromising the method chain.
Creating variables dynamically is not a good idea, but you can easily take advantage of mutable objects like dictionaries.
Adding a new DataFrame method to do this seamlessly:
from pandas.core.base import PandasObject
### this only needs to be done once per session
def to_name(df, dic, name, copy=False):
dic[name] = df.copy() if copy else df
return df
PandasObject.to_name = to_name
###
tmp = {}
df = (pd.DataFrame([[2, 4, 6],
[8, 10, 12],
[14, 16, 18],
])
.assign(something_else=100)
.div(2)
.to_name(tmp, 'after_div2', copy=True)
.div(10)
)
print(tmp['after_div2'])
print(df)
Output:
# tmp['after_div2']
0 1 2 something_else
0 1.0 2.0 3.0 50.0
1 4.0 5.0 6.0 50.0
2 7.0 8.0 9.0 50.0
# df
0 1 2 something_else
0 0.1 0.2 0.3 5.0
1 0.4 0.5 0.6 5.0
2 0.7 0.8 0.9 5.0
If you don't want to monkey patch the DataFrame objects, use pipe
:
def to_name(df, dic, name, copy=False):
dic[name] = df.copy() if copy else df
return df
tmp = {}
df = (pd.DataFrame([[2, 4, 6],
[8, 10, 12],
[14, 16, 18],
])
.assign(something_else=100)
.div(2)
.pipe(to_name, tmp, 'after_div2')
.div(10)
.pipe(lambda df: print('\nQuick alternative:', df, sep='\n') or df)
)
print(tmp['after_div2'])
In the same line you can also add a chainable print
method, or again use a lambda in pipe
:
from pandas.core.base import PandasObject
### this only needs to be done once per session
def df_print(df, *args):
if args:
print(*args)
print(df)
return df
PandasObject.print = df_print
###
df = (pd.DataFrame([[2, 4, 6],
[8, 10, 12],
[14, 16, 18],
])
.print()
.assign(something_else=100)
.div(2)
.print('\nAfter 2:')
.div(10)
.pipe(lambda df: print('\nQuick alternative:', df, sep='\n') or df)
)
Output:
0 1 2
0 2 4 6
1 8 10 12
2 14 16 18
After 2:
0 1 2 something_else
0 1.0 2.0 3.0 50.0
1 4.0 5.0 6.0 50.0
2 7.0 8.0 9.0 50.0
Quick alternative:
0 1 2 something_else
0 0.1 0.2 0.3 5.0
1 0.4 0.5 0.6 5.0
2 0.7 0.8 0.9 5.0
You could also create a module:
pandas_debug.py
from pandas.core.base import PandasObject
def df_print(df, *args):
if args:
print(*args)
print(df)
return df
PandasObject.print = df_print
def to_name(df, dic, name, copy=False):
dic[name] = df.copy() if copy else df
return df
PandasObject.to_name = to_name
Then in your code:
import pandas as pd
import pandas_debug
tmp = {}
df = (pd.DataFrame([[2, 4, 6],
[8, 10, 12],
[14, 16, 18],
])
.assign(something_else=100)
.div(2)
.to_name(tmp, 'after_div2')
.div(10)
.print()
)