I create a function like below:
import pandas as pd
DATAPATH = "path/"
df_orig = pd.read_csv(DATAPATH + "file.csv" , dtype=str, sep = "|")
def test(df = None, df2 = None):
if df == None:
pass
elif not isinstance(df, pd.DataFrame):
df_in = pd.read_csv(DATAPATH + df + ".csv", dtype=str, sep = "|")
print(f'============ Number of obs. in {df} ============\n{df_in.shape[0]:,}\n')
print(f'============ First 10 Records of {df}.csv ============\n{df_in.head(10)}\n\n\n')
print(f'============ Last 10 Records of {df}.csv ============\n{df_in.tail(10)}\n\n\n')
else:
df_in = df
print(f'============ Number of obs. ============\n{df_in.shape[0]:,}\n')
print(f'============ First 10 Records ============\n{df_in.head(10)}\n\n\n')
print(f'============ Last 10 Records ============\n{df_in.tail(10)}\n\n\n')
if df2 == None:
pass
elif not isinstance(df2 , pd.DataFrame):
df_in2 = pd.read_csv(DATAPATH + df2 + ".csv", dtype=str, sep = "|")
print(f'============ Number of obs. in {df2} ============\n{df_in2.shape[0]:,}\n')
print(f'============ First 10 Records of {df2}.csv ============\n{df_in2.head(10)}\n\n\n')
print(f'============ Last 10 Records of {df2}.csv ============\n{df_in2.tail(10)}\n\n\n')
else:
df_in2 = df2
print(f'============ Number of obs. ============\n{df_in2.shape[0]:,}\n')
print(f'============ First 10 Records ============\n{ddf_in2_in.head(10)}\n\n\n')
print(f'============ Last 10 Records ============\n{df_in2.tail(10)}\n\n\n')
test(df = df_orig, df2 = None)
However, I got the error:
Traceback (most recent call last):
File "test.py", line 21, in <module>
test(df = df_orig)
File "test.py", line 7, in test
if df == None:
File "/mypath/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py", line 1443, in __nonzero__
f"The truth value of a {type(self).__name__} is ambiguous. "
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
Any suggestion on this?
Do I use the wrong logic for if df == None:
?
The official way of checking if DataFrames are empty:
if df.empty:
# do something
Another way is to check the length:
if len(df) == 0:
# do something
Edit: the comments lead me to believe that your order of operations is wrong here.
def test(df = None):
if not isinstance(df, pd.DataFrame):
pass # do whatever
elif df.empty:
pass # do whatever else
None
has no length, so len()
throws an error, while df.empty
will throw an error if df is None
as well. Simply check if df is a DatFrame first, this will take care of the None
values.