How do I locate which file has a keyerror in python?

i have written a pre processing script in python that helps consolidating confidence. Below is my script:

import pandas as pd
import numpy as np
from pathlib import Path
import glob as glob


inp_dir = Path(r'C:/Users/jtharian/Desktop/bbc/') 

for file in inp_dir.glob('*.csv'):
    df = pd.read_csv(file, sep=',', quotechar='|',error_bad_lines=False)
    df['confidence'] = df['confidence'].replace(np.nan, 0.01)
    df.to_csv(file,index=False)

Error:

Traceback (most recent call last):

  File "C:\Users\jtharian\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 3080, in get_loc
    return self._engine.get_loc(casted_key)

  File "pandas\_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc

  File "pandas\_libs\index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc

  File "pandas\_libs\hashtable_class_helper.pxi", line 4554, in pandas._libs.hashtable.PyObjectHashTable.get_item

  File "pandas\_libs\hashtable_class_helper.pxi", line 4562, in pandas._libs.hashtable.PyObjectHashTable.get_item

KeyError: 'confidence'


The above exception was the direct cause of the following exception:

Traceback (most recent call last):

  File "<ipython-input-1-0cbf17caf540>", line 11, in <module>
    df['confidence'] = df['confidence'].replace(np.nan, 0.01)

  File "C:\Users\jtharian\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py", line 3024, in __getitem__
    indexer = self.columns.get_loc(key)

  File "C:\Users\jtharian\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 3082, in get_loc
    raise KeyError(key) from err

KeyError: 'confidence'

I unferstand I am receiving this error because one of the files in my directory does not have the column 'confidence'. But how can I locate that file or print the file name?

Solution

Maybe check the column names has confidence listed and break if not...

import pandas as pd
import numpy as np
from pathlib import Path
import glob as glob


inp_dir = Path(r'C:/Users/jtharian/Desktop/bbc/') 

for file in inp_dir.glob('*.csv'):
    df = pd.read_csv(file, sep=',', quotechar='|',error_bad_lines=False)
    if 'confidence' not in df.columns:
        print('filename: ' + str(file))
        break
    df['confidence'] = df['confidence'].replace(np.nan, 0.01)
    df.to_csv(file,index=False)