I am trying to calculate feature importance in python file. I run this python file through Spark Submit. Since our data nodes does not have catboost libraries installed on them, I manually load them in python.
Then I load model file and try to calculate feature importance. There I am getting following error
abc = model.get_feature_importance(type=catBoost.EFstrType().FeatureImportance(), prettified=True, thread_count=-1, verbose=False)
TypeError: __call__() missing 1 required positional argument: 'value'
Please see below code that I am running.
def getCatBoostLibraries():
files = os.listdir(os.getcwd())
with zipfile.ZipFile(os.path.abspath("catboost zip file path"), 'r') as zip_ref:
zip_ref.extractall(os.getcwd())
catboostFldrPath = os.path.abspath(os.path.join(os.getcwd(), "catboost"))
sys.path.append(catboostFldrPath)
configFile = os.path.abspath(os.path.join(catboostFldrPath, "__init__.py"))
spec = importlib.util.spec_from_file_location("catboost", configFile)
catBoost = importlib.util.module_from_spec(spec)
spec.loader.exec_module(catBoost)
return catBoost
def getFeatureImportance(modelPath):
catBoost = getCatBoostLibraries()
model = catBoost.CatBoostRegressor()
model.load_model(modelPath)
abc = model.get_feature_importance(type=catBoost.EFstrType().FeatureImportance(), prettified=True, thread_count=-1, verbose=False)
importance_score_df = pd.DataFrame(abc, columns=['features', 'featr_imp'])
return importance_score_df
It worked after I removed braces in following line
abc = model.get_feature_importance(type=catBoost.EFstrType.FeatureImportance, prettified=True, thread_count=-1, verbose=False)