I am using a hugging face transformer to do some image feature extraction to use later for some similarity search functionality. This is not working currently because after processing around 200 images too much memory is being used and crashes the system... what am I doing wrong? what can I change to fix this.
Here is my feature extraction class:
import numpy as np
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification, AutoTokenizer, TFCLIPModel
def expand_greyscale_image_channels(grey_pil_image):
grey_image_arr = np.array(grey_pil_image)
grey_image_arr = np.expand_dims(grey_image_arr, -1)
grey_image_arr_3_channel = grey_image_arr.repeat(3, axis=-1)
return grey_image_arr_3_channel
def get_color_image(img):
img = img.resize((224, 224))
img = img.convert('RGB')
return img
def get_greyscale_image(img):
img = img.resize((224, 224))
img = img.convert('L')
img = expand_greyscale_image_channels(img)
return img
class FeatureExtractor:
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
pass
def __init__(self, processor=None, model=None, tokenizer=None, text_model=None):
self.processor = processor
self.model = model
self.tokenizer = tokenizer
self.text_model = text_model
def model(self):
return self.model
def processor(self):
return self.processor
def extract_features(self, img, grey=False):
"""
Extract a deep feature from an input image
Args:
img: from PIL.Image.open(path) or tensorflow.keras.preprocessing.image.load_img(path)
Returns:
feature (np.ndarray): deep feature with the shape=(4096, )
"""
try:
if grey:
img = get_greyscale_image(img)
else:
img = get_color_image(img)
inputs = self.processor(images=img, return_tensors="pt")
image_features = self.model.get_image_features(**inputs)
# Use tensor.detach().numpy() instead.
image_features /= image_features.norm(dim=-1, keepdim=True)
return image_features.detach().numpy() # Normalize
except Exception as e:
print(e)
def extract_text_features(self, text):
try:
inputs = self.tokenizer([text], padding=True, return_tensors="tf")
text_features = self.text_model.get_text_features(**inputs)
text_features = text_features / np.linalg.norm(text_features)
return text_features.numpy()
except Exception as e:
print(e)
Here is the function that I run in a loop over each image url:
fe = FeatureExtractor(processor, model, tokenizer, text_model)
def get_features_for_image(image_meta):
id = image_meta["id"]
image_url = image_meta["image_url"]
# get features for image
try:
# open image from url
image = get_pil_image_from_url(image_url)
# resize image
image = image.resize((224, 224))
# if file not in features folder
# extract features
if not os.path.exists("features/" + id + ".npy"):
# with FeatureExtractor(processor, model, tokenizer, text_model) as fe:
image_features = fe.extract_features(image)
np.save("features/" + id + ".npy", image_features)
del image_features
del image
gc.collect()
# write features to the json file
# save featuers under file features/id.npy
return True
except Exception as e:
print("Error extracting features for image ", id, " error: ", e)
where is the memory leak? how can I fix it?
Here is the image of cpu usage. It is doing fine per image, as the number of images that features are extracted for in total increases so does the cpu usage. even if the model uses a lot of memory, shouldn't it recover the memory after the feature of each image is done extracting?
Figured it out. Its the line image_features.detach.numpy() in extract_features the detach() creates a separate copy of the numpy array. It is not needed here and created the leak.