I'm making an inpainting app and I'm almost getting the desired result except the pipeline object outputs a 512*512 image no matter what resolution I pass in. I'm running this on the CPU, it's the onnx-converted, AMD-friendly version of stable diffusion.
Here's the code I think is relevant:
class CustomDiffuser:
def __init__(self, provider:Literal['CPUExecutionProvider', 'DmlExecutionProvider']='CPUExecutionProvider'):
self.pipe_text2image = None
self.pipe_inpaint = None
self.image = None
self.sam = None
self.provider = provider
def load_model_for_inpainting(
self,
path: str = '../stable_diffusion_onnx_inpainting',
safety_checker=None
):
self.pipe_inpaint = OnnxStableDiffusionInpaintPipeline.from_pretrained(path, provider=self.provider, revision='onnx', safety_checker=safety_checker)
def inpaint_with_prompt(
self,
image: cv2.typing.MatLike | Image.Image,
mask: cv2.typing.MatLike | Image.Image,
height: int,
width: int,
prompt: str = '',
negative: str = '',
steps: int = 10,
cfg: float = 7.5,
noise: float = 0.75
):
pipe = self.pipe_inpaint
image = image.resize((width, height))
mask = mask.resize((width, height))
output_image = pipe(
prompt,
image,
mask,
#strength=noise,
guidance_scale=cfg
)
return output_image
diffuser = CustomDiffuser('CPUExecutionProvider')
diffuser.load_model_for_inpainting('C:/path/to/repository/stable_diffusion_onnx_inpainting')
output = diffuser.inpaint_with_prompt(
Image.open(image_path),
Image.fromarray(headless_selfie_mask.astype(np.uint8)),
576, #height first
384,
'a picture of a man dressed in a darth vader costume, full body shot, front view, light saber',
''
)
You need to pass the height and width as follows:
output_image = pipe(
prompt,
image,
mask,
height,
width,
#strength=noise,
guidance_scale=cfg
)
If you check the source code - the height and width defaults to 512.