Search code examples
pythonpytorchhuggingface-transformersstable-diffusion

How to configure inference settings to generate images with the Stable Diffusion XL pipeline?


I'm working with the Stable Diffusion XL (SDXL) model from Hugging Face's diffusers library and I want to set this inference parameters :

  • width: Width of the image in pixels.
  • height: Height of the image in pixels.
  • steps: Amount of inference steps performed on image generation.
  • cfg_scale: How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt).

Here's a minimal example of my current implementation:

import os
import datetime

from diffusers import DiffusionPipeline
import torch

if __name__ == "__main__":
    output_dir = "output_images"
    os.makedirs(output_dir, exist_ok=True)

    pipe = DiffusionPipeline.from_pretrained(
        # https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
        "stabilityai/stable-diffusion-xl-base-1.0",
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16",
    )
    pipe.to("cuda")
    # enabling xformers for memory efficiency
    pipe.enable_xformers_memory_efficient_attention()

    prompt = "Extreme close up of a slice a lemon with splashing green cocktail, alcohol,  healthy food photography"

    images = pipe(prompt=prompt).images
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    image_path = os.path.join(output_dir, f"output_{timestamp}.jpg")
    images[0].save(image_path)

    print(f"Image saved at: {image_path}")

How Can I set the inference parameters?


Solution

  • Here is my solution

    import os
    import datetime
    
    from diffusers import DiffusionPipeline
    import torch
    
    if __name__ == "__main__":
        output_dir = "output_images"
        os.makedirs(output_dir, exist_ok=True)
    
        pipe = DiffusionPipeline.from_pretrained(
            # https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
            "stabilityai/stable-diffusion-xl-base-1.0",
            torch_dtype=torch.float16,
            use_safetensors=True,
            variant="fp16",
        )
        pipe.to("cuda")
        # enabling xformers for memory efficiency
        pipe.enable_xformers_memory_efficient_attention()
    
        prompt = "Extreme close up of a slice a lemon with splashing green cocktail, alcohol,  healthy food photography"
    
        images = pipe(
                    prompt=prompt,
                    negative_prompt='',
                    width=1024,                                     # Width of the image in pixels.
                    height=1024,                                    # Height of the image in pixels.
                    guidance_scale=guidance_scale,                  # How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt).
                    num_inference_steps=num_inference_steps,        # Amount of inference steps performed on image generation.
                    num_images_per_prompt = 1,
    
        ).images
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        image_path = os.path.join(output_dir, f"output_{timestamp}.jpg")
        images[0].save(image_path)
    
        print(f"Image saved at: {image_path}")