rust graphics glsl vulkan compute-shader

Rust Vulkano creation of compute pipeline does not work and freeze my video card

I'm trying to load a shader into an amd video card. After all the buffers are created, I try to create a new Compute pipeline. As i started to debug it printing messages i found out that the "Finished Creating the compute pipeline" is never printed. When i run it with `cargo run --release` it prints the: "Creating pipeline with shader" but after some seconds it freezes my whole computer and i have to turn it off and back on again...

My Vulkano version is: 0.32.1; My vulkano-shaders version is: 0.32.1; My Video Card is: AMD RX570 4GB

Vulkano Physical device properties:

buffer_image_granularity: 64,
compute_units_per_shader_array: Some(
    8,
),
conformance_version: Some(
    1.2.0,
),

Cargo.toml:

[package]
name = "vulkano_matrix"
version = "0.1.0"
edition = "2021"

[dependencies]
vulkano = "0.32.1"
vulkano-shaders = "0.32.0"
rand = "0.8.4"
nalgebra="0.31.4"
colored = "2.0.0"
bytemuck = "1.12.3"

// main.rs
extern crate nalgebra as na;
use bytemuck::{Pod, Zeroable};
use colored::Colorize;
use na::{dmatrix, DMatrix};
use std::{
    io::{stdin, stdout, Write},
    sync::Arc,
    time::Instant,
};
use vulkano::{
    buffer::{BufferUsage, CpuAccessibleBuffer, DeviceLocalBuffer},
    command_buffer::{
        allocator::{CommandBufferAllocator, StandardCommandBufferAllocator},
        AutoCommandBufferBuilder, PrimaryAutoCommandBuffer, PrimaryCommandBufferAbstract,
    },
    descriptor_set::{
        allocator::StandardDescriptorSetAllocator, PersistentDescriptorSet, WriteDescriptorSet,
    },
    device::{
        physical::PhysicalDevice, Device, DeviceCreateInfo, DeviceExtensions, Features,
        QueueCreateInfo, QueueFlags,
    },
    instance::{Instance, InstanceCreateInfo},
    memory::allocator::{MemoryAllocator, StandardMemoryAllocator},
    pipeline::Pipeline,
    pipeline::{ComputePipeline, PipelineBindPoint},
    sync::GpuFuture,
    VulkanLibrary,
};

#[derive(Clone, Copy)]
pub enum Padding {
    None,
    Fixed(usize, usize),
    Same,
}

#[repr(C)]
#[derive(Default, Copy, Clone, Debug, Zeroable, Pod)]
struct Dimension {
    pub rows: usize,
    pub columns: usize,
    pub channels: usize,
}

impl Dimension {
    pub fn from_matrix<T>(mat: &DMatrix<T>) -> Self {
        let shape = mat.shape();
        Self {
            rows: shape.0,
            columns: shape.1,
            channels: 1,
        }
    }
}

#[repr(C)]
#[derive(Default, Copy, Clone, Debug, Zeroable, Pod)]
struct BufferDimensions {
    pub input_matrix: Dimension,
    pub kernel: Dimension,
    pub output_matrix: Dimension,
}

#[repr(C)]
#[derive(Default, Copy, Clone, Debug, Zeroable, Pod)]
struct ConvolutionOptions {
    pub padding: [i32; 2],
    pub stride: u32,
}

fn input(question: impl Into<String>) -> String {
    let mut result = "".to_string();
    print!("{} ", question.into().bold().cyan());
    stdout().flush().expect("Could not flush stdout");
    stdin()
        .read_line(&mut result)
        .expect("Could not read stdin");
    result
}

fn main() {
    let library = VulkanLibrary::new().expect("Could not find vulkan.dll");
    let instance =
        Instance::new(library, InstanceCreateInfo::default()).expect("Failed to Create Instance");

    println!("Available GPUs:");
    let physical_devices = instance
        .enumerate_physical_devices()
        .expect("Could not enumerate the physical devices")
        .enumerate()
        .map(|(i, physical)| {
            println!(
                "[{}]: \"{}\"; TYPE: \"{:?}\"; API_VERSION: \"{}\"",
                i.to_string().bold().bright_magenta(),
                physical.properties().device_name.to_string().bold().green(),
                physical.properties().device_type,
                physical.api_version()
            );
            physical
        })
        .collect::<Vec<Arc<PhysicalDevice>>>();

    let physical_index = input(format!("Type the chosen [{}]:", "index".bright_magenta()))
        .replace("\n", "")
        .parse::<usize>()
        .expect("Please type a number.");
    let physical = physical_devices[physical_index].clone();
    println!(
        "Using {}; TYPE: \"{:?}\"; \n\n {:?} \n\n {:#?}",
        physical.properties().device_name.to_string().bold().green(),
        physical.properties().device_type,
        physical.api_version(),
        physical.properties()
    );
    return;

    let queue_family_index = physical
        .queue_family_properties()
        .iter()
        .position(|q| {
            q.queue_flags.intersects(&QueueFlags {
                compute: true,
                ..QueueFlags::empty()
            })
        })
        .unwrap() as u32;

    let (device, mut queues) = Device::new(
        physical,
        DeviceCreateInfo {
            enabled_features: Features::empty(),
            queue_create_infos: vec![QueueCreateInfo {
                queue_family_index,
                ..Default::default()
            }],
            ..Default::default()
        },
    )
    .expect("Failed to create device");
    let queue = queues.next().unwrap();

    let memory_allocator = StandardMemoryAllocator::new_default(device.clone());
    let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone());
    let command_buffer_allocator =
        StandardCommandBufferAllocator::new(device.clone(), Default::default());

    let mut builder = AutoCommandBufferBuilder::primary(
        &command_buffer_allocator,
        queue.queue_family_index(),
        vulkano::command_buffer::CommandBufferUsage::OneTimeSubmit,
    )
    .unwrap();

    let stride = 1;
    let get_result_shape = |input_shape: usize, padding: usize, ker_shape: usize| {
        (input_shape + 2 * padding - ker_shape) / stride + 1
    };
    let padding = Padding::Same;

    let input_data = dmatrix![1.0f32, 2., 3.; 4., 5., 6.; 7., 8., 9.];
    let kernel_data = dmatrix![11.0f32, 19.; 31., 55.];
    let input_shape = Dimension::from_matrix(&input_data);
    let kernel_shape = Dimension::from_matrix(&kernel_data);

    let padding = match padding {
        Padding::None => (0, 0),
        Padding::Fixed(x_p, y_p) => (x_p, y_p),
        Padding::Same => {
            let get_padding = |input_shape: usize, ker_shape: usize| {
                (((stride - 1) as i64 * input_shape as i64 - stride as i64 + ker_shape as i64)
                    as f64
                    / 2.0)
                    .ceil() as usize
            };
            (
                /* rows */
                get_padding(input_shape.rows, kernel_shape.rows),
                /* columns */
                get_padding(input_shape.columns, kernel_shape.columns),
            )
        }
    };

    let dimensions = BufferDimensions {
        input_matrix: input_shape,
        kernel: kernel_shape,
        output_matrix: Dimension {
            rows: get_result_shape(input_shape.rows, padding.0, kernel_shape.rows),
            columns: get_result_shape(input_shape.columns, padding.1, kernel_shape.columns),
            channels: 1,
        },
    };
    let options = ConvolutionOptions {
        padding: [padding.0 as i32, padding.1 as i32],
        stride: stride as u32,
    };

    let dimensions_buffer = DeviceLocalBuffer::from_data(
        &memory_allocator,
        dimensions,
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");
    let options_buffer = DeviceLocalBuffer::from_data(
        &memory_allocator,
        options,
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");

    println!(
        "{:?} {:?} {:?} {:?}",
        input_data, dimensions, options, kernel_data
    );

    let input_buffer = DeviceLocalBuffer::from_iter(
        &memory_allocator,
        input_data.data.as_vec().to_owned(),
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");
    let kernel_buffer = DeviceLocalBuffer::from_iter(
        &memory_allocator,
        kernel_data.data.as_vec().to_owned(),
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");
    let output_buffer = CpuAccessibleBuffer::from_iter(
        &memory_allocator,
        BufferUsage {
            storage_buffer: true,
            ..BufferUsage::empty()
        },
        false,
        [0..(dimensions.output_matrix.channels
            * dimensions.output_matrix.rows
            * dimensions.output_matrix.columns)]
        .map(|__| 0.0f32)
        .to_owned(),
    )
    .expect("Failed to create storage buffer.");

    println!("Loading shader");
    
let cs = cs::load(device.clone()).unwrap();

println!("Creating pipeline with shader"); // This line prints just fine 
let compute_pipeline = ComputePipeline::new(
    device.clone(),
    cs.entry_point("main").unwrap(),
    &(),
    None,
    |_| {},
)
.expect("Failed to create compute shader");
println!("Finished Creating the compute pipeline"); // THIS LINE NEVER GETS RUN

}


pub mod cs {
    use vulkano_shaders::shader;

    shader! {
        ty: "compute",
        path: "./matrix_convolution.glsl"
    }
}

The shader is:

#version 450
#pragma shader_stage(compute)

layout(local_size_x=32, local_size_y=32, local_size_z=16) in;

struct Dimension {
  uint rows;
  uint columns;
  uint channels;
};

layout(set=0, binding=0) buffer Dimensions {
  Dimension input_matrix;
  Dimension kernel;
  Dimension output_matrix;
} dims_buf;


layout(set=0, binding=1) buffer readonly InputMatrix {
  float[] input_matrix;
};

layout(set=0, binding=2) buffer readonly Kernel {
  float[] kernel;
};


layout(set=0, binding=3) buffer writeonly OutputMatrix {
   float[] output_matrix;
}; 
layout(set=0, binding=4) buffer Options {
   ivec2 padding;
   uint stride;
} options_buf;

void main() {
  const uint raw_row = gl_GlobalInvocationID.x;
  const uint raw_column = gl_GlobalInvocationID.y;
  const uint raw_channel = gl_GlobalInvocationID.z;
}

I tried to run similar programs with different shaders and it worked just fine.

Solution

It turns out that the work groups sizes must be fewer less than the

Therefore: local_size_x * local_size_y * local_size_z must be less than max_compute_work_group_invocations

physical.properties().max_compute_work_group_invocations ```