MLIR for multiplying 2 matrices

I am trying to multiply 2 matrices using MLIR. However, IR does not compile and gives this error:

./matmult.mlir:24:16: error: custom op 'linalg.fill' [parseNamedStructuredOpRegion] ods-gen generated region expects 2 args, got 0

The error happens in this line (linalg.fill(%A, %cf1) : memref<2048x2048xf64>, f64). Despite two args are provided to linalg.fill, it gives error of no args. I am using LLVM16 and this command to compile (mlir-opt -convert-linalg-to-loops ./matmult.mlir)

MLIR file:

    // C += A * B.
func.func @matmul(%A: memref<2048x2048xf64>, %B: memref<2048x2048xf64>, %C: memref<2048x2048xf64>) {
    affine.for %arg3 = 0 to 2048 {
        affine.for %arg4 = 0 to 2048 {
            affine.for %arg5 = 0 to 2048 {
                %a = affine.load %A[%arg3, %arg5] : memref<2048x2048xf64>
                %b = affine.load %B[%arg5, %arg4] : memref<2048x2048xf64>
                %ci = affine.load %C[%arg3, %arg4] : memref<2048x2048xf64>
                %p = arith.mulf %a, %b : f64
                %co = arith.addf %ci, %p : f64
                affine.store %co, %C[%arg3, %arg4] : memref<2048x2048xf64>
            }
        }
    }
return
}

func.func @main() {
    %A = memref.alloc() : memref<2048x2048xf64>
    %B = memref.alloc() : memref<2048x2048xf64>
    %C = memref.alloc() : memref<2048x2048xf64>
    %cf1 = llvm.mlir.constant(1.00000e+00 : f64) : f64

    linalg.fill(%A, %cf1) : memref<2048x2048xf64>, f64
    linalg.fill(%B, %cf1) : memref<2048x2048xf64>, f64
    linalg.fill(%C, %cf1) : memref<2048x2048xf64>, f64
    call @matmul(%A, %B, %C) : (memref<2048x2048xf64>, memref<2048x2048xf64>, memref<2048x2048xf64>) -> ()
    call @print_memref_2d_f64(%C): (memref<2048x2048xf64>) -> ()
    return
}

func.func @print_memref_2d_f64(memref<2048x2048xf64>)

Solution

Instruction notation seems to be changed, replacing all of those linalg.fill with,

linalg.fill ins(%cf1 : f64) outs(%A : memref<2048x2048xf64>)
linalg.fill ins(%cf1 : f64) outs(%B : memref<2048x2048xf64>)
linalg.fill ins(%cf1 : f64) outs(%C : memref<2048x2048xf64>)

Produces:

module {
  func.func @matmul(%arg0: memref<2048x2048xf64>, %arg1: memref<2048x2048xf64>, %arg2: memref<2048x2048xf64>) {
    affine.for %arg3 = 0 to 2048 {
      affine.for %arg4 = 0 to 2048 {
        affine.for %arg5 = 0 to 2048 {
          %0 = affine.load %arg0[%arg3, %arg5] : memref<2048x2048xf64>
          %1 = affine.load %arg1[%arg5, %arg4] : memref<2048x2048xf64>
          %2 = affine.load %arg2[%arg3, %arg4] : memref<2048x2048xf64>
          %3 = arith.mulf %0, %1 : f64
          %4 = arith.addf %2, %3 : f64
          affine.store %4, %arg2[%arg3, %arg4] : memref<2048x2048xf64>
        }
      }
    }
    return
  }
  func.func @main() {
    %c2048 = arith.constant 2048 : index
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %0 = llvm.mlir.constant(1.000000e+00 : f64) : f64
    %1 = memref.alloc() : memref<2048x2048xf64>
    %2 = memref.alloc() : memref<2048x2048xf64>
    %3 = memref.alloc() : memref<2048x2048xf64>
    scf.for %arg0 = %c0 to %c2048 step %c1 {
      scf.for %arg1 = %c0 to %c2048 step %c1 {
        memref.store %0, %1[%arg0, %arg1] : memref<2048x2048xf64>
      }
    }
    scf.for %arg0 = %c0 to %c2048 step %c1 {
      scf.for %arg1 = %c0 to %c2048 step %c1 {
        memref.store %0, %2[%arg0, %arg1] : memref<2048x2048xf64>
      }
    }
    scf.for %arg0 = %c0 to %c2048 step %c1 {
      scf.for %arg1 = %c0 to %c2048 step %c1 {
        memref.store %0, %3[%arg0, %arg1] : memref<2048x2048xf64>
      }
    }
    call @matmul(%1, %2, %3) : (memref<2048x2048xf64>, memref<2048x2048xf64>, memref<2048x2048xf64>) -> ()
    return
  }
}

(I commented out call to print_memref_2d_f64 function as it's missing)

I took usage syntax from here