I was looking for an implementation of Strassen's Algorithm in C, and I've found this code at the end.
To use the multiply
function:
void multiply(int n, matrix a, matrix b, matrix c, matrix d);
which multiplies two matrices a
, b
and puts the result in c
(d
is a intermediary matrix). Matrices a
and b
should have the following type:
typedef union _matrix
{
double **d;
union _matrix **p;
} *matrix;
I have allocated dynamically four matrices a
, b
, c
, d
(two-dimensional arrays of doubles) and have assigned their addresses to the field _matrix.d
:
#include "strassen.h"
#define SIZE 50
int main(int argc, char *argv[])
{
double ** matA, ** matB, ** matC, ** matD;
union _matrix ma, mb, mc, md;
int i = 0, j = 0, n;
matA = (double **) malloc(sizeof(double *) * SIZE);
for (i = 0; i < SIZE; i++)
matA[i] = (double *) malloc(sizeof(double) * SIZE);
// Do the same for matB, matC, matD.
ma.d = matA;
mb.d = matB;
mc.d = matC;
md.d = matD;
// Initialize matC and matD to 0.
// Read n.
// Read matA and matB.
multiply(n, &ma, &mb, &mc, &md);
return 0;
}
This code successfully compiles but crashes with n
> BREAK
.
strassen.c :
#include "strassen.h"
/* c = a * b */
void multiply(int n, matrix a, matrix b, matrix c, matrix d)
{
if (n <= BREAK) {
double sum, **p = a->d, **q = b->d, **r = c->d;
int i, j, k;
for (i = 0; i < n; i++)
for (j = 0; j < n; j++) {
for (sum = 0., k = 0; k < n; k++)
sum += p[i][k] * q[k][j];
r[i][j] = sum;
}
} else {
n /= 2;
sub(n, a12, a22, d11);
add(n, b21, b22, d12);
multiply(n, d11, d12, c11, d21);
sub(n, a21, a11, d11);
add(n, b11, b12, d12);
multiply(n, d11, d12, c22, d21);
add(n, a11, a12, d11);
multiply(n, d11, b22, c12, d12);
sub(n, c11, c12, c11);
sub(n, b21, b11, d11);
multiply(n, a22, d11, c21, d12);
add(n, c21, c11, c11);
sub(n, b12, b22, d11);
multiply(n, a11, d11, d12, d21);
add(n, d12, c12, c12);
add(n, d12, c22, c22);
add(n, a21, a22, d11);
multiply(n, d11, b11, d12, d21);
add(n, d12, c21, c21);
sub(n, c22, d12, c22);
add(n, a11, a22, d11);
add(n, b11, b22, d12);
multiply(n, d11, d12, d21, d22);
add(n, d21, c11, c11);
add(n, d21, c22, c22);
}
}
/* c = a + b */
void add(int n, matrix a, matrix b, matrix c)
{
if (n <= BREAK) {
double **p = a->d, **q = b->d, **r = c->d;
int i, j;
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
r[i][j] = p[i][j] + q[i][j];
} else {
n /= 2;
add(n, a11, b11, c11);
add(n, a12, b12, c12);
add(n, a21, b21, c21);
add(n, a22, b22, c22);
}
}
/* c = a - b */
void sub(int n, matrix a, matrix b, matrix c)
{
if (n <= BREAK) {
double **p = a->d, **q = b->d, **r = c->d;
int i, j;
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
r[i][j] = p[i][j] - q[i][j];
} else {
n /= 2;
sub(n, a11, b11, c11);
sub(n, a12, b12, c12);
sub(n, a21, b21, c21);
sub(n, a22, b22, c22);
}
}
strassen.h:
#define BREAK 8
typedef union _matrix {
double **d;
union _matrix **p;
} *matrix;
/* Notational shorthand to access submatrices for matrices named a, b, c, d */
#define a11 a->p[0]
#define a12 a->p[1]
#define a21 a->p[2]
#define a22 a->p[3]
#define b11 b->p[0]
#define b12 b->p[1]
#define b21 b->p[2]
#define b22 b->p[3]
#define c11 c->p[0]
#define c12 c->p[1]
#define c21 c->p[2]
#define c22 c->p[3]
#define d11 d->p[0]
#define d12 d->p[1]
#define d21 d->p[2]
#define d22 d->p[3]
My question is how to use the function multiply
(how to implement the matrix).
Like Atom said, you need to correctly initialize matrix.p
for both matrices.
1) First of all, your matrix
is a union so p
essentially becomes d
interpreted as _matrix **
which doesn't make sense here - that's why it crashes. You probably need to make matrix
a struct
instead.
Finally, p
is by definition an array of submatrices so it should be a struct _matrix *
(and you'll need to malloc
the actual array when needed) or struct _matrix[4]
(which is impossible :) ).
typedef struct _matrix
{
double **d;
struct _matrix *p;
} *matrix;
2) Now, let's see what p
should be.
│
A.d -> d1 -> a[1,1] a[1,2]│a[1,3] a[1,4]
d2 -> a[2,1] a[2,2]│a[2,3] a[2,4]
─────────────────────────────
d3 -> a[3,1] a[3,2]│a[3,3] a[3,4]
d4 -> a[4,1] a[4,2]│a[4,3] a[4,4]
│
p
points to an array of matrix
structures! The peculiarity is to make d
's of those structures point to inside A
in such a way that (p[k].d)[i][j]
is the respective submatrix's element:
p[0].d -> p01 -> a[1,1] p[1].d -> p11 -> a[1,3]
p02 -> a[2,1] p12 -> a[2,3]
p[2].d -> p21 -> a[3,1] p[3].d -> p31 -> a[3,3]
p22 -> a[4,1] p32 -> a[4,3]
Can you now deduce the algorithm to initialize p
for square A of an arbitrary even size?
And WHEN to initialize it in the first place? ;)