Search code examples
c99simdvariable-length-arrayrestrict-qualifierauto-vectorization

using restrict qualifier with C99 variable length arrays (VLAs)


I am exploring how different implementations of simple loops in C99 auto-vectorize based upon the function signature.

Here is my code:

/* #define PRAGMA_SIMD _Pragma("simd") */
#define PRAGMA_SIMD

#ifdef __INTEL_COMPILER
#define ASSUME_ALIGNED(a) __assume_aligned(a,64)
#else
#define ASSUME_ALIGNED(a)
#endif

#ifndef ARRAY_RESTRICT
#define ARRAY_RESTRICT
#endif

void foo1(double * restrict a, const double * restrict b, const double * restrict c) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < 2048; ++i) {
        if (c[i] > 0) {
            a[i] = b[i];
        } else {
            a[i] = 0.0;
        } 
    }
}

void foo2(double * restrict a, const double * restrict b, const double * restrict c) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < 2048; ++i) {
        a[i] = ((c[i] > 0) ? b[i] : 0.0);
    }
}

/* Undetermined size version */

void foo3(int n, double * restrict a, const double * restrict b, const double * restrict c) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < n; ++i) {
        if (c[i] > 0) {
            a[i] = b[i];
        } else {
            a[i] = 0.0;
        } 
    }
}

void foo4(int n, double * restrict a, const double * restrict b, const double * restrict c) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < n; ++i) {
        a[i] = ((c[i] > 0) ? b[i] : 0.0);
    }
}

/* Static array versions */

void foo5(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048]) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < 2048; ++i) {
        if (c[i] > 0) {
            a[i] = b[i];
        } else {
            a[i] = 0.0;
        } 
    }
}

void foo6(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048]) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < 2048; ++i) {
        a[i] = ((c[i] > 0) ? b[i] : 0.0);
    }
}

/* VLA versions */

void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n]) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < n; ++i) {
        if (c[i] > 0) {
            a[i] = b[i];
        } else {
            a[i] = 0.0;
        } 
    }
}

void foo8(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n]) 
{ 
    ASSUME_ALIGNED(a);
    ASSUME_ALIGNED(b);
    ASSUME_ALIGNED(c);
    PRAGMA_SIMD
    for (int i = 0; i < n; ++i) {
        a[i] = ((c[i] > 0) ? b[i] : 0.0);
    }
}

When I compile with

$ icc -O3 -std=c99 -opt-report5 -mavx -S foo.c 
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location

I see that the VLA cases are not auto-vectorized, but when I add the flag to assert no aliasing -fno-alias, they are. Thus, I conclude that I should prescribe this in the source, so I attempt to do that by compiling with

$ icc -O3 -std=c99 -opt-report5 -mavx -DARRAY_RESTRICT=restrict -S foo.c 
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location

The compiler error output includes

foo.c(98): error: "restrict" is not allowed
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], 
const double ARRAY_RESTRICT c[n]) 

             ^

but as you can see, restrict is not allowed on my VLA arguments.

So my question is: is there no way to assert no aliasing of VLA in ISO C?

Note that I can assert no aliasing in the source code using pragmas - e.g. simd, omp simd, ivdep etc. - and get the auto-vectorization that I want but these aren't ISO C.

In this context, ISO C means the most recent version of C, which of course is C11 as of the writing of this post.


Solution

  • Your original code fails nicely for me with messages such as:

     void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
     ^
    restrict.c:126:1: error: invalid use of ‘restrict’
    restrict.c:126:1: error: invalid use of ‘restrict’
    restrict.c:145:1: error: invalid use of ‘restrict’
    

    Transferring selected parts of the comments

    §6.7.6.3 Function declarators (including prototypes) has Example 5 which says the following function prototype declarators are equivalent:

    void f(double (* restrict a)[5]);
    void f(double a[restrict][5]);
    void f(double a[restrict 3][5]);
    void f(double a[restrict static 3][5]);
    

    This is the only place in the standard where restrict appears associated directly with array types. §6.7.6 is on declarators generally, and §6.7.6.2 on array declarators, and it looks to me as though the restrict has to appear inside the first component of the array dimension. In your context, it should be:

    void foo7(int n, double a[ARRAY_RESTRICT n],
               const double b[ARRAY_RESTRICT n],
               const double c[ARRAY_RESTRICT n])
    

    I wouldn't have believed that notation without seeing the examples in the standard and you asking the question! Note that this applies to arrays as well as VLAs.

    This revised code, based on the commentary, compiles cleanly under the same compilation options:

    gcc -g -O3 -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
        -Wold-style-definition -Wold-style-declaration -Werror -c new.restrict.c
    

    The compilation options demand prior declarations of non-static functions, hence the declarations at the top of the file. I also forced #define ARRAY_RESTRICT restrict in the source, rather than leaving it as a compilation option.

    The compiler is GCC 4.9.2 running on an Ubuntu 14.04 derivative.

    File new.restrict.c:

    /* #define PRAGMA_SIMD _Pragma("simd") */
    #define PRAGMA_SIMD
    
    #ifdef __INTEL_COMPILER
    #define ASSUME_ALIGNED(a) __assume_aligned(a, 64)
    #else
    #define ASSUME_ALIGNED(a)
    #endif
    
    #define ARRAY_RESTRICT restrict
    
    #ifndef ARRAY_RESTRICT
    #define ARRAY_RESTRICT
    #endif
    
    void foo1(double *restrict a, const double *restrict b, const double *restrict c);
    void foo2(double *restrict a, const double *restrict b, const double *restrict c);
    void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c);
    void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c);
    void foo5(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048]);
    void foo6(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048]);
    void foo7(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n]);
    void foo8(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n]);
    
    void foo1(double *restrict a, const double *restrict b, const double *restrict c)
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < 2048; ++i)
        {
            if (c[i] > 0)
            {
                a[i] = b[i];
            }
            else
            {
                a[i] = 0.0;
            }
        }
    }
    
    void foo2(double *restrict a, const double *restrict b, const double *restrict c)
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < 2048; ++i)
        {
            a[i] = ((c[i] > 0) ? b[i] : 0.0);
        }
    }
    
    /* Undetermined size version */
    
    void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c)
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < n; ++i)
        {
            if (c[i] > 0)
            {
                a[i] = b[i];
            }
            else
            {
                a[i] = 0.0;
            }
        }
    }
    
    void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c)
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < n; ++i)
        {
            a[i] = ((c[i] > 0) ? b[i] : 0.0);
        }
    }
    
    /* Static array versions */
    
    void foo5(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048])
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < 2048; ++i)
        {
            if (c[i] > 0)
            {
                a[i] = b[i];
            }
            else
            {
                a[i] = 0.0;
            }
        }
    }
    
    void foo6(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048])
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < 2048; ++i)
        {
            a[i] = ((c[i] > 0) ? b[i] : 0.0);
        }
    }
    
    /* VLA versions */
    
    void foo7(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n])
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < n; ++i)
        {
            if (c[i] > 0)
            {
                a[i] = b[i];
            }
            else
            {
                a[i] = 0.0;
            }
        }
    }
    
    void foo8(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n])
    {
        ASSUME_ALIGNED(a);
        ASSUME_ALIGNED(b);
        ASSUME_ALIGNED(c);
        PRAGMA_SIMD
        for (int i = 0; i < n; ++i)
        {
            a[i] = ((c[i] > 0) ? b[i] : 0.0);
        }
    }