Search code examples
c++openglcudainteropvao

My VAO doesn't work, and how do I change it with Cuda?


I'm trying to draw a bunch of dots on screen with Cuda 10.1 and OpenGL 4.6 interop. However, right now I am just loading a triangle from CPU for testing. However, I am new to OpenGL and this is my first time writing a Vertex Array Object. So I guess the first questions I have are: What is wrong with my VAO code? Why is my triangle not drawing? I have tried to figure it out to the best of my ability. My second questions: If I change the data inside the two VBOs associated with the VAO with Cuda, will the VAO still update, and draw the change?

Here is my code (sorry that its uncommented, I'm on a time crunch):

GPUmain.h:

#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/remove.h>
#include <curand.h>
#include <GL/glew.h>
#include <SDL_opengl.h>
#include <cuda_gl_interop.h>

#define BUFFER_OFFSET(i) ((char *)NULL + (i))

struct pos {

    GLint x, y, z;

};

struct col {

    GLubyte r, g, b, a;

};

struct phy {

    GLdouble spd;

    GLdouble dir;

};

struct ver {

    pos p;

    col c;

};

class GPU {

public:

    static int nParticles;

    static GLuint vboidP;

    static GLuint vboidC;

    static GLuint vaoid;

    static GLuint vrshaderid;

    static GLuint frshaderid;

    static GLuint lkshaderid;

    static cudaGraphicsResource *CGRp;

    static cudaGraphicsResource *CGRc;

    static const char* shaders[2];

    static thrust::device_vector<ver> verts;

    static void init(int w, int h);

    static void compute();

    static void render();

    static void GPUmain();

    static void free();

};

GPUmain.cu:

#include "GPUmain.cuh"

__global__ void uploadVerts(ver *ve, pos *po, col *co) {
    int id = threadIdx.x + (blockDim.x * blockIdx.x);
    po[id].x = ve[id].p.x;
    po[id].y = ve[id].p.y;
    po[id].z = ve[id].p.z;
    co[id].r = ve[id].c.r;
    co[id].g = ve[id].c.g;
    co[id].b = ve[id].c.b;
    co[id].a = ve[id].c.a;
}

__global__ void genGrid(ver *v) {
    int i = threadIdx.x + (blockDim.x * blockIdx.x);
    int x = i % 1920;
    int y = i / 1920;

    v[i].p.x = x;
    v[i].p.y = y;
    v[i].p.z = 0;

    v[i].c.r = 127;
    v[i].c.g = 255;
    v[i].c.b = 0;
    v[i].c.a = 255;
}

int GPU::nParticles;

GLuint GPU::vboidP;

GLuint GPU::vboidC;

GLuint GPU::vaoid;

GLuint GPU::vrshaderid;

GLuint GPU::frshaderid;

GLuint GPU::lkshaderid;

cudaGraphicsResource *GPU::CGRp;

cudaGraphicsResource *GPU::CGRc;

const char* GPU::shaders[2] = {
    "#version 460\n"
    "layout(location = 0) in vec3 vertex_position;"
    "layout(location = 1) in vec4 vertex_colour;"
    "out vec4 colour;"
    "void main() {"
    "   colour = vertex_colour;"
    "   gl_Position = vec4(vertex_position, 1.0);"
    "}"
    ,
    "#version 460\n"
    "in vec4 colour;"
    "out vec4 frag_colour;"
    "void main() {"
    "   frag_colour = colour;"
    "}"
};

//collection of vertices to be simulated and rendered
thrust::device_vector<ver> GPU::verts;



void GPU::init(int w, int h)
{

    /*nParticles = w * h;
    verts.resize(nParticles, ver{ pos{0,0,0}, col{255,0,0,255} });
    genGrid<<<nParticles/1024,1024>>>(thrust::raw_pointer_cast(&verts[0]));
    cudaDeviceSynchronize();*/

    pos vp[3] = {
        pos{0,0,0},
        pos{200,0,4},
        pos{100,200,3}

    };
    col vc[3] = {
        col{255,0,0,255},
        col{0,255,0,255},
        col{0,0,255,255}
    };

    vrshaderid = glCreateShader(GL_VERTEX_SHADER);
    glShaderSource(vrshaderid, 1, &shaders[0], NULL);
    glCompileShader(vrshaderid);
    GLint success;
    GLchar infoLog[512];
    glGetShaderiv(vrshaderid, GL_COMPILE_STATUS, &success);
    if (!success)
    {
        glGetShaderInfoLog(vrshaderid, 512, NULL, infoLog);
        std::cout << "ERROR::SHADER::VERTEX::COMPILATION_FAILED\n" << infoLog << std::endl;
    }
    frshaderid = glCreateShader(GL_FRAGMENT_SHADER);
    glShaderSource(frshaderid, 1, &shaders[1], NULL);
    glCompileShader(frshaderid);
    glGetShaderiv(frshaderid, GL_COMPILE_STATUS, &success);
    if (!success)
    {
        glGetShaderInfoLog(frshaderid, 512, NULL, infoLog);
        std::cout << "ERROR::SHADER::FRAGMENT::COMPILATION_FAILED\n" << infoLog << std::endl;
    }

    lkshaderid = glCreateProgram();
    glAttachShader(lkshaderid, vrshaderid);
    glAttachShader(lkshaderid, frshaderid);
    glLinkProgram(lkshaderid);
    glGetProgramiv(lkshaderid, GL_LINK_STATUS, &success);
    if (!success) {
        glGetProgramInfoLog(lkshaderid, 512, NULL, infoLog);
        std::cout << "ERROR::SHADER::PROGRAM::LINKING_FAILED\n" << infoLog << std::endl;
    }

    glGenVertexArrays(1, &vaoid);
    glGenBuffers(1,&vboidP);
    glGenBuffers(1, &vboidC);
    glBindVertexArray(vaoid);

    glBindBuffer(GL_ARRAY_BUFFER,vboidP);
    glBufferData(GL_ARRAY_BUFFER,3*sizeof(pos),vp,GL_DYNAMIC_DRAW);
    glVertexAttribPointer(0, 3, GL_INT, GL_TRUE, 3 * sizeof(pos), NULL);
    glEnableVertexAttribArray(0);
    glBindBuffer(GL_ARRAY_BUFFER, 0);

    glBindBuffer(GL_ARRAY_BUFFER, vboidC);
    glBufferData(GL_ARRAY_BUFFER,3*sizeof(col),vc, GL_DYNAMIC_DRAW);
    glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, GL_TRUE, 3 * sizeof(col), NULL);
    glEnableVertexAttribArray(1);
    glBindBuffer(GL_ARRAY_BUFFER, 0);

    /*cudaGraphicsGLRegisterBuffer(&CGRp,vboidP,cudaGraphicsMapFlagsWriteDiscard);
    cudaGraphicsGLRegisterBuffer(&CGRc,vboidC, cudaGraphicsMapFlagsWriteDiscard);*/

    glBindVertexArray(0);

}

void GPU::compute()
{

}

void GPU::render()
{
    /*pos *posi;
    col *cols;

    size_t sizep;
    size_t sizec;

    cudaGraphicsMapResources(1, &CGRp, 0);
    cudaGraphicsMapResources(1, &CGRc, 0);

    cudaGraphicsResourceGetMappedPointer((void**)&posi, &sizep, CGRp);
    cudaGraphicsResourceGetMappedPointer((void**)&cols, &sizec, CGRc);

    uploadVerts<<<nParticles/1024, 1024>>>(thrust::raw_pointer_cast(&verts[0]), posi, cols);
    cudaDeviceSynchronize();

    cudaGraphicsUnmapResources(1, &CGRp, 0);
    cudaGraphicsUnmapResources(1, &CGRc, 0);*/

    glClearColor(0, 0, 0, 0); // we clear the screen with black (else, frames would overlay...)
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // clear the buffer

    glUseProgram(lkshaderid);

    glBindVertexArray(vaoid);

    glDrawArrays(GL_TRIANGLES,0,3);

    glBindVertexArray(0);
}

void GPU::GPUmain()
{

    compute();

    render();

}

void GPU::free()
{
    /*cudaGraphicsUnregisterResource(CGRp);
    cudaGraphicsUnregisterResource(CGRc);*/
    glDeleteVertexArrays(1,&vaoid);
    glDeleteBuffers(1, &vboidP);
    glDeleteBuffers(1, &vboidC);
    verts.clear();
    thrust::device_vector<ver>().swap(verts);
}

window.cpp:

bool Window::init()
{
    //initialize SDL
    if (SDL_Init(SDL_INIT_EVERYTHING) != 0) {

        log << "Failed to initialize SDL!\n";
        return false;

    }

    //set window atributes
    SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);

    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 6);

    SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 8);

    SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);



    //creat window
    window = SDL_CreateWindow(
        name.c_str(),
        SDL_WINDOWPOS_CENTERED,
        SDL_WINDOWPOS_CENTERED,
        width,
        height,
        SDL_WINDOW_OPENGL

    );

    //create opengl context in the window
    glcontext = SDL_GL_CreateContext(window);

    SDL_GL_SetSwapInterval(1);

    //check if the window was created
    if (window == nullptr) {

        log << "Failed to create window!\n";
        return false;

    }

    //turn on experimental features
    glewExperimental = GL_TRUE;

    //initiallize glew
    if (glewInit() != GLEW_OK) {

        log << "Failed to Init GLEW";

        return false;

    }



    //set drawing parameters
    glViewport(0, 0, width, height);
    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    glOrtho(0, width, 0, height, -255, 0);
    glPointSize(1);
    glEnable(GL_BLEND);                                // Allow Transparency
    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);  // how transparency acts

    std::cout << sizeof(ver);

    GPU::init(width, height);

    return true;
}

void Window::renderFrame()
{

    GPU::render();

    SDL_GL_SwapWindow(window); //swap buffers
}

Solution

  • The type of the attributes are integral data types:

    struct pos {
        GLint x, y, z;
    };
    
    struct col {
        GLubyte r, g, b, a;
    };
    

    So you ve to use glVertexAttribIPointer (focus on I), when you set up the arrays of generic vertex attribute data, rather than glVertexAttribPointer.
    The data type of vertex shader attributes has to be an integral data type, too:

    layout(location = 0) in vec3 vertex_position
    layout(location = 1) in vec4 vertex_colour;

    layout(location = 0) in ivec3 vertex_position;
    layout(location = 1) in ivec4 vertex_colour;
    

    The stride parameter of glVertexAttribIPointer/glVertexAttribPointer the byte offset between consecutive generic vertex attributes. So it has to be sizeof(pos) respectively sizeof(col) rather than 3*sizeof(pos) and 3*sizeof(col).
    If the the generic vertex attributes are tightly packed, then stride can be set 0. This is a special case, where the stride is automatically calculated by the size and type parameter:

    glBindBuffer(GL_ARRAY_BUFFER,vboidP);
    // [...]
    glVertexAttribIPointer(0, 3, GL_INT, 0, NULL);
    // [...]
    
    glBindBuffer(GL_ARRAY_BUFFER, vboidC);
    // [...]
    glVertexAttribIPointer(1, 4, GL_UNSIGNED_BYTE, 0, NULL);
    // [...]
    

    A core profile context (SDL_GL_CONTEXT_PROFILE_CORE) doesn't support the fixed function matrix stack.

    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    glOrtho(0, width, 0, height, -255, 0);
    

    See Fixed Function Pipeline and Legacy OpenGL.
    The answer to OpenGL gluLookat not working with shaders on may help, too.

    I recommend to use a library like OpenGL Mathematics to calculate the view matrix by ortho() and a uniform variable:

    version 460
    layout(location = 0) in ivec3 vertex_position;
    
    layout(location = 7) uniform mat4 prj_matrix;
    
    void main()
    {  
        // [...]
    
        gl_Position = prj_matrix * vec4(vertex_position, 1.0);"
    }
    
    #include <glm/glm.hpp>
    #include <glm/gtc/matrix_transform.hpp>
    #include <glm/gtc/type_ptr.hpp>
    
    // [...]
    
    void GPU::render()
    {
        // [...]
    
        glUseProgram(lkshaderid);
    
        glm::mat4 prj = glm::ortho(0.0f, (float)width, 0.0f, (float)height, -255.0f, 0.0f);
        glUniformMatrix4fv(7, 1, GL_FALSE, glm::value_ptr(prj));
    
        // [...]
    }
    

    The uniform location is set explicite by a Layout qualifier (location = 7).
    glUniformMatrix4fv sets the value of the uniform at the specified location in the default uniform block. This has to be done after the progroam was installed by glUseProgram.


    Full shader code, using Raw string literal:

    const char* GPU::shaders[2] = {
    R"(
    #version 460
    
    layout(location = 0) in ivec3 vertex_position;
    layout(location = 1) in ivec4 vertex_colour;
    
    layout(location = 7) uniform mat4 prj_matrix;
    
    out vec4 colour;
    
    void main() {
        colour = vec4(vertex_colour) / 255.0;
        gl_Position = prj_matrix * vec4(vertex_position, 1.0);
    }
    )"
    ,
    R"(
    #version 460
    
    in vec4 colour;
    
    out vec4 frag_colour;
    
    void main() {
       frag_colour = colour;
    }
    )"
    };
    

    If you apply the suggested changes, then you'll see the following triangle: