I'm trying to draw a bunch of dots on screen with Cuda 10.1 and OpenGL 4.6 interop. However, right now I am just loading a triangle from CPU for testing. However, I am new to OpenGL and this is my first time writing a Vertex Array Object. So I guess the first questions I have are: What is wrong with my VAO code? Why is my triangle not drawing? I have tried to figure it out to the best of my ability. My second questions: If I change the data inside the two VBOs associated with the VAO with Cuda, will the VAO still update, and draw the change?
Here is my code (sorry that its uncommented, I'm on a time crunch):
GPUmain.h:
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/remove.h>
#include <curand.h>
#include <GL/glew.h>
#include <SDL_opengl.h>
#include <cuda_gl_interop.h>
#define BUFFER_OFFSET(i) ((char *)NULL + (i))
struct pos {
GLint x, y, z;
};
struct col {
GLubyte r, g, b, a;
};
struct phy {
GLdouble spd;
GLdouble dir;
};
struct ver {
pos p;
col c;
};
class GPU {
public:
static int nParticles;
static GLuint vboidP;
static GLuint vboidC;
static GLuint vaoid;
static GLuint vrshaderid;
static GLuint frshaderid;
static GLuint lkshaderid;
static cudaGraphicsResource *CGRp;
static cudaGraphicsResource *CGRc;
static const char* shaders[2];
static thrust::device_vector<ver> verts;
static void init(int w, int h);
static void compute();
static void render();
static void GPUmain();
static void free();
};
GPUmain.cu:
#include "GPUmain.cuh"
__global__ void uploadVerts(ver *ve, pos *po, col *co) {
int id = threadIdx.x + (blockDim.x * blockIdx.x);
po[id].x = ve[id].p.x;
po[id].y = ve[id].p.y;
po[id].z = ve[id].p.z;
co[id].r = ve[id].c.r;
co[id].g = ve[id].c.g;
co[id].b = ve[id].c.b;
co[id].a = ve[id].c.a;
}
__global__ void genGrid(ver *v) {
int i = threadIdx.x + (blockDim.x * blockIdx.x);
int x = i % 1920;
int y = i / 1920;
v[i].p.x = x;
v[i].p.y = y;
v[i].p.z = 0;
v[i].c.r = 127;
v[i].c.g = 255;
v[i].c.b = 0;
v[i].c.a = 255;
}
int GPU::nParticles;
GLuint GPU::vboidP;
GLuint GPU::vboidC;
GLuint GPU::vaoid;
GLuint GPU::vrshaderid;
GLuint GPU::frshaderid;
GLuint GPU::lkshaderid;
cudaGraphicsResource *GPU::CGRp;
cudaGraphicsResource *GPU::CGRc;
const char* GPU::shaders[2] = {
"#version 460\n"
"layout(location = 0) in vec3 vertex_position;"
"layout(location = 1) in vec4 vertex_colour;"
"out vec4 colour;"
"void main() {"
" colour = vertex_colour;"
" gl_Position = vec4(vertex_position, 1.0);"
"}"
,
"#version 460\n"
"in vec4 colour;"
"out vec4 frag_colour;"
"void main() {"
" frag_colour = colour;"
"}"
};
//collection of vertices to be simulated and rendered
thrust::device_vector<ver> GPU::verts;
void GPU::init(int w, int h)
{
/*nParticles = w * h;
verts.resize(nParticles, ver{ pos{0,0,0}, col{255,0,0,255} });
genGrid<<<nParticles/1024,1024>>>(thrust::raw_pointer_cast(&verts[0]));
cudaDeviceSynchronize();*/
pos vp[3] = {
pos{0,0,0},
pos{200,0,4},
pos{100,200,3}
};
col vc[3] = {
col{255,0,0,255},
col{0,255,0,255},
col{0,0,255,255}
};
vrshaderid = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vrshaderid, 1, &shaders[0], NULL);
glCompileShader(vrshaderid);
GLint success;
GLchar infoLog[512];
glGetShaderiv(vrshaderid, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(vrshaderid, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::VERTEX::COMPILATION_FAILED\n" << infoLog << std::endl;
}
frshaderid = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(frshaderid, 1, &shaders[1], NULL);
glCompileShader(frshaderid);
glGetShaderiv(frshaderid, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(frshaderid, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::FRAGMENT::COMPILATION_FAILED\n" << infoLog << std::endl;
}
lkshaderid = glCreateProgram();
glAttachShader(lkshaderid, vrshaderid);
glAttachShader(lkshaderid, frshaderid);
glLinkProgram(lkshaderid);
glGetProgramiv(lkshaderid, GL_LINK_STATUS, &success);
if (!success) {
glGetProgramInfoLog(lkshaderid, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::PROGRAM::LINKING_FAILED\n" << infoLog << std::endl;
}
glGenVertexArrays(1, &vaoid);
glGenBuffers(1,&vboidP);
glGenBuffers(1, &vboidC);
glBindVertexArray(vaoid);
glBindBuffer(GL_ARRAY_BUFFER,vboidP);
glBufferData(GL_ARRAY_BUFFER,3*sizeof(pos),vp,GL_DYNAMIC_DRAW);
glVertexAttribPointer(0, 3, GL_INT, GL_TRUE, 3 * sizeof(pos), NULL);
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER, vboidC);
glBufferData(GL_ARRAY_BUFFER,3*sizeof(col),vc, GL_DYNAMIC_DRAW);
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, GL_TRUE, 3 * sizeof(col), NULL);
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, 0);
/*cudaGraphicsGLRegisterBuffer(&CGRp,vboidP,cudaGraphicsMapFlagsWriteDiscard);
cudaGraphicsGLRegisterBuffer(&CGRc,vboidC, cudaGraphicsMapFlagsWriteDiscard);*/
glBindVertexArray(0);
}
void GPU::compute()
{
}
void GPU::render()
{
/*pos *posi;
col *cols;
size_t sizep;
size_t sizec;
cudaGraphicsMapResources(1, &CGRp, 0);
cudaGraphicsMapResources(1, &CGRc, 0);
cudaGraphicsResourceGetMappedPointer((void**)&posi, &sizep, CGRp);
cudaGraphicsResourceGetMappedPointer((void**)&cols, &sizec, CGRc);
uploadVerts<<<nParticles/1024, 1024>>>(thrust::raw_pointer_cast(&verts[0]), posi, cols);
cudaDeviceSynchronize();
cudaGraphicsUnmapResources(1, &CGRp, 0);
cudaGraphicsUnmapResources(1, &CGRc, 0);*/
glClearColor(0, 0, 0, 0); // we clear the screen with black (else, frames would overlay...)
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // clear the buffer
glUseProgram(lkshaderid);
glBindVertexArray(vaoid);
glDrawArrays(GL_TRIANGLES,0,3);
glBindVertexArray(0);
}
void GPU::GPUmain()
{
compute();
render();
}
void GPU::free()
{
/*cudaGraphicsUnregisterResource(CGRp);
cudaGraphicsUnregisterResource(CGRc);*/
glDeleteVertexArrays(1,&vaoid);
glDeleteBuffers(1, &vboidP);
glDeleteBuffers(1, &vboidC);
verts.clear();
thrust::device_vector<ver>().swap(verts);
}
window.cpp:
bool Window::init()
{
//initialize SDL
if (SDL_Init(SDL_INIT_EVERYTHING) != 0) {
log << "Failed to initialize SDL!\n";
return false;
}
//set window atributes
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 6);
SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
//creat window
window = SDL_CreateWindow(
name.c_str(),
SDL_WINDOWPOS_CENTERED,
SDL_WINDOWPOS_CENTERED,
width,
height,
SDL_WINDOW_OPENGL
);
//create opengl context in the window
glcontext = SDL_GL_CreateContext(window);
SDL_GL_SetSwapInterval(1);
//check if the window was created
if (window == nullptr) {
log << "Failed to create window!\n";
return false;
}
//turn on experimental features
glewExperimental = GL_TRUE;
//initiallize glew
if (glewInit() != GLEW_OK) {
log << "Failed to Init GLEW";
return false;
}
//set drawing parameters
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, width, 0, height, -255, 0);
glPointSize(1);
glEnable(GL_BLEND); // Allow Transparency
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // how transparency acts
std::cout << sizeof(ver);
GPU::init(width, height);
return true;
}
void Window::renderFrame()
{
GPU::render();
SDL_GL_SwapWindow(window); //swap buffers
}
The type of the attributes are integral data types:
struct pos { GLint x, y, z; }; struct col { GLubyte r, g, b, a; };
So you ve to use glVertexAttribIPointer
(focus on I
), when you set up the arrays of generic vertex attribute data, rather than glVertexAttribPointer
.
The data type of vertex shader attributes has to be an integral data type, too:
layout(location = 0) in vec3 vertex_position
layout(location = 1) in vec4 vertex_colour;
layout(location = 0) in ivec3 vertex_position;
layout(location = 1) in ivec4 vertex_colour;
The stride
parameter of glVertexAttribIPointer
/glVertexAttribPointer
the byte offset between consecutive generic vertex attributes. So it has to be sizeof(pos)
respectively sizeof(col)
rather than 3*sizeof(pos)
and 3*sizeof(col)
.
If the the generic vertex attributes are tightly packed, then stride
can be set 0. This is a special case, where the stride is automatically calculated by the size
and type
parameter:
glBindBuffer(GL_ARRAY_BUFFER,vboidP);
// [...]
glVertexAttribIPointer(0, 3, GL_INT, 0, NULL);
// [...]
glBindBuffer(GL_ARRAY_BUFFER, vboidC);
// [...]
glVertexAttribIPointer(1, 4, GL_UNSIGNED_BYTE, 0, NULL);
// [...]
A core profile context (SDL_GL_CONTEXT_PROFILE_CORE
) doesn't support the fixed function matrix stack.
glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0, width, 0, height, -255, 0);
See Fixed Function Pipeline and Legacy OpenGL.
The answer to OpenGL gluLookat not working with shaders on may help, too.
I recommend to use a library like OpenGL Mathematics to calculate the view matrix by ortho()
and a uniform variable:
version 460
layout(location = 0) in ivec3 vertex_position;
layout(location = 7) uniform mat4 prj_matrix;
void main()
{
// [...]
gl_Position = prj_matrix * vec4(vertex_position, 1.0);"
}
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
// [...]
void GPU::render()
{
// [...]
glUseProgram(lkshaderid);
glm::mat4 prj = glm::ortho(0.0f, (float)width, 0.0f, (float)height, -255.0f, 0.0f);
glUniformMatrix4fv(7, 1, GL_FALSE, glm::value_ptr(prj));
// [...]
}
The uniform location is set explicite by a Layout qualifier (location = 7
).
glUniformMatrix4fv
sets the value of the uniform at the specified location in the default uniform block. This has to be done after the progroam was installed by glUseProgram
.
Full shader code, using Raw string literal:
const char* GPU::shaders[2] = {
R"(
#version 460
layout(location = 0) in ivec3 vertex_position;
layout(location = 1) in ivec4 vertex_colour;
layout(location = 7) uniform mat4 prj_matrix;
out vec4 colour;
void main() {
colour = vec4(vertex_colour) / 255.0;
gl_Position = prj_matrix * vec4(vertex_position, 1.0);
}
)"
,
R"(
#version 460
in vec4 colour;
out vec4 frag_colour;
void main() {
frag_colour = colour;
}
)"
};
If you apply the suggested changes, then you'll see the following triangle: