Search code examples
c++direct3d11

instancing in directx 11


I try to do tiled 2d graphics in direct3d 11. Tiled 2d graphics is basically just a whole lot of textured quads. It seems natural to use instancing to do that, instead of drawing every quad individually. Drawing a single quad works, but if I try to use instancing to draw 2 quads, I get a blank screen. If I understand it all correctly, the idea is that you create a layout that has the vertex buffer layout in slot 0 and the instance buffer layout in slot 1. You create a vertex buffer with the vertex data and an instance buffer with the instance data. You put those in an array. This is an array of pointers (is that true?). The index in this array corresponds with the slot number in the layout. You receive the vertex data and the index data in the vertex shader and do your thing with it. And then draw it with DrawIndexedInstanced. As usual in directx, understanding it conceptually is the easy bit, making it actually happen is the hard bit.

Here is the code (I know the naming style is not entirely consistent, part is copied from a tutorial).

Definition of the layout:

    D3D11_INPUT_ELEMENT_DESC inputElementDesc[] =
    {
        { "POS", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
        { "TEX", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
        {"INSTANCEPOS", 0, DXGI_FORMAT_R32G32_FLOAT, 1, 0, D3D11_INPUT_PER_INSTANCE_DATA, 1},
        {"INSTANCETEX", 0, DXGI_FORMAT_R32G32_FLOAT, 1, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_INSTANCE_DATA,1}
    };

    HRESULT hResult = m_device->CreateInputLayout(inputElementDesc, ARRAYSIZE(inputElementDesc), vs_bytecode->GetBufferPointer(), vs_bytecode->GetBufferSize(), &m_layout);
    assert(SUCCEEDED(hResult));
    vs_bytecode->Release();```

Binding of the layout:

m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
m_context->IASetInputLayout(m_layout.Get());  // m_layout is a ComPtr

Vertex buffer header file:


#pragma once

#include "bindable.h"
#include "base_types.h"

#include <wrl.h>
#include <DirectXMath.h>

#include <array>

struct Vertex
{
    DirectX::XMFLOAT2 pos;
    DirectX::XMFLOAT2 tex_coords;
};

struct Instance_data
{
    DirectX::XMFLOAT2 instance_pos;
    DirectX::XMFLOAT2 instance_tex;
};


{
public:
class Vertex_buffer : public Bindable
    Vertex_buffer(ID3D11DeviceContext1* context, ID3D11Device1* device, Rect vertex_pos, Rect texture_pos);
    void bind() noexcept override;
    int add_instance(Vec2 pos, Vec2 texture_coords);
private:
    inline constexpr static size_t num_instances{ 2 };
    std::array<Instance_data, num_instances> m_instance_data;
    std::array<ID3D11Buffer*, 2> m_buffers;
    std::array<UINT, 2> m_strides{ sizeof(Vertex), sizeof(Instance_data) };
    std::array<UINT, 2> m_offsets{ 0u, 0u };
    size_t next_index{ 0 };
};    
  


Vertex buffer c++ file

#include "vertex_buffer.h"

#include <array>
#include <vector>

Vertex_buffer::Vertex_buffer(ID3D11DeviceContext1* context, ID3D11Device1* device, Rect vertex_pos, Rect texture_pos) : Bindable{ context, device }
{
    std::array<Vertex, 4> vertices;
    vertices[0]=(Vertex{ DirectX::XMFLOAT2{ vertex_pos.x,  vertex_pos.y }, DirectX::XMFLOAT2{ texture_pos.x, texture_pos.y } });
    vertices[1]=(Vertex{ DirectX::XMFLOAT2 {vertex_pos.x + vertex_pos.width, vertex_pos.y - vertex_pos.height}, DirectX::XMFLOAT2{ texture_pos.x + texture_pos.width, texture_pos.y + texture_pos.height } });
    vertices[2] = (Vertex{ DirectX::XMFLOAT2 { vertex_pos.x, vertex_pos.y - vertex_pos.height }, DirectX::XMFLOAT2{ texture_pos.x, texture_pos.y + texture_pos.height } });
    vertices[3] = (Vertex{ DirectX::XMFLOAT2 {vertex_pos.x + vertex_pos.width,  vertex_pos.y},  DirectX::XMFLOAT2{texture_pos.x + texture_pos.width, texture_pos.y} });

    UINT numVerts = vertices.size();

    D3D11_BUFFER_DESC vertexBufferDesc = {};
    vertexBufferDesc.ByteWidth = sizeof(vertices);//m_stride * numVerts;//sizeof(vertexData);
    vertexBufferDesc.Usage = D3D11_USAGE_DEFAULT;
    vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;

    D3D11_SUBRESOURCE_DATA vertexSubresourceData = { vertices.data() };

    //HRESULT hResult = device->CreateBuffer(&vertexBufferDesc, &vertexSubresourceData, &m_vertex_buffer);
    HRESULT hResult = device->CreateBuffer(&vertexBufferDesc, &vertexSubresourceData, &m_buffers[0]);
    assert(SUCCEEDED(hResult));

    D3D11_BUFFER_DESC instance_buffer_desc{};
    instance_buffer_desc.Usage = D3D11_USAGE_DEFAULT;
    instance_buffer_desc.ByteWidth = sizeof(Instance_data) * num_instances;
    instance_buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
    instance_buffer_desc.CPUAccessFlags = 0;
    instance_buffer_desc.MiscFlags = 0;

    D3D11_SUBRESOURCE_DATA subres_data{};
    subres_data.pSysMem = m_instance_data.data();
  
    HRESULT hr = device->CreateBuffer(&instance_buffer_desc, &subres_data, &m_buffers[1]);
    assert(SUCCEEDED(hr));
}

void Vertex_buffer::bind() noexcept
{
    m_context->IASetVertexBuffers(0u, 2u, m_buffers.data(), m_strides.data(), m_offsets.data());
}

int Vertex_buffer::add_instance(Vec2 pos, Vec2 texture_coords)
{
    m_instance_data[next_index] = Instance_data{ {pos.x, pos.y}, {texture_coords.x, texture_coords.y} };
    return next_index++;
}

shader file

struct VS_Input {
    float2 pos : POS;
    float2 uv : TEX;
    float2 instancepos : INSTANCEPOS;
    float2 instancetex : INSTANCETEX;
};

struct VS_Output {
    float4 pos : SV_POSITION;
    float2 uv : TEXCOORD;
};

Texture2D    mytexture : register(t0);
SamplerState mysampler : register(s0);

VS_Output vs_main(VS_Input input)
{
    VS_Output output;
    output.pos = float4(input.instancepos, 0.0f, 1.0f);
    output.uv = input.instancetex;
    return output;
}

float4 ps_main(VS_Output input) : SV_Target
{
    return mytexture.Sample(mysampler, input.uv);   
}

In the main function (there is of course a lot of code that creates the window and does stuff that I know that works)


Vertex_buffer vertex_buffer{ d3d11DeviceContext, d3d11Device, vertex_pos, texture_pos }; // calculated vertex_pos in -1..1 range and texture_pos in 0..1 range

// do the instance data

constexpr unsigned int num_instances{ 2u };
struct Instance_data   // yes, this is defined in 2 places, that should be improved
{
    DirectX::XMFLOAT2 instance_pos;
    DirectX::XMFLOAT2 instance_tex;
};
std::array<Instance_data, num_instances> instance_data;
Rect v1 = calc_vertex_pos(3.0f, 32.0f, 32.0f, screenwidth, screenheight, 4, 3); // these functions that calculate vertex and texture coordinates work correctly
Rect v2 = calc_vertex_pos(3.0f, 32.0f, 32.0f, screenwidth, screenheight, 3, 2);
Rect t1 = calc_texture_pos(32.0f, 32.0f, 10, 3, 8);
Rect t2 = calc_texture_pos(32.0f, 32.0f, 10, 3, 15);
auto inst0 = vertex_buffer.add_instance({ v1.x, v1.y }, { t1.x, t1.y });
auto inst1 = vertex_buffer.add_instance({ v2.x, v2.y }, { t1.x, t2.y });

in the main loop:


// bind all the things

// ...
layout.bind();
vertex_buffer.bind();
index_buffer.bind();

d3d11DeviceContext->DrawIndexedInstanced(index_buffer.count(), 2u, 0u, 0u, 0u);
d3d11SwapChain->Present(1, 0);

The result is a blank screen. It all compiles and runs, no assertions are hit. I suspect that for some reason the data doesn't get into the shader correctly but I am not aware of any way to debug that.

A few related questions:
1- I assume that binding stuff includes sending the associated data to the graphics hardware. Is it efficient to bind everything every frame (I assume not). Would it work if I only bind things if there are mutations?
2- The instancebuffer description takes the size of the buffer in bytes. Of course it is just a C array underneath. That means that the size is essentially fixed. But the number of objects on the screen can vary: enemies, treasure, bullets and the like can spawn and get destroyed. How do you do that with instancing? Instance only the fixed objects like walls and floors? Or recreate the instance buffer when the number of instances changes? Is recreating the buffer expensive or cheap?
3- Are these semantics names in the shader case-insensitive? I sometimes see SV_POSITION and sometimes SV_Position. Would sv_position also work?

Fiddled around with it quite a bit. Expected two textured quads, got a blank screen.

Solution

  • OK, got a brainwave and got it working! With the instance buffer.

    The trick is that all instances do have access to the vertex buffer data. So, every vertex knows the relative position of the vertex in the vertex buffer. In the instance buffer you store the translation of the entire thing, and that you add to all vertex coords.

    So, the recipe is: for the vertex buffer, define the correct shape, but based in the top left corner of the window (-1, 1). for every instance, calculate the translation vector (how much do you have to add to the x and y coordinates to get to the new position). in the shader, do that addition. For texture coordinates, it is kind of the same: put the coordinates of the top left texture in the vertex buffer (here, to keep things inconsistent, the coordinates go fom (0,0) to (1,1)). in the instance data put the translation vector. in the shader add the translation vector to the coords in the vertex buffer.

    The shader will now look like this:

    struct VS_Input 
    {
        float2 pos : POS;
        float2 uv : TEX;
        float2 instancepos : INSTANCEPOS;
        float2 instancetex : INSTANCETEX;
        uint instance_id : SV_InstanceID;
    };
    
    struct VS_Output 
    {
        float4 pos : SV_POSITION;
        float2 uv : TEXCOORD;
    };
    
    VS_Output vs_main(VS_Input input)
    {
        VS_Output output;
        output.pos = float4(input.pos.x + input.instancepos.x, input.pos.y - input.instancepos.y, 0.0f, 1.0f);
        output.uv = float2(input.uv.x + input.instancetex.x, input.uv.y + input.instancetex.y);
        return output;
    }