I have a DirectX 12 desktop project on Windows 11 that implements post-processing using a combination of DXTK post-process effects.
The aim of the post-proc sequence is to end up with individual bloom and blur textures (along with a depth texture rendered in a depth pass) which are sampled in a 'big triangle' pixel shader to achieve a depth of field effect for the final backbuffer screen image.
The DXTK PostProcesses operate on the full-size (1920x1080) screen texture. Presently this isn't impacting performance (benchmarked at 60fps), but I imagine it could be an issue when I eventually want to support 4K resolutions in future, where full-size image post-processing could be expensive.
Since the recommended best practice is to operate on a scaled down copy of the source image, I hoped to achieve this by using half-size (i.e. quarter resolution) working textures with the DownScale_2x2 BasicPostProcess option. But after several attempts experimenting with the effect, only the top-left quarter of the original source image is being rendered to the downsized texture... not the full image as expected per the documentation:
DownScale_2x2: Downscales each 2x2 block of pixels to an average. This is intended to write to a render target that is half the size of the source texture in each dimension.
Other points of note:
The following code snippets show how I'm implementing the DownScale_2x2 shader into my post-process. Hopefully it's enough to resolve the issue and I can update with more info if necessary.
Resource initialization under CreateDeviceDependentResources():
namespace GameConstants {
constexpr DXGI_FORMAT BACKBUFFERFORMAT(DXGI_FORMAT_R10G10B10A2_UNORM); // back buffer to support hdr rendering
constexpr DXGI_FORMAT HDRFORMAT(DXGI_FORMAT_R16G16B16A16_FLOAT); // format for hdr render targets
constexpr DXGI_FORMAT DEPTHFORMAT(DXGI_FORMAT_D32_FLOAT); // format for render target depth buffer
constexpr UINT MSAACOUNT(4u); // requested multisample count
}
...
//
// Render targets
//
mMsaaHelper = std::make_unique<MSAAHelper>(GameConstants::HDRFORMAT, GameConstants::DEPTHFORMAT, GameConstants::MSAACOUNT);
mMsaaHelper->SetClearColor(GameConstants::CLEARCOLOR);
mDistortionRenderTex = std::make_unique<RenderTexture>(GameConstants::BACKBUFFERFORMAT);
mHdrRenderTex = std::make_unique<RenderTexture>(GameConstants::HDRFORMAT);
mPass1RenderTex = std::make_unique<RenderTexture>(GameConstants::HDRFORMAT);
mPass2RenderTex = std::make_unique<RenderTexture>(GameConstants::HDRFORMAT);
mBloomRenderTex = std::make_unique<RenderTexture>(GameConstants::HDRFORMAT);
mBlurRenderTex = std::make_unique<RenderTexture>(GameConstants::HDRFORMAT);
mDistortionRenderTex->SetClearColor(GameConstants::CLEARCOLOR);
mHdrRenderTex->SetClearColor(GameConstants::CLEARCOLOR);
mPass1RenderTex->SetClearColor(GameConstants::CLEARCOLOR);
mPass2RenderTex->SetClearColor(GameConstants::CLEARCOLOR);
mBloomRenderTex->SetClearColor(GameConstants::CLEARCOLOR);
mBlurRenderTex->SetClearColor(GameConstants::CLEARCOLOR);
mMsaaHelper->SetDevice(device); // Set the MSAA device. Note this updates GetSampleCount.
mDistortionRenderTex->SetDevice(device,
mPostProcSrvDescHeap->GetCpuHandle(SRV_PostProcDescriptors::DistortionMaskSRV),
mRtvDescHeap->GetCpuHandle(RTV_Descriptors::DistortionMaskRTV));
mHdrRenderTex->SetDevice(device,
mPostProcSrvDescHeap->GetCpuHandle(SRV_PostProcDescriptors::HdrSRV),
mRtvDescHeap->GetCpuHandle(RTV_Descriptors::HdrRTV));
mPass1RenderTex->SetDevice(device,
mPostProcSrvDescHeap->GetCpuHandle(SRV_PostProcDescriptors::Pass1SRV),
mRtvDescHeap->GetCpuHandle(RTV_Descriptors::Pass1RTV));
mPass2RenderTex->SetDevice(device,
mPostProcSrvDescHeap->GetCpuHandle(SRV_PostProcDescriptors::Pass2SRV),
mRtvDescHeap->GetCpuHandle(RTV_Descriptors::Pass2RTV));
mBloomRenderTex->SetDevice(device,
mPostProcSrvDescHeap->GetCpuHandle(SRV_PostProcDescriptors::BloomSRV),
mRtvDescHeap->GetCpuHandle(RTV_Descriptors::BloomRTV));
mBlurRenderTex->SetDevice(device,
mPostProcSrvDescHeap->GetCpuHandle(SRV_PostProcDescriptors::BlurSRV),
mRtvDescHeap->GetCpuHandle(RTV_Descriptors::BlurRTV));
...
RenderTargetState ppState(GameConstants::HDRFORMAT, DXGI_FORMAT_UNKNOWN); // 2d postproc rendering
...
// Set other postprocessing effects
mBloomExtract = std::make_unique<BasicPostProcess>(device, ppState, BasicPostProcess::BloomExtract);
mBloomPass = std::make_unique<BasicPostProcess>(device, ppState, BasicPostProcess::BloomBlur);
mBloomCombine = std::make_unique<DualPostProcess>(device, ppState, DualPostProcess::BloomCombine);
mGaussBlurPass = std::make_unique<BasicPostProcess>(device, ppState, BasicPostProcess::GaussianBlur_5x5);
mDownScalePass = std::make_unique<BasicPostProcess>(device, ppState, BasicPostProcess::DownScale_2x2);
Resource resizing under CreateWindowSizeDependentResources():
// Get current backbuffer dimensions
CD3DX12_RECT outputRect(mDeviceResources->GetOutputSize());
// Determine the render target size in pixels
mBackbufferSize.x = std::max<UINT>(outputRect.right - outputRect.left, 1u);
mBackbufferSize.y = std::max<UINT>(outputRect.bottom - outputRect.top, 1u);
...
mMsaaHelper->SetWindow(outputRect);
XMUINT2 halfSize(mBackbufferSize.x / 2u, mBackbufferSize.y / 2u);
mBloomRenderTex->SetWindow(outputRect);
mBlurRenderTex->SetWindow(outputRect);
mDistortionRenderTex->SetWindow(outputRect);
mHdrRenderTex->SetWindow(outputRect);
mPass1RenderTex->SizeResources(halfSize.x, halfSize.y);
mPass2RenderTex->SizeResources(halfSize.x, halfSize.y);
Post-processing implementation:
mMsaaHelper->Prepare(commandList);
Clear(commandList);
// Render 3d scene
mMsaaHelper->Resolve(commandList, mHdrRenderTex->GetResource(),
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RENDER_TARGET);
//
// Postprocessing
//
// Set texture descriptor heap in prep for postprocessing if necessary.
// Unbind dsv for postprocess textures and sprites.
ID3D12DescriptorHeap* postProcHeap[] = { mPostProcSrvDescHeap->Heap() };
commandList->SetDescriptorHeaps(UINT(std::size(postProcHeap)), postProcHeap);
// downscale pass
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvDownScaleDescriptor(mRtvDescHeap->GetCpuHandle(RTV_Descriptors::Pass1RTV));
commandList->OMSetRenderTargets(1u, &rtvDownScaleDescriptor, FALSE, nullptr);
mPass1RenderTex->BeginScene(commandList); // transition to render target state
mDownScalePass->SetSourceTexture(mPostProcSrvDescHeap->GetGpuHandle(SRV_PostProcDescriptors::HdrSRV), mHdrRenderTex->GetResource());
mDownScalePass->Process(commandList);
mPass1RenderTex->EndScene(commandList); // transition to pixel shader resource state
// blur horizontal pass
commandList->OMSetRenderTargets(1u, &rtvPass2Descriptor, FALSE, nullptr);
mPass2RenderTex->BeginScene(commandList); // transition to render target state
mGaussBlurPass->SetSourceTexture(mPostProcSrvDescHeap->GetGpuHandle(SRV_PostProcDescriptors::Pass1SRV), mPass1RenderTex->GetResource());
//mGaussBlurPass->SetSourceTexture(mPostProcSrvDescHeap->GetGpuHandle(SRV_PostProcDescriptors::HdrSRV), mHdrRenderTex->GetResource());
mGaussBlurPass->SetGaussianParameter(1.f);
mGaussBlurPass->SetBloomBlurParameters(TRUE, 4.f, 1.f); // horizontal blur
mGaussBlurPass->Process(commandList);
mPass2RenderTex->EndScene(commandList); // transition to pixel shader resource
// blur vertical pass
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvBlurDescriptor(mRtvDescHeap->GetCpuHandle(RTV_Descriptors::BlurRTV));
commandList->OMSetRenderTargets(1u, &rtvBlurDescriptor, FALSE, nullptr);
mBlurRenderTex->BeginScene(commandList); // transition to render target state
mGaussBlurPass->SetSourceTexture(mPostProcSrvDescHeap->GetGpuHandle(SRV_PostProcDescriptors::Pass2SRV), mPass2RenderTex->GetResource());
mGaussBlurPass->SetBloomBlurParameters(FALSE, 4.f, 1.f); // vertical blur
mGaussBlurPass->Process(commandList);
mBlurRenderTex->EndScene(commandList); // transition to pixel shader resource
// render the final image to hdr texture
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHdrDescriptor(mRtvDescHeap->GetCpuHandle(RTV_Descriptors::HdrRTV));
commandList->OMSetRenderTargets(1u, &rtvHdrDescriptor, FALSE, nullptr);
//mHdrRenderTex->BeginScene(commandList); // transition to render target state
commandList->SetGraphicsRootSignature(mRootSig.Get()); // bind root signature
commandList->SetPipelineState(mPsoDepthOfField.Get()); // set PSO
...
commandList->SetGraphicsRootConstantBufferView(RootParameterIndex::PSDofCB, psDofCB.GpuAddress());
commandList->SetGraphicsRootDescriptorTable(RootParameterIndex::PostProcDT, mPostProcSrvDescHeap->GetFirstGpuHandle());
// use the big triangle optimization to draw a fullscreen quad
commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
commandList->DrawInstanced(3u, 1u, 0u, 0u);
...
PIXBeginEvent(commandList, PIX_COLOR_DEFAULT, L"Tone Map");
// Set swapchain backbuffer as the tonemapping render target and unbind depth/stencil for sprites (UI)
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvDescriptor(mDeviceResources->GetRenderTargetView());
commandList->OMSetRenderTargets(1u, &rtvDescriptor, FALSE, nullptr);
CD3DX12_GPU_DESCRIPTOR_HANDLE postProcTexture(mPostProcSrvDescHeap->GetGpuHandle(SRV_PostProcDescriptors::HdrSRV));
ApplyToneMapping(commandList, postProcTexture);
Vertex shader:
/*
We use the 'big triangle' optimization that only requires three vertices to completely
cover the full screen area.
v0 v1 ID NDC UV
*____* -- ------- ----
| | / 0 (-1,+1) (0,0)
|_|/ 1 (+3,+1) (2,0)
| / 2 (-1,-3) (0,2)
|/
*
v2
*/
TexCoordVertexOut VS(uint id : SV_VertexID)
{
TexCoordVertexOut vout;
vout.texCoord = float2((id << 1u) & 2u, id & 2u);
// See Luna p.687
float x = vout.texCoord.x * 2.f - 1.f;
float y = -vout.texCoord.y * 2.f + 1.f;
// Procedurally generate each NDC vertex.
// The big triangle produces a quad covering the screen in NDC space.
vout.posH = float4(x, y, 0.f, 1.f);
// Transform quad corners to view space near plane.
float4 ph = mul(vout.posH, InvProj);
vout.posV = ph.xyz / ph.w;
return vout;
}
Pixel shader:
float4 PS(TexCoordVertexOut pin) : SV_TARGET
//float4 PS(float2 texCoord : TEXCOORD0) : SV_TARGET
{
...
// Get downscale texture sample
float3 colorDownScale = Pass1Tex.Sample(PointSampler, pin.texCoord).rgb;
...
return float4(colorDownScale, 1.f); // only top-quarter of source input is rendered!
//return float4(colorOutput, 1.f);
//return float4(distortCoords, 0.f, 1.f);
//return float4(colorHDR, 1.f);
//return float4(colorBlurred, 1.f);
//return float4(colorBloom, 1.f);
//return float4((p.z * 0.01f).rrr, 1.f); // multiply by a contrast factor
}
The PostProcess
class uses a 'full-screen quad' rendering model. Since we can rely on Direct3D 10.0 or later class hardware, it makes use of the 'self-generating quad' model to avoid the need for a VB.
As such, the self-generating quad is going to be positioned wherever you have the viewport set. The scissors settings are also needed since it uses the "big-triangle" optimization to avoid having a diagonal seam across the image IF you have the viewport positioned anywhere except the full render target.
I have this detail in the Writing custom shaders tutorial, but I forgot to replicate it in the PostProcess docs on the wiki.
TL;DR: When you go to render to the smaller render target, use:
auto vp = m_deviceResources->GetScreenViewport();
Viewport halfvp(vp);
halfvp.height /= 2.f;
halfvp.width /= 2.f;
commandList->RSSetViewports(1, halfvp.Get12());
Then when we switch back to your full-size rendertarget, use:
commandList->RSSetViewports(1, &vp);
Updated the wiki page.