Search code examples
xnahlslxna-4.0xbox360pixel-shader

Pixel Shader performance on xbox


I've got a pixelshader (below) that i'm using with XNA. On my laptop (crappy graphics card) it runs a little jerky, but ok. I've just tried running it on the xbox and it's horrible!

There's nothing to the game (it's just a fractal renderer) so it's got to be the pixel shader causing the issues. I also think it's the PS code because i've lowered the iterations and it's ok. I've also checked, and the GC delta is zero.

Are there any HLSL functions that are no-no's on the xbox?? I must be doing something wrong here, performance can't be that bad!

#include "FractalBase.fxh"

float ZPower;

float3 Colour;
float3 ColourScale;

float ComAbs(float2 Arg)
{
    return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}

float2 ComPow(float2 Arg, float Power)
{
    float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
    float Ang = atan2(Arg.y, Arg.x) * Power;

    return float2(Mod * cos(Ang), Mod * sin(Ang));
}

float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
    float2 c = texCoord.xy;
    float2 z = 0;

    float i;

    float oldBailoutTest = 0;
    float bailoutTest = 0;

    for(i = 0; i < Iterations; i++)
    {
        z = ComPow(z, ZPower) + c;

        bailoutTest = z.x * z.x + z.y * z.y;

        if(bailoutTest >= ZPower * ZPower)
        {
            break;
        }

        oldBailoutTest = bailoutTest;
    }

    float normalisedIterations = i / Iterations;
    float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);

    float4 Result = normalisedIterations + (1 / factor / Iterations);

    Result = (i >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);

    return Result;
}

technique Technique1
{
    pass
    {
        VertexShader = compile vs_3_0 SpriteVertexShader();
        PixelShader = compile ps_3_0 FractalPixelShader(128);
    }
}

Below is FractalBase.fxh:

float4x4 MatrixTransform : register(vs, c0);

float2 Pan;
float Zoom;
float Aspect;

void SpriteVertexShader(inout float4 Colour    : COLOR0,
                        inout float2 texCoord : TEXCOORD0,
                        inout float4 position : SV_Position)
{
    position = mul(position, MatrixTransform);

    // Convert the position into from screen space into complex coordinates
    texCoord = (position) * Zoom * float2(1, Aspect) - float2(Pan.x, -Pan.y);
}

EDIT I did try removing the conditional by using lots of lerps, however when i did that i got loads of artifacts (and not the kind that "belong in a museum"!). I changed things around, and fixed a few logic errors, however the key was to multiply the GreaterThan result by 1 + epsilon, to account for rounding errors just making 0.9999 = 0 (integer). See the fixed code below:

#include "FractalBase.fxh"

float ZPower;

float3 Colour;
float3 ColourScale;

float ComAbs(float2 Arg)
{
    return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}

float2 ComPow(float2 Arg, float Power)
{
    float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
    float Ang = atan2(Arg.y, Arg.x) * Power;

    return float2(Mod * cos(Ang), Mod * sin(Ang));
}

float GreaterThan(float x, float y)
{
    return ((x - y) / (2 * abs(x - y)) + 0.5) * 1.001;
}

float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
    float2 c = texCoord.xy;
    float2 z = 0;

    int i;

    float oldBailoutTest = 0;
    float bailoutTest = 0;

    int KeepGoing = 1;

    int DoneIterations = Iterations;

    int Bailout = 0;

    for(i = 0; i < Iterations; i++)
    {
        z = lerp(z, ComPow(z, ZPower) + c, KeepGoing);

        bailoutTest = lerp(bailoutTest, z.x * z.x + z.y * z.y, KeepGoing);

        Bailout = lerp(Bailout, GreaterThan(bailoutTest, ZPower * ZPower), -abs(Bailout) + 1);

        KeepGoing = lerp(KeepGoing, 0.0, Bailout);
        DoneIterations = lerp(DoneIterations, min(i, DoneIterations), Bailout);

        oldBailoutTest = lerp(oldBailoutTest, bailoutTest, KeepGoing);
    }

    float normalisedIterations = DoneIterations / Iterations;
    float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);

    float4 Result = normalisedIterations + (1 / factor / Iterations);

    Result = (DoneIterations >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);

    return Result;
}

technique Technique1
{
    pass
    {
        VertexShader = compile vs_3_0 SpriteVertexShader();
        PixelShader = compile ps_3_0 FractalPixelShader(128);
    }
}

Solution

  • The xbox has a pretty large block size, so branching on the xbox isn't always so great. Also the compiler isn't always the most effective at emitting dynamic branches which your code seems to use.

    Look into the branch attribute: http://msdn.microsoft.com/en-us/library/bb313972%28v=xnagamestudio.31%29.aspx

    Also, if you move the early bailout, does the PC become more more similar to the Xbox?

    Keep in mind that modern graphic cards are actually quite a bit faster then the Xenon unit by now.