I have been testing moving a lot of objects in Unity through normal C# code and through HLSL shaders. However, there is no difference in speed. FPS remains the same. Different perlin noise is used to change the position. The C# code uses the standard Mathf.PerlinNoise, while the HLSL uses a custom noise function.
Object spawn:
[SerializeField]
private GameObject prefab;
private void Start()
{
for (int i = 0; i < 50; i++)
for (int j = 0; j < 50; j++)
{
GameObject createdParticle;
createdParticle = Instantiate(prefab);
createdParticle.transform.position = new Vector3(i * 1f, Random.Range(-1f, 1f), j * 1f);
}
}
Code to move an object via C#. This script is added to each created object:
private Vector3 position = new Vector3();
private void Start()
{
position = new Vector3(transform.position.x, Mathf.PerlinNoise(Time.time, Time.time), transform.position.z);
}
private void Update()
{
position.y = Mathf.PerlinNoise(transform.position.x / 20f + Time.time, transform.position.z / 20f + Time.time) * 5f;
transform.position = position;
}
Part 1: C# client code
Object spawn, running the calculation on the shader and assigning the resulting value to the objects:
public struct Particle
{
public Vector3 position;
}
[SerializeField]
private GameObject prefab;
[SerializeField]
private ComputeShader computeShader;
private List<GameObject> particlesList = new List<GameObject>();
private Particle[] particlesDataArray;
private void Start()
{
CreateParticles();
}
private void Update()
{
UpdateParticlePosition();
}
private void CreateParticles()
{
List<Particle> particlesDataList = new List<Particle>();
for (int i = 0; i < 50; i++)
for (int j = 0; j < 50; j++)
{
GameObject createdParticle;
createdParticle = Instantiate(prefab);
createdParticle.transform.position = new Vector3(i * 1f, Random.Range(-1f, 1f), j * 1f);
particlesList.Add(createdParticle);
Particle particle = new Particle();
particle.position = createdParticle.transform.position;
particlesDataList.Add(particle);
}
particlesDataArray = particlesDataList.ToArray();
particlesDataList.Clear();
computeBuffer = new ComputeBuffer(particlesDataArray.Length, sizeof(float) * 7);
computeBuffer.SetData(particlesDataArray);
computeShader.SetBuffer(0, "particles", computeBuffer);
}
private ComputeBuffer computeBuffer;
private void UpdateParticlePosition()
{
computeShader.SetFloat("time", Time.time);
computeShader.Dispatch(computeShader.FindKernel("CSMain"), particlesDataArray.Length / 10, 1, 1);
computeBuffer.GetData(particlesDataArray);
for (int i = 0; i < particlesDataArray.Length; i++)
{
Vector3 pos = particlesList[i].transform.position;
pos.y = particlesDataArray[i].position.y;
particlesList[i].transform.position = pos;
}
}
Part 2: Compute kernel (GPGPU)
#pragma kernel CSMain
struct Particle {
float3 position;
float4 color;
};
RWStructuredBuffer<Particle> particles;
float time;
float mod(float x, float y)
{
return x - y * floor(x / y);
}
float permute(float x) { return floor(mod(((x * 34.0) + 1.0) * x, 289.0)); }
float3 permute(float3 x) { return mod(((x * 34.0) + 1.0) * x, 289.0); }
float4 permute(float4 x) { return mod(((x * 34.0) + 1.0) * x, 289.0); }
float taylorInvSqrt(float r) { return 1.79284291400159 - 0.85373472095314 * r; }
float4 taylorInvSqrt(float4 r) { return float4(taylorInvSqrt(r.x), taylorInvSqrt(r.y), taylorInvSqrt(r.z), taylorInvSqrt(r.w)); }
float3 rand3(float3 c) {
float j = 4096.0 * sin(dot(c, float3(17.0, 59.4, 15.0)));
float3 r;
r.z = frac(512.0 * j);
j *= .125;
r.x = frac(512.0 * j);
j *= .125;
r.y = frac(512.0 * j);
return r - 0.5;
}
float _snoise(float3 p) {
const float F3 = 0.3333333;
const float G3 = 0.1666667;
float3 s = floor(p + dot(p, float3(F3, F3, F3)));
float3 x = p - s + dot(s, float3(G3, G3, G3));
float3 e = step(float3(0.0, 0.0, 0.0), x - x.yzx);
float3 i1 = e * (1.0 - e.zxy);
float3 i2 = 1.0 - e.zxy * (1.0 - e);
float3 x1 = x - i1 + G3;
float3 x2 = x - i2 + 2.0 * G3;
float3 x3 = x - 1.0 + 3.0 * G3;
float4 w, d;
w.x = dot(x, x);
w.y = dot(x1, x1);
w.z = dot(x2, x2);
w.w = dot(x3, x3);
w = max(0.6 - w, 0.0);
d.x = dot(rand3(s), x);
d.y = dot(rand3(s + i1), x1);
d.z = dot(rand3(s + i2), x2);
d.w = dot(rand3(s + 1.0), x3);
w *= w;
w *= w;
d *= w;
return dot(d, float4(52.0, 52.0, 52.0, 52.0));
}
[numthreads(10, 1, 1)]
void CSMain(uint3 id : SV_DispatchThreadID)
{
Particle particle = particles[id.x];
float modifyTime = time / 5.0;
float positionY = _snoise(float3(particle.position.x / 20.0 + modifyTime, 0.0, particle.position.z / 20.0 + modifyTime)) * 5.0;
particle.position = float3(particle.position.x, positionY, particle.position.z);
particles[id.x] = particle;
}
What am I doing wrong, why is there no increase in calculation speed? :)
Thanks in advance!
TL;DR: your GPGPU (compute shader) scenario is unoptimized thus skewing your results. Consider binding a material to the computeBuffer
and rendering via Graphics.DrawProcedural
. That way everything stays on the GPU.
OP:
What am I doing wrong, why is there no increase in calculation speed?
Essentially, there are two parts to your problem.
With most things GPU-related, you generally want to avoid reading from the GPU since it will block the CPU. This is true also for GPGPU scenarios.
If I were to hazard a guess it would be the GPGPU (compute shader) call computeBuffer.GetData()
shown below:
private void Update()
{
UpdateParticlePosition();
}
private void UpdateParticlePosition()
{
.
.
.
computeBuffer.GetData(particlesDataArray); // <----- OUCH!
Unity (my emphasis):
ComputeBuffer.GetData
Read data values from the buffer into an array...
Note that this function reads the data back from the GPU, which can be slow...If any GPU work has been submitted that writes to this buffer, Unity waits for the tasks to complete before it retrieves the requested data. Tell me more...
I can see you are creating 2,500 "particles" where each particle is attached to a GameObject
. If the intent is to just draw a simple quad then it's more efficient to create an array struct
s containing a Vector3
position and then performing a batch render call to draw all the particles in one go.
Proof: see video below of nBody simulation. 60+ FPS on 2014 era NVidia card
e.g. for my GPGPU n-Body Galaxy Simulation I do just that. Pay attention to the StarMaterial.SetBuffer("stars", _starsBuffer)
during actual rendering. That tells the GPU to use the buffer that already exists on the GPU, the very same buffer that the computer shader used to move the star positions. There is no CPU reading the GPU here.
public class Galaxy1Controller : MonoBehaviour
{
public Texture2D HueTexture;
public int NumStars = 10000; // That's right! 10,000 stars!
public ComputeShader StarCompute;
public Material StarMaterial;
private ComputeBuffer _quadPoints;
private Star[] _stars;
private ComputeBuffer _starsBuffer;
.
.
.
private void Start()
{
_updateParticlesKernel = StarCompute.FindKernel("UpdateStars");
_starsBuffer = new ComputeBuffer(NumStars, Constants.StarsStride);
_stars = new Star[NumStars];
// Create initial positions for stars here (not shown)
_starsBuffer.SetData(_stars);
_quadPoints = new ComputeBuffer(6, QuadStride);
_quadPoints.SetData(...); // star quad
}
private void Update()
{
// bind resources to compute shader
StarCompute.SetBuffer(_updateParticlesKernel, "stars", _starsBuffer);
StarCompute.SetFloat("deltaTime", Time.deltaTime*_manager.MasterSpeed);
StarCompute.SetTexture(_updateParticlesKernel, "hueTexture", HueTexture);
// dispatch, launch threads on GPU
var numberOfGroups = Mathf.CeilToInt((float) NumStars/GroupSize);
StarCompute.Dispatch(_updateParticlesKernel, numberOfGroups, 1, 1);
// "Look Ma, no reading from the GPU!"
}
private void OnRenderObject()
{
// bind resources to material
StarMaterial.SetBuffer("stars", _starsBuffer);
StarMaterial.SetBuffer("quadPoints", _quadPoints);
// set the pass
StarMaterial.SetPass(0);
// draw
Graphics.DrawProcedural(MeshTopology.Triangles, 6, NumStars);
}
}
n-Body galaxy simulation of 10,000 stars:
I think everyone can agree that Microsoft's GPGPU documentation is pretty sparse so your best bet is to check out examples scattered around the interwebs. One that comes to mind is the excellent "GPU Ray Tracing in Unity" series over at Three Eyed Games. See the link below.