ComputeShader学习
基础
- Compute Shaders是在GPU运行却又在普通渲染管线之外的程序。用于运行GPGPU program。
- 平行算法被拆分成很多线程组,而线程组包含很多线程。例如一个线程处理一个像素点,而一定要注意这种处理是无序的随机的,并不一定是固定的处理顺序,例如不一定是从左到右挨个处理像素点。
线程组
A Thread Group 运行在一个GPU单元 (A single multiprocesser),如果GPU有16个
multiprocesser,那么程序至少要分成16个 Thread Group使得每个multiprocesser都参与计算。
组之间不分享内存。
线程
一个线程组包含n个线程,每32个thread称为一个warp(nvidia:warp=32 ,ati:wavefront=64,因此未来此数字可能会更高)。
从效率考虑,一个线程组包含的线程数最好的warp的倍数,256是一个比较合适的数字。
语法
numthreads中定义单个线程组,这个线程组为三维线程矩阵881个线程
在外面调用computeShader.Dispath(2,2,1),表示定义221个线程组
SV_DispatchThreadID表示当前线程Id,取值范围为(0,0,0)~(threadxthread_groupx-1,threadythread_groupy-1,threadz*thread_groupz-1)
因此在使用是根据实际处理图片等分辨率,来写thread以及thread_group满足SV_DispatchThreadID的xy值可以覆盖图片分辨率,这样可以做到处理每个像素
打个比方:图片像素为512*512,那么thread写成(8,8,1), thread_group写成(512/8,512/8,1)
ComputeShader如下,相当于对图片每个像素写入红色,因为Id.xy的范围在(0511,0511)一维调度:
DispatchIndex=DispatchThreadID.x+DispatchThreadID.ynumthreads.xDispatch.x+DispatchThreadID.znumthreads.xDispatch.xnumthreads.yDispatch.y
1 | #pragma kernel FillWithRed |
测试代码
Shader
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61Shader "Unlit/ComputTest"
{
Properties
{
_Color ("Color", Color) = (1,1,1,1)
}
SubShader
{
Tags { "RenderType" = "Opaque" "RenderPipeline" = "UniversalPipeline"}
Blend SrcAlpha OneMinusSrcAlpha
HLSLINCLUDE
#include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
#include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Lighting.hlsl"
CBUFFER_START(UnityPerMaterial)
float4 _Color;
CBUFFER_END
struct data
{
float3 position;
float4 color;
};
StructuredBuffer<data> Result;
ENDHLSL
Pass
{
Blend SrcAlpha OneMinusSrcAlpha
HLSLPROGRAM
#pragma vertex vert
#pragma fragment frag
struct v2f
{
float4 vertex : SV_POSITION;
float4 color : COLOR;
};
sampler2D _MainTex;
float4 _MainTex_ST;
v2f vert (uint id : SV_VertexID)
{
v2f o;
o.vertex = TransformObjectToHClip(float4(Result[id].position, 1.0));
o.color = Result[id].color;
return o;
}
float4 frag (v2f i) : SV_Target
{
return i.color;
}
ENDHLSL
}
}
}C#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public struct ParticleData
{
Vector3 position;
Color color;
}
public class ComputeTest : MonoBehaviour
{
public ComputeShader computeShader;
ParticleData[] particleDatas;
public int count = 64;
public Color color;
public int size = 1;
public Material material;
private ComputeBuffer ComputeBuffer;
private int id;
// Start is called before the first frame update
void Start()
{
int vec3Stride = sizeof(float) * 3;
int colorStride = sizeof(float) * 4;
ComputeBuffer = new ComputeBuffer(count, vec3Stride + colorStride);
id = computeShader.FindKernel("CSMain");
particleDatas = new ParticleData[count];
for (int i = 0; i < count; i++)
{
particleDatas[i] = new ParticleData();
}
ComputeBuffer.SetData(particleDatas);
computeShader.SetBuffer(id, "Result", ComputeBuffer);
material.SetBuffer("Result", ComputeBuffer);
}
// Update is called once per frame
void OnRenderObject()
{
computeShader.SetFloat("time", Time.time);
computeShader.SetFloat("size", size);
computeShader.SetVector("color", color);
computeShader.SetVector("_threadGroup", new Vector3(10, 10, 100));
computeShader.Dispatch(id, 10, 10, 100);
material.SetPass(0);
Graphics.DrawProceduralNow(MeshTopology.Points, ComputeBuffer.count);
}
private void OnDestroy()
{
ComputeBuffer.Release();
ComputeBuffer.Dispose();
}
}CS
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29// Each #kernel tells which function to compile; you can have many kernels
#pragma kernel CSMain
float time;
float4 color;
float size;
struct ParticleData
{
float3 position;
float4 color;
};
RWStructuredBuffer<ParticleData> Result;
float3 _threadGroup;
[numthreads(10,10,10)]
void CSMain (uint3 id : SV_DispatchThreadID)
{
int DispatchIndex = id.x + (id.y * 10 * _threadGroup.x) + (id.z * 10 * 10 * _threadGroup.x * _threadGroup.y); // x*x, x*y*x*y
ParticleData data = Result[DispatchIndex];
data.color = color;
data.position = float3(size * sin(DispatchIndex + time), DispatchIndex * 0.002,size * cos(DispatchIndex + time));
data.position.xy *= abs(sin(data.position.y + time * 0.3));
Result[DispatchIndex] = data;
}
作用
只要有涉及大量数据的处理都可以放在ComputeShader中计算
- Unity新版VFX
- 布料/头发模拟
- 光追
- 后处理
参考链接
Compute Shader介绍(一)
Compute Shader介绍(二)
初识ComputeShader
Shader第二十八讲 Compute Shaders