基础

  • Compute Shaders是在GPU运行却又在普通渲染管线之外的程序。用于运行GPGPU program。
  • 平行算法被拆分成很多线程组,而线程组包含很多线程。例如一个线程处理一个像素点,而一定要注意这种处理是无序的随机的,并不一定是固定的处理顺序,例如不一定是从左到右挨个处理像素点。

线程组

A Thread Group 运行在一个GPU单元 (A single multiprocesser),如果GPU有16个
multiprocesser,那么程序至少要分成16个 Thread Group使得每个multiprocesser都参与计算。
组之间不分享内存。

线程

一个线程组包含n个线程,每32个thread称为一个warp(nvidia:warp=32 ,ati:wavefront=64,因此未来此数字可能会更高)。
从效率考虑,一个线程组包含的线程数最好的warp的倍数,256是一个比较合适的数字。

语法

  • numthreads中定义单个线程组,这个线程组为三维线程矩阵881个线程

  • 在外面调用computeShader.Dispath(2,2,1),表示定义221个线程组

  • SV_DispatchThreadID表示当前线程Id,取值范围为(0,0,0)~(threadxthread_groupx-1,threadythread_groupy-1,threadz*thread_groupz-1)

  • 因此在使用是根据实际处理图片等分辨率,来写thread以及thread_group满足SV_DispatchThreadID的xy值可以覆盖图片分辨率,这样可以做到处理每个像素
    打个比方:图片像素为512*512,那么thread写成(8,8,1), thread_group写成(512/8,512/8,1)
    ComputeShader如下,相当于对图片每个像素写入红色,因为Id.xy的范围在(0511,0511)

  • 一维调度:
    DispatchIndex=DispatchThreadID.x+DispatchThreadID.ynumthreads.xDispatch.x+DispatchThreadID.znumthreads.xDispatch.xnumthreads.yDispatch.y

1
2
3
4
5
6
7
8
9
10
11
12
#pragma kernel FillWithRed
RWTexture2D< float4 > res;

// numthreads中定义单个线程组,这个线程组为三维线程矩阵8*8*1个线程
// 在外面调用computeShader.Dispath(2,2,1),表示定义2*2*1个线程组
// SV_DispatchThreadID表示当前线程Id,取值范围为(0,0,0)~(threadx*thread_groupx-1,thready*thread_groupy-1,threadz*thread_groupz-1)
[numthreads(8,8,1)]
void FillWithRed (uint3 id : SV_DispatchThreadID)
{
res[id.xy] = float4(1,0,0,1);
}

测试代码

  • Shader

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    Shader "Unlit/ComputTest"
    {
    Properties
    {
    _Color ("Color", Color) = (1,1,1,1)
    }
    SubShader
    {
    Tags { "RenderType" = "Opaque" "RenderPipeline" = "UniversalPipeline"}

    Blend SrcAlpha OneMinusSrcAlpha
    HLSLINCLUDE

    #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
    #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Lighting.hlsl"

    CBUFFER_START(UnityPerMaterial)
    float4 _Color;
    CBUFFER_END

    struct data
    {
    float3 position;
    float4 color;
    };
    StructuredBuffer<data> Result;

    ENDHLSL

    Pass
    {
    Blend SrcAlpha OneMinusSrcAlpha
    HLSLPROGRAM
    #pragma vertex vert
    #pragma fragment frag

    struct v2f
    {
    float4 vertex : SV_POSITION;
    float4 color : COLOR;
    };

    sampler2D _MainTex;
    float4 _MainTex_ST;

    v2f vert (uint id : SV_VertexID)
    {
    v2f o;
    o.vertex = TransformObjectToHClip(float4(Result[id].position, 1.0));
    o.color = Result[id].color;
    return o;
    }

    float4 frag (v2f i) : SV_Target
    {
    return i.color;
    }
    ENDHLSL
    }
    }
    }
  • C#

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public struct ParticleData
    {
    Vector3 position;
    Color color;
    }
    public class ComputeTest : MonoBehaviour
    {
    public ComputeShader computeShader;

    ParticleData[] particleDatas;

    public int count = 64;
    public Color color;
    public int size = 1;

    public Material material;

    private ComputeBuffer ComputeBuffer;
    private int id;

    // Start is called before the first frame update
    void Start()
    {
    int vec3Stride = sizeof(float) * 3;
    int colorStride = sizeof(float) * 4;
    ComputeBuffer = new ComputeBuffer(count, vec3Stride + colorStride);
    id = computeShader.FindKernel("CSMain");


    particleDatas = new ParticleData[count];
    for (int i = 0; i < count; i++)
    {
    particleDatas[i] = new ParticleData();
    }
    ComputeBuffer.SetData(particleDatas);
    computeShader.SetBuffer(id, "Result", ComputeBuffer);
    material.SetBuffer("Result", ComputeBuffer);
    }

    // Update is called once per frame
    void OnRenderObject()
    {
    computeShader.SetFloat("time", Time.time);
    computeShader.SetFloat("size", size);
    computeShader.SetVector("color", color);
    computeShader.SetVector("_threadGroup", new Vector3(10, 10, 100));
    computeShader.Dispatch(id, 10, 10, 100);
    material.SetPass(0);
    Graphics.DrawProceduralNow(MeshTopology.Points, ComputeBuffer.count);
    }

    private void OnDestroy()
    {
    ComputeBuffer.Release();
    ComputeBuffer.Dispose();
    }
    }
  • CS

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    // Each #kernel tells which function to compile; you can have many kernels
    #pragma kernel CSMain


    float time;
    float4 color;
    float size;
    struct ParticleData
    {
    float3 position;
    float4 color;
    };


    RWStructuredBuffer<ParticleData> Result;

    float3 _threadGroup;

    [numthreads(10,10,10)]
    void CSMain (uint3 id : SV_DispatchThreadID)
    {
    int DispatchIndex = id.x + (id.y * 10 * _threadGroup.x) + (id.z * 10 * 10 * _threadGroup.x * _threadGroup.y); // x*x, x*y*x*y

    ParticleData data = Result[DispatchIndex];
    data.color = color;
    data.position = float3(size * sin(DispatchIndex + time), DispatchIndex * 0.002,size * cos(DispatchIndex + time));
    data.position.xy *= abs(sin(data.position.y + time * 0.3));
    Result[DispatchIndex] = data;
    }

作用

只要有涉及大量数据的处理都可以放在ComputeShader中计算

  • Unity新版VFX
  • 布料/头发模拟
  • 光追
  • 后处理

参考链接

Compute Shader介绍(一)
Compute Shader介绍(二)
初识ComputeShader
Shader第二十八讲 Compute Shaders