Unity中的Compute Shader

发表于2018-10-18
评论0 2.4k浏览

想免费获取内部独家PPT资料库?观看行业大牛直播?点击加入腾讯游戏学院游戏程序行业精英群

711501594
CPU在面对大规模个体时计算能力就显的不足了,我们知道GPU天生的并行架构给它带了强大的计算能力,那么我们能否用GPU去分担CPU的压力呢,从而摆脱计算的瓶颈。答案是肯定的,那就是下面内容介绍中的主角“Compute Shader”。

官方链接:https://docs.unity3d.com/Manual/ComputeShaders.html

看下官网给的定义:
Compute shaders:
    Compute shaders are programs that run on the graphics card, outside of the normal rendering pipeline.
    They can be used for massively parallel GPGPU algorithms,or to accelerate parts of game rendering.
    In order to efficiently use them, an in-depth knowledge of GPU architectures and parallel algorithms is often needed;
    as well as knowledge of DirectCompute, OpenGL Compute, OpenCL, CUDA, or OpenCL.

大概的意思:
ComputeShaders是运行在GPU的,不同于传统的渲染流水线。它们能够用来进行庞大的并行图形处理器通用计算,或者是渲染。

常用的计算架构有DirectCompute,OpenGLCompute,OpenCL,CUDA,orOpenCL.
GPGPU–General-purposecomputingongraphicsprocessingunits图形处理器通用计算
Unity的ComputeShader十分接近DirectCompute(微软推出的,随DirectX11一起发布)

Unity引入的ComputeShader支持如下平台:
Windows and Windows Store, with a DirectX 11 or DirectX 12 graphics API and Shader Model 5.0 GPU
macOS and iOS using Metal graphics API
Android, Linux and Windows platforms with Vulkan API
Modern OpenGL platforms (OpenGL 4.3 on Linux or Windows; OpenGL ES 3.1 on Android). Note that Mac OS X does not support OpenGL 4.3
Modern consoles (Sony PS4 and Microsoft Xbox One)

例子

具体的解释,看程序中的注释即可。
Compute Shader
        //  用来在gpu上实现集群效果
        //Compute Shader的入口函数
        #pragma kernel CSMain
        //封装计算单个boid时所需要的数据
        struct Boid
        {
            float3 pos;
            float3 rot;
            float3 flockPos;
            float speed;
            float nearbyDis;
            float boidsCount;
        };
        //声明要在Compute Shader中操作的数据
        RWStructuredBuffer<Boid> boidBuffer;
        float deltaTime;
        //Compute Shader执行的线程组,每个线程组又包含多个线程 ,默认创建的[numthreads(8,8,1)]
        //[numthreads(8,8,1)] 的意思就是在这个线程组中分配了8*8*1=64个线程,当然也可以用[numthreads(64,1,1)] 表示
        //这里自己改下
        [numthreads(128,1,1)]
        void CSMain (uint3 id : SV_DispatchThreadID)
        {
            Boid boid = boidBuffer[id.x];
            float3 pos = boid.pos;
            float3 rot = boid.rot;
            //separation
            float3 separation = float3(0.0, 0.0, 0.0);
            //alignment
            float3 alignment = float3(0.0, 0.0, 0.0);
            //cohesion
            float3 cohesion = boid.flockPos;
            float3 tempCohesion = float3(0.0, 0.0, 0.0);
            float tempSpeed = 0;
            uint nearbyCount = 0;
            [loop]
            for (int i = 0; i < int(boid.boidsCount); i++)
            {
                if (i != int(id.x))
                {
                    Boid tempBoid = boidBuffer[i];
                    if (length(boid.pos - tempBoid.pos) < boid.nearbyDis)
                    {
                        separation += boid.pos - tempBoid.pos;
                        alignment += tempBoid.rot;
                        tempCohesion += tempBoid.pos;
                        nearbyCount++;
                    }
                }
            }
            if (nearbyCount > 0)
            {
                alignment *= 1 / nearbyCount;
                tempCohesion *= 1 / nearbyCount;
            }
            cohesion += tempCohesion;
            float3 direction = alignment + separation + normalize(cohesion - boid.pos);
            boid.rot = lerp(boid.rot, normalize(direction), deltaTime * 4);
            boid.pos += boid.rot * boid.speed * deltaTime;
            boidBuffer[id.x] = boid;
        }

C#
        public struct GPUBoid
        {
            public Vector3 pos, rot, flockPos;
            public float speed, nearbyDis, boidsCount;
        }
    //-------------------------------------------------------------------------------------------------------
        using System.Collections;
        using System.Collections.Generic;
        using UnityEngine;
        public class GPUFlock : MonoBehaviour {
            #region 字段
            public ComputeShader cshader;
            public GameObject boidPrefab;
            public int boidsCount;
            public float spawnRadius;
            public GameObject[] boidsGo;
            public GPUBoid[] boidsData;
            public float flockSpeed;
            public float nearbyDis;
            private Vector3 targetPos = Vector3.zero;
            private int kernelHandle;
            #endregion
            #region 方法
            void Start()
            {
                this.boidsGo = new GameObject[this.boidsCount];
                this.boidsData = new GPUBoid[this.boidsCount];
                this.kernelHandle = cshader.FindKernel("CSMain");
                for (int i = 0; i < this.boidsCount; i++)
                {
                    this.boidsData[i] = this.CreateBoidData();
                    this.boidsGo[i] = Instantiate(boidPrefab, this.boidsData[i].pos, Quaternion.Euler(this.boidsData[i].rot)) as GameObject;
                    this.boidsData[i].rot = this.boidsGo[i].transform.forward;
                }
            }
            GPUBoid CreateBoidData()
            {
                GPUBoid boidData = new GPUBoid();
                Vector3 pos = transform.position + Random.insideUnitSphere * spawnRadius;
                Quaternion rot = Quaternion.Slerp(transform.rotation, Random.rotation, 0.3f);
                boidData.pos = pos;
                boidData.flockPos = transform.position;
                boidData.boidsCount = this.boidsCount;
                boidData.nearbyDis = this.nearbyDis;
                boidData.speed = this.flockSpeed + Random.Range(-0.5f, 0.5f);
                return boidData;
            }
            void Update()
            {
                this.targetPos += new Vector3(2f, 5f, 3f);
                this.transform.localPosition += new Vector3(
                    (Mathf.Sin(Mathf.Deg2Rad * this.targetPos.x) * -0.2f),
                    (Mathf.Sin(Mathf.Deg2Rad * this.targetPos.y) * 0.2f),
                    (Mathf.Sin(Mathf.Deg2Rad * this.targetPos.z) * 0.2f)
                );
                ComputeBuffer buffer = new ComputeBuffer(boidsCount, 56);
                for (int i = 0; i < this.boidsData.Length; i++)
                {
                    this.boidsData[i].flockPos = this.transform.position;
                }
                //注意:和一般的Shader不同的是,compute shader和图形无关,因此在使用compute shader时不会
                //涉及到mesh、material这些内容,相反这些compute shader的设置和执行在C#脚本中,如下:
                //准备数据
                buffer.SetData(this.boidsData);
                //传递数据
                cshader.SetBuffer(this.kernelHandle, "boidBuffer", buffer);
                cshader.SetFloat("deltaTime", Time.deltaTime);
                //分配线程组执行compute shader
                cshader.Dispatch(this.kernelHandle, this.boidsCount, 1, 1);
                //将数据从GPU传回到CPU中,注意数据的传输需要等待,这里比较耗时
                buffer.GetData(this.boidsData);
                buffer.Release();
                for (int i = 0; i < this.boidsData.Length; i++)
                {
                    this.boidsGo[i].transform.localPosition = this.boidsData[i].pos;
                    if(!this.boidsData[i].rot.Equals(Vector3.zero))
                    {
                        this.boidsGo[i].transform.rotation = Quaternion.LookRotation(this.boidsData[i].rot);
                    }
                }
            }
            #endregion
        }
工程链接http://download.csdn.net/download/a958832776/9984053
来自:https://blog.csdn.net/a958832776/article/details/78022291

原文链接

著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

如社区发表内容存在侵权行为,您可以点击这里查看侵权投诉指引

游戏学院公众号二维码
腾讯游戏学院
微信公众号

提供更专业的游戏知识学习平台