这个模板专业化或声明(或其他东西)?

Is this template specialization or a declaration (or something else altogether)?

我在 Nvidia 的 CUDA Samples n-body 模拟中看到这段代码:

template <> NBodyDemo<double> *NBodyDemo<double>::m_singleton = 0;
template <> NBodyDemo<float> *NBodyDemo<float>::m_singleton = 0;

这段代码是在声明指针吗?如果是这种情况,为什么模板 <> 那么。 我无法弄清楚这两行是干什么用的。 此外,class 的 ctor 和 dtor 是私有的;一个实例化这个 class 使用 Create() 方法。这在我的经验中是非常不寻常的,我希望能深入了解这种编码风格背后的原因。

版权声明

   /*
     * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
     *
     * Please refer to the NVIDIA end user license agreement (EULA) associated
     * with this source code for terms and conditions that govern your use of
     * this software. Any use, reproduction, disclosure, or distribution of
     * this software and related documentation outside the terms of the EULA
     * is strictly prohibited.
     *
     */

Class定义

template <typename T>
    class NBodyDemo
    {
        public:
            static void Create()
            {
                m_singleton = new NBodyDemo;
            }
            static void Destroy()
            {
                delete m_singleton;
            }

            static void init(int numBodies, int numDevices, int blockSize,
                             bool usePBO, bool useHostMem, bool useCpu)
            {
                m_singleton->_init(numBodies, numDevices, blockSize, usePBO, useHostMem, useCpu);
            }

            static void reset(int numBodies, NBodyConfig config)
            {
                m_singleton->_reset(numBodies, config);
            }

            static void selectDemo(int index)
            {
                m_singleton->_selectDemo(index);
            }

            static bool compareResults(int numBodies)
            {
                return m_singleton->_compareResults(numBodies);
            }

            static void runBenchmark(int iterations)
            {
                m_singleton->_runBenchmark(iterations);
            }

            static void updateParams()
            {
                m_singleton->m_nbody->setSoftening(activeParams.m_softening);
                m_singleton->m_nbody->setDamping(activeParams.m_damping);
            }

            static void updateSimulation()
            {
                m_singleton->m_nbody->update(activeParams.m_timestep);
            }

            static void display()
            {
                m_singleton->m_renderer->setSpriteSize(activeParams.m_pointSize);

                if (useHostMem)
                {
                    // This event sync is required because we are rendering from the host memory that CUDA is
                    // writing.  If we don't wait until CUDA is done updating it, we will render partially
                    // updated data, resulting in a jerky frame rate.
                    if (!useCpu)
                    {
                        cudaEventSynchronize(hostMemSyncEvent);
                    }

                    m_singleton->m_renderer->setPositions(
                        m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION),
                        m_singleton->m_nbody->getNumBodies());
                }
                else
                {
                    m_singleton->m_renderer->setPBO(m_singleton->m_nbody->getCurrentReadBuffer(),
                                                    m_singleton->m_nbody->getNumBodies(),
                                                    (sizeof(T) > 4));
                }

                // display particles
                m_singleton->m_renderer->display(displayMode);
            }

            static void getArrays(T *pos, T *vel)
            {
                T *_pos = m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION);
                T *_vel = m_singleton->m_nbody->getArray(BODYSYSTEM_VELOCITY);
                memcpy(pos, _pos, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
                memcpy(vel, _vel, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
            }

            static void setArrays(const T *pos, const T *vel)
            {
                if (pos != m_singleton->m_hPos)
                {
                    memcpy(m_singleton->m_hPos, pos, numBodies * 4 * sizeof(T));
                }

                if (vel != m_singleton->m_hVel)
                {
                    memcpy(m_singleton->m_hVel, vel, numBodies * 4 * sizeof(T));
                }

                m_singleton->m_nbody->setArray(BODYSYSTEM_POSITION, m_singleton->m_hPos);
                m_singleton->m_nbody->setArray(BODYSYSTEM_VELOCITY, m_singleton->m_hVel);

                if (!benchmark && !useCpu && !compareToCPU)
                {
                    m_singleton->_resetRenderer();
                }
            }

        private:
            static NBodyDemo *m_singleton;

            BodySystem<T>     *m_nbody;
            BodySystemCUDA<T> *m_nbodyCuda;
            BodySystemCPU<T>  *m_nbodyCpu;

            ParticleRenderer *m_renderer;

            T *m_hPos;
            T *m_hVel;
            float *m_hColor;

        private:
            NBodyDemo()
                : m_nbody(0),
                  m_nbodyCuda(0),
                  m_nbodyCpu(0),
                  m_renderer(0),
                  m_hPos(0),
                  m_hVel(0),
                  m_hColor(0)
            {

            }

            ~NBodyDemo()
            {
                if (m_nbodyCpu)
                {
                    delete m_nbodyCpu;
                }

                if (m_nbodyCuda)
                {
                    delete m_nbodyCuda;
                }

                if (m_hPos)
                {
                    delete [] m_hPos;
                }

                if (m_hVel)
                {
                    delete [] m_hVel;
                }

                if (m_hColor)
                {
                    delete [] m_hColor;
                }

                sdkDeleteTimer(&demoTimer);

                if (!benchmark && !compareToCPU)
                    delete m_renderer;
            }

            void _init(int numBodies, int numDevices, int blockSize,
                       bool bUsePBO, bool useHostMem, bool useCpu)
            {
                if (useCpu)
                {
                    m_nbodyCpu = new BodySystemCPU<T>(numBodies);
                    m_nbody = m_nbodyCpu;
                    m_nbodyCuda = 0;
                }
                else
                {
                    m_nbodyCuda = new BodySystemCUDA<T>(numBodies, numDevices, blockSize, bUsePBO, useHostMem);
                    m_nbody = m_nbodyCuda;
                    m_nbodyCpu = 0;
                }

                // allocate host memory
                m_hPos = new T[numBodies*4];
                m_hVel = new T[numBodies*4];
                m_hColor = new float[numBodies*4];

                m_nbody->setSoftening(activeParams.m_softening);
                m_nbody->setDamping(activeParams.m_damping);

                if (useCpu)
                {
                    sdkCreateTimer(&timer);
                    sdkStartTimer(&timer);
                }
                else
                {
                    checkCudaErrors(cudaEventCreate(&startEvent));
                    checkCudaErrors(cudaEventCreate(&stopEvent));
                    checkCudaErrors(cudaEventCreate(&hostMemSyncEvent));
                }

                if (!benchmark && !compareToCPU)
                {
                    m_renderer = new ParticleRenderer;
                    _resetRenderer();
                }

                sdkCreateTimer(&demoTimer);
                sdkStartTimer(&demoTimer);
            }

            void _reset(int numBodies, NBodyConfig config)
            {
                if (tipsyFile == "")
                {
                    randomizeBodies(config, m_hPos, m_hVel, m_hColor,
                                    activeParams.m_clusterScale,
                                    activeParams.m_velocityScale,
                                    numBodies, true);
                    setArrays(m_hPos, m_hVel);
                }
                else
                {
                    m_nbody->loadTipsyFile(tipsyFile);
                    ::numBodies = m_nbody->getNumBodies();
                }
            }

            void _resetRenderer()
            {
                if (fp64)
                {
                    float color[4] = { 0.4f, 0.8f, 0.1f, 1.0f};
                    m_renderer->setBaseColor(color);
                }
                else
                {
                    float color[4] = { 1.0f, 0.6f, 0.3f, 1.0f};
                    m_renderer->setBaseColor(color);
                }

                m_renderer->setColors(m_hColor, m_nbody->getNumBodies());
                m_renderer->setSpriteSize(activeParams.m_pointSize);
            }

            void _selectDemo(int index)
            {
                assert(index < numDemos);

                activeParams = demoParams[index];
                camera_trans[0] = camera_trans_lag[0] = activeParams.m_x;
                camera_trans[1] = camera_trans_lag[1] = activeParams.m_y;
                camera_trans[2] = camera_trans_lag[2] = activeParams.m_z;
                reset(numBodies, NBODY_CONFIG_SHELL);
                sdkResetTimer(&demoTimer);
            }

            bool _compareResults(int numBodies)
            {
                assert(m_nbodyCuda);

                bool passed = true;

                m_nbody->update(0.001f);

                {
                    m_nbodyCpu = new BodySystemCPU<T>(numBodies);

                    m_nbodyCpu->setArray(BODYSYSTEM_POSITION, m_hPos);
                    m_nbodyCpu->setArray(BODYSYSTEM_VELOCITY, m_hVel);

                    m_nbodyCpu->update(0.001f);

                    T *cudaPos = m_nbodyCuda->getArray(BODYSYSTEM_POSITION);
                    T *cpuPos  = m_nbodyCpu->getArray(BODYSYSTEM_POSITION);

                    T tolerance = 0.0005f;

                    for (int i = 0; i < numBodies; i++)
                    {
                        if (fabs(cpuPos[i] - cudaPos[i]) > tolerance)
                        {
                            passed = false;
                            printf("Error: (host)%f != (device)%f\n", cpuPos[i], cudaPos[i]);
                        }
                    }
                }
                if (passed)
                {
                    printf("  OK\n");
                }
                return passed;
            }

            void _runBenchmark(int iterations)
            {
                // once without timing to prime the device
                if (!useCpu)
                {
                    m_nbody->update(activeParams.m_timestep);
                }

                if (useCpu)
                {
                    sdkCreateTimer(&timer);
                    sdkStartTimer(&timer);
                }
                else
                {
                    checkCudaErrors(cudaEventRecord(startEvent, 0));
                }

                for (int i = 0; i < iterations; ++i)
                {
                    m_nbody->update(activeParams.m_timestep);
                }

                float milliseconds = 0;

                if (useCpu)
                {
                    sdkStopTimer(&timer);
                    milliseconds = sdkGetTimerValue(&timer);
                    sdkStartTimer(&timer);
                }
                else
                {
                    checkCudaErrors(cudaEventRecord(stopEvent, 0));
                    checkCudaErrors(cudaEventSynchronize(stopEvent));
                    checkCudaErrors(cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
                }

                double interactionsPerSecond = 0;
                double gflops = 0;
                computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);

                printf("%d bodies, total time for %d iterations: %.3f ms\n",
                       numBodies, iterations, milliseconds);
                printf("= %.3f billion interactions per second\n", interactionsPerSecond);
                printf("= %.3f %s-precision GFLOP/s at %d flops per interaction\n", gflops,
                       (sizeof(T) > 4) ? "double" : "single", flopsPerInteraction);
            }
    };

NBodyDemo 是一个 class 模板,因为它在某些成员和函数中使用参数 T,例如 BodySystem<T> *m_nbody;setArrays(const T *pos,...)

m_singletonstatic 类型 NBodyDemo 的指针。 "static" 表示它将被具有相同参数的所有实例共享 T.

template <> NBodyDemo<double> *NBodyDemo<double>::m_singleton = 0;

为 "T = double" 专业化初始化 m_singleton
template <> 对于声明范围之外的模板是必需的。