这个模板专业化或声明(或其他东西)?
Is this template specialization or a declaration (or something else altogether)?
我在 Nvidia 的 CUDA Samples n-body 模拟中看到这段代码:
template <> NBodyDemo<double> *NBodyDemo<double>::m_singleton = 0;
template <> NBodyDemo<float> *NBodyDemo<float>::m_singleton = 0;
这段代码是在声明指针吗?如果是这种情况,为什么模板 <> 那么。
我无法弄清楚这两行是干什么用的。
此外,class 的 ctor 和 dtor 是私有的;一个实例化这个 class 使用 Create() 方法。这在我的经验中是非常不寻常的,我希望能深入了解这种编码风格背后的原因。
版权声明
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
Class定义
template <typename T>
class NBodyDemo
{
public:
static void Create()
{
m_singleton = new NBodyDemo;
}
static void Destroy()
{
delete m_singleton;
}
static void init(int numBodies, int numDevices, int blockSize,
bool usePBO, bool useHostMem, bool useCpu)
{
m_singleton->_init(numBodies, numDevices, blockSize, usePBO, useHostMem, useCpu);
}
static void reset(int numBodies, NBodyConfig config)
{
m_singleton->_reset(numBodies, config);
}
static void selectDemo(int index)
{
m_singleton->_selectDemo(index);
}
static bool compareResults(int numBodies)
{
return m_singleton->_compareResults(numBodies);
}
static void runBenchmark(int iterations)
{
m_singleton->_runBenchmark(iterations);
}
static void updateParams()
{
m_singleton->m_nbody->setSoftening(activeParams.m_softening);
m_singleton->m_nbody->setDamping(activeParams.m_damping);
}
static void updateSimulation()
{
m_singleton->m_nbody->update(activeParams.m_timestep);
}
static void display()
{
m_singleton->m_renderer->setSpriteSize(activeParams.m_pointSize);
if (useHostMem)
{
// This event sync is required because we are rendering from the host memory that CUDA is
// writing. If we don't wait until CUDA is done updating it, we will render partially
// updated data, resulting in a jerky frame rate.
if (!useCpu)
{
cudaEventSynchronize(hostMemSyncEvent);
}
m_singleton->m_renderer->setPositions(
m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION),
m_singleton->m_nbody->getNumBodies());
}
else
{
m_singleton->m_renderer->setPBO(m_singleton->m_nbody->getCurrentReadBuffer(),
m_singleton->m_nbody->getNumBodies(),
(sizeof(T) > 4));
}
// display particles
m_singleton->m_renderer->display(displayMode);
}
static void getArrays(T *pos, T *vel)
{
T *_pos = m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION);
T *_vel = m_singleton->m_nbody->getArray(BODYSYSTEM_VELOCITY);
memcpy(pos, _pos, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
memcpy(vel, _vel, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
}
static void setArrays(const T *pos, const T *vel)
{
if (pos != m_singleton->m_hPos)
{
memcpy(m_singleton->m_hPos, pos, numBodies * 4 * sizeof(T));
}
if (vel != m_singleton->m_hVel)
{
memcpy(m_singleton->m_hVel, vel, numBodies * 4 * sizeof(T));
}
m_singleton->m_nbody->setArray(BODYSYSTEM_POSITION, m_singleton->m_hPos);
m_singleton->m_nbody->setArray(BODYSYSTEM_VELOCITY, m_singleton->m_hVel);
if (!benchmark && !useCpu && !compareToCPU)
{
m_singleton->_resetRenderer();
}
}
private:
static NBodyDemo *m_singleton;
BodySystem<T> *m_nbody;
BodySystemCUDA<T> *m_nbodyCuda;
BodySystemCPU<T> *m_nbodyCpu;
ParticleRenderer *m_renderer;
T *m_hPos;
T *m_hVel;
float *m_hColor;
private:
NBodyDemo()
: m_nbody(0),
m_nbodyCuda(0),
m_nbodyCpu(0),
m_renderer(0),
m_hPos(0),
m_hVel(0),
m_hColor(0)
{
}
~NBodyDemo()
{
if (m_nbodyCpu)
{
delete m_nbodyCpu;
}
if (m_nbodyCuda)
{
delete m_nbodyCuda;
}
if (m_hPos)
{
delete [] m_hPos;
}
if (m_hVel)
{
delete [] m_hVel;
}
if (m_hColor)
{
delete [] m_hColor;
}
sdkDeleteTimer(&demoTimer);
if (!benchmark && !compareToCPU)
delete m_renderer;
}
void _init(int numBodies, int numDevices, int blockSize,
bool bUsePBO, bool useHostMem, bool useCpu)
{
if (useCpu)
{
m_nbodyCpu = new BodySystemCPU<T>(numBodies);
m_nbody = m_nbodyCpu;
m_nbodyCuda = 0;
}
else
{
m_nbodyCuda = new BodySystemCUDA<T>(numBodies, numDevices, blockSize, bUsePBO, useHostMem);
m_nbody = m_nbodyCuda;
m_nbodyCpu = 0;
}
// allocate host memory
m_hPos = new T[numBodies*4];
m_hVel = new T[numBodies*4];
m_hColor = new float[numBodies*4];
m_nbody->setSoftening(activeParams.m_softening);
m_nbody->setDamping(activeParams.m_damping);
if (useCpu)
{
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventCreate(&startEvent));
checkCudaErrors(cudaEventCreate(&stopEvent));
checkCudaErrors(cudaEventCreate(&hostMemSyncEvent));
}
if (!benchmark && !compareToCPU)
{
m_renderer = new ParticleRenderer;
_resetRenderer();
}
sdkCreateTimer(&demoTimer);
sdkStartTimer(&demoTimer);
}
void _reset(int numBodies, NBodyConfig config)
{
if (tipsyFile == "")
{
randomizeBodies(config, m_hPos, m_hVel, m_hColor,
activeParams.m_clusterScale,
activeParams.m_velocityScale,
numBodies, true);
setArrays(m_hPos, m_hVel);
}
else
{
m_nbody->loadTipsyFile(tipsyFile);
::numBodies = m_nbody->getNumBodies();
}
}
void _resetRenderer()
{
if (fp64)
{
float color[4] = { 0.4f, 0.8f, 0.1f, 1.0f};
m_renderer->setBaseColor(color);
}
else
{
float color[4] = { 1.0f, 0.6f, 0.3f, 1.0f};
m_renderer->setBaseColor(color);
}
m_renderer->setColors(m_hColor, m_nbody->getNumBodies());
m_renderer->setSpriteSize(activeParams.m_pointSize);
}
void _selectDemo(int index)
{
assert(index < numDemos);
activeParams = demoParams[index];
camera_trans[0] = camera_trans_lag[0] = activeParams.m_x;
camera_trans[1] = camera_trans_lag[1] = activeParams.m_y;
camera_trans[2] = camera_trans_lag[2] = activeParams.m_z;
reset(numBodies, NBODY_CONFIG_SHELL);
sdkResetTimer(&demoTimer);
}
bool _compareResults(int numBodies)
{
assert(m_nbodyCuda);
bool passed = true;
m_nbody->update(0.001f);
{
m_nbodyCpu = new BodySystemCPU<T>(numBodies);
m_nbodyCpu->setArray(BODYSYSTEM_POSITION, m_hPos);
m_nbodyCpu->setArray(BODYSYSTEM_VELOCITY, m_hVel);
m_nbodyCpu->update(0.001f);
T *cudaPos = m_nbodyCuda->getArray(BODYSYSTEM_POSITION);
T *cpuPos = m_nbodyCpu->getArray(BODYSYSTEM_POSITION);
T tolerance = 0.0005f;
for (int i = 0; i < numBodies; i++)
{
if (fabs(cpuPos[i] - cudaPos[i]) > tolerance)
{
passed = false;
printf("Error: (host)%f != (device)%f\n", cpuPos[i], cudaPos[i]);
}
}
}
if (passed)
{
printf(" OK\n");
}
return passed;
}
void _runBenchmark(int iterations)
{
// once without timing to prime the device
if (!useCpu)
{
m_nbody->update(activeParams.m_timestep);
}
if (useCpu)
{
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventRecord(startEvent, 0));
}
for (int i = 0; i < iterations; ++i)
{
m_nbody->update(activeParams.m_timestep);
}
float milliseconds = 0;
if (useCpu)
{
sdkStopTimer(&timer);
milliseconds = sdkGetTimerValue(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventRecord(stopEvent, 0));
checkCudaErrors(cudaEventSynchronize(stopEvent));
checkCudaErrors(cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
}
double interactionsPerSecond = 0;
double gflops = 0;
computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
printf("%d bodies, total time for %d iterations: %.3f ms\n",
numBodies, iterations, milliseconds);
printf("= %.3f billion interactions per second\n", interactionsPerSecond);
printf("= %.3f %s-precision GFLOP/s at %d flops per interaction\n", gflops,
(sizeof(T) > 4) ? "double" : "single", flopsPerInteraction);
}
};
NBodyDemo
是一个 class 模板,因为它在某些成员和函数中使用参数 T
,例如 BodySystem<T> *m_nbody;
或 setArrays(const T *pos,...)
m_singleton
是 static
类型 NBodyDemo
的指针。 "static" 表示它将被具有相同参数的所有实例共享 T
.
template <> NBodyDemo<double> *NBodyDemo<double>::m_singleton = 0;
为 "T = double" 专业化初始化 m_singleton
。
template <>
对于声明范围之外的模板是必需的。
我在 Nvidia 的 CUDA Samples n-body 模拟中看到这段代码:
template <> NBodyDemo<double> *NBodyDemo<double>::m_singleton = 0;
template <> NBodyDemo<float> *NBodyDemo<float>::m_singleton = 0;
这段代码是在声明指针吗?如果是这种情况,为什么模板 <> 那么。 我无法弄清楚这两行是干什么用的。 此外,class 的 ctor 和 dtor 是私有的;一个实例化这个 class 使用 Create() 方法。这在我的经验中是非常不寻常的,我希望能深入了解这种编码风格背后的原因。
版权声明
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
Class定义
template <typename T>
class NBodyDemo
{
public:
static void Create()
{
m_singleton = new NBodyDemo;
}
static void Destroy()
{
delete m_singleton;
}
static void init(int numBodies, int numDevices, int blockSize,
bool usePBO, bool useHostMem, bool useCpu)
{
m_singleton->_init(numBodies, numDevices, blockSize, usePBO, useHostMem, useCpu);
}
static void reset(int numBodies, NBodyConfig config)
{
m_singleton->_reset(numBodies, config);
}
static void selectDemo(int index)
{
m_singleton->_selectDemo(index);
}
static bool compareResults(int numBodies)
{
return m_singleton->_compareResults(numBodies);
}
static void runBenchmark(int iterations)
{
m_singleton->_runBenchmark(iterations);
}
static void updateParams()
{
m_singleton->m_nbody->setSoftening(activeParams.m_softening);
m_singleton->m_nbody->setDamping(activeParams.m_damping);
}
static void updateSimulation()
{
m_singleton->m_nbody->update(activeParams.m_timestep);
}
static void display()
{
m_singleton->m_renderer->setSpriteSize(activeParams.m_pointSize);
if (useHostMem)
{
// This event sync is required because we are rendering from the host memory that CUDA is
// writing. If we don't wait until CUDA is done updating it, we will render partially
// updated data, resulting in a jerky frame rate.
if (!useCpu)
{
cudaEventSynchronize(hostMemSyncEvent);
}
m_singleton->m_renderer->setPositions(
m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION),
m_singleton->m_nbody->getNumBodies());
}
else
{
m_singleton->m_renderer->setPBO(m_singleton->m_nbody->getCurrentReadBuffer(),
m_singleton->m_nbody->getNumBodies(),
(sizeof(T) > 4));
}
// display particles
m_singleton->m_renderer->display(displayMode);
}
static void getArrays(T *pos, T *vel)
{
T *_pos = m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION);
T *_vel = m_singleton->m_nbody->getArray(BODYSYSTEM_VELOCITY);
memcpy(pos, _pos, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
memcpy(vel, _vel, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
}
static void setArrays(const T *pos, const T *vel)
{
if (pos != m_singleton->m_hPos)
{
memcpy(m_singleton->m_hPos, pos, numBodies * 4 * sizeof(T));
}
if (vel != m_singleton->m_hVel)
{
memcpy(m_singleton->m_hVel, vel, numBodies * 4 * sizeof(T));
}
m_singleton->m_nbody->setArray(BODYSYSTEM_POSITION, m_singleton->m_hPos);
m_singleton->m_nbody->setArray(BODYSYSTEM_VELOCITY, m_singleton->m_hVel);
if (!benchmark && !useCpu && !compareToCPU)
{
m_singleton->_resetRenderer();
}
}
private:
static NBodyDemo *m_singleton;
BodySystem<T> *m_nbody;
BodySystemCUDA<T> *m_nbodyCuda;
BodySystemCPU<T> *m_nbodyCpu;
ParticleRenderer *m_renderer;
T *m_hPos;
T *m_hVel;
float *m_hColor;
private:
NBodyDemo()
: m_nbody(0),
m_nbodyCuda(0),
m_nbodyCpu(0),
m_renderer(0),
m_hPos(0),
m_hVel(0),
m_hColor(0)
{
}
~NBodyDemo()
{
if (m_nbodyCpu)
{
delete m_nbodyCpu;
}
if (m_nbodyCuda)
{
delete m_nbodyCuda;
}
if (m_hPos)
{
delete [] m_hPos;
}
if (m_hVel)
{
delete [] m_hVel;
}
if (m_hColor)
{
delete [] m_hColor;
}
sdkDeleteTimer(&demoTimer);
if (!benchmark && !compareToCPU)
delete m_renderer;
}
void _init(int numBodies, int numDevices, int blockSize,
bool bUsePBO, bool useHostMem, bool useCpu)
{
if (useCpu)
{
m_nbodyCpu = new BodySystemCPU<T>(numBodies);
m_nbody = m_nbodyCpu;
m_nbodyCuda = 0;
}
else
{
m_nbodyCuda = new BodySystemCUDA<T>(numBodies, numDevices, blockSize, bUsePBO, useHostMem);
m_nbody = m_nbodyCuda;
m_nbodyCpu = 0;
}
// allocate host memory
m_hPos = new T[numBodies*4];
m_hVel = new T[numBodies*4];
m_hColor = new float[numBodies*4];
m_nbody->setSoftening(activeParams.m_softening);
m_nbody->setDamping(activeParams.m_damping);
if (useCpu)
{
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventCreate(&startEvent));
checkCudaErrors(cudaEventCreate(&stopEvent));
checkCudaErrors(cudaEventCreate(&hostMemSyncEvent));
}
if (!benchmark && !compareToCPU)
{
m_renderer = new ParticleRenderer;
_resetRenderer();
}
sdkCreateTimer(&demoTimer);
sdkStartTimer(&demoTimer);
}
void _reset(int numBodies, NBodyConfig config)
{
if (tipsyFile == "")
{
randomizeBodies(config, m_hPos, m_hVel, m_hColor,
activeParams.m_clusterScale,
activeParams.m_velocityScale,
numBodies, true);
setArrays(m_hPos, m_hVel);
}
else
{
m_nbody->loadTipsyFile(tipsyFile);
::numBodies = m_nbody->getNumBodies();
}
}
void _resetRenderer()
{
if (fp64)
{
float color[4] = { 0.4f, 0.8f, 0.1f, 1.0f};
m_renderer->setBaseColor(color);
}
else
{
float color[4] = { 1.0f, 0.6f, 0.3f, 1.0f};
m_renderer->setBaseColor(color);
}
m_renderer->setColors(m_hColor, m_nbody->getNumBodies());
m_renderer->setSpriteSize(activeParams.m_pointSize);
}
void _selectDemo(int index)
{
assert(index < numDemos);
activeParams = demoParams[index];
camera_trans[0] = camera_trans_lag[0] = activeParams.m_x;
camera_trans[1] = camera_trans_lag[1] = activeParams.m_y;
camera_trans[2] = camera_trans_lag[2] = activeParams.m_z;
reset(numBodies, NBODY_CONFIG_SHELL);
sdkResetTimer(&demoTimer);
}
bool _compareResults(int numBodies)
{
assert(m_nbodyCuda);
bool passed = true;
m_nbody->update(0.001f);
{
m_nbodyCpu = new BodySystemCPU<T>(numBodies);
m_nbodyCpu->setArray(BODYSYSTEM_POSITION, m_hPos);
m_nbodyCpu->setArray(BODYSYSTEM_VELOCITY, m_hVel);
m_nbodyCpu->update(0.001f);
T *cudaPos = m_nbodyCuda->getArray(BODYSYSTEM_POSITION);
T *cpuPos = m_nbodyCpu->getArray(BODYSYSTEM_POSITION);
T tolerance = 0.0005f;
for (int i = 0; i < numBodies; i++)
{
if (fabs(cpuPos[i] - cudaPos[i]) > tolerance)
{
passed = false;
printf("Error: (host)%f != (device)%f\n", cpuPos[i], cudaPos[i]);
}
}
}
if (passed)
{
printf(" OK\n");
}
return passed;
}
void _runBenchmark(int iterations)
{
// once without timing to prime the device
if (!useCpu)
{
m_nbody->update(activeParams.m_timestep);
}
if (useCpu)
{
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventRecord(startEvent, 0));
}
for (int i = 0; i < iterations; ++i)
{
m_nbody->update(activeParams.m_timestep);
}
float milliseconds = 0;
if (useCpu)
{
sdkStopTimer(&timer);
milliseconds = sdkGetTimerValue(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventRecord(stopEvent, 0));
checkCudaErrors(cudaEventSynchronize(stopEvent));
checkCudaErrors(cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
}
double interactionsPerSecond = 0;
double gflops = 0;
computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
printf("%d bodies, total time for %d iterations: %.3f ms\n",
numBodies, iterations, milliseconds);
printf("= %.3f billion interactions per second\n", interactionsPerSecond);
printf("= %.3f %s-precision GFLOP/s at %d flops per interaction\n", gflops,
(sizeof(T) > 4) ? "double" : "single", flopsPerInteraction);
}
};
NBodyDemo
是一个 class 模板,因为它在某些成员和函数中使用参数 T
,例如 BodySystem<T> *m_nbody;
或 setArrays(const T *pos,...)
m_singleton
是 static
类型 NBodyDemo
的指针。 "static" 表示它将被具有相同参数的所有实例共享 T
.
template <> NBodyDemo<double> *NBodyDemo<double>::m_singleton = 0;
为 "T = double" 专业化初始化 m_singleton
。
template <>
对于声明范围之外的模板是必需的。