OSX 上的 glDrawElements 使用率很高 cpu
glDrawElements on OSX has high cpu usage
我主要使用 SDL2 和 OpenGL 3.3 在 Linux (Mint) 和 Windows 上进行开发,在绘图对象方面几乎没有问题。 CPU 使用率从未真正超过 ~40%。
那是,直到我尝试将我必须的移植到 OSX (Sierra)。
使用与 Linux 和 Windows 上的 运行 完全相同的着色器和代码就好了,cpu 在 OSX 上的使用率持续飙升至约 99%。
起初,我认为这是一个批处理问题,所以我将我的绘图调用批处理在一起以尽量减少对 glDrawElements 的调用次数,但这没有用。
然后,我认为这是一个涉及在 vertex/fragment 着色器中不使用属性的问题(例如:OpenGL core profile incredible slowdown on OS X)
此外,我将帧率保持在 60 fps。
整理后,运气不好。尽我所能记录一切,glGetError() 和着色器日志都没有记录。
所以我从我的 vertex/fragment 着色器中删除了点点滴滴,看看是什么特别减慢了我的绘制调用。我设法将其简化为:在我的 vertex/fragment 着色器中对 texture() 函数的任何调用都会 运行 cpu 高使用率。
纹理加载代码:
// Texture loading
void PCShaderSurface::AddTexturePairing(HashString const &aName)
{
GLint minFilter = GL_LINEAR;
GLint magFilter = GL_LINEAR;
GLint wrapS = GL_REPEAT;
GLint wrapT = GL_REPEAT;
if(Constants::GetString("OpenGLMinFilter") == "GL_NEAREST")
{
minFilter = GL_NEAREST;
}
if(Constants::GetString("OpenGLMagFilter") == "GL_NEAREST")
{
magFilter = GL_NEAREST;
}
if(Constants::GetString("OpenGLWrapModeS") == "GL_CLAMP_TO_EDGE")
{
wrapS = GL_CLAMP_TO_EDGE;
}
if(Constants::GetString("OpenGLWrapModeT") == "GL_CLAMP_TO_EDGE")
{
wrapT = GL_CLAMP_TO_EDGE;
}
glGenTextures(1, &mTextureID);
glBindTexture(GL_TEXTURE_2D, mTextureID);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, minFilter);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, magFilter);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, wrapS);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, wrapT);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, mSurface->w, mSurface->h, 0, mTextureFormat, GL_UNSIGNED_BYTE, mSurface->pixels);
GetManager()->AddTexturePairing(aName, TextureData(mTextureID, mSurface->w, mSurface->h));
}
抽奖代码:
// I batch objects that use the same program and texture id to draw in the same call.
glUseProgram(program);
int activeTexture = texture % mMaxTextures;
int vertexPosLocation = glGetAttribLocation(program, "vertexPos");
int texCoordPosLocation = glGetAttribLocation(program, "texCoord");
int objectPosLocation = glGetAttribLocation(program, "objectPos");
int colorPosLocation = glGetAttribLocation(program, "primaryColor");
// Calculate matrices and push vertex, color, position, texCoord data
// ...
// Enable textures and set uniforms.
glBindVertexArray(mVertexArrayObjectID);
glActiveTexture(GL_TEXTURE0 + activeTexture);
glBindTexture(GL_TEXTURE_2D, texture);
glUniform1i(glGetUniformLocation(program, "textureUnit"), activeTexture);
glUniform3f(glGetUniformLocation(program, "cameraDiff"), cameraTranslation.x, cameraTranslation.y, cameraTranslation.z);
glUniform3f(glGetUniformLocation(program, "cameraSize"), cameraSize.x, cameraSize.y, cameraSize.z);
glUniformMatrix3fv(glGetUniformLocation(program, "cameraTransform"), 1, GL_TRUE, cameraMatrix);
// Set shader properties. Due to batching, done on a per surface / shader basis.
// Shader uniforms are reset upon relinking.
SetShaderProperties(surface, true);
// Set VBO and buffer data.
glBindVertexArray(mVertexArrayObjectID);
BindAttributeV3(GL_ARRAY_BUFFER, mVertexBufferID, vertexPosLocation, vertexData);
BindAttributeV3(GL_ARRAY_BUFFER, mTextureBufferID, texCoordPosLocation, textureData);
BindAttributeV3(GL_ARRAY_BUFFER, mPositionBufferID, objectPosLocation, positionData);
BindAttributeV4(GL_ARRAY_BUFFER, mColorBufferID, colorPosLocation, colorData);
// Set index data
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBufferID);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint) * indices.size(), &indices[0], GL_DYNAMIC_DRAW);
// Draw and disable
glDrawElements(GL_TRIANGLES, static_cast<unsigned>(vertexData.size()), GL_UNSIGNED_INT, 0);
DisableVertexAttribArray(vertexPosLocation);
DisableVertexAttribArray(texCoordPosLocation);
DisableVertexAttribArray(objectPosLocation);
DisableVertexAttribArray(colorPosLocation);
// Reset shader property values.
SetShaderProperties(surface, false);
// Reset to default texture
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
glUseProgram(0);
示例绑定代码:
void PCShaderScreen::BindAttributeV3(GLenum aTarget, int const aBufferID, int const aAttributeLocation, std::vector<Vector3> &aData)
{
if(aAttributeLocation != -1)
{
glEnableVertexAttribArray(aAttributeLocation);
glBindBuffer(aTarget, aBufferID);
glBufferData(aTarget, sizeof(Vector3) * aData.size(), &aData[0], GL_DYNAMIC_DRAW);
glVertexAttribPointer(aAttributeLocation, 3, GL_FLOAT, GL_FALSE, sizeof(Vector3), 0);
glBindBuffer(aTarget, 0);
}
}
VS 代码:
#version 330
in vec4 vertexPos;
in vec4 texCoord;
in vec4 objectPos;
in vec4 primaryColor;
uniform vec3 cameraDiff;
uniform vec3 cameraSize;
uniform mat3 cameraTransform;
out vec2 texValues;
out vec4 texColor;
void main()
{
texColor = primaryColor;
texValues = texCoord.xy;
vec3 vertex = vertexPos.xyz + objectPos.xyz;
vertex = (cameraTransform * vertex) - cameraDiff;
vertex.x /= cameraSize.x;
vertex.y /= -cameraSize.y;
vertex.y += 1.0;
vertex.x -= 1.0;
gl_Position.xyz = vertex;
gl_Position.w = 1.0;
}
FS代码:
#version 330
uniform sampler2D textureUnit;
in vec2 texValues;
in vec4 texColor;
out vec4 fragColor;
void main()
{
// Slow, 99% CPU usage on OSX only
fragColor = texture(textureUnit, texValues) * texColor;
// Fine on everything
fragColor = vec4(1,1,1,1);
}
我在这里真的没有想法,我什至尽我所能地遵循了 Apple 的最佳实践 (https://developer.apple.com/library/content/documentation/GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/opengl_texturedata.html),但没有运气。
我使用的 Windows 和 Linux 驱动程序是否只是向我提供某种我不知道的宽恕形式? OSX 驱动程序真的那么敏感吗?我肯定错过了什么。任何帮助和见解将不胜感激。感谢您阅读我冗长的演讲。
您在绘图代码中做了很多 gl-calls:绑定缓冲区、将数据上传到缓冲区等。其中大部分最好在准备或上传数据时完成。
我更喜欢在绘制代码中做:
- glUseProgram(程序);
- 通过
glBindVertexArray
启用 de VAO
- 通行证制服
glActiveTexture
的活动纹理单元
- glDrawXXX 命令
- glUseProgram(0);
- 禁用 VAO
感谢 @keltar 找到了这个,但我的问题出在 glActiveTexture 调用中。
我将调用从 glActiveTexture(GL_TEXTURE0 + activeTexture) 更改为仅 glActiveTexture(GL_TEXTURE0).
解释@keltar:"Constantly changing the texture slot number might force driver to recompile shader each time. I don't think it matters which exact value it would be, as long as it doesn't change (and within GL limits). I suppose hardware that you use can't effectively (or at all) sample texture from any slot specified by uniform variable - but GL implies so. On some hardware e.g. fetching vertex attributes is internally part of shader too. When state changes, driver attempts to patch shader, but if change is too big (or driver don't know how to patch) - it falls to recompilation. Sadly OSX graphics drivers aren't known to be good, to my knowledge."
我主要使用 SDL2 和 OpenGL 3.3 在 Linux (Mint) 和 Windows 上进行开发,在绘图对象方面几乎没有问题。 CPU 使用率从未真正超过 ~40%。
那是,直到我尝试将我必须的移植到 OSX (Sierra)。 使用与 Linux 和 Windows 上的 运行 完全相同的着色器和代码就好了,cpu 在 OSX 上的使用率持续飙升至约 99%。
起初,我认为这是一个批处理问题,所以我将我的绘图调用批处理在一起以尽量减少对 glDrawElements 的调用次数,但这没有用。
然后,我认为这是一个涉及在 vertex/fragment 着色器中不使用属性的问题(例如:OpenGL core profile incredible slowdown on OS X)
此外,我将帧率保持在 60 fps。
整理后,运气不好。尽我所能记录一切,glGetError() 和着色器日志都没有记录。
所以我从我的 vertex/fragment 着色器中删除了点点滴滴,看看是什么特别减慢了我的绘制调用。我设法将其简化为:在我的 vertex/fragment 着色器中对 texture() 函数的任何调用都会 运行 cpu 高使用率。
纹理加载代码:
// Texture loading
void PCShaderSurface::AddTexturePairing(HashString const &aName)
{
GLint minFilter = GL_LINEAR;
GLint magFilter = GL_LINEAR;
GLint wrapS = GL_REPEAT;
GLint wrapT = GL_REPEAT;
if(Constants::GetString("OpenGLMinFilter") == "GL_NEAREST")
{
minFilter = GL_NEAREST;
}
if(Constants::GetString("OpenGLMagFilter") == "GL_NEAREST")
{
magFilter = GL_NEAREST;
}
if(Constants::GetString("OpenGLWrapModeS") == "GL_CLAMP_TO_EDGE")
{
wrapS = GL_CLAMP_TO_EDGE;
}
if(Constants::GetString("OpenGLWrapModeT") == "GL_CLAMP_TO_EDGE")
{
wrapT = GL_CLAMP_TO_EDGE;
}
glGenTextures(1, &mTextureID);
glBindTexture(GL_TEXTURE_2D, mTextureID);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, minFilter);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, magFilter);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, wrapS);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, wrapT);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, mSurface->w, mSurface->h, 0, mTextureFormat, GL_UNSIGNED_BYTE, mSurface->pixels);
GetManager()->AddTexturePairing(aName, TextureData(mTextureID, mSurface->w, mSurface->h));
}
抽奖代码:
// I batch objects that use the same program and texture id to draw in the same call.
glUseProgram(program);
int activeTexture = texture % mMaxTextures;
int vertexPosLocation = glGetAttribLocation(program, "vertexPos");
int texCoordPosLocation = glGetAttribLocation(program, "texCoord");
int objectPosLocation = glGetAttribLocation(program, "objectPos");
int colorPosLocation = glGetAttribLocation(program, "primaryColor");
// Calculate matrices and push vertex, color, position, texCoord data
// ...
// Enable textures and set uniforms.
glBindVertexArray(mVertexArrayObjectID);
glActiveTexture(GL_TEXTURE0 + activeTexture);
glBindTexture(GL_TEXTURE_2D, texture);
glUniform1i(glGetUniformLocation(program, "textureUnit"), activeTexture);
glUniform3f(glGetUniformLocation(program, "cameraDiff"), cameraTranslation.x, cameraTranslation.y, cameraTranslation.z);
glUniform3f(glGetUniformLocation(program, "cameraSize"), cameraSize.x, cameraSize.y, cameraSize.z);
glUniformMatrix3fv(glGetUniformLocation(program, "cameraTransform"), 1, GL_TRUE, cameraMatrix);
// Set shader properties. Due to batching, done on a per surface / shader basis.
// Shader uniforms are reset upon relinking.
SetShaderProperties(surface, true);
// Set VBO and buffer data.
glBindVertexArray(mVertexArrayObjectID);
BindAttributeV3(GL_ARRAY_BUFFER, mVertexBufferID, vertexPosLocation, vertexData);
BindAttributeV3(GL_ARRAY_BUFFER, mTextureBufferID, texCoordPosLocation, textureData);
BindAttributeV3(GL_ARRAY_BUFFER, mPositionBufferID, objectPosLocation, positionData);
BindAttributeV4(GL_ARRAY_BUFFER, mColorBufferID, colorPosLocation, colorData);
// Set index data
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBufferID);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint) * indices.size(), &indices[0], GL_DYNAMIC_DRAW);
// Draw and disable
glDrawElements(GL_TRIANGLES, static_cast<unsigned>(vertexData.size()), GL_UNSIGNED_INT, 0);
DisableVertexAttribArray(vertexPosLocation);
DisableVertexAttribArray(texCoordPosLocation);
DisableVertexAttribArray(objectPosLocation);
DisableVertexAttribArray(colorPosLocation);
// Reset shader property values.
SetShaderProperties(surface, false);
// Reset to default texture
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
glUseProgram(0);
示例绑定代码:
void PCShaderScreen::BindAttributeV3(GLenum aTarget, int const aBufferID, int const aAttributeLocation, std::vector<Vector3> &aData)
{
if(aAttributeLocation != -1)
{
glEnableVertexAttribArray(aAttributeLocation);
glBindBuffer(aTarget, aBufferID);
glBufferData(aTarget, sizeof(Vector3) * aData.size(), &aData[0], GL_DYNAMIC_DRAW);
glVertexAttribPointer(aAttributeLocation, 3, GL_FLOAT, GL_FALSE, sizeof(Vector3), 0);
glBindBuffer(aTarget, 0);
}
}
VS 代码:
#version 330
in vec4 vertexPos;
in vec4 texCoord;
in vec4 objectPos;
in vec4 primaryColor;
uniform vec3 cameraDiff;
uniform vec3 cameraSize;
uniform mat3 cameraTransform;
out vec2 texValues;
out vec4 texColor;
void main()
{
texColor = primaryColor;
texValues = texCoord.xy;
vec3 vertex = vertexPos.xyz + objectPos.xyz;
vertex = (cameraTransform * vertex) - cameraDiff;
vertex.x /= cameraSize.x;
vertex.y /= -cameraSize.y;
vertex.y += 1.0;
vertex.x -= 1.0;
gl_Position.xyz = vertex;
gl_Position.w = 1.0;
}
FS代码:
#version 330
uniform sampler2D textureUnit;
in vec2 texValues;
in vec4 texColor;
out vec4 fragColor;
void main()
{
// Slow, 99% CPU usage on OSX only
fragColor = texture(textureUnit, texValues) * texColor;
// Fine on everything
fragColor = vec4(1,1,1,1);
}
我在这里真的没有想法,我什至尽我所能地遵循了 Apple 的最佳实践 (https://developer.apple.com/library/content/documentation/GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/opengl_texturedata.html),但没有运气。
我使用的 Windows 和 Linux 驱动程序是否只是向我提供某种我不知道的宽恕形式? OSX 驱动程序真的那么敏感吗?我肯定错过了什么。任何帮助和见解将不胜感激。感谢您阅读我冗长的演讲。
您在绘图代码中做了很多 gl-calls:绑定缓冲区、将数据上传到缓冲区等。其中大部分最好在准备或上传数据时完成。
我更喜欢在绘制代码中做:
- glUseProgram(程序);
- 通过
glBindVertexArray
启用 de VAO
- 通行证制服
glActiveTexture
的活动纹理单元
- glDrawXXX 命令
- glUseProgram(0);
- 禁用 VAO
感谢 @keltar 找到了这个,但我的问题出在 glActiveTexture 调用中。
我将调用从 glActiveTexture(GL_TEXTURE0 + activeTexture) 更改为仅 glActiveTexture(GL_TEXTURE0).
解释@keltar:"Constantly changing the texture slot number might force driver to recompile shader each time. I don't think it matters which exact value it would be, as long as it doesn't change (and within GL limits). I suppose hardware that you use can't effectively (or at all) sample texture from any slot specified by uniform variable - but GL implies so. On some hardware e.g. fetching vertex attributes is internally part of shader too. When state changes, driver attempts to patch shader, but if change is too big (or driver don't know how to patch) - it falls to recompilation. Sadly OSX graphics drivers aren't known to be good, to my knowledge."