片段着色器有时间限制吗? iOS 使用 Brad Larson 的 GPUImage 滤镜
are there time limits on fragment shaders? iOS using filters with GPUImage from Brad Larson
我在 iOS 8 上使用 Brad Larson 的优秀库 GPUImage
我已经 运行 进入了一个问题过滤器,它只在单个图像上完成了它的 3/4 的 task/fragments,运行,但是多个过滤器:
从我正在进行的测试来看,我似乎违反了时间限制或缓冲区大小,但奇怪的是它看起来更像是一个时间限制......尽管情况可能并非如此,但更有可能我在某个地方溢出了。
请看下图,我正在 运行 通过 GPUImage 中的一些普通滤镜,然后应用我在最后创建的新滤镜,这需要 5 到 8 秒才能完成.我可以调整新过滤器的片段着色器内的循环数量并更快地看到它 运行 然后最后一个过滤器完成,即使它占用相同数量的缓冲区 space (我相信) .
(另请参阅下面的片段着色器代码)
如果我按照我想要的方式离开滤镜,下图是它停止大约 3/4 秒时的结果,奇怪的是,你可以看到下面倒数第三个滤镜(一个 GPUImageDirectionalSobelEdgeDetectionFilter 滤镜,而不是比倒数第二个过滤器,一个 GPUImageDirectionalNonMaximumSuppressionFilter 过滤器)
我在 Brad Larson 的代码中找不到任何限制缓冲区或时间的 "limits"。
看起来我是在溢出缓冲区还是 运行进入其他限制?记住我可以通过简单地减少最后一个片段着色器中的一些循环来完成这个过滤器,并且不改变任何其他东西..并且循环不填充任何缓冲区,只计算一些浮点数和 vecs(可能以某种方式溢出) ?)
(编辑:可能某些 buffer/image space 正在被释放或其他,因为该过程花费的时间太长以至于有时间 deallocate/free?)
下面是 Brad 关于 programs/filters
的链接和编译时间量的一些调试代码
Core Graphics绘制时间:731.258035
GLProgram 在 5.171001 毫秒内编译
GLProgram 在 2.515018 毫秒内编译
GLProgram 在 5.878985 毫秒内链接
GLProgram 在 0.092983 毫秒内编译完成
GLProgram 在 0.181973 毫秒内编译完成
GLProgram 在 1.731992 毫秒内链接
GLProgram 在 0.275016 毫秒内编译完成
GLProgram 在 0.414014 毫秒内编译完成
GLProgram 在 1.176000 毫秒内链接
GLProgram 在 0.074029 毫秒内编译完成
GLProgram 在 0.380039 毫秒内编译完成
GLProgram 在 0.957966 毫秒内链接
GLProgram 在 0.078022 毫秒内编译完成
GLProgram 在 1.359999 毫秒内编译完成
GLProgram 在 5.873978 毫秒内链接
这里是片段着色器的一部分,我可以通过多种方式调整循环部分以使其花费更短的时间,并完成过滤器,我遗漏的部分(由等等......等等...)在新过滤器的这个片段着色器的循环内更多相同:
[sourcePicture addTarget:theNewFilter];
[theNewFilter useNextFrameForImageCapture];
[sourcePicture processImage];
UIImage *currentFilteredVideoFrame = [theNewFilter imageFromCurrentFramebuffer];
[self.zoomView setImage:currentFilteredVideoFrame];
和片段着色器:
(
precision mediump float;
uniform sampler2D inputImageTexture;
varying mediump vec2 textureCoordinate;
uniform mediump float texelWidth;
uniform mediump float texelHeight;
uniform mediump float texelWidthX2;
uniform mediump float texelHeightX2;
const int numOfConvolutions = 7;
uniform int sAMPLES[numOfConvolutions];
const int sAMPLES0 = 17;
const int sAMPLES1 = 32;
const int sAMPLES2 = 30;
const int sAMPLES3 = 32;
const int sAMPLES4 = 32;
const int sAMPLES5 = 32;
const int sAMPLES6 = 32;
uniform mediump float convolutionCriteria[numOfConvolutions];
uniform mediump vec3 pos0Weight[sAMPLES0];
uniform mediump vec3 pos1Weight[sAMPLES1];
uniform mediump vec3 pos2Weight[sAMPLES2];
uniform mediump vec3 pos3Weight[sAMPLES3];
uniform mediump vec3 pos4Weight[sAMPLES4];
uniform mediump vec3 pos5Weight[sAMPLES5];
uniform mediump vec3 pos6Weight[sAMPLES6];
void main()
{
mediump vec4 textureColor = texture2D(inputImageTexture, textureCoordinate);
mediump vec3 weightStep;
mediump vec2 currentStep1;
mediump vec2 currentStep2;
mediump vec2 sideStepRight;
mediump vec2 sideStepLeft;
mediump vec2 bottomStep;
mediump vec2 topStep;
mediump float currentColorf;
mediump float finalColorf1 = 0.0;
mediump float finalColorf2 = 0.0;
mediump float totalColor1f = 0.0;
mediump float totalColor2f = 0.0;
mediump float rightSideColorBotf;
mediump float rightSideColorTopf;
mediump float leftSideColorBotf;
mediump float leftSideColorTopf;
mediump float bottomRightSideColorf;
mediump float topRightSideColorf;
mediump float bottomLeftSideColorf;
mediump float topLeftSideColorf;
mediump vec2 currentCoordinate;
if (textureColor.r > 0.02)
{
for (int j = 0; j < (numOfConvolutions - 1); j++)
{
totalColor2f = 0.0;
totalColor1f = 0.0;
for (int i = 2; i < sAMPLES[j]; i++)
{
if (j == 0) weightStep = pos0Weight[i];
else if (j == 1) weightStep = pos1Weight[i];
else if (j == 2) weightStep = pos2Weight[i];
else if (j == 3) weightStep = pos3Weight[i];
else if (j == 4) weightStep = pos4Weight[i];
else if (j == 5) weightStep = pos5Weight[i];
sideStepLeft = vec2(weightStep.x - texelWidthX2, weightStep.y);
currentStep1 = vec2(weightStep.x, weightStep.y);
sideStepRight = vec2(weightStep.x + texelWidthX2, weightStep.y);
topStep = vec2(weightStep.y, -weightStep.x - texelHeightX2);
currentStep2 = vec2(weightStep.y, -weightStep.x);
bottomStep = vec2(weightStep.y, -weightStep.x + texelHeightX2);
//------------ Bottom first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy + sideStepRight;
rightSideColorBotf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ top half first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy - sideStepRight;
rightSideColorTopf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ Bottom first arm Side step left ----------
etc.... etc.... etc.....
//------------ left half second arm ---------------
currentCoordinate = textureCoordinate.xy - currentStep2;
currentColorf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
totalColor2f += currentColorf - (bottomLeftSideColorf + topLeftSideColorf);
}
if (totalColor2f > convolutionCriteria[j]) {finalColorf2 = totalColor2f; break;}
else if (totalColor1f > convolutionCriteria[j]) {finalColorf1 = totalColor1f; break;}
}
if ((finalColorf2 < 0.01) && (finalColorf1 < 0.01))
{
for (int j = 1; j < (numOfConvolutions - 1); j++)
{
totalColor2f = 0.0;
totalColor1f = 0.0;
for (int i = 2; i < sAMPLES[j]; i++)
{
if (j == 1) weightStep = pos1Weight[i];
else if (j == 2) weightStep = pos2Weight[i];
else if (j == 3) weightStep = pos3Weight[i];
else if (j == 4) weightStep = pos4Weight[i];
else if (j == 5) weightStep = pos5Weight[i];
sideStepLeft = vec2(-weightStep.x - texelWidthX2, weightStep.y);
currentStep1 = vec2(-weightStep.x, weightStep.y);
sideStepRight = vec2(-weightStep.x + texelWidthX2, weightStep.y);
topStep = vec2(weightStep.y, weightStep.x - texelHeightX2);
currentStep2 = vec2(weightStep.y, weightStep.x);
bottomStep = vec2(weightStep.y, weightStep.x + texelHeightX2);
//------------ Bottom first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy + sideStepRight;
rightSideColorBotf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ top half first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy - sideStepRight;
rightSideColorTopf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ Bottom first arm Side step left ---------------
etc.......etc......etc.....
//------------ left half second arm ---------------
currentCoordinate = textureCoordinate.xy - currentStep2;
currentColorf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
totalColor2f += currentColorf - (bottomLeftSideColorf + topLeftSideColorf);
}
if (totalColor2f > convolutionCriteria[j]) {finalColorf2 = totalColor2f; break;}
else if (totalColor1f > convolutionCriteria[j]) {finalColorf1 = totalColor1f; break;}
}
}
}
if (finalColorf2 > 0.01)
{
gl_FragColor = vec4(textureColor.r * 1.6,0.0,0.0,1.0);
} else if (finalColorf1 > 0.01) {
gl_FragColor = vec4(0.0,0.0,textureColor.r * 1.6,1.0);
} else {
gl_FragColor = textureColor;
}
}
);
好吧,我终于确定这是某种硬件限制,像这样:GL_MAX_FRAGMENT_UNIFORM_COMPONENTS,但不是这个,有太多 variations/combinations 我无法确定是哪个,但是片段着色器从未在设备上编译失败,我错误地认为如果超过这样的限制就会发生这种情况。
我确定这是问题所在的方式是,我 运行 在带视网膜显示屏的 iPad mini2 和 iPad mini 1 上使用完全相同的代码,iPad mini2 完成了片段着色器没有问题,甚至做了一张比上图大两倍的照片也没有问题,代码没有变化,我将不得不限制应用程序可用的硬件 运行在我看来。
我在 iOS 8 上使用 Brad Larson 的优秀库 GPUImage 我已经 运行 进入了一个问题过滤器,它只在单个图像上完成了它的 3/4 的 task/fragments,运行,但是多个过滤器:
从我正在进行的测试来看,我似乎违反了时间限制或缓冲区大小,但奇怪的是它看起来更像是一个时间限制......尽管情况可能并非如此,但更有可能我在某个地方溢出了。
请看下图,我正在 运行 通过 GPUImage 中的一些普通滤镜,然后应用我在最后创建的新滤镜,这需要 5 到 8 秒才能完成.我可以调整新过滤器的片段着色器内的循环数量并更快地看到它 运行 然后最后一个过滤器完成,即使它占用相同数量的缓冲区 space (我相信) .
(另请参阅下面的片段着色器代码)
如果我按照我想要的方式离开滤镜,下图是它停止大约 3/4 秒时的结果,奇怪的是,你可以看到下面倒数第三个滤镜(一个 GPUImageDirectionalSobelEdgeDetectionFilter 滤镜,而不是比倒数第二个过滤器,一个 GPUImageDirectionalNonMaximumSuppressionFilter 过滤器)
我在 Brad Larson 的代码中找不到任何限制缓冲区或时间的 "limits"。
看起来我是在溢出缓冲区还是 运行进入其他限制?记住我可以通过简单地减少最后一个片段着色器中的一些循环来完成这个过滤器,并且不改变任何其他东西..并且循环不填充任何缓冲区,只计算一些浮点数和 vecs(可能以某种方式溢出) ?)
(编辑:可能某些 buffer/image space 正在被释放或其他,因为该过程花费的时间太长以至于有时间 deallocate/free?)
下面是 Brad 关于 programs/filters
的链接和编译时间量的一些调试代码Core Graphics绘制时间:731.258035
GLProgram 在 5.171001 毫秒内编译
GLProgram 在 2.515018 毫秒内编译
GLProgram 在 5.878985 毫秒内链接
GLProgram 在 0.092983 毫秒内编译完成
GLProgram 在 0.181973 毫秒内编译完成
GLProgram 在 1.731992 毫秒内链接
GLProgram 在 0.275016 毫秒内编译完成
GLProgram 在 0.414014 毫秒内编译完成
GLProgram 在 1.176000 毫秒内链接
GLProgram 在 0.074029 毫秒内编译完成
GLProgram 在 0.380039 毫秒内编译完成
GLProgram 在 0.957966 毫秒内链接
GLProgram 在 0.078022 毫秒内编译完成
GLProgram 在 1.359999 毫秒内编译完成
GLProgram 在 5.873978 毫秒内链接
这里是片段着色器的一部分,我可以通过多种方式调整循环部分以使其花费更短的时间,并完成过滤器,我遗漏的部分(由等等......等等...)在新过滤器的这个片段着色器的循环内更多相同:
[sourcePicture addTarget:theNewFilter];
[theNewFilter useNextFrameForImageCapture];
[sourcePicture processImage];
UIImage *currentFilteredVideoFrame = [theNewFilter imageFromCurrentFramebuffer];
[self.zoomView setImage:currentFilteredVideoFrame];
和片段着色器:
(
precision mediump float;
uniform sampler2D inputImageTexture;
varying mediump vec2 textureCoordinate;
uniform mediump float texelWidth;
uniform mediump float texelHeight;
uniform mediump float texelWidthX2;
uniform mediump float texelHeightX2;
const int numOfConvolutions = 7;
uniform int sAMPLES[numOfConvolutions];
const int sAMPLES0 = 17;
const int sAMPLES1 = 32;
const int sAMPLES2 = 30;
const int sAMPLES3 = 32;
const int sAMPLES4 = 32;
const int sAMPLES5 = 32;
const int sAMPLES6 = 32;
uniform mediump float convolutionCriteria[numOfConvolutions];
uniform mediump vec3 pos0Weight[sAMPLES0];
uniform mediump vec3 pos1Weight[sAMPLES1];
uniform mediump vec3 pos2Weight[sAMPLES2];
uniform mediump vec3 pos3Weight[sAMPLES3];
uniform mediump vec3 pos4Weight[sAMPLES4];
uniform mediump vec3 pos5Weight[sAMPLES5];
uniform mediump vec3 pos6Weight[sAMPLES6];
void main()
{
mediump vec4 textureColor = texture2D(inputImageTexture, textureCoordinate);
mediump vec3 weightStep;
mediump vec2 currentStep1;
mediump vec2 currentStep2;
mediump vec2 sideStepRight;
mediump vec2 sideStepLeft;
mediump vec2 bottomStep;
mediump vec2 topStep;
mediump float currentColorf;
mediump float finalColorf1 = 0.0;
mediump float finalColorf2 = 0.0;
mediump float totalColor1f = 0.0;
mediump float totalColor2f = 0.0;
mediump float rightSideColorBotf;
mediump float rightSideColorTopf;
mediump float leftSideColorBotf;
mediump float leftSideColorTopf;
mediump float bottomRightSideColorf;
mediump float topRightSideColorf;
mediump float bottomLeftSideColorf;
mediump float topLeftSideColorf;
mediump vec2 currentCoordinate;
if (textureColor.r > 0.02)
{
for (int j = 0; j < (numOfConvolutions - 1); j++)
{
totalColor2f = 0.0;
totalColor1f = 0.0;
for (int i = 2; i < sAMPLES[j]; i++)
{
if (j == 0) weightStep = pos0Weight[i];
else if (j == 1) weightStep = pos1Weight[i];
else if (j == 2) weightStep = pos2Weight[i];
else if (j == 3) weightStep = pos3Weight[i];
else if (j == 4) weightStep = pos4Weight[i];
else if (j == 5) weightStep = pos5Weight[i];
sideStepLeft = vec2(weightStep.x - texelWidthX2, weightStep.y);
currentStep1 = vec2(weightStep.x, weightStep.y);
sideStepRight = vec2(weightStep.x + texelWidthX2, weightStep.y);
topStep = vec2(weightStep.y, -weightStep.x - texelHeightX2);
currentStep2 = vec2(weightStep.y, -weightStep.x);
bottomStep = vec2(weightStep.y, -weightStep.x + texelHeightX2);
//------------ Bottom first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy + sideStepRight;
rightSideColorBotf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ top half first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy - sideStepRight;
rightSideColorTopf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ Bottom first arm Side step left ----------
etc.... etc.... etc.....
//------------ left half second arm ---------------
currentCoordinate = textureCoordinate.xy - currentStep2;
currentColorf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
totalColor2f += currentColorf - (bottomLeftSideColorf + topLeftSideColorf);
}
if (totalColor2f > convolutionCriteria[j]) {finalColorf2 = totalColor2f; break;}
else if (totalColor1f > convolutionCriteria[j]) {finalColorf1 = totalColor1f; break;}
}
if ((finalColorf2 < 0.01) && (finalColorf1 < 0.01))
{
for (int j = 1; j < (numOfConvolutions - 1); j++)
{
totalColor2f = 0.0;
totalColor1f = 0.0;
for (int i = 2; i < sAMPLES[j]; i++)
{
if (j == 1) weightStep = pos1Weight[i];
else if (j == 2) weightStep = pos2Weight[i];
else if (j == 3) weightStep = pos3Weight[i];
else if (j == 4) weightStep = pos4Weight[i];
else if (j == 5) weightStep = pos5Weight[i];
sideStepLeft = vec2(-weightStep.x - texelWidthX2, weightStep.y);
currentStep1 = vec2(-weightStep.x, weightStep.y);
sideStepRight = vec2(-weightStep.x + texelWidthX2, weightStep.y);
topStep = vec2(weightStep.y, weightStep.x - texelHeightX2);
currentStep2 = vec2(weightStep.y, weightStep.x);
bottomStep = vec2(weightStep.y, weightStep.x + texelHeightX2);
//------------ Bottom first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy + sideStepRight;
rightSideColorBotf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ top half first arm Side step right ---------------
currentCoordinate = textureCoordinate.xy - sideStepRight;
rightSideColorTopf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
//------------ Bottom first arm Side step left ---------------
etc.......etc......etc.....
//------------ left half second arm ---------------
currentCoordinate = textureCoordinate.xy - currentStep2;
currentColorf = texture2D(inputImageTexture, currentCoordinate).r * weightStep.z;
totalColor2f += currentColorf - (bottomLeftSideColorf + topLeftSideColorf);
}
if (totalColor2f > convolutionCriteria[j]) {finalColorf2 = totalColor2f; break;}
else if (totalColor1f > convolutionCriteria[j]) {finalColorf1 = totalColor1f; break;}
}
}
}
if (finalColorf2 > 0.01)
{
gl_FragColor = vec4(textureColor.r * 1.6,0.0,0.0,1.0);
} else if (finalColorf1 > 0.01) {
gl_FragColor = vec4(0.0,0.0,textureColor.r * 1.6,1.0);
} else {
gl_FragColor = textureColor;
}
} );
好吧,我终于确定这是某种硬件限制,像这样:GL_MAX_FRAGMENT_UNIFORM_COMPONENTS,但不是这个,有太多 variations/combinations 我无法确定是哪个,但是片段着色器从未在设备上编译失败,我错误地认为如果超过这样的限制就会发生这种情况。
我确定这是问题所在的方式是,我 运行 在带视网膜显示屏的 iPad mini2 和 iPad mini 1 上使用完全相同的代码,iPad mini2 完成了片段着色器没有问题,甚至做了一张比上图大两倍的照片也没有问题,代码没有变化,我将不得不限制应用程序可用的硬件 运行在我看来。