简单金属程序中的内存泄漏
Memory Leak in a Simple Metal Program
我正在尝试学习 Metal 以进行科学编程。我尝试创建一个进行形态学膨胀的简单内核。我面临的问题是,每次我在图像上调用 dilate
时,内存似乎增加了几 KB。
我通过 运行 在 for 循环中的 dilate
方法验证了内存泄漏 10000 次迭代,并观察到 Xcode 的调试导航器中分配的内存从 16MB 增长到17 MB。
您在我的代码中看到任何会导致内存泄漏的内容吗?如果有帮助,我还将项目推送到 Github。
class MorphologyIOS : public Morphology
{
public:
MorphologyIOS(
const uint kernel,
const uint width,
const uint height
) {
device_ = MTLCreateSystemDefaultDevice();
kernelSize_ = kernel;
buffer_ = [device_ newBufferWithBytes:&kernelSize_ length:4 options:MTLStorageModeShared];
library_ = [device_ newDefaultLibrary];
commandQueue_ = [device_ newCommandQueue];
identityFunction_ = [library_ newFunctionWithName:@"identity"];
MTLTextureDescriptor* readDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
width:width height:height mipmapped:false];
MTLTextureDescriptor* writeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
width:width height:height mipmapped:false];
[writeDesc setUsage:MTLTextureUsageShaderWrite];
inTexture_ = [device_ newTextureWithDescriptor:readDesc];
outTexture_ = [device_ newTextureWithDescriptor:writeDesc];
entireImage_ = MTLRegionMake2D(0, 0, width, height);
pipelineState_ = [device_ newComputePipelineStateWithFunction:identityFunction_ error:NULL];
}
virtual ~MorphologyIOS() override {}
virtual std::shared_ptr<unsigned char> dilate(
const std::shared_ptr<unsigned char>& inImage
) override {
void* result = malloc(outTexture_.width * outTexture_.height);
std::shared_ptr<unsigned char> outImage;
@autoreleasepool
{
commandBuffer_ = [commandQueue_ commandBuffer];
commandEncoder_ = [commandBuffer_ computeCommandEncoder];
[commandEncoder_ setComputePipelineState:pipelineState_];
[inTexture_ replaceRegion:entireImage_ mipmapLevel:0 withBytes:inImage.get() bytesPerRow:outTexture_.width];
[commandEncoder_ setTexture:inTexture_ atIndex:0];
[commandEncoder_ setTexture:outTexture_ atIndex:1];
[commandEncoder_ setBuffer:buffer_ offset:0 atIndex:0];
MTLSize threadGroupCount = MTLSizeMake(10, 10, 1);
MTLSize threadGroups = MTLSizeMake(inTexture_.width / threadGroupCount.width,
inTexture_.height / threadGroupCount.height, 1);
[commandEncoder_ dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupCount];
[commandEncoder_ endEncoding];
[commandBuffer_ commit];
[commandBuffer_ waitUntilCompleted];
[outTexture_ getBytes:result bytesPerRow:outTexture_.width fromRegion:entireImage_ mipmapLevel:0];
outImage.reset(reinterpret_cast<unsigned char*>(result));
}
return outImage;
}
private:
id<MTLDevice> device_;
uint kernelSize_;
id<MTLBuffer> buffer_;
id<MTLLibrary> library_;
id<MTLComputePipelineState> pipelineState_;
id<MTLCommandQueue> commandQueue_;
id<MTLFunction> identityFunction_;
id<MTLCommandBuffer> commandBuffer_;
id<MTLComputeCommandEncoder> commandEncoder_;
id<MTLTexture> inTexture_;
id<MTLTexture> outTexture_;
MTLRegion entireImage_;
};
我的内核是这样的:
kernel void dilation(
texture2d<uint, access::read> inTexture [[texture(0)]],
texture2d<uint, access::write> outTexture [[texture(1)]],
device uint *kernelSize [[buffer(0)]],
uint2 gid [[thread_position_in_grid]]
) {
uint halfKernel = kernelSize[0] / 2;
uint minX = gid.x >= halfKernel ? gid.x - halfKernel : 0;
uint minY = gid.y >= halfKernel ? gid.y - halfKernel : 0;
uint maxX = gid.x + halfKernel < inTexture.get_width() ? gid.x + halfKernel : inTexture.get_width();
uint maxY = gid.y + halfKernel < inTexture.get_height() ? gid.y + halfKernel : inTexture.get_height();
uint maxValue = 0;
for (uint i = minX; i <= maxX; i++)
{
for (uint j = minY; j <= maxY; j++)
{
uint4 value = inTexture.read(uint2(i, j));
if (maxValue < value[0])
maxValue = value[0];
}
}
outTexture.write(maxValue, gid);
}
与其说这是一个错误,不如说它是 capture/validation 层代表您做一些簿记的产物。由于它不会在实际使用中发生,因此可能无需担心。
我正在尝试学习 Metal 以进行科学编程。我尝试创建一个进行形态学膨胀的简单内核。我面临的问题是,每次我在图像上调用 dilate
时,内存似乎增加了几 KB。
我通过 运行 在 for 循环中的 dilate
方法验证了内存泄漏 10000 次迭代,并观察到 Xcode 的调试导航器中分配的内存从 16MB 增长到17 MB。
您在我的代码中看到任何会导致内存泄漏的内容吗?如果有帮助,我还将项目推送到 Github。
class MorphologyIOS : public Morphology
{
public:
MorphologyIOS(
const uint kernel,
const uint width,
const uint height
) {
device_ = MTLCreateSystemDefaultDevice();
kernelSize_ = kernel;
buffer_ = [device_ newBufferWithBytes:&kernelSize_ length:4 options:MTLStorageModeShared];
library_ = [device_ newDefaultLibrary];
commandQueue_ = [device_ newCommandQueue];
identityFunction_ = [library_ newFunctionWithName:@"identity"];
MTLTextureDescriptor* readDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
width:width height:height mipmapped:false];
MTLTextureDescriptor* writeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
width:width height:height mipmapped:false];
[writeDesc setUsage:MTLTextureUsageShaderWrite];
inTexture_ = [device_ newTextureWithDescriptor:readDesc];
outTexture_ = [device_ newTextureWithDescriptor:writeDesc];
entireImage_ = MTLRegionMake2D(0, 0, width, height);
pipelineState_ = [device_ newComputePipelineStateWithFunction:identityFunction_ error:NULL];
}
virtual ~MorphologyIOS() override {}
virtual std::shared_ptr<unsigned char> dilate(
const std::shared_ptr<unsigned char>& inImage
) override {
void* result = malloc(outTexture_.width * outTexture_.height);
std::shared_ptr<unsigned char> outImage;
@autoreleasepool
{
commandBuffer_ = [commandQueue_ commandBuffer];
commandEncoder_ = [commandBuffer_ computeCommandEncoder];
[commandEncoder_ setComputePipelineState:pipelineState_];
[inTexture_ replaceRegion:entireImage_ mipmapLevel:0 withBytes:inImage.get() bytesPerRow:outTexture_.width];
[commandEncoder_ setTexture:inTexture_ atIndex:0];
[commandEncoder_ setTexture:outTexture_ atIndex:1];
[commandEncoder_ setBuffer:buffer_ offset:0 atIndex:0];
MTLSize threadGroupCount = MTLSizeMake(10, 10, 1);
MTLSize threadGroups = MTLSizeMake(inTexture_.width / threadGroupCount.width,
inTexture_.height / threadGroupCount.height, 1);
[commandEncoder_ dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupCount];
[commandEncoder_ endEncoding];
[commandBuffer_ commit];
[commandBuffer_ waitUntilCompleted];
[outTexture_ getBytes:result bytesPerRow:outTexture_.width fromRegion:entireImage_ mipmapLevel:0];
outImage.reset(reinterpret_cast<unsigned char*>(result));
}
return outImage;
}
private:
id<MTLDevice> device_;
uint kernelSize_;
id<MTLBuffer> buffer_;
id<MTLLibrary> library_;
id<MTLComputePipelineState> pipelineState_;
id<MTLCommandQueue> commandQueue_;
id<MTLFunction> identityFunction_;
id<MTLCommandBuffer> commandBuffer_;
id<MTLComputeCommandEncoder> commandEncoder_;
id<MTLTexture> inTexture_;
id<MTLTexture> outTexture_;
MTLRegion entireImage_;
};
我的内核是这样的:
kernel void dilation(
texture2d<uint, access::read> inTexture [[texture(0)]],
texture2d<uint, access::write> outTexture [[texture(1)]],
device uint *kernelSize [[buffer(0)]],
uint2 gid [[thread_position_in_grid]]
) {
uint halfKernel = kernelSize[0] / 2;
uint minX = gid.x >= halfKernel ? gid.x - halfKernel : 0;
uint minY = gid.y >= halfKernel ? gid.y - halfKernel : 0;
uint maxX = gid.x + halfKernel < inTexture.get_width() ? gid.x + halfKernel : inTexture.get_width();
uint maxY = gid.y + halfKernel < inTexture.get_height() ? gid.y + halfKernel : inTexture.get_height();
uint maxValue = 0;
for (uint i = minX; i <= maxX; i++)
{
for (uint j = minY; j <= maxY; j++)
{
uint4 value = inTexture.read(uint2(i, j));
if (maxValue < value[0])
maxValue = value[0];
}
}
outTexture.write(maxValue, gid);
}
与其说这是一个错误,不如说它是 capture/validation 层代表您做一些簿记的产物。由于它不会在实际使用中发生,因此可能无需担心。