简单金属程序中的内存泄漏

Memory Leak in a Simple Metal Program

我正在尝试学习 Metal 以进行科学编程。我尝试创建一个进行形态学膨胀的简单内核。我面临的问题是,每次我在图像上调用 dilate 时,内存似乎增加了几 KB。

我通过 运行 在 for 循环中的 dilate 方法验证了内存泄漏 10000 次迭代,并观察到 ​​Xcode 的调试导航器中分配的内存从 16MB 增长到17 MB。

您在我的代码中看到任何会导致内存泄漏的内容吗?如果有帮助,我还将项目推送到 Github

class MorphologyIOS : public Morphology
{
public:
    MorphologyIOS(
        const uint kernel,
        const uint width,
        const uint height
    ) {
        device_ = MTLCreateSystemDefaultDevice();
        kernelSize_ = kernel;
        buffer_ = [device_ newBufferWithBytes:&kernelSize_ length:4 options:MTLStorageModeShared];
        library_ = [device_ newDefaultLibrary];
        commandQueue_ = [device_ newCommandQueue];
        identityFunction_ = [library_ newFunctionWithName:@"identity"];

        MTLTextureDescriptor* readDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
            width:width height:height mipmapped:false];

        MTLTextureDescriptor* writeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
            width:width height:height mipmapped:false];

        [writeDesc setUsage:MTLTextureUsageShaderWrite];

        inTexture_ = [device_ newTextureWithDescriptor:readDesc];
        outTexture_ = [device_ newTextureWithDescriptor:writeDesc];

        entireImage_ = MTLRegionMake2D(0, 0, width, height);

        pipelineState_ = [device_ newComputePipelineStateWithFunction:identityFunction_ error:NULL];

    }

    virtual ~MorphologyIOS() override {}

    virtual std::shared_ptr<unsigned char> dilate(
        const std::shared_ptr<unsigned char>& inImage
    ) override {
        void* result = malloc(outTexture_.width * outTexture_.height);
        std::shared_ptr<unsigned char> outImage;
        @autoreleasepool
        {
            commandBuffer_ = [commandQueue_ commandBuffer];
            commandEncoder_ = [commandBuffer_ computeCommandEncoder];
            [commandEncoder_ setComputePipelineState:pipelineState_];

            [inTexture_ replaceRegion:entireImage_ mipmapLevel:0 withBytes:inImage.get() bytesPerRow:outTexture_.width];

            [commandEncoder_ setTexture:inTexture_ atIndex:0];
            [commandEncoder_ setTexture:outTexture_ atIndex:1];
            [commandEncoder_ setBuffer:buffer_ offset:0 atIndex:0];

            MTLSize threadGroupCount = MTLSizeMake(10, 10, 1);
            MTLSize threadGroups = MTLSizeMake(inTexture_.width / threadGroupCount.width,
                inTexture_.height / threadGroupCount.height, 1);

            [commandEncoder_ dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupCount];
            [commandEncoder_ endEncoding];
            [commandBuffer_ commit];
            [commandBuffer_ waitUntilCompleted];

            [outTexture_ getBytes:result bytesPerRow:outTexture_.width fromRegion:entireImage_ mipmapLevel:0];
            outImage.reset(reinterpret_cast<unsigned char*>(result));
        }

        return outImage;
    }
private:
    id<MTLDevice> device_;
    uint kernelSize_;
    id<MTLBuffer> buffer_;
    id<MTLLibrary> library_;
    id<MTLComputePipelineState> pipelineState_;
    id<MTLCommandQueue> commandQueue_;
    id<MTLFunction> identityFunction_;
    id<MTLCommandBuffer> commandBuffer_;
    id<MTLComputeCommandEncoder> commandEncoder_;
    id<MTLTexture> inTexture_;
    id<MTLTexture> outTexture_;
    MTLRegion entireImage_;
};

我的内核是这样的:

kernel void dilation(
    texture2d<uint, access::read> inTexture [[texture(0)]],
    texture2d<uint, access::write> outTexture [[texture(1)]],
    device uint *kernelSize [[buffer(0)]],
    uint2 gid [[thread_position_in_grid]]
) {
    uint halfKernel = kernelSize[0] / 2;
    uint minX = gid.x >= halfKernel ? gid.x - halfKernel : 0;
    uint minY = gid.y >= halfKernel ? gid.y - halfKernel : 0;
    uint maxX = gid.x + halfKernel < inTexture.get_width() ? gid.x + halfKernel : inTexture.get_width();
    uint maxY = gid.y + halfKernel < inTexture.get_height() ? gid.y + halfKernel : inTexture.get_height();
    uint maxValue = 0;
    for (uint i = minX; i <= maxX; i++)
    {
        for (uint j = minY; j <= maxY; j++)
        {
            uint4 value = inTexture.read(uint2(i, j));
            if (maxValue < value[0])
                maxValue = value[0];
        }
    }
    outTexture.write(maxValue, gid);
}

与其说这是一个错误,不如说它是 capture/validation 层代表您做一些簿记的产物。由于它不会在实际使用中发生,因此可能无需担心。