使用 boost 反序列化字符串数组并且大于预期(从 cv::cuda::GpuMat 序列化后)

Deserializing array of string with boost and are bigger than expected (after serializing from cv::cuda::GpuMat)

我正在尝试序列化和反序列化 cv::cuda::GpuMat 的数组(byte / uchar 的倾斜数组)。在反序列化部分,我不需要恢复一个GpuMat,而是一个byte数组。

我用那两个类:

#pragma once
#include <fstream>
#include <string>
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <opencv2/core/cuda.hpp>

using namespace std;
using namespace cv;
using namespace cuda;

#ifndef uchar
#define uchar unsigned char
#endif

class SerieFlow
{
    friend class boost::serialization::access;
    
    template <class Archive>
    void serialize(Archive& ar, const unsigned int version)
    {
        ar & flowX;
        ar & flowY;
    }

    GpuMat* FlowPlanes{}; // GpuMat[2]
public:
    string flowX{};
    string flowY{};

    void PrepData()
    {
        Mat matX = Mat(FlowPlanes[0]);
        Mat matY = Mat(FlowPlanes[1]);
        flowX = *new string((const char*)matX.col(0).data);
        flowY = *new string((const char*)matY.col(0).data);
        matX.release();
        matY.release();
    }
    
    SerieFlow() = default;
    SerieFlow(GpuMat* flowPlanes) : FlowPlanes(flowPlanes)
    {
        PrepData();
    }
};

class SerieFlowFile
{
public:
    void Save(SerieFlow content, string filename)
    {
        ofstream stream(filename);
        {
            boost::archive::text_oarchive archive(stream);
            archive << content;
        }
    }
    
    SerieFlow Open(string filename)
    {
        SerieFlow content;
        {
            ifstream stream(filename);
            boost::archive::text_iarchive archive(stream);
            archive >> content;
        }
        return content;
    }
};

我用以下方法对结果进行单元测试:

TEST_METHOD(ReadDeserializeTest)
{
    string filename{ "WriteRead.sflow" };
    if (filesystem::exists(filename))
        std::remove(filename.c_str());
    unsigned char data[8] = { 7, 4, 2, 6, 7, 18, 29, 111 };
    string dataAsString((const char*)data, 8);
    vector<unsigned char> expectedData{ dataAsString.begin(), dataAsString.end() };
    Mat mat{ 8, 1, CV_8UC1, data };
    GpuMat gpuMat1(mat);
    GpuMat gpuMat2(mat);
    GpuMat gpuMatArray[2] = { gpuMat1 , gpuMat2 };
    SerieFlow sflow(gpuMatArray);
    SerieFlowFile sut{};
    sut.Save(sflow, filename);

    SerieFlowFile sut2{};
    auto sflow2 = sut2.Open(filename);
    vector<unsigned char> resultFlowX{ sflow2.flowX.begin(),sflow2.flowX.end() };
    vector<unsigned char> resultFlowY{ sflow2.flowY.begin(),sflow2.flowY.end() };

    stringstream ss{};
    ss << "flowX size: " << resultFlowX.size();
    ss << " flowY size: " << resultFlowY.size() << endl;
    Logger::WriteMessage(ss.str().c_str());

    stringstream resultFlowXStream{};
    stringstream resultFlowYStream{};
    copy(resultFlowX.begin(), resultFlowX.end(), std::ostream_iterator<int>(resultFlowXStream, " "));
    copy(resultFlowY.begin(), resultFlowY.end(), std::ostream_iterator<int>(resultFlowYStream, " "));
    Logger::WriteMessage(resultFlowXStream.str().c_str());
    Logger::WriteMessage(resultFlowYStream.str().c_str());

    Assert::IsTrue(resultFlowX == resultFlowY, L"flowX and flowY are not the same.");
    Assert::IsTrue(expectedData == resultFlowX, L"resultFlowX is not correct.");
    Assert::IsTrue(expectedData == resultFlowY, L"resultFlowY is not correct.");
}

然而我得到:

flowX size: 52 flowY size: 36

7 4 2 6 7 18 29 111 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 253 253 253 253

7 4 2 6 7 18 29 111 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 205 253 253 253 253

我希望两者都是 8 码并提供:

7 4 2 6 7 18 29 111

填充这么多是否正常?

我正在使用 string 作为操作字节数组、比较等的便捷方式

我怀疑 string 有问题,因为我的文件中的原始数组不是空终止的;我应该如何序列化和反序列化这些数组?如果我在序列化时空终止字符串 (string dataAsString((const char*)data, 8);) 为什么会出现问题?

我使用 boost 1.72 作为 nuget 包和 opencv 4.51,并在 x64 中使用 VS2019 工具集 1.42 进行测试。

PrepData中,字符串在没有给定长度的情况下被初始化,导致任意长字符串(直到达到空'\0')。

正确的代码是:

string strX((const char*)matX.col(0).data, matX.rows);
string strY((const char*)matY.col(0).data, matY.rows);
  1. 不要使用内存泄漏运算符:

     X a = *new X(); // guaranteed unrecoverable memory leak¹
    
  2. 你用的地方问题多多:

        flowX = *new string((const char*)matX.col(0).data);
        flowY = *new string((const char*)matY.col(0).data);
    
  3. 首先,(const char*)data是重新演绎。使用起来很少安全。

    flowX = std::string(reinterpret_cast<const char*>(matX.col(0).data), length);
    flowY = std::string(reinterpret_cast<const char*>(matY.col(0).data), length);
    
  4. 您将其解释为原始的 C 风格字符串。 C 字符串需要以 NUL 结尾。没有理由假设他们是。指定长度。

    flowX = std::string(reinterpret_cast<const char*>(matX.col(0).data), length);
    flowY = std::string(reinterpret_cast<const char*>(matY.col(0).data), length);
    

    或者实际上,使用 flowX.assign

    flowX.assign(reinterpret_cast<const char*>(matX.col(0).data), length);
    flowY.assign(reinterpret_cast<const char*>(matY.col(0).data), length);
    
  5. 这一行

    cv::Mat matX = Mat(FlowPlanes[0]);
    

    不必要的复制构造

    cv::Nat matX(FlowPlanes[0]);
    
  6. 反正复制矩阵好像也没有理由

    void PrepData() {
        flowX.assign(reinterpret_cast<const char*>(FlowPlanes[0].col(0).data), length);
        flowY.assign(reinterpret_cast<const char*>(FlowPlanes[1].col(0).data), length);
    }
    
  7. 事实上,因为没有其他东西使用flowPlanes,就像往常一样在构造函数中初始化:

    std::string flowX;
    std::string flowY;
    
    SerieFlow() = default;
    explicit SerieFlow(cv::cuda::GpuMat* flowPlanes)
        : flowX(reinterpret_cast<const char*>(flowPlanes[0].col(0).data), length),
          flowY(reinterpret_cast<const char*>(flowPlanes[1].col(0).data), length)
    { }
    
  8. 许多其他转换混淆可以简化:

    unsigned char data[8] = { 7, 4, 2, 6, 7, 18, 29, 111 };
    std::string dataAsString(reinterpret_cast<const char*>(data), 8);
    

    std::string dataAsString { 7, 4, 2, 6, 7, 18, 29, 111 };
    

    std::string dataAsString { 7, 4, 2, 6, 7, 18, 29, 111 };
    std::vector<unsigned char> expectedData{ dataAsString.begin(),
                                             dataAsString.end() };
    

    简单地...

    std::vector<unsigned char> data { 7, 4, 2, 6, 7, 18, 29, 111 };
    cv::Mat mat(data.size(), 1, CV_8UC1, data.data());
    
  9. 不需要临时创建数组:

    cv::cuda::GpuMat gpuMat1(mat);
    cv::cuda::GpuMat gpuMat2(mat);
    cv::cuda::GpuMat gpuMatArray[2] = { gpuMat1, gpuMat2 };
    

    可以

    GpuMat gpuMatArray[2] = { GpuMat{mat}, GpuMat{mat} };
    
  10. 一般来说,整个转换成std::string是不必要的,什么 造成与 uchar 与 char 的不兼容性。留着吧vector<uchar>?

    using cv::cuda::GpuMat;
    using uchar = std::uint8_t;
    
    using TwoMat = std::array<GpuMat, 2>;
    
    class SerieFlow {
        friend class boost::serialization::access;
        template <class Archive> void serialize(Archive& ar, unsigned /*unused*/) {
            ar& flowX & flowY;
        }
    
        static auto to_vector(GpuMat const& mat) {
            assert(mat.elemSize() == 1);
            auto n = mat.size().area() * mat.elemSize();
            return std::vector<uchar>(mat.data, mat.data+n);
        }
      public:
        std::vector<uchar> flowX, flowY;
    
        SerieFlow() = default;
        explicit SerieFlow(TwoMat const planes)
          : flowX(to_vector(planes[0])),
            flowY(to_vector(planes[1])) {}
    };
    
    struct SerieFlowFile {
        static void Save(const SerieFlow& content, const std::string& filename) {
            std::ofstream stream(filename);
            boost::archive::text_oarchive archive(stream);
            archive << content;
        }
    
        static SerieFlow Open(const std::string& filename) {
            std::ifstream stream(filename);
            boost::archive::text_iarchive archive(stream);
            SerieFlow content;
            archive >> content;
            return content;
        }
    };
    
    void ReadDeserializeTest() {
        std::filesystem::path filename = "WriteRead.sflow";
        if (exists(filename)) {
            remove(filename);
        }
    
        std::vector<unsigned char> data { 7, 4, 2, 6, 7, 18, 29, 111 };
        cv::Mat mat(data.size(), 1, CV_8UC1, data.data());
        TwoMat gpuMatArray { GpuMat{mat}, GpuMat{mat} };
    
        SerieFlow sflow(gpuMatArray);
        SerieFlowFile::Save(sflow, filename);
    
        auto roundtrip = SerieFlowFile::Open(filename);
    
        auto check = [&data](auto& flowX, auto label) {
            std::cout << label << " size: " << flowX.size();
            copy(flowX.begin(), flowX.end(), std::ostream_iterator<int>(std::cout, " "));
            std::cout << "\n" << (data == flowX? "CORRECT":"INCORRECT") << std::endl;
        };
    
        check(roundtrip.flowX, "flowX");
        check(roundtrip.flowY, "flowY");
    }
    
    int main() {
        ReadDeserializeTest();
    }
    

开箱即用:只需序列化 GpuMat

为什么不直接为GpuMat类型添加序列化?

#include <boost/serialization/array.hpp>
#include <boost/serialization/array_wrapper.hpp>

namespace boost::serialization {
    template <typename Ar> void save(Ar& ar, GpuMat const& mat, unsigned) {
        int r = mat.rows;
        int c = mat.cols;
        ar & r & c & make_array(&mat.data[0], r*c);
    }

    template <typename Ar> void load(Ar& ar, GpuMat& mat, unsigned) {
        int r, c;
        ar & r & c;
        mat = GpuMat(r, c, CV_8UC1);
        ar & make_array(&mat.data[0], r*c);
    }
}

BOOST_SERIALIZATION_SPLIT_FREE(GpuMat)

这将删除所有复制并且应该立即解决不处理的问题 与矩阵的实际运行时形状(你只是假设它总是 1 行)。

现在整个事情可以实现为

using Flows = std::array<GpuMat, 2>;

struct SerieFlowFile {
    static void Save(const Flows& content, const std::string& filename) {
        std::ofstream stream(filename);
        boost::archive::text_oarchive archive(stream);
        archive << content;
    }

    static Flows Open(const std::string& filename) {
        std::ifstream stream(filename);
        boost::archive::text_iarchive archive(stream);
        Flows content;
        archive >> content;
        return content;
    }
};

使用以下测试主要编译:

void ReadDeserializeTest() {
    std::vector<uchar> data { 7, 4, 2, 6, 7, 18, 29, 111 };

    Flows gpuMatArray {
        GpuMat (8, 1, CV_8UC1, data.data()),
        GpuMat (4, 1, CV_8UC1, data.data()),
    };

    SerieFlowFile::Save(gpuMatArray, "WriteRead.sflow");
    auto roundtrip = SerieFlowFile::Open("WriteRead.sflow");

    static auto as_vec = [](GpuMat const& mat) {
        return std::vector(&mat.data[0], &mat.data[mat.cols * mat.rows]);
    };

    auto check = [&] (int index) {
        auto const& v = as_vec(roundtrip[index]);
        auto eq = boost::equal(as_vec(gpuMatArray[index]), v);
        fmt::print("#{} size: {} {} {}\n", index, v.size(), v, (eq? "CORRECT":"INCORRECT"));
    };

    check(0);
    check(1);
}

看到了Live On Compiler Explorer

//#pragma once
#include <array>
#include <cstdint>

using uchar = std::uint8_t;

#ifndef NO_OPENCV
    #include <opencv2/core/cuda.hpp>
    //using cv::cuda::GpuMat;
    using GpuMat = cv::Mat;
#else
    #include <memory>
    #include <algorithm>
    namespace {
        enum {CV_8UC1};
        struct FakeMat {
            FakeMat(int r=1, int c=1, int=CV_8UC1, void* init = nullptr)
                : rows(r), cols(c), data(r && c? std::make_unique<uchar[]>(r*c) : nullptr)
            {
                if (init && data) {
                    std::copy_n(static_cast<uchar const*>(init), rows*cols, data.get());
                }
            }
            FakeMat(FakeMat const& rhs)
                : rows(rhs.rows), cols(rhs.cols), data(rhs.data? std::make_unique<uchar[]>(rows*cols) : nullptr)
            {
                if (data && rhs.data) {
                    std::copy_n(rhs.data.get(), rows*cols, data.get());
                }
            }

            FakeMat(FakeMat&&) = default;
            FakeMat& operator=(FakeMat&&) = default;

            int rows, cols;
            std::unique_ptr<uchar[]> data;
        };
    }

    using GpuMat = FakeMat;
#endif

#include <boost/serialization/array.hpp>
#include <boost/serialization/array_wrapper.hpp>

namespace boost::serialization {
    template <typename Ar> void save(Ar& ar, GpuMat const& mat, unsigned) {
        int r = mat.rows;
        int c = mat.cols;
        ar & r & c & make_array(&mat.data[0], r*c);
    }

    template <typename Ar> void load(Ar& ar, GpuMat& mat, unsigned) {
        int r, c;
        ar & r & c;
        mat = GpuMat(r, c, CV_8UC1);
        ar & make_array(&mat.data[0], r*c);
    }
}

BOOST_SERIALIZATION_SPLIT_FREE(GpuMat)

#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <fstream>

using Flows = std::array<GpuMat, 2>;

struct SerieFlowFile {
    static void Save(const Flows& content, const std::string& filename) {
        std::ofstream stream(filename);
        boost::archive::text_oarchive archive(stream);
        archive << content;
    }

    static Flows Open(const std::string& filename) {
        std::ifstream stream(filename);
        boost::archive::text_iarchive archive(stream);
        Flows content;
        archive >> content;
        return content;
    }
};

#include <boost/range.hpp>
#include <fmt/ranges.h>
void ReadDeserializeTest() {
    std::vector<uchar> data { 7, 4, 2, 6, 7, 18, 29, 111 };

    Flows gpuMatArray {
        GpuMat (8, 1, CV_8UC1, data.data()),
        GpuMat (4, 1, CV_8UC1, data.data()+2),
    };

    SerieFlowFile::Save(gpuMatArray, "WriteRead.sflow");
    auto roundtrip = SerieFlowFile::Open("WriteRead.sflow");

    static auto as_vec = [](GpuMat const& mat) {
        return std::vector(&mat.data[0], &mat.data[mat.cols * mat.rows]);
    };

    auto check = [&] (int index) {
        auto const& v = as_vec(roundtrip[index]);
        auto eq = boost::equal(as_vec(gpuMatArray[index]), v);
        fmt::print("#{} size: {} {} {}\n", index, v.size(), v, (eq? "CORRECT":"INCORRECT"));
    };

    check(0);
    check(1);
}

int main() {
    ReadDeserializeTest();
}

版画

#0 size: 8 {7, 4, 2, 6, 7, 18, 29, 111} CORRECT
#1 size: 4 {2, 6, 7, 18} CORRECT