向量向量的快速分配
Fast allocation of vectors of vectors
我将 3D 数据存储在基于 std::vector 的结构中:
std::shared_ptr< std::vector< std::vector< std::vector< Type > > > > data;
我在遍历向量时通过调用 resize 来分配这个结构:
arraydata.reset(new std::vector< std::vector< std::vector< Type > > >());
arraydata->resize(m_width);
for (unsigned int col_index = 0; col_index < m_width; ++col_index)
{
(*arraydata)[col_index].resize(m_height);
for (unsigned int line_index = 0; line_index < m_height; ++line_index)
{
(*arraydata)[col_index][line_index].resize(m_nbbands);
}
}
但是当维度很大时,这种分配会花费很多时间...
有没有办法在单个操作中分配所有需要的 space(例如 malloc(m_width*m_height*m_nbbands*sizeof(Type))
),然后将其自己的数据分配给每个向量 space全球 space ?它会更高效吗?
编辑:我测试了@justin-time 的想法
arraydata.reset(new std::vector< std::vector< std::vector< T > > >(m_width,
std::vector< std::vector< T > >(m_height, std::vector< T > (m_nbbands))));
给出了与原始代码相当的执行时间,大约 分配 4.9 秒,释放大约 40 秒 ???
这可以在内存管理器中看到:
我没有成功测试 malloc 的分配,此代码在 std::vector< T > tmp(datptr, (T*)(datptr+arraySize));
处失败
unsigned int arraySize = m_nbbands*sizeof(T);
T *datptr = (T*)malloc(m_width*m_height*arraySize);
arraydata.reset(new std::vector< std::vector< std::vector< T > > >(m_width));
for (unsigned int col_index = 0; col_index < m_width; ++col_index)
{
(*arraydata)[col_index].resize(m_height);
for (unsigned int line_index = 0; line_index < m_height; ++line_index)
{
std::vector< T > tmp(datptr, (T*)(datptr+arraySize));
(*arraydata)[col_index][line_index].swap(tmp);
// also tested with same results:
//(*arraydata)[col_index][line_index] =
// std::vector< T >(datptr, (T*)(datptr+arraySize));
datptr += arraySize;
}
}
不要使用一个向量的向量。使用具有内部数组的 class,然后提供一种访问元素的方法。例如:
template <typename T>
class vec3d {
std::vector<T> data;
size_t xmax, ymax, zmax;
public:
T& operator()(size_t x, size_t y, size_t z)
{ return data[x+y*xmax+z*xmax*ymax]; }
const T& operator()(size_t x, size_t y, size_t z)
{ return data[x+y*xmax+z*xmax*ymax]; }
vec3d(size_t x, size_t y, size_t z)
: xmax(x), ymax(y), zmax(z), data(x*y*z) {}
T& v(size_t x, size_t y, size_t z) { return (*this)(x,y,z); }
};
访问就像
shared_ptr<vec3d<int>> p = make_shared<vec3d<int>>(10, 20, 30);
p->v(5,6,7) = 14;
或
vec3d vec(5,6,7);
vec(1,2,4) = 16.0f; // Fortran 风格索引。
您可能需要更多成员以允许迭代、维度等。因为这是单个分配,所以速度会很多。
根据 Martin Bonner 的回答,我得出了以下解决方案,分配和释放时间不到一秒。
可以使用 arraydata[x][y][z]
.
访问数据
arraydata = std::make_shared< CImageStorage< T > >(m_width,
m_height, m_nbbands, bppixel);
与
template<class T>
class CImageStorage1D
{
T* data;
unsigned int m_nbbands;
public:
CImageStorage1D(T* p, unsigned int nbbands) :
data(p), m_nbbands(nbbands) {}
T& DataPtr(unsigned int band) { return data[band]; }
const T& DataPtr(unsigned int band) const { return data[band]; }
T& operator[](unsigned int band) { return (data[band]); }
const T& operator[] (unsigned int band) const { return (data[band]); }
unsigned int size()const { return m_nbbands; }
};
template<class T>
class CImageStorage2D
{
T* data;
unsigned int m_height, m_nbbands, m_bppixel;
public:
CImageStorage2D(T* p, unsigned int height, unsigned int nbbands,
unsigned int bppixel, std::shared_ptr< std::vector<int> > heightShift) :
data(p), m_height(height), m_nbbands(nbbands), m_bppixel(bppixel) {}
T* DataPtr(unsigned int height) { return (T*)(data+m_height*m_nbbands); }
const T* DataPtr(unsigned int height) const {
return (T*)(data+m_height*m_nbbands); }
CImageStorage1D<T> operator[](unsigned int height) {
return CImageStorage1D<T>((T*)(data+m_height*m_nbbands), m_nbbands); }
const CImageStorage1D<T> operator[] (unsigned int height) const {
return CImageStorage1D<T>((T*)(data+m_height*m_nbbands), m_nbbands); }
unsigned int size()const { return m_height; }
};
template<class T>
class CImageStorage
{
T* data;
unsigned int m_width, m_height, m_nbbands, m_bppixel;
public:
CImageStorage(unsigned int width, unsigned int height, unsigned int nbbands,
unsigned int bppixel) :
m_width(width), m_height(height), m_nbbands(nbbands), m_bppixel(bppixel)
{
data = (T*)malloc(m_width*m_height*m_nbbands*m_bppixel);
}
~CImageStorage() { free(data); }
bool IsValid() { return (data != nullptr); }
T** DataPtr(unsigned int width) {
return (T**)(data+width*m_height*m_nbbands); }
const T** DataPtr(unsigned int width) const {
return (T**)(data+width*m_height*m_nbbands); }
CImageStorage2D<T> operator[](unsigned int width) {
return CImageStorage2D<T>( (T*)(data+width*m_height*m_nbbands),
m_height, m_nbbands, m_bppixel); }
const CImageStorage2D<T> operator[] (unsigned int width) const {
return CImageStorage2D<T>((T*)(data+width*m_height*m_nbbands),
m_height, m_nbbands, m_bppixel); }
unsigned int size()const { return m_width; }
};
我将 3D 数据存储在基于 std::vector 的结构中:
std::shared_ptr< std::vector< std::vector< std::vector< Type > > > > data;
我在遍历向量时通过调用 resize 来分配这个结构:
arraydata.reset(new std::vector< std::vector< std::vector< Type > > >());
arraydata->resize(m_width);
for (unsigned int col_index = 0; col_index < m_width; ++col_index)
{
(*arraydata)[col_index].resize(m_height);
for (unsigned int line_index = 0; line_index < m_height; ++line_index)
{
(*arraydata)[col_index][line_index].resize(m_nbbands);
}
}
但是当维度很大时,这种分配会花费很多时间...
有没有办法在单个操作中分配所有需要的 space(例如 malloc(m_width*m_height*m_nbbands*sizeof(Type))
),然后将其自己的数据分配给每个向量 space全球 space ?它会更高效吗?
编辑:我测试了@justin-time 的想法
arraydata.reset(new std::vector< std::vector< std::vector< T > > >(m_width,
std::vector< std::vector< T > >(m_height, std::vector< T > (m_nbbands))));
给出了与原始代码相当的执行时间,大约 分配 4.9 秒,释放大约 40 秒 ???
这可以在内存管理器中看到:
我没有成功测试 malloc 的分配,此代码在 std::vector< T > tmp(datptr, (T*)(datptr+arraySize));
unsigned int arraySize = m_nbbands*sizeof(T);
T *datptr = (T*)malloc(m_width*m_height*arraySize);
arraydata.reset(new std::vector< std::vector< std::vector< T > > >(m_width));
for (unsigned int col_index = 0; col_index < m_width; ++col_index)
{
(*arraydata)[col_index].resize(m_height);
for (unsigned int line_index = 0; line_index < m_height; ++line_index)
{
std::vector< T > tmp(datptr, (T*)(datptr+arraySize));
(*arraydata)[col_index][line_index].swap(tmp);
// also tested with same results:
//(*arraydata)[col_index][line_index] =
// std::vector< T >(datptr, (T*)(datptr+arraySize));
datptr += arraySize;
}
}
不要使用一个向量的向量。使用具有内部数组的 class,然后提供一种访问元素的方法。例如:
template <typename T>
class vec3d {
std::vector<T> data;
size_t xmax, ymax, zmax;
public:
T& operator()(size_t x, size_t y, size_t z)
{ return data[x+y*xmax+z*xmax*ymax]; }
const T& operator()(size_t x, size_t y, size_t z)
{ return data[x+y*xmax+z*xmax*ymax]; }
vec3d(size_t x, size_t y, size_t z)
: xmax(x), ymax(y), zmax(z), data(x*y*z) {}
T& v(size_t x, size_t y, size_t z) { return (*this)(x,y,z); }
};
访问就像
shared_ptr<vec3d<int>> p = make_shared<vec3d<int>>(10, 20, 30);
p->v(5,6,7) = 14;
或 vec3d vec(5,6,7); vec(1,2,4) = 16.0f; // Fortran 风格索引。
您可能需要更多成员以允许迭代、维度等。因为这是单个分配,所以速度会很多。
根据 Martin Bonner 的回答,我得出了以下解决方案,分配和释放时间不到一秒。
可以使用 arraydata[x][y][z]
.
arraydata = std::make_shared< CImageStorage< T > >(m_width,
m_height, m_nbbands, bppixel);
与
template<class T>
class CImageStorage1D
{
T* data;
unsigned int m_nbbands;
public:
CImageStorage1D(T* p, unsigned int nbbands) :
data(p), m_nbbands(nbbands) {}
T& DataPtr(unsigned int band) { return data[band]; }
const T& DataPtr(unsigned int band) const { return data[band]; }
T& operator[](unsigned int band) { return (data[band]); }
const T& operator[] (unsigned int band) const { return (data[band]); }
unsigned int size()const { return m_nbbands; }
};
template<class T>
class CImageStorage2D
{
T* data;
unsigned int m_height, m_nbbands, m_bppixel;
public:
CImageStorage2D(T* p, unsigned int height, unsigned int nbbands,
unsigned int bppixel, std::shared_ptr< std::vector<int> > heightShift) :
data(p), m_height(height), m_nbbands(nbbands), m_bppixel(bppixel) {}
T* DataPtr(unsigned int height) { return (T*)(data+m_height*m_nbbands); }
const T* DataPtr(unsigned int height) const {
return (T*)(data+m_height*m_nbbands); }
CImageStorage1D<T> operator[](unsigned int height) {
return CImageStorage1D<T>((T*)(data+m_height*m_nbbands), m_nbbands); }
const CImageStorage1D<T> operator[] (unsigned int height) const {
return CImageStorage1D<T>((T*)(data+m_height*m_nbbands), m_nbbands); }
unsigned int size()const { return m_height; }
};
template<class T>
class CImageStorage
{
T* data;
unsigned int m_width, m_height, m_nbbands, m_bppixel;
public:
CImageStorage(unsigned int width, unsigned int height, unsigned int nbbands,
unsigned int bppixel) :
m_width(width), m_height(height), m_nbbands(nbbands), m_bppixel(bppixel)
{
data = (T*)malloc(m_width*m_height*m_nbbands*m_bppixel);
}
~CImageStorage() { free(data); }
bool IsValid() { return (data != nullptr); }
T** DataPtr(unsigned int width) {
return (T**)(data+width*m_height*m_nbbands); }
const T** DataPtr(unsigned int width) const {
return (T**)(data+width*m_height*m_nbbands); }
CImageStorage2D<T> operator[](unsigned int width) {
return CImageStorage2D<T>( (T*)(data+width*m_height*m_nbbands),
m_height, m_nbbands, m_bppixel); }
const CImageStorage2D<T> operator[] (unsigned int width) const {
return CImageStorage2D<T>((T*)(data+width*m_height*m_nbbands),
m_height, m_nbbands, m_bppixel); }
unsigned int size()const { return m_width; }
};