在 C 中为不同数据类型分配内存的正确方法是什么?

What is the proper way to allocate memory in C for different data types?

我正在将图像读取到动态分配的数组中,图像像素类型可以是以下任何类型:

typedef enum {
              Byte = 1,//unsigned char
              Int32 = 2,//int
              Float32 = 3,//float
              Float64 = 4//double 
}DataType;

我正在考虑使用 switch 块在 void 指针上分配内存:

int NumPix = GetImageSize(Image);
DataType theDataType  = GetImageDataType(Image);
void *data;
switch (theDataType)
{
    case Byte:
      data = (unsigned char *)malloc(NumPix * sizeof(unsigned char));
      break;
    case Int32:
     data = (int *)malloc(NumPix * sizeof(int));
     break;
    case Float32:
     data = (float *)malloc(NumPix * sizeof(float));
     break;
    case Float64 :
     data = (double*)malloc(NumPix * sizeof(double));
     break; 


}
// do something with data
free(data);

这种方式合法吗?有没有另一种方法可以用更少的代码和更通用的方法来做到这一点?

您将需要知道很多地方的像素大小。所以定义一个函数来计算像素大小,每次需要的时候使用它。

size_t pixel_size(DataType type) {
    switch (type) {
    case Byte: return sizeof(unsigned char);
    case Int32: return sizeof(unsigned int); // Shouldn't this be uint32_t?
    case Float32: return sizeof(float);
    case Float64: return sizeof(double);
    }
}

// Allocate an image with indeterminate content.
// Return NULL if the allocation fails.
void *allocate_indeterminate_image(DataType type, size_t x, size_t y) {
    size_t pixel_count = x * y; // overflow checking omitted
    size_t byte_count = pixel_count * pixel_size(type); // overflow checking omitted;
    return malloc(byte_count);
}

// Allocate an image with all-bits-zero. On almost every platform,
// this means that each pixel has the value 0 (+0.0 for floats).
// Return NULL if the allocation fails.
void *allocate_blank_image(DataType type, size_t x, size_t y) {
    size_t pixel_count = x * y; // overflow checking omitted
    size_t bytes_per_pixel = pixel_size(type);
    return calloc(pixel_count, bytes_per_pixel);
}

我对你的程序做了一些改动,以我在这里用于其他事情的方式,我将把它留在这里作为你的案例可能有用的例子,以及测试数据

ImageData 结构

typedef struct
{
    uint32_t    _limit; 
    uint32_t    _imgC; // argc
    Record**    _imgV; // argv

}   ImageData;
  • ImageData 这是一个动态结构,容量为 _limit Record 个数据结构。
  • imgC是实际使用的记录数
  • Record** 是指向 Record 结构指针数组的指针,长度可变为 1、2、4 或 8 字节像素数据。这样它可以同时保存任意数量任意大小的像素数据
  • 这与 main()
  • 中的 argc/argv 对完全相同

Record 结构

typedef void Data;

typedef struct
{
    uint8_t     _id; // 1,2,4 or 8
    uint32_t    _NumPix; // argc
    Data*       _data; // argv

}   Record;     // one record of image data
  • _id为像素数据长度
  • _NumPix是像素的总和,如你所呈现的那样
  • 这里的情况与你的程序不同:像素数据在这里分配,并且是一个 _id * _NumPix 字节的区域,像素数据按顺序排列,就像在我的位图构建程序中我改为使用您的数据 ;)
  • 像素数据在这里打包成 _id 组,并根据需要解包,只使用 void* 指针。

例子

我使用了随机数据和构建 returns 随机 1-2-4 或 8 字节图像的“工厂”函数

像素工厂函数

Record* getImage()
{

    const uint8_t len[4] = { 1,2,4,8 };
    Record* one = (Record*)malloc(sizeof(Record));
    one->_id = len[rand() % 4];
    one->_NumPix = 1 + rand() % 8;
    // now builds the pixels with random values under 255
    // here the actual size of EACH record is computed
    uint32_t total = one->_NumPix * one->_id; 
    void* p = (Data**)malloc(total);
    uint8_t value = rand() % 255;
    // here you get the actual pixel data
    memset(p, value, total); 
    one->_data = (Data**)p;
    return one;
};

程序逻辑

  • 开头的3个常量定义了运行:
#define     _BLOCK_SIZE_ 50
#define     _IMAGES_     5
#define     _SEED_       201001
  • 图像内存分配在 _BLOCK_SIZE_ Record* 指针块中。在数据采集阶段结束时,阵列被修剪到精确使用的大小。

    • _IMAGES_是要构建的图像数量,构建ImageData数组时显示所有图像。

    • _SEED 是随机数据的种子

  • 创建数组时,最终未使用的数据是free(),因此ImageData只有与图像数据对应的记录。

  • 程序随后显示一些随机记录,这次形成ImageData数组,以便我们可以将其与构建阶段显示的数据进行比较。

输出示例

DumpImage(Sample 0): 8 8-pixels images [64 bytes]:

     0: EC EC EC EC EC EC EC EC
     1: EC EC EC EC EC EC EC EC
     2: EC EC EC EC EC EC EC EC
     3: EC EC EC EC EC EC EC EC
     4: EC EC EC EC EC EC EC EC
     5: EC EC EC EC EC EC EC EC
     6: EC EC EC EC EC EC EC EC
     7: EC EC EC EC EC EC EC EC

DumpImage(Sample 1): 4 2-pixels images [8 bytes]:

     0: 02 02
     1: 02 02
     2: 02 02
     3: 02 02

DumpImage(Sample 2): 3 2-pixels images [6 bytes]:

     0: DC DC
     1: DC DC
     2: DC DC

DumpImage(Sample 3): 5 2-pixels images [10 bytes]:

     0: 75 75
     1: 75 75
     2: 75 75
     3: 75 75
     4: 75 75

DumpImage(Sample 4): 7 4-pixels images [28 bytes]:

     0: 54 54 54 54
     1: 54 54 54 54
     2: 54 54 54 54
     3: 54 54 54 54
     4: 54 54 54 54
     5: 54 54 54 54
     6: 54 54 54 54



5 Test images loaded
now shows 2 random images from list


DumpImage(From Array: image 3): 5 2-pixels images [10 bytes]:

     0: 75 75
     1: 75 75
     2: 75 75
     3: 75 75
     4: 75 75

DumpImage(From Array: image 0): 8 8-pixels images [64 bytes]:

     0: EC EC EC EC EC EC EC EC
     1: EC EC EC EC EC EC EC EC
     2: EC EC EC EC EC EC EC EC
     3: EC EC EC EC EC EC EC EC
     4: EC EC EC EC EC EC EC EC
     5: EC EC EC EC EC EC EC EC
     6: EC EC EC EC EC EC EC EC
     7: EC EC EC EC EC EC EC EC

        50 pointers allocated
        5 arguments read
        45 pointers to free
        Block size trimmed for a total of 5 pointers
        Image array is ready for use

示例程序到此结束。我没有移植内存的释放,那是微不足道的,并且没有使用数据写任何东西,因为我对此一无所知:)

我也没怎么测试。请不要在这里 religion talk:是的,我在结构字段的开头使用了下划线。我现在只使用 运行 的 Microsoft 编译器 CL 16.7.4。

我将这里的数字设置为 5 条记录,这样我就可以 post 所有输出数据。但是由于内存中只包含数据,元数据只有几个字就可以 运行 这对于数千条记录。而且速度很快。

对于大数据集,最好将BLOCK_SIZE改为更大的数字

示例代码

#define     _BLOCK_SIZE_ 50
#define     _IMAGES_     5
#define     _SEED_       201001

#include <math.h>
#include <memory.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

typedef void Data;

typedef struct
{
    uint8_t     _id; // 1,2,4 or 8
    uint32_t    _NumPix; // argc
    Data*       _data; // argv

}   Record;     // one record of image data

typedef struct
{
    uint32_t    _limit; 
    uint32_t    _imgC; // argc
    Record**    _imgV; // argv

}   ImageData;

int         dumpImage(const char*,Record*);
Record*     getImage();


int main(void)
{
    srand(_SEED_);
    char message[30];
    ImageData   image;
    image._imgC = 0;
    image._limit = _BLOCK_SIZE_;
    image._imgV = (Record**)malloc( image._limit * sizeof(Record*) );

    Record*      sandbox;
    for (int i = 0; i < _IMAGES_; i += 1)
    {
        // gets an image and a Record to hold it
        sprintf(message, "Sample %d", i);
        sandbox = getImage(); // gets a random image
        dumpImage(message, sandbox);


        Record* target = (Record*)malloc(sizeof(Record)); // new one
        target->_id = sandbox->_id;
        target->_NumPix = sandbox->_NumPix;
        uint32_t total = sandbox->_NumPix * sandbox->_id;
        target->_data = (void*)malloc(total);

        memcpy(target->_data, sandbox->_data, total);
        image._imgV[image._imgC] = target;

        // expands the block if there is no space left
        if (image._imgC >= image._limit)
        {   // block is full
            image._limit += _BLOCK_SIZE_;
            char* new_block = realloc(image._imgV, (image._limit * sizeof(char*)));
            printf("Block extended for a total of %d pointers\n", image._limit);
            image._imgV = (Record**)new_block;
        };  // if()

        image._imgC += 1;

    };  // for

    printf("\n\n%d Test images loaded\n", image._imgC);
    printf("now shows %d random images from list\n\n\n", _IMAGES_ / 2 );

    for (int i = 0; i < _IMAGES_ / 2; i += 1)
    {
        int j = rand() % _IMAGES_;
        sprintf(message, "From Array: image %d", j);
        sandbox = getImage(); // gets a random image
        dumpImage(message,image._imgV[j]);
    };  // for()


    // now trims the end of the block
    // allocated: _limit
    // used: argc
    printf("\t%d pointers allocated\n", image._limit);
    printf("\t%d arguments read\n", image._imgC);
    if (image._limit == image._imgC)
        printf("\tNothing to free()\n");
    else
    {
        printf("\t%d pointers to free\n", image._limit - image._imgC);
        char* new_size = realloc(image._imgV, (image._imgC * sizeof(char*)));
        printf("\tBlock size trimmed for a total of %d pointers\n", image._imgC);
        image._imgV = (Record**)new_size;
    };

    printf("\tImage array is ready for use\n");

    return 0;
};  // main()


int         dumpImage(const char* msg, Record* img)
{
    /*
        uint8_t     _id; // len
        uint32_t    _NumPix; // argc
        Data**      _data; // argv
     */
    uint32_t total = img->_NumPix * img->_id;
    uint8_t* p = (void*)img->_data;
    printf("DumpImage(%s): %d %d-pixels images [%d bytes]:\n\n", msg, img->_NumPix, img->_id, total);
    for (uint32_t i = 0; i < img->_NumPix; i += 1)
    {
        printf("%6d: ", i);
        for (int j = 0; j < img->_id; j++)
            printf("%02X ", *p);
        printf("\n");
    };  // for()
    printf("\n");
    return total;
};

//
// Image factory: build random "images"
// id is 1 to 4, NumPix is 1 to 100, 
// data is NumPix Pixels with id bytes
//
/*
    uint8_t     _id; // len
    uint32_t    _NumPix; // argc
    Data**      _data; // argv
 */
Record* getImage()
{

    const uint8_t len[4] = { 1,2,4,8 };
    Record* one = (Record*)malloc(sizeof(Record));
    one->_id = len[rand() % 4];
    one->_NumPix = 1 + rand() % 8;
    // now builds the pixels with random values under 255
    uint32_t total = one->_NumPix * one->_id;
    void* p = (Data**)malloc(total);
    uint8_t value = rand() % 255;
    memset(p, value, total);
    one->_data = (Data**)p;
    return one;
};

Is this way OK in the sense of being legit?

当然可以。顺便提一句。您可以只抽象出大小,并根据数据类型有一个 returns sizeof 的函数,然后执行 malloc(NumPix * data_get_size_of_object(DataType))Real world example - a big function that returns sizeof(type) in a big switch.

Is there another way doing this with less code and more generic approach?

嗯,不是“更少的代码”。

你最终会得到很多我所说的“fat switches”。您将拥有的每个功能都将是一个很大的 switch(type) { case sometype: do_something_with_that_type((cast_to_type*)pointer); case someothertype: etc. } ,这将变成 1000 行长的不可读的混乱开关。它不会灵活。添加新功能会很慢。维护会很慢。因为您每次都必须考虑每种情况,所以会出现错误。颠倒思路——应该知道如何处理类型的不是函数,type 本身应该知道操作。因此术语 面向对象 编程。创建一个 interface 并创建实现该接口的 objects - 这样对象就可以跟踪发生在它们身上的事情。 Real life example - struct file_operations from linux kernel.

我最近的经历让我相信函数指针有一些恐惧。函数指针可用于选择一次要做什么,然后根据先前的选择执行不同的操作。分配函数指针的一个开关,然后只需一次跳转到先前分配的函数。无需一次又一次地选择所有时间。一些具有虚拟 table 和界面的设计可能如下所示:

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <errno.h>

typedef struct dataif_s dataif_t;

struct dataif_vtable_s {
    void (*free)(dataif_t *t); // destructor
    int (*read_data)(dataif_t *t, FILE *from); // read data from file
    int (*save)(dataif_t *t, FILE *to); // serialize data to file
    int (*fprint)(dataif_t *t, FILE *to); // print in human readable format
    int (*add)(dataif_t *t, int a); // add integer to each number
};

struct dataif_s {
    // a pointer to virtual table that implements operations
    const struct dataif_vtable_s *vtable;
    // a pointer to provide object's data
    void *priv;
};

// shortcut accessors
int dataif_read_data(dataif_t *t, FILE *from) {
     // add assert(t != NULL); etc. to ease bugs detection
     // or maybe handle 'if(t->vtable->read_data != NULL) return -ENOSYS;' ?
     return t->vtable->read_data(t, from);
}

int dataif_print(dataif_t *t) {
    return t->vtable->fprint(t, stdout);
}

void dataif_free(dataif_t *t) {
     t->vtable->free(t);
}
// etc. for each function

/* ------------------------------------------ */

// private data implementing unsigned char operations
struct bytedata_s {
    size_t count;
    unsigned char *data;
};

int bytedata_read_data(dataif_t *t, FILE *from);
void bytedata_free(dataif_t *t);

// the virutal table
// note it's static const - it's stored in .rodata, saves RAM memory
static const struct dataif_vtable_s bytedata_vtable = {
     .free = bytedata_free,
     .read_data = bytedata_read_data,
     /* etc. fill with custom function pointers */
};

// Constructor for data of unsigned chars
int bytedata_init(dataif_t *t, size_t NumPix) {
     // construct bytedata object
     struct bytedata_s *p = malloc(sizeof(*p));
     if (!p) goto ERR_p;
     p->data = malloc(NumPix * sizeof(*p->data));
     if (!p->data) goto ERR_data;
     p->count = NumPix;

     // create the interface
     t->priv = p;
     t->vtable = &bytedata_vtable;

     return 0;
     free(p->data);
     ERR_data:
     free(p);
     ERR_p:
     return -ENOMEM; 
}

// reading values
int bytedata_read_data(dataif_t *t, FILE *from) {
    struct bytedata_s *p = t->priv; // extract out object
    for (size_t i = 0; i < p->count; ++i) {
        if (fscanf(from, "%hhu", &p->data[i]) != 1) return -1;
    }
    return 0;
}

void bytedata_free(dataif_t *t) {
     // free data allocated in constructor
     struct bytedata_s *p = t->priv;
     free(p->data);
     free(t->priv);
}

/* ---------------------------------------------- */

// create same interfaces for each type
// note - only one single function is visible externally
// all other functions are accessible via virtual table
int int32data_init(dataif_t *t, size_t NumPix);
int float32data_init(dataif_t *t, size_t NumPix);
int float64data_init(dataif_t *t, size_t NumPix);

/* ---------------------------------------------- */

// map value types to constructors, ie. "object factory"
typedef enum {
    DATATYPE_BYTE,     //unsigned char
    DATATYPE_INT32,    //int
    DATATYPE_FLOAT32,  //float
    DATATYPE_FLOAT64,  //double 
} datatype_t; // I will not use UpperCamelCase

// map of datatypes to constructors
static const int (*dataif_inits[])(dataif_t *t, size_t NumPix) = {
    [DATATYPE_BYTE] = bytedata_init,
    [DATATYPE_INT32] = int32data_init,
    [DATATYPE_FLOAT32] = float32data_init,
    [DATATYPE_float64] = float64data_init,
};

int dataif_init(dataif_t *t, datatype_t datatype, size_t NumPix) {
    if (datatype < 0 || datatype > sizeof(dataif_inits)/sizeof(*dataif_inits)) {
        // the datatype not found in the array
        return -EINVAL;
    }
    // note how the switch... doesn't exists anymore.
    return dataif_inits[datatype](t, NumPix);
}

# if 0
// if enum values doesn't start from zero
// use a structure and a for to find the mapping between an enum to constructor
struct datatype_to_init_s {
     datatype_t t;
     int (*init)(dataif_t *t, size_t NumPix);
};
static const struct datatype_to_init_s datatype_to_init[] = {
     { DATATYPE_BYTE, bytedata_init },
     /* etc. */
};
int dataif_init(dataif_t *t, datatype_t datatype, size_t NumPix) {
    // find datatype in datatype_to_init
    for (size_t i = 0; i < sizeof(datatype_to_init)/sizeof(*datatype_to_init); ++i) {
         if (datatype_to_init[i].t == datatype) {
             // found it? create the object
             return datatype_to_init[i].init(NumPix);
         }
    }
    // the datatype not found in the array
    return -EINVAL;
}
# endif

/* ---------------------------------------------- */

size_t image_get_size(const char *);
datatype_t image_get_datatype(const char *);
static const char *image = "/tmp/a.png";

int main() {
     int err = 0;
     size_t numpix = image_get_size(image);
     datatype_t thedatatype  = image_get_datatype(image);
     FILE *some_file = fopen(image, "r");
     if (some_file == NULL) { err = -1; goto ERR_fopen; }

     dataif_t data;
     err = dataif_init(&data, thedatatype, numpix); // initializes data depending on datatype
     if (err) goto ERR_dataif_init; // add a friendly error message

     err = dataif_read_data(&data, some_file);
     if (err) goto ERR_read_data; 

     dataif_do_something(&data);
     dataif_print(&data);

     ERR_read_data:
     dataif_free(&data);
     ERR_dataif_init:
     fclose(some_file);
     ERR_fopen:
     return err;
}