为什么 valgrind 在重用分配的内存时会抱怨大小为 1 的无效读写?

Why would valgrind complain about invalid read and writes of size 1 on reusing allocated memory?

我写了一个函数来比较 c 版本的字符串,如 "1.2.3""1.123.9"

int compare_versions(char* first, char* second)
{
  size_t first_last_dot1 = 0;
  size_t first_last_dot2 = strcspn(first, ".");
  size_t second_last_dot1 = 0;
  size_t second_last_dot2 = strcspn(second, ".");

  while(first_last_dot2 || second_last_dot2)
  {
    if(first_last_dot2 && !second_last_dot2) return 1;
    if(!first_last_dot2 && second_last_dot2) return -1;

    char* first_c = (char*)calloc(first_last_dot2 + 1, sizeof(char));
    strncat(first_c, first + first_last_dot1, first_last_dot2);
    int first_n = atoi(first_c);
    first_last_dot1 += first_last_dot2 + 1;
    first_last_dot2 = strcspn(first + first_last_dot1, ".");
    free(first_c);

    char* second_c = (char*)calloc(second_last_dot2 + 1, sizeof(char));
    strncat(second_c, second + second_last_dot1, second_last_dot2);
    int second_n = atoi(second_c);
    second_last_dot1 += second_last_dot2 + 1;
    second_last_dot2 = strcspn(second + second_last_dot1, ".");
    free(second_c);

    if (first_n != second_n)
    {
      if(first_n < second_n) return -1;
      return 1;
    }
  }

  return 0;
}

而 valgrind 根本不会抱怨我的 'tests':

assert(0 == compare_versions("1.12345678.2", "1.12345678.2"));

valgrind 输出:

==24002== Memcheck, a memory error detector
==24002== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==24002== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==24002== Command: ./a.out
==24002== 
==24002== 
==24002== HEAP SUMMARY:
==24002==     in use at exit: 0 bytes in 0 blocks
==24002==   total heap usage: 10 allocs, 10 frees, 48 bytes allocated
==24002== 
==24002== All heap blocks were freed -- no leaks are possible
==24002== 
==24002== For counts of detected and suppressed errors, rerun with: -v
==24002== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 6 from 6)

现在我想重新使用分配的内存,即:当我分配一个大小为 4 的内存块时,我可以将它重新用于所有大小为 4(包括终止空字符)的 c 字符串。

所以我写了这个功能完全相同的版本:

int compare_versions(char* first, char* second)
{
  size_t first_last_dot1 = 0;
  size_t first_last_dot2 = strcspn(first, ".");
  size_t second_last_dot1 = 0;
  size_t second_last_dot2 = strcspn(second, ".");

  char* first_c = (char*)calloc(first_last_dot2 + 1, sizeof(char));
  char* second_c = (char*)calloc(second_last_dot2 + 1, sizeof(char));
  int first_max = first_last_dot2;
  int second_max = second_last_dot2;

  while(first_last_dot2 || second_last_dot2)
  {
    // first longer than second ( different only by last segment )
    if(first_last_dot2 && !second_last_dot2) {
      free(first_c);
      free(second_c);
      return 1;
    }
    // second longer than first ( different only by last segment )
    if(!first_last_dot2 && second_last_dot2) {
      free(first_c);
      free(second_c);
      return -1;
    }

    if(first_last_dot2 > first_max) {
      first_max = first_last_dot2;
      first_c = (char*)realloc(first_c, first_last_dot2 + 1);
      memset(first_c, 0, first_last_dot2);
    }
    strncat(first_c, first + first_last_dot1, first_last_dot2);
    int first_n = atoi(first_c);
    first_last_dot1 += first_last_dot2 + 1;
    first_last_dot2 = strcspn(first + first_last_dot1, ".");

    if(second_last_dot2 > second_max) {
      second_max = second_last_dot2;
      second_c = (char*)realloc(second_c, second_last_dot2 + 1);
      memset(second_c, 0, second_last_dot2);
    }
    strncat(second_c, second + second_last_dot1, second_last_dot2);
    int second_n = atoi(second_c);
    second_last_dot1 += second_last_dot2 + 1;
    second_last_dot2 = strcspn(second + second_last_dot1, ".");

    if (first_n != second_n)
    {
      free(first_c);
      free(second_c);
      if(first_n < second_n) return -1;
      return 1;
    }
  }

  free(first_c);
  free(second_c);
  return 0;                                                                                                                                                                                                                                                                       
}

但随后我将从 valgrind 获得以下输出:

==24039== Memcheck, a memory error detector
==24039== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==24039== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==24039== Command: ./a.out
==24039== 
==24039== Invalid write of size 1
==24039==    at 0x4A07E83: strncat (mc_replace_strmem.c:304)
==24039==    by 0x400828: compare_versions (main.c:46)
==24039==    by 0x400986: main (main.c:94)
==24039==  Address 0x4c330e9 is 0 bytes after a block of size 9 alloc'd
==24039==    at 0x4A06C20: realloc (vg_replace_malloc.c:662)
==24039==    by 0x4007F1: compare_versions (main.c:43)
==24039==    by 0x400986: main (main.c:94)
==24039== 
==24039== Invalid read of size 1
==24039==    at 0x39AA436FF3: ____strtol_l_internal (strtol_l.c:438)
==24039==    by 0x39AA433C5F: atoi (atoi.c:28)
==24039==    by 0x400834: compare_versions (main.c:47)
==24039==    by 0x400986: main (main.c:94)
==24039==  Address 0x4c330e9 is 0 bytes after a block of size 9 alloc'd
==24039==    at 0x4A06C20: realloc (vg_replace_malloc.c:662)
==24039==    by 0x4007F1: compare_versions (main.c:43)
==24039==    by 0x400986: main (main.c:94)
==24039== 
...
==24039== 
==24039== HEAP SUMMARY:
==24039==     in use at exit: 0 bytes in 0 blocks
==24039==   total heap usage: 4 allocs, 4 frees, 22 bytes allocated
==24039== 
==24039== All heap blocks were freed -- no leaks are possible
==24039== 
==24039== For counts of detected and suppressed errors, rerun with: -v
==24039== ERROR SUMMARY: 34 errors from 8 contexts (suppressed: 6 from 6)

这里有什么问题?

Valgrind 说明了一切 - 我只看初始错误:

==24039== Invalid write of size 1
==24039==    at 0x4A07E83: strncat (mc_replace_strmem.c:304)
==24039==    by 0x400828: compare_versions (main.c:46)
==24039==    by 0x400986: main (main.c:94)
==24039==  Address 0x4c330e9 is 0 bytes after a block of size 9 alloc'd
==24039==    at 0x4A06C20: realloc (vg_replace_malloc.c:662)
==24039==    by 0x4007F1: compare_versions (main.c:43)
==24039==    by 0x400986: main (main.c:94)

compare_versions (main.c:46)行是:

    strncat(first_c, first + first_last_dot1, first_last_dot2);

compare_versions (main.c:43)行是:

      first_c = (char*)realloc(first_c, first_last_dot2 + 1);

错误发生在第三个循环周期。 first_last_dot2 已经设置在行

的第二个循环周期
    first_last_dot2 = strcspn(first + first_last_dot1, ".");

到值1(版本字符串最后一部分的长度,即2);之前 first_last_dot2 已在第二个循环周期中设置为值 8(版本字符串中间部分的长度,即 12345678),因此realloc 分配大小 8 + 1 = 9。由于现在 if 条件 (first_last_dot2 > first_max) 不成立,因此 realloc 以及

      memset(first_c, 0, first_last_dot2);

被跳过;因为没有执行后者,first_c 仍然包含 12345678 并且 strncat 附加 2 到它,将终止空字节写入 first_c[9],其中 是大小为 9 的块分配后的 0 字节
如果您将两个 memset(也是 second_c 的那个)移动到 if 块之后,则没有错误。