fscanf 写入字符串数组错误

fscanf writing to string array wrong

我正在使用 fscanf 逐字读取文件并将它们写入 char** 数组。

如果我想打印当前索引,它工作正常,但在完全写入完成后,打印数组会导致输出错误。

char **stop_words = (char**)malloc(1000*sizeof(char*));

FILE *fp;
fp = fopen("englishstopwords.txt", "r");

int i = 0;
while(!feof(fp)) {
    fscanf(fp,"%s\n", &stop_words[i]);
    // printf("%s\n", &stop_words[i]); //this works fine
    i++;
}
// for (int i = 0; i < 1000; i++) { //this works buggy
    // printf("%s\n", &stop_words[i]);
// }
fclose(fp); 

损坏的印刷品看起来像这样:
即时重要索引

工作打印看起来像这样:
立即
重要性
重要
指数

它们有什么区别?

问题

你的内存分配从根本上是错误的。

char **stop_words = (char**)malloc(1000*sizeof(char*));只分配了一块可以存储1000个指针的内存。

stop_words[0]stop_words[999]的内容未定义,都是malloc()returns后的垃圾值。

有时写入 stop_words[i] 看起来不错,但幸运的是垃圾是指向映射内存的指针(尽管仍然很糟糕,您可能因此而导致内存损坏)。

解决这个问题的方法很简单,就是分配另一个内存块来包含文件中的数据。


错误的目标缓冲区

这部分

fscanf(fp,"%s\n", &stop_words[i]);

写入您用malloc()分配的指针数组。表达式类型 &stop_words[i] 本身与 %s 不匹配,你真的应该激活警告标志,一个好的编译器应该默认警告你。

潜在的缓冲区溢出

你读取一行的方法很危险,因为 fscanf%s 不关心你的缓冲区有多大,你的程序因此很容易发生缓冲区溢出。

解决此问题的方法是您可以使用 fgets 并指定缓冲区的大小。

如果某行的内存超过为缓冲区分配的内存,则可以 realloc()。要检测到这一点,您可以看到返回的最后一个字符。如果是换行,那么它就是行尾,否则它可能是文件尾或者字符超过缓冲区大小的行(所以你可以决定重新分配)。


解决此问题


englishstopwords.txt(用于测试的示例文件)

i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves


test.c


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>

#define MAX_WORDS   (1000u)
#define INIT_ALLOC  (128u)

int main(void)
{
    size_t i, total_words;
    FILE *fp;
    char **stop_words = malloc(MAX_WORDS * sizeof(*stop_words));
    /* TODO: Handle `stop_words == NULL` */

    fp = fopen("englishstopwords.txt", "r");
    /* TODO: Handle `fp == NULL` */


    i = 0;
    while (true) {
        size_t len = 0;
        char *ret, *buf = malloc(INIT_ALLOC * sizeof(*buf));
        /* TODO: Handle `buf == NULL` */

        ret = buf;
    re_fgets:
        ret = fgets(ret, INIT_ALLOC, fp);
        if (ret == NULL) {
            /* We've reached the end of file */

            if (len == 0) {
                /*
                 * Throw away the buffer, this is unused
                 */
                free(buf);
            } else {
                /* Last line buffer. */
                stop_words[i++] = buf;
            }

            break;
        }


        len = strlen(buf);
        if (buf[len - 1] != '\n') {
            /*
             *
             * We don't see an LF, this means this line
             * has more than `INIT_ALLOC` characters or
             * it may be the EOF.
             *
             */

            ret = realloc(buf, (len + 1 + INIT_ALLOC) * sizeof(*buf));
            /* TODO: Handle `ret == NULL` */


            buf = ret;
            /*
             * Shift the pointer to the right (end of string).
             * Because this line has not been fully read.
             *
             * We put the next `fgets` buffer to the end of this
             * string.
             */
            ret += len;
            goto re_fgets;
        }

        /* TODO: Trim CR on Windows platform */

        /* Trim the LF */
        buf[len - 1]  = '[=12=]';
        stop_words[i++] = buf;

        if (i >= MAX_WORDS) {
            /*
             * TODO: You can do realloc(stop_words, ...) if you
             * want to.
             */
            break;
        }
    }
    fclose(fp);

    total_words = i;
    for (i = 0; i < total_words; i++)
        printf("%s\n", stop_words[i]);


    for (i = 0; i < total_words; i++)
        free(stop_words[i]);

    free(stop_words);
    return 0;
}


编译并运行

ammarfaizi2@integral:/tmp$ cat englishstopwords.txt 
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
ammarfaizi2@integral:/tmp$ gcc -ggdb3 -Wall -Wextra -pedantic-errors test.c -o test
ammarfaizi2@integral:/tmp$ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --track-fds=yes --error-exitcode=99 -s ./test
==503906== Memcheck, a memory error detector
==503906== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==503906== Using Valgrind-3.17.0 and LibVEX; rerun with -h for copyright info
==503906== Command: ./test
==503906== 
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
==503906== 
==503906== FILE DESCRIPTORS: 3 open (3 std) at exit.
==503906== 
==503906== HEAP SUMMARY:
==503906==     in use at exit: 0 bytes in 0 blocks
==503906==   total heap usage: 22 allocs, 22 frees, 20,476 bytes allocated
==503906== 
==503906== All heap blocks were freed -- no leaks are possible
==503906== 
==503906== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
ammarfaizi2@integral:/tmp$