fscanf 写入字符串数组错误
fscanf writing to string array wrong
我正在使用 fscanf 逐字读取文件并将它们写入 char** 数组。
如果我想打印当前索引,它工作正常,但在完全写入完成后,打印数组会导致输出错误。
char **stop_words = (char**)malloc(1000*sizeof(char*));
FILE *fp;
fp = fopen("englishstopwords.txt", "r");
int i = 0;
while(!feof(fp)) {
fscanf(fp,"%s\n", &stop_words[i]);
// printf("%s\n", &stop_words[i]); //this works fine
i++;
}
// for (int i = 0; i < 1000; i++) { //this works buggy
// printf("%s\n", &stop_words[i]);
// }
fclose(fp);
损坏的印刷品看起来像这样:
即时重要索引
工作打印看起来像这样:
立即
重要性
重要
指数
它们有什么区别?
问题
你的内存分配从根本上是错误的。
char **stop_words = (char**)malloc(1000*sizeof(char*));
只分配了一块可以存储1000个指针的内存。
stop_words[0]
到stop_words[999]
的内容未定义,都是malloc()
returns后的垃圾值。
有时写入 stop_words[i]
看起来不错,但幸运的是垃圾是指向映射内存的指针(尽管仍然很糟糕,您可能因此而导致内存损坏)。
解决这个问题的方法很简单,就是分配另一个内存块来包含文件中的数据。
错误的目标缓冲区
这部分
fscanf(fp,"%s\n", &stop_words[i]);
写入您用malloc()
分配的指针数组。表达式类型 &stop_words[i]
本身与 %s
不匹配,你真的应该激活警告标志,一个好的编译器应该默认警告你。
潜在的缓冲区溢出
你读取一行的方法很危险,因为 fscanf
和 %s
不关心你的缓冲区有多大,你的程序因此很容易发生缓冲区溢出。
解决此问题的方法是您可以使用 fgets
并指定缓冲区的大小。
如果某行的内存超过为缓冲区分配的内存,则可以 realloc()
。要检测到这一点,您可以看到返回的最后一个字符。如果是换行,那么它就是行尾,否则它可能是文件尾或者字符超过缓冲区大小的行(所以你可以决定重新分配)。
解决此问题
englishstopwords.txt(用于测试的示例文件)
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
test.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#define MAX_WORDS (1000u)
#define INIT_ALLOC (128u)
int main(void)
{
size_t i, total_words;
FILE *fp;
char **stop_words = malloc(MAX_WORDS * sizeof(*stop_words));
/* TODO: Handle `stop_words == NULL` */
fp = fopen("englishstopwords.txt", "r");
/* TODO: Handle `fp == NULL` */
i = 0;
while (true) {
size_t len = 0;
char *ret, *buf = malloc(INIT_ALLOC * sizeof(*buf));
/* TODO: Handle `buf == NULL` */
ret = buf;
re_fgets:
ret = fgets(ret, INIT_ALLOC, fp);
if (ret == NULL) {
/* We've reached the end of file */
if (len == 0) {
/*
* Throw away the buffer, this is unused
*/
free(buf);
} else {
/* Last line buffer. */
stop_words[i++] = buf;
}
break;
}
len = strlen(buf);
if (buf[len - 1] != '\n') {
/*
*
* We don't see an LF, this means this line
* has more than `INIT_ALLOC` characters or
* it may be the EOF.
*
*/
ret = realloc(buf, (len + 1 + INIT_ALLOC) * sizeof(*buf));
/* TODO: Handle `ret == NULL` */
buf = ret;
/*
* Shift the pointer to the right (end of string).
* Because this line has not been fully read.
*
* We put the next `fgets` buffer to the end of this
* string.
*/
ret += len;
goto re_fgets;
}
/* TODO: Trim CR on Windows platform */
/* Trim the LF */
buf[len - 1] = '[=12=]';
stop_words[i++] = buf;
if (i >= MAX_WORDS) {
/*
* TODO: You can do realloc(stop_words, ...) if you
* want to.
*/
break;
}
}
fclose(fp);
total_words = i;
for (i = 0; i < total_words; i++)
printf("%s\n", stop_words[i]);
for (i = 0; i < total_words; i++)
free(stop_words[i]);
free(stop_words);
return 0;
}
编译并运行
ammarfaizi2@integral:/tmp$ cat englishstopwords.txt
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
ammarfaizi2@integral:/tmp$ gcc -ggdb3 -Wall -Wextra -pedantic-errors test.c -o test
ammarfaizi2@integral:/tmp$ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --track-fds=yes --error-exitcode=99 -s ./test
==503906== Memcheck, a memory error detector
==503906== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==503906== Using Valgrind-3.17.0 and LibVEX; rerun with -h for copyright info
==503906== Command: ./test
==503906==
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
==503906==
==503906== FILE DESCRIPTORS: 3 open (3 std) at exit.
==503906==
==503906== HEAP SUMMARY:
==503906== in use at exit: 0 bytes in 0 blocks
==503906== total heap usage: 22 allocs, 22 frees, 20,476 bytes allocated
==503906==
==503906== All heap blocks were freed -- no leaks are possible
==503906==
==503906== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
ammarfaizi2@integral:/tmp$
我正在使用 fscanf 逐字读取文件并将它们写入 char** 数组。
如果我想打印当前索引,它工作正常,但在完全写入完成后,打印数组会导致输出错误。
char **stop_words = (char**)malloc(1000*sizeof(char*));
FILE *fp;
fp = fopen("englishstopwords.txt", "r");
int i = 0;
while(!feof(fp)) {
fscanf(fp,"%s\n", &stop_words[i]);
// printf("%s\n", &stop_words[i]); //this works fine
i++;
}
// for (int i = 0; i < 1000; i++) { //this works buggy
// printf("%s\n", &stop_words[i]);
// }
fclose(fp);
损坏的印刷品看起来像这样:
即时重要索引
工作打印看起来像这样:
立即
重要性
重要
指数
它们有什么区别?
问题
你的内存分配从根本上是错误的。
char **stop_words = (char**)malloc(1000*sizeof(char*));
只分配了一块可以存储1000个指针的内存。
stop_words[0]
到stop_words[999]
的内容未定义,都是malloc()
returns后的垃圾值。
有时写入 stop_words[i]
看起来不错,但幸运的是垃圾是指向映射内存的指针(尽管仍然很糟糕,您可能因此而导致内存损坏)。
解决这个问题的方法很简单,就是分配另一个内存块来包含文件中的数据。
错误的目标缓冲区
这部分
fscanf(fp,"%s\n", &stop_words[i]);
写入您用malloc()
分配的指针数组。表达式类型 &stop_words[i]
本身与 %s
不匹配,你真的应该激活警告标志,一个好的编译器应该默认警告你。
潜在的缓冲区溢出
你读取一行的方法很危险,因为 fscanf
和 %s
不关心你的缓冲区有多大,你的程序因此很容易发生缓冲区溢出。
解决此问题的方法是您可以使用 fgets
并指定缓冲区的大小。
如果某行的内存超过为缓冲区分配的内存,则可以 realloc()
。要检测到这一点,您可以看到返回的最后一个字符。如果是换行,那么它就是行尾,否则它可能是文件尾或者字符超过缓冲区大小的行(所以你可以决定重新分配)。
解决此问题
englishstopwords.txt(用于测试的示例文件)
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
test.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#define MAX_WORDS (1000u)
#define INIT_ALLOC (128u)
int main(void)
{
size_t i, total_words;
FILE *fp;
char **stop_words = malloc(MAX_WORDS * sizeof(*stop_words));
/* TODO: Handle `stop_words == NULL` */
fp = fopen("englishstopwords.txt", "r");
/* TODO: Handle `fp == NULL` */
i = 0;
while (true) {
size_t len = 0;
char *ret, *buf = malloc(INIT_ALLOC * sizeof(*buf));
/* TODO: Handle `buf == NULL` */
ret = buf;
re_fgets:
ret = fgets(ret, INIT_ALLOC, fp);
if (ret == NULL) {
/* We've reached the end of file */
if (len == 0) {
/*
* Throw away the buffer, this is unused
*/
free(buf);
} else {
/* Last line buffer. */
stop_words[i++] = buf;
}
break;
}
len = strlen(buf);
if (buf[len - 1] != '\n') {
/*
*
* We don't see an LF, this means this line
* has more than `INIT_ALLOC` characters or
* it may be the EOF.
*
*/
ret = realloc(buf, (len + 1 + INIT_ALLOC) * sizeof(*buf));
/* TODO: Handle `ret == NULL` */
buf = ret;
/*
* Shift the pointer to the right (end of string).
* Because this line has not been fully read.
*
* We put the next `fgets` buffer to the end of this
* string.
*/
ret += len;
goto re_fgets;
}
/* TODO: Trim CR on Windows platform */
/* Trim the LF */
buf[len - 1] = '[=12=]';
stop_words[i++] = buf;
if (i >= MAX_WORDS) {
/*
* TODO: You can do realloc(stop_words, ...) if you
* want to.
*/
break;
}
}
fclose(fp);
total_words = i;
for (i = 0; i < total_words; i++)
printf("%s\n", stop_words[i]);
for (i = 0; i < total_words; i++)
free(stop_words[i]);
free(stop_words);
return 0;
}
编译并运行
ammarfaizi2@integral:/tmp$ cat englishstopwords.txt
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
ammarfaizi2@integral:/tmp$ gcc -ggdb3 -Wall -Wextra -pedantic-errors test.c -o test
ammarfaizi2@integral:/tmp$ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --track-fds=yes --error-exitcode=99 -s ./test
==503906== Memcheck, a memory error detector
==503906== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==503906== Using Valgrind-3.17.0 and LibVEX; rerun with -h for copyright info
==503906== Command: ./test
==503906==
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
==503906==
==503906== FILE DESCRIPTORS: 3 open (3 std) at exit.
==503906==
==503906== HEAP SUMMARY:
==503906== in use at exit: 0 bytes in 0 blocks
==503906== total heap usage: 22 allocs, 22 frees, 20,476 bytes allocated
==503906==
==503906== All heap blocks were freed -- no leaks are possible
==503906==
==503906== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
ammarfaizi2@integral:/tmp$