在 C 中处理从文件 I/O 存储的字符串时出现问题
Problems Manipulating strings stored from File I/O in C
我的方法是从文件中读取每个字符并进行计数,因此当我们遇到非法字符时,我会跟踪字符串长度以及遇到多少个该长度的字符串。现在我试图用我读入的字符构建字符串并将它们存储在一个数组中。它几乎可以正常工作,但是在读入的两个字符串长度相同的情况下,当我尝试将 2 个字符串加在一起时,我可以绕过中止和段错误。如果您不介意给我一些反馈,我在代码的第 129 行标记了我遇到问题的地方....我希望在完成后打印每个长度的字符串
这是我用来测试的文本文件:
Tomorrow, and tomorrow, and tomorrow,
To the last syllable of recorded time;
源代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
*this program reads in a text file from the command line
*then counts and stores the number of words of all lengths
*/
#define LENGTH 34
#define WORD_BUFFER 750
int strLengths[LENGTH],lengthsCopy[LENGTH];
char *array[WORD_BUFFER][LENGTH];
char strings[LENGTH];
int counter = 0;
int ch,tester;
//sorts the output of string lengths printing the largest amounts first
void sort()
{
int max_val =0;
int i,j,temp,val;
//create copy
for (i=0; i < LENGTH; i++)
{
lengthsCopy[i] = strLengths[i];
}
//for loop finds the max value in the array elements
for(i=0; i<LENGTH; i++)
{
if(lengthsCopy[i] > max_val)
max_val = lengthsCopy[i];
}
printf("max val in the array is %d\n",max_val);
//prints the max value,decrements,checks,prints, rinse repeat...
//iterates until the max is 0
while(max_val!=0)
{
//checks all elements
for(i=LENGTH-1; i > 0; i--)
{
//print when max val is found
if(lengthsCopy[i] == max_val)
{
temp = i;
printf("Count[%02d]=%02d;\n",i,max_val);
//check for doubles
for(j=LENGTH-1; j > 0; j--)
{
//if double is found that is not the original, print
if(lengthsCopy[j] == max_val && temp != j)
{
printf("Count[%02d]=%02d;\n",j,max_val);
//erase value
lengthsCopy[j] = 0;
}
}
}
}
max_val--;
}
}
//print all array that are not null, represent count of word lenghts
void printList()
{
int i,val;
for(i=1; i<=LENGTH;i++)
{
if(strLengths[i] > 0)
{
val = strLengths[i];
printf("Count[%02d]=%02d;\n",i,val);
}
}
}
int main (int argc, char *argv[])
{
//error message if input file is not passed
if(argc < 2)
{
printf("You have to give me a file!\n");
exit(1);
}
FILE *text = fopen(argv[1], "r");
//errror message if no contents in the file
if(text == NULL)
{
printf("No content to read in %s. \n", argv[1]);
exit(1);
}
//iterate through text until end of file
ch = fgetc(text);
int strPoint =0;
while(ch != EOF)
{
//if illegal char is met, add a count to the array value of current counter
//set counter back to 0
//scan next char
if(ch==' '||ch==','||ch=='('||ch==')'||ch==';'||ch=='\n')
{
if(array[counter][0] == NULL)//if length not defined yet
{
array[counter][0] = strings;//add current string build to the array
printf("%s\n",array[counter][0] );
}
else if(array[counter][0] != NULL && strings[0] != '[=12=]')
{//else length is defined add to text bank
printf("else if reached\n");
printf("%s\n",strings );
printf("%lu\n",strlen(array[counter][0]) );
int arrayptr = strlen(*array[counter]);
printf("ptr %d",arrayptr);
/* next line aborts / seg_faults */
strncat(*array[counter],strings,strlen(strings));
}
strLengths[counter]++;
counter = 0;
ch = fgetc(text);
memset(strings, 0, sizeof(strings));//clear stringBuild
strPoint =0;
}
//else a legal character, increase counter, scan next char
else
{
strings[strPoint] = ch;
printf("string build %c\n",strings[strPoint]);
counter++;
strPoint++;
ch = fgetc(text);
}
}
fclose(text);
printf("stored string %s\n",array[3][0] );
printList();
//call sort
sort();
exit(0);
}
从你的代码中我可以看出,你的主要问题是你对发生的事情有误解:
array[counter][0] = strings;//add current string build to the array
您正在将指针 array[counter][0]
设置为 strings
的地址。你只有 one strings
变量,所以每个 array[counter][0]
指向相同的东西(所以你的 array
中的每一行都指向最后一个字符串包含在 strings
)
你的 strncat
作为 strcpy
但是 nul-termianting 由于 strncat
的行为没有错,但是意识到这可能是长缓冲区的性能损失。您可能还有其他逻辑问题,但它们被代码的笨拙布局和 指向 char.
数组的指针的非标准使用所混淆
意见反馈
尝试并简化您的实施。如果您主要关心存储从文件中读取的单词以及每个单词的长度以进行排序,那么您可以简单地将单词存储在 char 的二维数组中,并在每次需要时调用 strlen
长度,或者对于 int
的大小,您可以使用简单的结构将每个单词的长度与单词本身相关联,例如
typedef struct {
char word[LENGTH];
int len;
} wordinfo;
然后您只需创建一个数组或结构(例如 wordinfo words[WORD_BUFFER];
)并将您的单词存储在 words[x].word
中,将长度存储在 word[x].len
中。如果您想放弃使用结构,则只需声明一个二维数组(例如 char words[LENGTH][WORD_BUFFER];
并将单词存储在那里。(对于每个单词 4 字节的成本,如果存储不是问题,您将通过存储您从读取的字符中已经获得的长度来节省对 strlen
的重复函数调用的开销)
您还可以声明一个指向 char LENGTH 数组的 指针(例如 char (*array)[LENGTH];
并使用 [=35] 为其中的 WORD_BUFFER
个动态分配存储空间=](您可以使用 calloc
来将分配的所有字节初始化为零)。这是一个不错的选择,但动态分配似乎不是您的目标。
此外,避免使用全局变量。它们几乎从不 需要并且会增加名称冲突和值覆盖的风险。将您的变量声明为 main()
的本地变量,并根据需要将它们作为参数传递。例如,使用 struct 实现,您可以编写按长度排序和打印如下,使用指向您的 struct 数组的指针和填充的数字作为参数:
/* simple insertion sort on len (descending) */
void sort (wordinfo *a, int n)
{
int i, j;
wordinfo v;
for (i = 1; i < n; i++) {
v = a[i];
j = i;
while (j > 0 && a[j - 1].len < v.len ) {
a[j] = a[j - 1];
j -= 1;
}
a[j] = v;
}
}
/* tabular print of words read */
void printlist (wordinfo *a, int n)
{
int i;
for (i = 0; i < n; i++)
printf (" %-34s (%d-chars)\n", a[i].word, a[i].len);
}
(注意: 除非作业需要,否则不要编写或使用你自己的排序。C 提供了 qsort
无限更高效且经过充分测试,只需编写一个 compare 函数来比较您需要排序的任何内容的两个元素,然后让 qsort
完成工作)
最后,从文件中读取每个字符的逻辑一点都不复杂。简单地阅读字符,检查它,然后采取任何适当的行动。唯一增加的复杂性来自测试,以确保您保持在 LENGTH
个字符和 WORD_BUFFER
个单词以内,以防止覆盖存储范围。即使使用结构实现,声明和初始化为:
int c, len = 0, maxndx = 0, ndx = 0;
wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};
您可以将 main
中的读取逻辑简化为:
while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
if (len + 1 == LENGTH || /* check if full or c matches */
c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
if (len) { /* if we started a word */
if (len > words[maxndx].len) /* check if longest */
maxndx = ndx; /* update max index */
words[ndx].len = len; /* set words[x].len */
words[ndx++].word[len] = 0; /* nul-terminat word */
len = 0; /* reset length */
}
}
else
words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
}
(注意: maxndx
只是为保存最长单词的结构保存索引 (ndx
),或者最长的单词之一是您拥有的超过一个相同的最大长度)
总而言之,您可以将代码归结为:
#include <stdio.h>
#define LENGTH 34
#define WORD_BUFFER 750
typedef struct {
char word[LENGTH];
int len;
} wordinfo;
/* simple insertion sort on len (descending) */
void sort (wordinfo *a, int n)
{
int i, j;
wordinfo v;
for (i = 1; i < n; i++) {
v = a[i];
j = i;
while (j > 0 && a[j - 1].len < v.len ) {
a[j] = a[j - 1];
j -= 1;
}
a[j] = v;
}
}
/* tabular print of words read */
void printlist (wordinfo *a, int n)
{
int i;
for (i = 0; i < n; i++)
printf (" %-34s (%d-chars)\n", a[i].word, a[i].len);
}
int main (int argc, char **argv) {
int c, len = 0, maxndx = 0, ndx = 0;
wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
/* read each char and store in words[x].word up to 'ndx' words.
* save the length of each word in words[x].len.
*/
while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
if (len + 1 == LENGTH || /* check if full or c matches */
c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
if (len) { /* if we started a word */
if (len > words[maxndx].len) /* check if longest */
maxndx = ndx; /* update max index */
words[ndx].len = len; /* set words[x].len */
words[ndx++].word[len] = 0; /* nul-terminat word */
len = 0; /* reset length */
}
}
else
words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
printf ("\nlongest word: '%s' (%d-chars)\n\n",
words[maxndx].word, words[maxndx].len);
printf ("words read from file:\n\n");
printlist (words, ndx); /* print words in order read */
sort (words, ndx);
printf ("\nwords sorted by length:\n\n");
printlist (words, ndx); /* print words sorted by length */
return 0;
}
(注意: 程序希望文件名作为第一个参数读取,否则如果没有给出参数,它将从 stdin
(默认情况下)读取)
例子Use/Output
$ ./bin/rdstrings3 <dat/tomorrow.txt
longest word: 'Tomorrow' (8-chars)
words read from file:
Tomorrow (8-chars)
and (3-chars)
tomorrow (8-chars)
and (3-chars)
tomorrow (8-chars)
To (2-chars)
the (3-chars)
last (4-chars)
syllable (8-chars)
of (2-chars)
recorded (8-chars)
time (4-chars)
words sorted by length:
Tomorrow (8-chars)
tomorrow (8-chars)
tomorrow (8-chars)
syllable (8-chars)
recorded (8-chars)
last (4-chars)
time (4-chars)
and (3-chars)
and (3-chars)
the (3-chars)
To (2-chars)
of (2-chars)
检查一下,如果您有任何问题,请告诉我。选择使用结构并存储 len
还是只在需要的地方调用 strlen
完全取决于您。
我的方法是从文件中读取每个字符并进行计数,因此当我们遇到非法字符时,我会跟踪字符串长度以及遇到多少个该长度的字符串。现在我试图用我读入的字符构建字符串并将它们存储在一个数组中。它几乎可以正常工作,但是在读入的两个字符串长度相同的情况下,当我尝试将 2 个字符串加在一起时,我可以绕过中止和段错误。如果您不介意给我一些反馈,我在代码的第 129 行标记了我遇到问题的地方....我希望在完成后打印每个长度的字符串
这是我用来测试的文本文件:
Tomorrow, and tomorrow, and tomorrow,
To the last syllable of recorded time;
源代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
*this program reads in a text file from the command line
*then counts and stores the number of words of all lengths
*/
#define LENGTH 34
#define WORD_BUFFER 750
int strLengths[LENGTH],lengthsCopy[LENGTH];
char *array[WORD_BUFFER][LENGTH];
char strings[LENGTH];
int counter = 0;
int ch,tester;
//sorts the output of string lengths printing the largest amounts first
void sort()
{
int max_val =0;
int i,j,temp,val;
//create copy
for (i=0; i < LENGTH; i++)
{
lengthsCopy[i] = strLengths[i];
}
//for loop finds the max value in the array elements
for(i=0; i<LENGTH; i++)
{
if(lengthsCopy[i] > max_val)
max_val = lengthsCopy[i];
}
printf("max val in the array is %d\n",max_val);
//prints the max value,decrements,checks,prints, rinse repeat...
//iterates until the max is 0
while(max_val!=0)
{
//checks all elements
for(i=LENGTH-1; i > 0; i--)
{
//print when max val is found
if(lengthsCopy[i] == max_val)
{
temp = i;
printf("Count[%02d]=%02d;\n",i,max_val);
//check for doubles
for(j=LENGTH-1; j > 0; j--)
{
//if double is found that is not the original, print
if(lengthsCopy[j] == max_val && temp != j)
{
printf("Count[%02d]=%02d;\n",j,max_val);
//erase value
lengthsCopy[j] = 0;
}
}
}
}
max_val--;
}
}
//print all array that are not null, represent count of word lenghts
void printList()
{
int i,val;
for(i=1; i<=LENGTH;i++)
{
if(strLengths[i] > 0)
{
val = strLengths[i];
printf("Count[%02d]=%02d;\n",i,val);
}
}
}
int main (int argc, char *argv[])
{
//error message if input file is not passed
if(argc < 2)
{
printf("You have to give me a file!\n");
exit(1);
}
FILE *text = fopen(argv[1], "r");
//errror message if no contents in the file
if(text == NULL)
{
printf("No content to read in %s. \n", argv[1]);
exit(1);
}
//iterate through text until end of file
ch = fgetc(text);
int strPoint =0;
while(ch != EOF)
{
//if illegal char is met, add a count to the array value of current counter
//set counter back to 0
//scan next char
if(ch==' '||ch==','||ch=='('||ch==')'||ch==';'||ch=='\n')
{
if(array[counter][0] == NULL)//if length not defined yet
{
array[counter][0] = strings;//add current string build to the array
printf("%s\n",array[counter][0] );
}
else if(array[counter][0] != NULL && strings[0] != '[=12=]')
{//else length is defined add to text bank
printf("else if reached\n");
printf("%s\n",strings );
printf("%lu\n",strlen(array[counter][0]) );
int arrayptr = strlen(*array[counter]);
printf("ptr %d",arrayptr);
/* next line aborts / seg_faults */
strncat(*array[counter],strings,strlen(strings));
}
strLengths[counter]++;
counter = 0;
ch = fgetc(text);
memset(strings, 0, sizeof(strings));//clear stringBuild
strPoint =0;
}
//else a legal character, increase counter, scan next char
else
{
strings[strPoint] = ch;
printf("string build %c\n",strings[strPoint]);
counter++;
strPoint++;
ch = fgetc(text);
}
}
fclose(text);
printf("stored string %s\n",array[3][0] );
printList();
//call sort
sort();
exit(0);
}
从你的代码中我可以看出,你的主要问题是你对发生的事情有误解:
array[counter][0] = strings;//add current string build to the array
您正在将指针 array[counter][0]
设置为 strings
的地址。你只有 one strings
变量,所以每个 array[counter][0]
指向相同的东西(所以你的 array
中的每一行都指向最后一个字符串包含在 strings
)
你的 strncat
作为 strcpy
但是 nul-termianting 由于 strncat
的行为没有错,但是意识到这可能是长缓冲区的性能损失。您可能还有其他逻辑问题,但它们被代码的笨拙布局和 指向 char.
意见反馈
尝试并简化您的实施。如果您主要关心存储从文件中读取的单词以及每个单词的长度以进行排序,那么您可以简单地将单词存储在 char 的二维数组中,并在每次需要时调用 strlen
长度,或者对于 int
的大小,您可以使用简单的结构将每个单词的长度与单词本身相关联,例如
typedef struct {
char word[LENGTH];
int len;
} wordinfo;
然后您只需创建一个数组或结构(例如 wordinfo words[WORD_BUFFER];
)并将您的单词存储在 words[x].word
中,将长度存储在 word[x].len
中。如果您想放弃使用结构,则只需声明一个二维数组(例如 char words[LENGTH][WORD_BUFFER];
并将单词存储在那里。(对于每个单词 4 字节的成本,如果存储不是问题,您将通过存储您从读取的字符中已经获得的长度来节省对 strlen
的重复函数调用的开销)
您还可以声明一个指向 char LENGTH 数组的 指针(例如 char (*array)[LENGTH];
并使用 [=35] 为其中的 WORD_BUFFER
个动态分配存储空间=](您可以使用 calloc
来将分配的所有字节初始化为零)。这是一个不错的选择,但动态分配似乎不是您的目标。
此外,避免使用全局变量。它们几乎从不 需要并且会增加名称冲突和值覆盖的风险。将您的变量声明为 main()
的本地变量,并根据需要将它们作为参数传递。例如,使用 struct 实现,您可以编写按长度排序和打印如下,使用指向您的 struct 数组的指针和填充的数字作为参数:
/* simple insertion sort on len (descending) */
void sort (wordinfo *a, int n)
{
int i, j;
wordinfo v;
for (i = 1; i < n; i++) {
v = a[i];
j = i;
while (j > 0 && a[j - 1].len < v.len ) {
a[j] = a[j - 1];
j -= 1;
}
a[j] = v;
}
}
/* tabular print of words read */
void printlist (wordinfo *a, int n)
{
int i;
for (i = 0; i < n; i++)
printf (" %-34s (%d-chars)\n", a[i].word, a[i].len);
}
(注意: 除非作业需要,否则不要编写或使用你自己的排序。C 提供了 qsort
无限更高效且经过充分测试,只需编写一个 compare 函数来比较您需要排序的任何内容的两个元素,然后让 qsort
完成工作)
最后,从文件中读取每个字符的逻辑一点都不复杂。简单地阅读字符,检查它,然后采取任何适当的行动。唯一增加的复杂性来自测试,以确保您保持在 LENGTH
个字符和 WORD_BUFFER
个单词以内,以防止覆盖存储范围。即使使用结构实现,声明和初始化为:
int c, len = 0, maxndx = 0, ndx = 0;
wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};
您可以将 main
中的读取逻辑简化为:
while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
if (len + 1 == LENGTH || /* check if full or c matches */
c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
if (len) { /* if we started a word */
if (len > words[maxndx].len) /* check if longest */
maxndx = ndx; /* update max index */
words[ndx].len = len; /* set words[x].len */
words[ndx++].word[len] = 0; /* nul-terminat word */
len = 0; /* reset length */
}
}
else
words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
}
(注意: maxndx
只是为保存最长单词的结构保存索引 (ndx
),或者最长的单词之一是您拥有的超过一个相同的最大长度)
总而言之,您可以将代码归结为:
#include <stdio.h>
#define LENGTH 34
#define WORD_BUFFER 750
typedef struct {
char word[LENGTH];
int len;
} wordinfo;
/* simple insertion sort on len (descending) */
void sort (wordinfo *a, int n)
{
int i, j;
wordinfo v;
for (i = 1; i < n; i++) {
v = a[i];
j = i;
while (j > 0 && a[j - 1].len < v.len ) {
a[j] = a[j - 1];
j -= 1;
}
a[j] = v;
}
}
/* tabular print of words read */
void printlist (wordinfo *a, int n)
{
int i;
for (i = 0; i < n; i++)
printf (" %-34s (%d-chars)\n", a[i].word, a[i].len);
}
int main (int argc, char **argv) {
int c, len = 0, maxndx = 0, ndx = 0;
wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
/* read each char and store in words[x].word up to 'ndx' words.
* save the length of each word in words[x].len.
*/
while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
if (len + 1 == LENGTH || /* check if full or c matches */
c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
if (len) { /* if we started a word */
if (len > words[maxndx].len) /* check if longest */
maxndx = ndx; /* update max index */
words[ndx].len = len; /* set words[x].len */
words[ndx++].word[len] = 0; /* nul-terminat word */
len = 0; /* reset length */
}
}
else
words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
printf ("\nlongest word: '%s' (%d-chars)\n\n",
words[maxndx].word, words[maxndx].len);
printf ("words read from file:\n\n");
printlist (words, ndx); /* print words in order read */
sort (words, ndx);
printf ("\nwords sorted by length:\n\n");
printlist (words, ndx); /* print words sorted by length */
return 0;
}
(注意: 程序希望文件名作为第一个参数读取,否则如果没有给出参数,它将从 stdin
(默认情况下)读取)
例子Use/Output
$ ./bin/rdstrings3 <dat/tomorrow.txt
longest word: 'Tomorrow' (8-chars)
words read from file:
Tomorrow (8-chars)
and (3-chars)
tomorrow (8-chars)
and (3-chars)
tomorrow (8-chars)
To (2-chars)
the (3-chars)
last (4-chars)
syllable (8-chars)
of (2-chars)
recorded (8-chars)
time (4-chars)
words sorted by length:
Tomorrow (8-chars)
tomorrow (8-chars)
tomorrow (8-chars)
syllable (8-chars)
recorded (8-chars)
last (4-chars)
time (4-chars)
and (3-chars)
and (3-chars)
the (3-chars)
To (2-chars)
of (2-chars)
检查一下,如果您有任何问题,请告诉我。选择使用结构并存储 len
还是只在需要的地方调用 strlen
完全取决于您。