在 C 中处理从文件 I/O 存储的字符串时出现问题

Problems Manipulating strings stored from File I/O in C

我的方法是从文件中读取每个字符并进行计数,因此当我们遇到非法字符时,我会跟踪字符串长度以及遇到多少个该长度的字符串。现在我试图用我读入的字符构建字符串并将它们存储在一个数组中。它几乎可以正常工作,但是在读入的两个字符串长度相同的情况下,当我尝试将 2 个字符串加在一起时,我可以绕过中止和段错误。如果您不介意给我一些反馈,我在代码的第 129 行标记了我遇到问题的地方....我希望在完成后打印每个长度的字符串

这是我用来测试的文本文件:

Tomorrow, and tomorrow, and tomorrow,
To the last syllable of recorded time;

源代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 *this program reads in a text file from the command line
 *then counts and stores the number of words of all lengths
 */
#define LENGTH 34
#define WORD_BUFFER 750

int strLengths[LENGTH],lengthsCopy[LENGTH];
char *array[WORD_BUFFER][LENGTH];
char strings[LENGTH];
int counter = 0;
int ch,tester;

 //sorts the output of string lengths printing the largest amounts first
 void sort()
 {
    int max_val =0;
    int i,j,temp,val;
    //create copy
    for (i=0; i < LENGTH; i++)
    {
        lengthsCopy[i] = strLengths[i];
    }
    //for loop finds the max value in the array elements
    for(i=0; i<LENGTH; i++)
    {
        if(lengthsCopy[i] > max_val)
        max_val = lengthsCopy[i];
    }

    printf("max val in the array is %d\n",max_val);

    //prints the max value,decrements,checks,prints, rinse repeat...
    //iterates until the max is 0
    while(max_val!=0)
    {
        //checks all elements
        for(i=LENGTH-1; i > 0; i--)
        {
            //print when max val is found
            if(lengthsCopy[i] == max_val)
            {
                temp = i;
                printf("Count[%02d]=%02d;\n",i,max_val);
                //check for doubles
                for(j=LENGTH-1; j > 0; j--)
                {
                    //if double is found that is not the original, print
                    if(lengthsCopy[j] == max_val && temp != j)
                    {
                        printf("Count[%02d]=%02d;\n",j,max_val);
                        //erase value
                    lengthsCopy[j] = 0;
                    }
                }
            }
        }
        max_val--;
    }
}

//print all array that are not null, represent count of word lenghts
void printList()
{
    int i,val;
    for(i=1; i<=LENGTH;i++)
    {
        if(strLengths[i] > 0)
        {
        val = strLengths[i];
        printf("Count[%02d]=%02d;\n",i,val);
        }
    }
}

int main (int argc, char *argv[])
{
    //error message if input file is not passed
    if(argc < 2)
    {
        printf("You have to give me a file!\n");
        exit(1);
    }
    FILE *text = fopen(argv[1], "r");
    //errror message if no contents in the file
    if(text == NULL)
    {
        printf("No content to read in %s. \n", argv[1]);
        exit(1);
    }
    //iterate through text until end of file
    ch = fgetc(text);
    int strPoint =0;
    while(ch != EOF)
    {
        //if illegal char is met, add a count to the array value of current counter
        //set counter back to 0
        //scan next char
        if(ch==' '||ch==','||ch=='('||ch==')'||ch==';'||ch=='\n')
        {

            if(array[counter][0] == NULL)//if length not defined yet
            {
                array[counter][0] = strings;//add current string build to the array
                printf("%s\n",array[counter][0] );
            }
            else if(array[counter][0] != NULL && strings[0] != '[=12=]')
            {//else length is defined add to text bank
                printf("else if reached\n");
                printf("%s\n",strings );
                printf("%lu\n",strlen(array[counter][0]) );
                int arrayptr = strlen(*array[counter]);
                printf("ptr %d",arrayptr);
                /* next line aborts / seg_faults */
                strncat(*array[counter],strings,strlen(strings)); 
            }

            strLengths[counter]++;
            counter = 0;
            ch = fgetc(text);
            memset(strings, 0, sizeof(strings));//clear stringBuild
            strPoint =0;
        }
        //else a legal character, increase counter, scan next char
        else
        {
            strings[strPoint] = ch;
            printf("string build %c\n",strings[strPoint]);
            counter++;
            strPoint++;
            ch = fgetc(text);
        }
    }
    fclose(text);
    printf("stored string %s\n",array[3][0] );

    printList();
    //call sort
    sort();

    exit(0);
}

从你的代码中我可以看出,你的主要问题是你对发生的事情有误解:

array[counter][0] = strings;//add current string build to the array

您正在将指针 array[counter][0] 设置为 strings 的地址。你只有 one strings 变量,所以每个 array[counter][0] 指向相同的东西(所以你的 array 中的每一行都指向最后一个字符串包含在 strings)

你的 strncat 作为 strcpy 但是 nul-termianting 由于 strncat 的行为没有错,但是意识到这可能是长缓冲区的性能损失。您可能还有其他逻辑问题,但它们被代码的笨拙布局和 指向 char.

数组的指针的非标准使用所混淆

意见反馈

尝试并简化您的实施。如果您主要关心存储从文件中读取的单词以及每个单词的长度以进行排序,那么您可以简单地将单词存储在 char 的二维数组中,并在每次需要时调用 strlen长度,或者对于 int 的大小,您可以使用简单的结构将每个单词的长度与单词本身相关联,例如

typedef struct {
    char word[LENGTH];
    int len;
} wordinfo;

然后您只需创建一个数组或结构(例如 wordinfo words[WORD_BUFFER];)并将您的单词存储在 words[x].word 中,将长度存储在 word[x].len 中。如果您想放弃使用结构,则只需声明一个二维数组(例如 char words[LENGTH][WORD_BUFFER]; 并将单词存储在那里。(对于每个单词 4 字节的成本,如果存储不是问题,您将通过存储您从读取的字符中已经获得的长度来节省对 strlen 的重复函数调用的开销)

您还可以声明一个指向 char LENGTH 数组的 指针(例如 char (*array)[LENGTH]; 并使用 [=35] 为其中的 WORD_BUFFER 个动态分配存储空间=](您可以使用 calloc 来将分配的所有字节初始化为零)。这是一个不错的选择,但动态分配似乎不是您的目标。

此外,避免使用全局变量。它们几乎从不 需要并且会增加名称冲突和值覆盖的风险。将您的变量声明为 main() 的本地变量,并根据需要将它们作为参数传递。例如,使用 struct 实现,您可以编写按长度排序和打印如下,使用指向您的 struct 数组的指针和填充的数字作为参数:

/* simple insertion sort on len (descending) */
void sort (wordinfo *a, int n)
{
    int i, j;
    wordinfo v;
    for (i = 1; i < n; i++) {
        v = a[i];
        j = i;
        while (j > 0 && a[j - 1].len < v.len ) {
            a[j] = a[j - 1];
            j -= 1;
        }
        a[j] = v;
    }
}

/* tabular print of words read */
void printlist (wordinfo *a, int n)
{
    int i;
    for (i = 0; i < n; i++)
        printf ("  %-34s  (%d-chars)\n", a[i].word, a[i].len);
}

(注意: 除非作业需要,否则不要编写或使用你自己的排序。C 提供了 qsort 无限更高效且经过充分测试,只需编写一个 compare 函数来比较您需要排序的任何内容的两个元素,然后让 qsort 完成工作)

最后,从文件中读取每个字符的逻辑一点都不复杂。简单地阅读字符,检查它,然后采取任何适当的行动。唯一增加的复杂性来自测试,以确保您保持在 LENGTH 个字符和 WORD_BUFFER 个单词以内,以防止覆盖存储范围。即使使用结构实现,声明和初始化为:

    int c, len = 0, maxndx = 0, ndx = 0;
    wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};

您可以将 main 中的读取逻辑简化为:

    while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
        if (len + 1 == LENGTH ||        /* check if full or c matches */
            c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
            if (len) {                          /* if we started a word */
                if (len > words[maxndx].len)    /* check if longest  */
                    maxndx = ndx;               /* update max index  */
                words[ndx].len = len;           /* set words[x].len  */
                words[ndx++].word[len] = 0;     /* nul-terminat word */
                len = 0;                        /* reset length */
            }
        }
        else
            words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
    }

(注意: maxndx 只是为保存最长单词的结构保存索引 (ndx),或者最长的单词之一是您拥有的超过一个相同的最大长度)

总而言之,您可以将代码归结为:

#include <stdio.h>

#define LENGTH 34
#define WORD_BUFFER 750

typedef struct {
    char word[LENGTH];
    int len;
} wordinfo;

/* simple insertion sort on len (descending) */
void sort (wordinfo *a, int n)
{
    int i, j;
    wordinfo v;
    for (i = 1; i < n; i++) {
        v = a[i];
        j = i;
        while (j > 0 && a[j - 1].len < v.len ) {
            a[j] = a[j - 1];
            j -= 1;
        }
        a[j] = v;
    }
}

/* tabular print of words read */
void printlist (wordinfo *a, int n)
{
    int i;
    for (i = 0; i < n; i++)
        printf ("  %-34s  (%d-chars)\n", a[i].word, a[i].len);
}

int main (int argc, char **argv) {

    int c, len = 0, maxndx = 0, ndx = 0;
    wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    /* read each char and store in words[x].word up to 'ndx' words.
     * save the length of each word in words[x].len.
     */
    while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
        if (len + 1 == LENGTH ||        /* check if full or c matches */
            c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
            if (len) {                          /* if we started a word */
                if (len > words[maxndx].len)    /* check if longest  */
                    maxndx = ndx;               /* update max index  */
                words[ndx].len = len;           /* set words[x].len  */
                words[ndx++].word[len] = 0;     /* nul-terminat word */
                len = 0;                        /* reset length */
            }
        }
        else
            words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
    }
    if (fp != stdin) fclose (fp);       /* close file if not stdin */

    printf ("\nlongest word: '%s'  (%d-chars)\n\n", 
            words[maxndx].word, words[maxndx].len);

    printf ("words read from file:\n\n");
    printlist (words, ndx);     /* print words in order read */

    sort (words, ndx);

    printf ("\nwords sorted by length:\n\n");
    printlist (words, ndx);     /* print words sorted by length */

    return 0;
}

注意: 程序希望文件名作为第一个参数读取,否则如果没有给出参数,它将从 stdin (默认情况下)读取)

例子Use/Output

$ ./bin/rdstrings3 <dat/tomorrow.txt

longest word: 'Tomorrow'  (8-chars)

words read from file:

  Tomorrow                            (8-chars)
  and                                 (3-chars)
  tomorrow                            (8-chars)
  and                                 (3-chars)
  tomorrow                            (8-chars)
  To                                  (2-chars)
  the                                 (3-chars)
  last                                (4-chars)
  syllable                            (8-chars)
  of                                  (2-chars)
  recorded                            (8-chars)
  time                                (4-chars)

words sorted by length:

  Tomorrow                            (8-chars)
  tomorrow                            (8-chars)
  tomorrow                            (8-chars)
  syllable                            (8-chars)
  recorded                            (8-chars)
  last                                (4-chars)
  time                                (4-chars)
  and                                 (3-chars)
  and                                 (3-chars)
  the                                 (3-chars)
  To                                  (2-chars)
  of                                  (2-chars)

检查一下,如果您有任何问题,请告诉我。选择使用结构并存储 len 还是只在需要的地方调用 strlen 完全取决于您。