删除结构节点会导致另一个问题

remove a structure node cause another problem

我正在尝试在结构中添加两个文件(可能还有更多)的单词。这行得通。但是我可以选择从结构中删除一些单词(在 stop.txt 中)。这会导致激活时输出不正确,并且有两个文件。

例如 test.txt 我有一些随机字符串:

kiio
luio
kiio
ohaio
lol

test1.txt:

vola
kiio
kiio
haio
lol

stop.txt:

luio
kiio

激活remove_word时的输出:

lol     test.txt        [1] {5}                                                                                                
lol     %~      [1] {5}                                                                                                        
lol     %~      [1] {5}                                                                                                        
luio    test.txt        [1] {2}                                                                                                
ohaio   test.txt        [1] {4}                                                                                                
vola    test1.txt       [1] {1}  

如果不是:

kiio    test.txt        [2] {1,3}     I need to have two nodes with the same word but different `fileno`                                                                                         
kiio    test1.txt       [2] {2,3}                                                                                              
lol     test.txt        [1] {5}                                                                                                
lol     test1.txt       [1] {5}                                                                                                
luio    test.txt        [1] {2}                                                                                                
ohaio   test.txt        [1] {4}                                                                                                
vola    test1.txt       [1] {1}                                                                                                
haio    test1.txt       [1] {4}    

我认为问题出在函数 remove_word 但我不确定,因为它只对一个文件有效(删除单词)。

结构定义如下:

typedef struct _word {
    char *s;                /* the word */
    int count;              /* number of times word occurs */
    int *line_numbers;      // Array of line numbers
    int num_line_numbers;   // Size of the array of line numbers
    char *fileno;
} word;
// Creating a struct to hold the data. I find it's easier
typedef struct {
    word *words;      // The array of word structs
    int num_words;    // The size of the array
} word_list;

remove_word函数:

void remove_word(word_list *words, const char *word_to_delete) {
    for (int i = 0; i < words->num_words; i++) {
        if (0 == strcmp(words->words[i].s, word_to_delete)) {
            // TODO: handle special case where there is only 1 word in list

            // Calc number of words after found word
            int number_of_words_to_right = words->num_words - i - 1;
            // Free mem
            free(words->words[i].s);
            free(words->words[i].line_numbers);
            free(words->words[i].fileno);

            // Copy remaining words
            memcpy(&words->words[i], &words->words[i + 1], sizeof(word) * number_of_words_to_right);
            // Resize the array (technically not required)
            word *tmp = realloc(words->words, sizeof(word) * --words->num_words);
            if (NULL == tmp) exit(0);
            words->words = tmp;
        }
    }
    return;
}

主要:

int main() {
    int i, n, m;
    int option = 0;
    n = 0;

    FILE *file = fopen("test.txt", "r"); 

    word_list *words = malloc(sizeof(word_list));
    if (NULL == words)
        exit(0);
    memset(words, 0, sizeof(word_list));

    char s[1000];
    int line_number = 1;
    while (fgets(s, sizeof(s), file)) {
        char *word = strtok(s, " ");
        while (word != NULL) {
            size_t len = strlen(word);
            if (len > 0 && word[len - 1] == '\n')
                word[--len] = 0;
            insert_word(words, word, line_number, "test.txt");
            word = strtok(NULL, " ");
        }
        line_number += 1;
    }
    fclose(file);

    FILE *file1 = fopen("test1.txt", "r"); 

    line_number = 1;
    while (fgets(s, sizeof(s), file)) {
        char *word = strtok(s, " ");
        while (word != NULL) {
            size_t len = strlen(word);
            if (len > 0 && word[len - 1] == '\n')
                word[--len] = 0;
            insert_word(words, word, line_number, "test1.txt");
            word = strtok(NULL, " ");
        }
        line_number += 1;
    }
    fclose(file1);

     if (option == 0) {
         FILE *stopfile = fopen("stop.txt", "r"); /* should check the result */
         char fline[256];

         while (fgets(fline, sizeof(fline), stopfile)) {
             remove_word(words, fline);
         }
         fclose(stopfile);
    }

    printlist(words);

    for (int i = 0; i < words->num_words; i++) {
        free(words->words[i].s);
        free(words->words[i].line_numbers);
        free(words->words[i].fileno);
    }
    free(words->words);
    free(words);
}

我发现了一些东西,不确定这是否足以修复代码:从数组中删除世界时必须减少 num_words,尝试添加:

words->num_words--;

在函数remove_word

的if末尾words->words = tmp;下方

我们无法测试您的程序,因为您没有提供 insert_wordprintlist 的源代码。

但是发布的代码中存在多个问题:

  • remove_word函数中,当找到并删除单词时,您应该递减i以便循环在下一次迭代中测试相同的索引,以防万一两个文件中都存在相同的词。

  • 在第二个读取循环中,您从 file 读取但此 FILE* 已关闭并且您使用了不同的 FILE 指针 file1打开第二个文件test1.txt。这具有未定义的行为。你可能很幸运, file1 可能会偶然指向内存中与 file 相同的位置。只需对所有文件使用相同的变量 file,或者更好:使用单独的函数从作为参数给定的文件中读取单词。

  • 您不会从传递给 remove_word 的单词中删除尾随换行符,因此不会从字典中删除任何内容。

这是您的程序的修改版本:

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct _word {
    char *s;                /* the word */
    int count;              /* number of times word occurs */
    int *line_numbers;      // Array of line numbers
    int num_line_numbers;   // Size of the array of line numbers
    char *fileno;
} word;

// Creating a struct to hold the data. I find it's easier
typedef struct {
    word *words;      // The array of word structs
    int num_words;    // The size of the array
} word_list;

char *strlower(char *s) {
    for (size_t i = 0; s[i]; i++) {
        s[i] = (char)tolower((unsigned char)s[i]);
    }
    return s;
}

void printlist(const word_list *words) {
    for (int i = 0, j; i < words->num_words;) {
        const word *wp = &words->words[i];
        /* check for identical words from different files */
        for (j = i + 1; j < words->num_words; j++) {
            if (strcmp(wp->s, words->words[j].s) != 0)
                break;
        }
        printf("%s\t[%d]", wp->s, j - i);
        const char *prefix = "";
        for (; i < j; i++, wp++) {
            printf("%s\t%s\t[%d]\t{%d", prefix, wp->fileno, wp->count, wp->line_numbers[0]);
            prefix = "\t";
            for (int k = 1; k < wp->num_line_numbers; k++) {
                printf(",%d", wp->line_numbers[k]);
            }
            printf("}\n");
        }
    }
}

/* insert the word in the dictionary.
 * words are inserted in lexicographical order,
 * identical words are inserted in order of calls to insert_word
 */
int insert_word(word_list *words, const char *s, int line_number, const char *filename) {
    int i, j;
    word *wp;
    /* locate the word in the dictionary */
    for (i = 0, j = words->num_words; i < j;) {
        int m = i + (j - i) / 2;
        if (strcmp(words->words[m].s, s) < 0)
            i = m + 1;
        else
            j = m;
    }
    wp = &words->words[i];
    /* check identical words already in the dictionary */
    for (; i < words->num_words && !strcmp(wp->s, s); i++, wp++) {
        if (!strcmp(wp->fileno, filename)) {
            /* found word from the same file */
            wp->count++;
            /* check if word appears for a new line number */
            for (j = 0; j < wp->num_line_numbers; j++) {
                if (wp->line_numbers[j] == line_number)
                    break;
            }
            if (j == wp->num_line_numbers) {
                /* add a new line */
                int *lp = realloc(wp->line_numbers, (j + 1) * sizeof(*wp->line_numbers));
                if (lp == NULL)
                    return 1;
                wp->line_numbers = lp;
                wp->line_numbers[wp->num_line_numbers++] = line_number;
            }
            return 0;
        }
    }
    /* insert new word into the dictionary at offset i */
    /* allocate all elements for easier memory management */
    char *new_s = strdup(s);
    char *new_filename = strdup(filename);
    int *new_line_numbers = malloc(1 * sizeof(*wp->line_numbers));
    if (!new_s || !new_filename || !new_line_numbers) {
        free(new_s);
        free(new_filename);
        free(new_line_numbers);
        return 1;
    }
    word *new_words = realloc(words->words, (words->num_words + 1) * sizeof(*words->words));
    if (new_words == NULL) {
        free(new_s);
        free(new_filename);
        free(new_line_numbers);
        return 1;
    }
    words->words = new_words;
    /* shift the rest of the dictionary to the right */
    wp = &words->words[i];
    memmove(wp + 1, wp, (words->num_words - i) * sizeof(*wp));
    wp->s = new_s;
    wp->count = 1;
    wp->line_numbers = new_line_numbers;
    wp->line_numbers[0] = line_number;
    wp->num_line_numbers = 1;
    wp->fileno = new_filename;
    words->num_words++;
    return 0;
}

int remove_word(word_list *words, const char *word_to_delete) {
    int found = 0;
    for (int i = 0; i < words->num_words; i++) {
        if (!strcmp(words->words[i].s, word_to_delete)) {
            // Calc number of words after found word
            int number_of_words_to_right = words->num_words - i - 1;
            // Free mem
            free(words->words[i].s);
            free(words->words[i].line_numbers);
            free(words->words[i].fileno);

            if (--words->num_words == 0) {
                free(words->words);
                words->words = NULL;
            } else {
                // Copy remaining words if any
                memcpy(&words->words[i], &words->words[i + 1],
                       sizeof(word) * number_of_words_to_right);
                // Resize the array (technically not required)
                word *tmp = realloc(words->words, sizeof(word) * words->num_words);
                if (tmp != NULL)
                    words->words = tmp;
            }
            found++;
            i--; // restart from the same index in the loop
        }
    }
    return found;
}

/* read all words from filename into word_list
 * return 0 if no error.
 */
int read_file(word_list *words, const char *filename) {
    char s[1000];
    int line_number = 1;
    FILE *file = fopen(filename, "r");
    if (file == NULL) {
        fprintf(stderr, "cannot open %s\n", filename);
        return 1;
    }
    while (fgets(s, sizeof(s), file)) {
        char *word = strtok(s, " \n");
        while (word != NULL) {
            if (insert_word(words, strlower(word), line_number, filename)) {
                fprintf(stderr, "error inserting from %s at line %d\n", filename, line_number);
                fclose(file);
                return 1;
            }
            word = strtok(NULL, " \n");
        }
        line_number += 1;
    }
    fclose(file);
    return 0;
}

int main() {
    int option = 1;
    word_list *words = calloc(sizeof(word_list), 1);
    if (words == NULL) {
        fprintf(stderr, "cannot allocate memory\n");
        return 1;
    }
    read_file(words, "test.txt");
    read_file(words, "test1.txt");

    if (option != 0) {
        char s[1000];
        FILE *file = fopen("stop.txt", "r"); /* should check the result */
        if (file == NULL) {
            fprintf(stderr, "cannot open %s\n", "stop.txt");
        } else {
            while (fgets(s, sizeof(s), file)) {
                char *word = strtok(s, " \n");
                while (word != NULL) {
                    remove_word(words, strlower(word));
                    word = strtok(NULL, " \n");
                }
            }
            fclose(file);
        }
    }
    printlist(words);

    for (int i = 0; i < words->num_words; i++) {
        free(words->words[i].s);
        free(words->words[i].line_numbers);
        free(words->words[i].fileno);
    }
    free(words->words);
    free(words);
    return 0;
}

输出:

haio    [1]     test1.txt       [1]     {4}
lol     [2]     test.txt        [1]     {5}
                test1.txt       [1]     {5}
ohaio   [1]     test.txt        [1]     {4}
vola    [1]     test1.txt       [1]     {1}