为什么我的 hash table 程序总是崩溃？

Question

我正在尝试创建一个程序来读取字典，然后将单词存储到散列中 table，然后读取另一个文件检查该文件的每个单词是否在散列中 [=14] =] 如果不是，那么它将作为拼写错误的单词输出。我首先尝试检查是否可以将字典文件加载到我的散列 table 中，然后输出散列 table 中的单词，但每当我尝试运行时，我的代码似乎都会崩溃它。我使用的哈希函数是从网上拿来的。我对数据结构还是很陌生，很难理解。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// file to read
#define dictionary "dictionary.txt"
// No. of buckets
const unsigned int N = 10;

typedef struct node
{
    char* word;
    struct node *next;
}
node;

node *table[10];

// hash function
unsigned int hash(char *word)
{
// TODO
    unsigned int hash = 5381;
    int c = 0;

    while (c == *word++)
        hash = ((hash << 5) + hash) + c;

    return hash % 10;
}

int main(void)
{
    // initialize array heads to NULL
    for (int i = 0; i < N; i++)
    {
        table[i] = NULL;
    }

    // Open file to read
    FILE *indata = fopen(dictionary, "r");   
    if (indata == NULL)
    {
        printf("cant open\n");
        return 1;
    }

    // variable to store words read from the file
    char *words = malloc(sizeof(char) * 20);
    if (words == NULL)
    {
        printf("no memory\n");
        return 1;
    }

    // While loop to read through the file
    while (fgets(words, 20, indata))
    {
        // get the index of the word using hash function
        int index = hash(words);

        // create new node
        node *newNode = malloc(sizeof(node));
        if (newNode == NULL)
        {
            printf("here\n");
            return 1;
        }

        // make the new node the new head of the list
        strcpy(newNode->word, words);
        newNode->next = table[index];
        table[index] = newNode;

        // free memory
        free(newNode);
    }
    // free memory
    free(words);
    // loop to print out the values of the hash table
    for (int i = 0; i < N; i++)
    {
        node *tmp = table[i];
        while (tmp->next != NULL)
        {
            printf("%s\n", tmp->word);
            tmp = tmp->next;
        }
    }

    // loop to free all memory of the hash table
    for (int i = 0; i < N; i++)
    {
        if (table[i] != NULL)
        {
            node *tmp = table[i]->next;
            free(table[i]);
            table[i] = tmp;
        }
    }

    // close the file
    fclose(indata);
}

Answer 1

粗略地看了一下，我发现了两个问题：

您没有为节点中的单词分配space；你只需将 strcopy 这个词变成一个未定义的指针。您可能想改用 strdup。
将节点添加到列表后释放节点的内存。 table 是一个指针数组，所以你将这个点存储在 table 中，然后丢弃它指向的内存。

哦，三：在最后一个循环中，您再次释放未分配的内存...

Answer 2

至少三个独立导致段错误的错误：

首先，newNode->word被使用unitialized，所以它指向随机内存，所以strcpy会出现段错误。最好使用 strdup

此外，在将 newNode 放入 table 后，您会 free(newNode) 使其指向的内容无效。这导致第二个循环出现段错误

第三，在第二个循环中，如果table[i]为null，则while (tmp->next != NULL)会出现段错误

我已经注释并更正了您的代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// file to read
#define dictionary "dictionary.txt"

// No. of buckets
const unsigned int N = 10;

typedef struct node {
    char *word;
    struct node *next;
} node;

node *table[10];

// hash function
unsigned int
hash(char *word)
{
// TODO
    unsigned int hash = 5381;
    int c = 0;

    while (c == *word++)
        hash = ((hash << 5) + hash) + c;

// NOTE: not a bug but probably better
#if 0
    return hash % 10;
#else
    return hash % N;
#endif
}

int
main(void)
{
    // initialize array heads to NULL
    for (int i = 0; i < N; i++) {
        table[i] = NULL;
    }

    // Open file to read
    FILE *indata = fopen(dictionary, "r");

    if (indata == NULL) {
        printf("cant open\n");
        return 1;
    }

    // variable to store words read from the file
    char *words = malloc(sizeof(char) * 20);

    if (words == NULL) {
        printf("no memory\n");
        return 1;
    }

    // While loop to read through the file
    while (fgets(words, 20, indata)) {
        // get the index of the word using hash function
        int index = hash(words);

        // create new node
        node *newNode = malloc(sizeof(node));

        if (newNode == NULL) {
            printf("here\n");
            return 1;
        }

        // make the new node the new head of the list
// NOTE/BUG: word is never set to anything valid -- possible segfault here
#if 0
        strcpy(newNode->word, words);
#else
        newNode->word = strdup(words);
#endif
        newNode->next = table[index];
        table[index] = newNode;

        // free memory
// NOTE/BUG: this will cause the _next_ loop to segfault -- don't deallocate
// the node you just added to the table
#if 0
        free(newNode);
#endif
    }

    // free memory
    free(words);

    // loop to print out the values of the hash table
    for (int i = 0; i < N; i++) {
        node *tmp = table[i];
// NOTE/BUG: this test fails if the tmp is originally NULL (i.e. no entries
// in the given hash index)
#if 0
        while (tmp->next != NULL) {
#else
        while (tmp != NULL) {
#endif
            printf("%s\n", tmp->word);
            tmp = tmp->next;
        }
    }

    // loop to free all memory of the hash table
    for (int i = 0; i < N; i++) {
        if (table[i] != NULL) {
            node *tmp = table[i]->next;

            free(table[i]);
            table[i] = tmp;
        }
    }

    // close the file
    fclose(indata);
}

更新：

I made a linked list program before that stores an integer in the list, int number; struct node *next; and I used newNode->number = 5; and it worked, why is it in this case it doesn't?? Is it because I am working with strings here??

区别在于word是一个指针。它必须先赋值才能使用。 strcpy 不会给 word 赋值。它试图使用 word 的内容作为副本的目标地址。

但是，无论 word 是 char * 还是 number 是 int.

，其他两个错误都会发生

If 你定义了 word not 作为一个指针，而是作为一个固定的数组 [在这方面不太好用法]，strcpy 会起作用。也就是说，如果您已经完成（例如）char word[5];

，而不是 char *word;

但是，strdup 更改后您所做的更好，除非您可以保证 word 的长度可以容纳输入。 strdup 将保证。

但是，请注意我[故意]使 word 只有五个字符来说明问题。这意味着要添加的单词的长度只能是 4 个字符[我们需要一个额外的字节来作为 nul 终止符]。您需要使用 strncpy 而不是 strcpy 但是 strncpy 有问题 [它 not 保证在末尾添加 nul char 如果源长度太大了。

巧合的是，今天还有一个问题的答案可能有助于进一步阐明 word 结构成员的差异：

为什么我的 hash table 程序总是崩溃？

Why does my hash table program keep crashing?

c

hash

hashtable