为什么 printf 函数会影响我的拼写程序？

Question

我正在使用来自 Internet 的散列函数，当我在 return 语句之前使用打印函数时，它使我的程序正确，但如果我删除它，它又会出现错误......就像字面上令人沮丧，因为我可以做 printf("asfasfnasfnk\n"); 并且它会正确输出但是当我删除 printf 函数时它再次出现错误...

unsigned int hash(const char *word)
{
    /* credits to...
     *https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
     */
    unsigned long hash = 0;
    int n  = strlen(word);

    for (int i = 0; i < n; i++)
    {
        hash = (hash << 2) ^ word[i];
    }
    return hash % N;
}

输出：

MISSPELLED WORDS

A
is
not
a
caterpillar

WORDS MISSPELLED:     5
WORDS IN DICTIONARY:  2
WORDS IN TEXT:        6
TIME IN load:         0.00
TIME IN check:        0.00
TIME IN size:         0.00
TIME IN unload:       0.00
TIME IN TOTAL:        0.00

unsigned int hash(const char *word)
{
    /* credits to...
     *https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
     */
    unsigned long hash = 0;
    int n  = strlen(word);

    for (int i = 0; i < n; i++)
    {
        hash = (hash << 2) ^ word[i];
    }
    printf("%s -> %lu\n", word, hash%N);
    return hash % N;
}

输出：

cat -> 1984
caterpillar -> 109622

MISSPELLED WORDS

a -> 97
A
cat -> 1984
is -> 471
is
not -> 1832
not
a -> 97
a
caterpillar -> 109622

WORDS MISSPELLED:     4
WORDS IN DICTIONARY:  2
WORDS IN TEXT:        6
TIME IN load:         0.00
TIME IN check:        0.00
TIME IN size:         0.00
TIME IN unload:       0.00
TIME IN TOTAL:        0.00

词典中的单词是猫和毛毛虫，正文中的单词是"A cat is not a caterpillar"

函数：

// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "dictionary.h"

// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

// Number of buckets in hash table
const unsigned int N = 200000;

// Hash table
node *table[N];

// Returns true if word is in dictionary else false
bool check(const char *word)
{
    // TODO
    int len = strlen(word);
    char *copy = malloc(sizeof(char) * len + 1);
    // change into lowercase the word
    for (int i = 0; i < len; i++)
    {
            copy[i] = tolower(word[i]);
    }
    // get the index by using the hash function
    int index = hash(copy);

    node *tmp = table[index];
    // check if the word is in the hash table
    while (tmp != NULL)
    {
        if (strcmp(tmp->word, copy) == 0)
        {
            free(copy);
            return true;
        }

        tmp = tmp->next;
    }

    free(copy);
    return false;
}

// Hashes word to a number
unsigned int hash(const char *word)
{
    /* credits to...
     *https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
     */
    unsigned long hash = 0;
    int n  = strlen(word);

    for (int i = 0; i < n; i++)
    {
        hash = (hash << 2) ^ word[i];
    }
    return hash % N;
}

// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
    // TODO
    char *words = malloc(sizeof(char) * (LENGTH + 1));
    if (words == NULL)
    {
        return 1;
    }
    // initialize the hash table to NULL
    for (int i = 0; i < N; i++)
    {
        table[i] = NULL;
    }

    // open dictionary file
    FILE *indata = fopen(dictionary, "r");


    // 1 character for '[=14=]' and another for '\n' because fgets takes a trailing new line
    // when it reads 'man' the value of words will be "man\n[=14=]" so meaning 2 extra characters
    while (fgets(words, LENGTH + 2, indata) != NULL)
    {
        // get rid of the trailing new line from fgets
        words[strlen(words) - 1] = '[=14=]';

        // allocate memory for the newNode
        node *newNode = malloc(sizeof(node));
        if (newNode == NULL)
        {
            return false;
        }

        // get the index by using the hash function
        int index = hash(words);

        strcpy(newNode->word, words);
        // make the newNode the head of the list
        newNode->next = table[index];
        table[index] = newNode;

    }

    // free memory and close the opened file
    free(words);
    fclose(indata);
    return true;
}

// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
    // TODO
    // counter of words loaded
    unsigned int counter = 0;
    // loop through the hash table
    for (int i = 0; i < N; i++)
    {
        node *tmp = table[i];

        while (tmp != NULL)
        {
            counter++;
            tmp = tmp->next;
        }
    }
    return counter;
}

// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
    // TODO
    // loop through the whole hash table
    for (int i = 0; i < N; i++)
    {
        while (table[i] != NULL)
        {
            node *tmp = table[i]->next;
            free(table[i]);
            table[i] = tmp;
        }
    }
    return true;
}

Answer 1

// TODO
int len = strlen(word);
char *copy = malloc(sizeof(char) * len + 1);
// change into lowercase the word
for (int i = 0; i < len; i++)
{
        copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);

请注意 malloc 中的 + 1。为什么在那里？它允许 space 作为标记字符串结尾的终止零字节。

假设字符串是 "test"。然后 strlen 将 return 4. 你的循环将从 0 到 3 迭代，复制单词中的四个字母。

但是您不会复制字符串末尾的终止零字节。当 hash 在 copy 上调用 strlen 时，谁知道它会得到什么值，因为你传递给它的不是合法的字符串。

将 for 循环中的条件更改为 i <= len。

为什么 printf 函数会影响我的拼写程序？

Why does the printf function affect my speller program?

c

cs50