为什么 printf 函数会影响我的拼写程序?
Why does the printf function affect my speller program?
我正在使用来自 Internet 的散列函数,当我在 return 语句之前使用打印函数时,它使我的程序正确,但如果我删除它,它又会出现错误......就像字面上令人沮丧,因为我可以做 printf("asfasfnasfnk\n");
并且它会正确输出但是当我删除 printf 函数时它再次出现错误...
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
输出:
MISSPELLED WORDS
A
is
not
a
caterpillar
WORDS MISSPELLED: 5
WORDS IN DICTIONARY: 2
WORDS IN TEXT: 6
TIME IN load: 0.00
TIME IN check: 0.00
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.00
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
printf("%s -> %lu\n", word, hash%N);
return hash % N;
}
输出:
cat -> 1984
caterpillar -> 109622
MISSPELLED WORDS
a -> 97
A
cat -> 1984
is -> 471
is
not -> 1832
not
a -> 97
a
caterpillar -> 109622
WORDS MISSPELLED: 4
WORDS IN DICTIONARY: 2
WORDS IN TEXT: 6
TIME IN load: 0.00
TIME IN check: 0.00
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.00
词典中的单词是猫和毛毛虫,正文中的单词是"A cat is not a caterpillar"
函数:
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 200000;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
// TODO
int len = strlen(word);
char *copy = malloc(sizeof(char) * len + 1);
// change into lowercase the word
for (int i = 0; i < len; i++)
{
copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);
node *tmp = table[index];
// check if the word is in the hash table
while (tmp != NULL)
{
if (strcmp(tmp->word, copy) == 0)
{
free(copy);
return true;
}
tmp = tmp->next;
}
free(copy);
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// TODO
char *words = malloc(sizeof(char) * (LENGTH + 1));
if (words == NULL)
{
return 1;
}
// initialize the hash table to NULL
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
// open dictionary file
FILE *indata = fopen(dictionary, "r");
// 1 character for '[=14=]' and another for '\n' because fgets takes a trailing new line
// when it reads 'man' the value of words will be "man\n[=14=]" so meaning 2 extra characters
while (fgets(words, LENGTH + 2, indata) != NULL)
{
// get rid of the trailing new line from fgets
words[strlen(words) - 1] = '[=14=]';
// allocate memory for the newNode
node *newNode = malloc(sizeof(node));
if (newNode == NULL)
{
return false;
}
// get the index by using the hash function
int index = hash(words);
strcpy(newNode->word, words);
// make the newNode the head of the list
newNode->next = table[index];
table[index] = newNode;
}
// free memory and close the opened file
free(words);
fclose(indata);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
// TODO
// counter of words loaded
unsigned int counter = 0;
// loop through the hash table
for (int i = 0; i < N; i++)
{
node *tmp = table[i];
while (tmp != NULL)
{
counter++;
tmp = tmp->next;
}
}
return counter;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
// TODO
// loop through the whole hash table
for (int i = 0; i < N; i++)
{
while (table[i] != NULL)
{
node *tmp = table[i]->next;
free(table[i]);
table[i] = tmp;
}
}
return true;
}
// TODO
int len = strlen(word);
char *copy = malloc(sizeof(char) * len + 1);
// change into lowercase the word
for (int i = 0; i < len; i++)
{
copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);
请注意 malloc
中的 + 1
。为什么在那里?它允许 space 作为标记字符串结尾的终止零字节。
假设字符串是 "test"。然后 strlen
将 return 4. 你的循环将从 0 到 3 迭代,复制单词中的四个字母。
但是您不会复制字符串末尾的终止零字节。当 hash
在 copy
上调用 strlen
时,谁知道它会得到什么值,因为你传递给它的不是合法的字符串。
将 for
循环中的条件更改为 i <= len
。
我正在使用来自 Internet 的散列函数,当我在 return 语句之前使用打印函数时,它使我的程序正确,但如果我删除它,它又会出现错误......就像字面上令人沮丧,因为我可以做 printf("asfasfnasfnk\n");
并且它会正确输出但是当我删除 printf 函数时它再次出现错误...
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
输出:
MISSPELLED WORDS
A
is
not
a
caterpillar
WORDS MISSPELLED: 5
WORDS IN DICTIONARY: 2
WORDS IN TEXT: 6
TIME IN load: 0.00
TIME IN check: 0.00
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.00
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
printf("%s -> %lu\n", word, hash%N);
return hash % N;
}
输出:
cat -> 1984
caterpillar -> 109622
MISSPELLED WORDS
a -> 97
A
cat -> 1984
is -> 471
is
not -> 1832
not
a -> 97
a
caterpillar -> 109622
WORDS MISSPELLED: 4
WORDS IN DICTIONARY: 2
WORDS IN TEXT: 6
TIME IN load: 0.00
TIME IN check: 0.00
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.00
词典中的单词是猫和毛毛虫,正文中的单词是"A cat is not a caterpillar"
函数:
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 200000;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
// TODO
int len = strlen(word);
char *copy = malloc(sizeof(char) * len + 1);
// change into lowercase the word
for (int i = 0; i < len; i++)
{
copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);
node *tmp = table[index];
// check if the word is in the hash table
while (tmp != NULL)
{
if (strcmp(tmp->word, copy) == 0)
{
free(copy);
return true;
}
tmp = tmp->next;
}
free(copy);
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// TODO
char *words = malloc(sizeof(char) * (LENGTH + 1));
if (words == NULL)
{
return 1;
}
// initialize the hash table to NULL
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
// open dictionary file
FILE *indata = fopen(dictionary, "r");
// 1 character for '[=14=]' and another for '\n' because fgets takes a trailing new line
// when it reads 'man' the value of words will be "man\n[=14=]" so meaning 2 extra characters
while (fgets(words, LENGTH + 2, indata) != NULL)
{
// get rid of the trailing new line from fgets
words[strlen(words) - 1] = '[=14=]';
// allocate memory for the newNode
node *newNode = malloc(sizeof(node));
if (newNode == NULL)
{
return false;
}
// get the index by using the hash function
int index = hash(words);
strcpy(newNode->word, words);
// make the newNode the head of the list
newNode->next = table[index];
table[index] = newNode;
}
// free memory and close the opened file
free(words);
fclose(indata);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
// TODO
// counter of words loaded
unsigned int counter = 0;
// loop through the hash table
for (int i = 0; i < N; i++)
{
node *tmp = table[i];
while (tmp != NULL)
{
counter++;
tmp = tmp->next;
}
}
return counter;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
// TODO
// loop through the whole hash table
for (int i = 0; i < N; i++)
{
while (table[i] != NULL)
{
node *tmp = table[i]->next;
free(table[i]);
table[i] = tmp;
}
}
return true;
}
// TODO
int len = strlen(word);
char *copy = malloc(sizeof(char) * len + 1);
// change into lowercase the word
for (int i = 0; i < len; i++)
{
copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);
请注意 malloc
中的 + 1
。为什么在那里?它允许 space 作为标记字符串结尾的终止零字节。
假设字符串是 "test"。然后 strlen
将 return 4. 你的循环将从 0 到 3 迭代,复制单词中的四个字母。
但是您不会复制字符串末尾的终止零字节。当 hash
在 copy
上调用 strlen
时,谁知道它会得到什么值,因为你传递给它的不是合法的字符串。
将 for
循环中的条件更改为 i <= len
。