将节点添加到链表的功能不起作用| C
Function to add a node to linked list not working | c
我有以下程序,它从文本文件中读取单词并创建一个链表,每个节点包含:单词、计数、下一个。
当一个词已经存在于链表中时更新计数,否则,在链表的末尾创建一个节点。
我的所有功能都有效,除了我在链表末尾添加单词的功能。节点链接可能有错误?
使用我的以下文本文件:第 1 行“我的名字是娜塔莉”,第 2 行“我的狗是 niko”
我应该得到以下输出:my(2), name(1), is(2), natalie(1), dog(1), niko(1)
但我得到:my(2), dog(2), s(1), iko(1), is(1), niko(1)
我的错误在哪里?
//function to add word to linked list
struct WordNode *addWord(char* word, struct WordNode *wordListHead){
//create new node
struct WordNode *current = malloc(sizeof(struct WordNode));
current->word = word;
current->count = 1;
current->next = NULL;
//
while(1){
if((wordListHead != NULL)&&(wordListHead->next ==NULL)){
//connect
wordListHead->next=current;
break;
}
wordListHead = wordListHead->next;
}
}
主要调用这里:
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("%s\n", filename);
if (fp == NULL){
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
else {
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\/*&^%@!~+=_"; // These are our word delimiters.
char line[1000];
char * token;
int count = 0;//count used so that first word of the doc can be created as head node
//create head pointer
struct WordNode *wordListHead = NULL;
//create current pointer
struct WordNode *current = NULL;
//iterate through each line in file
while(fgets(line, 1000, fp)){
//seperate each word
//first word of line
token = strtok(line, delim);
printf("%s\n", token);
if(count == 0){
//first word of document, create first wordnode (head node)
wordListHead = malloc(sizeof(struct WordNode));
wordListHead->word = token;
wordListHead->count = 1;
wordListHead->next = NULL;
}
else{
//check if first word of line exists in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}
//iterate through all the other words of line
while ((token=strtok(NULL, delim)) != NULL){
printf("%s\n", token);
//check if name is in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}
count++;
}
printWordList(wordListHead);
}
}
此处定义的结构:
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode{
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList( struct WordNode *wordListHead);
struct WordNode *addWord(char* word , struct WordNode *wordListead);
#endif
其他功能供参考:
//function to check if word is in linked list
bool doesWordExist(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
return 0;
}
wordListHead= wordListHead-> next;
}
return 1;
}
//function to update the count of word
void updateCount(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
//update count value
//capture current count and add 1
int curcount = wordListHead->count;
int newcount = curcount + 1;
wordListHead->count = newcount;
//printf("%d\n", newcount);
}
wordListHead= wordListHead-> next;
}
}
//function to print word list
//takes head node as argument
void printWordList( struct WordNode *wordListHead){
//WordNode *toyptr = wordListHead;
while (wordListHead != NULL){
printf("%s\n", wordListHead->word);
printf("%ld\n", wordListHead->count);
wordListHead = wordListHead -> next;
}
}
当您将 token
存储到结构中时,您使用的是作为输入缓冲区一部分的指针。
在新的输入行上,从前行收集的标记将是corrupted/trashed。
您需要分配 space 以将令牌存储在堆上的结构中。为此使用 strdup
。
因此,在 addWord
中,您需要:
current->word = strdup(word);
而在 main
中,您需要:
wordListHead->word = strdup(token);
更新:
这是首要问题。但是,您的代码做了一堆不必要的复制。
addWord
不处理空列表。但是,如果确实如此,main
就不需要为行中的第一个单词和后续单词单独 [复制] 代码。
可以将strcmp
合并到addWord
中,可以“一举多得”。 (即 单次 扫描列表)
对于 doesWordExist
,它 returns 匹配 bool
。如果它返回指向匹配元素的指针,updateCount
将只需要增加计数 [并且 而不是 重新扫描列表]。我已经相应地更新了这些功能,但由于 addWord
的更改,它们不再需要
以下是我将如何简化和重构代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) \
printf(_fmt)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode {
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList(struct WordNode *wordListHead);
#endif
//function to add word to linked list
struct WordNode *
addWord(char *word, struct WordNode **list)
{
struct WordNode *curr;
struct WordNode *prev = NULL;
struct WordNode *newnode = NULL;
for (curr = *list; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,word) == 0) {
newnode = curr;
break;
}
prev = curr;
}
// create new node
do {
// word already exists
if (newnode != NULL)
break;
newnode = malloc(sizeof(struct WordNode));
newnode->word = strdup(word);
newnode->count = 0;
newnode->next = NULL;
// attach to tail of list
if (prev != NULL) {
prev->next = newnode;
break;
}
// first node -- set list pointer
*list = newnode;
} while (0);
// increment the count
newnode->count += 1;
return newnode;
}
//function to check if word is in linked list
struct WordNode *
findWord(char *myword, struct WordNode *head)
{
struct WordNode *curr;
for (curr = head; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to update the count of word
void
updateCount(char *myword, struct WordNode *head)
{
struct WordNode *match;
match = findWord(myword,head);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(struct WordNode *head)
{
struct WordNode *curr;
for (curr = head; curr != NULL; curr = curr->next) {
printf("%s", curr->word);
printf(" %ld\n", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %s\n", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\/*&^%@!~+=_";
char line[1000];
char *token;
// create head pointer
struct WordNode *wordListHead = NULL;
// iterate through each line in file
while (fgets(line, sizeof(line), fp)) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %s\n", token);
addWord(token,&wordListHead);
}
}
printWordList(wordListHead);
return 0;
}
更新#2:
请注意 addWord
和 findWord
复制代码。 addWord
的第一部分是 [本质上] 复制 findWord
所做的。
但是,addWord
不能 只使用 findWord
[这是可取的] 因为 findWord
,如果找不到匹配 returns NULL
。在那种情况下,它 addWord
需要追加到的最后一个元素(即列表的“尾部”)[没有办法]进行通信。
虽然我们可以向 findWord
添加一个额外的参数来传播这个值,但更好的解决方案是创建一个不同的结构来定义“列表”。
在现有代码中,我们使用“双星”指针作为“列表”指向词头节点。使用单独的结构 更干净 并且有一些额外的优势。
我们可以传递一个指向列表的简单指针。我们不再需要担心我们是否应该传递双星指针。
虽然我们只使用了一个单链表,但是如果我们希望将链表转换为双[=110=,一个单独的列表结构会有所帮助] 链表 [稍后].
我们只是传递一个指向列表的指针,列表可以跟踪列表的头部、列表的尾部以及列表中元素的数量。
链接列表很适合使用 mergesort
进行排序。列表计数有助于提高效率,因为它更容易找到列表的“中点”[合并排序需要知道]。
为了显示 doubly 链表的开头,我在单词结构中添加了一个 prev
元素。这当前未使用,但它暗示了双向链接版本。
我修改了所有函数以获取指向列表的指针,而不是指向头节点的指针。
因为列表结构有一个 tail
指针,addWord
现在可以调用 findWord
。如果 findWord
没有 找到匹配项,addWord
可以简单地使用列表中的 head/tail
指针来找到正确的插入点。
为了进一步简化,我更改了单词节点 [和列表结构] 以使用一些 typedef
语句。
此外,更 usual/idiomatic 将主导结构指针作为第一个参数,因此我颠倒了某些函数的参数顺序。
无论如何,这是 [进一步] 重构的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) \
printf(_fmt)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
// word frequency control
typedef struct WordNode Word_t;
struct WordNode {
const char *word;
unsigned long count;
Word_t *next;
Word_t *prev;
};
// word list control
typedef struct {
Word_t *head;
Word_t *tail;
unsigned long count;
} List_t;
void printWordList(List_t *list);
#endif
// create a list
List_t *
newList(void)
{
List_t *list;
list = calloc(1,sizeof(*list));
return list;
}
// function to check if word is in linked list
Word_t *
findWord(List_t *list,const char *myword)
{
Word_t *curr;
for (curr = list->head; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to add word to linked list
Word_t *
addWord(List_t *list,const char *word)
{
Word_t *match;
do {
// try to find existing word
match = findWord(list,word);
// word already exists
if (match != NULL)
break;
// create new node
match = malloc(sizeof(*match));
match->word = strdup(word);
match->count = 0;
match->next = NULL;
// attach to head of list
if (list->head == NULL)
list->head = match;
// append to tail of list
else
list->tail->next = match;
// set new tail of list
list->tail = match;
// advance list count
list->count += 1;
} while (0);
// increment the word frequency count
match->count += 1;
return match;
}
//function to update the count of word
void
updateCount(List_t *list,const char *myword)
{
Word_t *match;
match = findWord(list,myword);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(List_t *list)
{
Word_t *curr;
for (curr = list->head; curr != NULL; curr = curr->next) {
printf("%s", curr->word);
printf(" %ld\n", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %s\n", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\/*&^%@!~+=_";
char line[1000];
char *token;
// create list/head pointer
List_t *list = newList();
// iterate through each line in file
while (fgets(line, sizeof(line), fp) != NULL) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %s\n", token);
addWord(list,token);
}
}
printWordList(list);
return 0;
}
@Craig Estey 为您提供了一个很好的答案,所以不要更改您的答案选择,而不是只在评论中给您留下 link,这里有几个重要的方法查看可能有帮助的列表操作,您必须使用 memory/error 检查程序来验证您对分配内存的使用,尤其是在处理列表操作时。
取一个包含字符串的节点,该字符串的引用计数是该字符串出现的额外次数,如您的情况,例如
typedef struct node_t { /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;
迭代节点地址和节点指针消除特殊情况
在 Linus on Understanding Pointers 中讨论了同时使用节点地址和指向节点的指针。
例如,您需要检查 if (list->head == NULL)
以将第一个节点添加到列表中,如果同时使用节点地址和指向节点的指针进行迭代,您只需将分配的指针分配给新节点node 到当前节点的地址。无论它是第一个、中间还是最后一个节点,这都有效。它还消除了从列表中删除节点时不必担心前一个节点是什么的问题。在要删除的节点上,您只需将下一个节点的内容分配给当前地址并释放原来存在的节点。这会将您的添加节点功能减少到类似于:
/** add node in sort order to list.
* returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head, /* address of current node */
*pn = *head; /* pointer to current node */
while (pn) { /* iterate to last node */
if (strcmp (pn->s, s) == 0) { /* compare node string with string */
pn->refcnt += 1; /* increment ref count */
return *ppn;
}
ppn = &pn->next; /* ppn to address of next */
pn = pn->next; /* advance pointer to next */
}
return *ppn = create_node (s); /* allocate and return node */
}
(注意: 通过延迟新节点和字符串的分配(上面的 create_node (s)
),在知道需要添加字符串之前避免分配 --简化内存处理)
正如上面评论中提到的,这将您对列表的 doesWordExist()
遍历和您对列表的 addWord()
遍历找到结束合并为一个遍历。如果你的列表中有几十万个节点,你不想多次遍历列表。
使用 strdup()
很好,但知道它 POSIX 不是标准 C
strdup()
用于复制字符串并将结果分配给新指针很方便,但 strdup()
由 POSIX 提供,因此并非所有实现都会提供它。此外,strdup()
分配内存,因此就像任何分配内存的函数一样,在使用 returned 的指针之前,您必须检查结果是否不是 NULL
。您可以通过编写一个简短的等价物来避免潜在的可移植性问题。例如在上面显示的 create_node()
中,它确实:
/** helper function to allocate node, and storage for string.
* copies string to node and initializes node->next pointer NULL
* and node->refcnt zero. returns pointer to allocated node on success,
* NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s); /* get string length */
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) { /* allocate for string */
perror ("create_node: malloc node->s");
free (node); /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1); /* copy string to node */
node->refcnt = 0; /* initialize ref count */
node->next = NULL; /* initialze next NULL */
return node; /* return allocated & initialized node */
}
释放分配的内存
任何时候您编写代码创建数据结构时,如果您要删除单个节点或使用列表完成操作,它们都需要能够自行清理。当您创建仅在 main()
下面的函数中声明和使用的列表等时,这变得势在必行,因为函数 return 上没有释放内存。使用 main()
,在 return 程序退出并将释放所有分配的内存。但是,如果仅在 main()
以下创建和使用列表,并且在 return 之前未释放列表内存,则每次调用该函数时都会导致内存泄漏。释放所有内存的函数简短易写,例如
/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node_t *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->s); /* free current string */
free (victim); /* free current node */
}
}
(**无需担心 refcnt
,因为您正在删除列表)
一个包含所有这些要点的简短示例,以及一个从列表中删除单个节点的函数 del_node()
(或者如果 [=31] 不删除节点,则减少 refcnt
=] 非零)可以是:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
typedef struct node_t { /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;
/** helper function to allocate node, and storage for string.
* copies string to node and initializes node->next pointer NULL
* and node->refcnt zero. returns pointer to allocated node on success,
* NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s); /* get string length */
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) { /* allocate for string */
perror ("create_node: malloc node->s");
free (node); /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1); /* copy string to node */
node->refcnt = 0; /* initialize ref count */
node->next = NULL; /* initialze next NULL */
return node; /* return allocated & initialized node */
}
/** add node in sort order to list.
* returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head, /* address of current node */
*pn = *head; /* pointer to current node */
while (pn) { /* iterate to last node */
if (strcmp (pn->s, s) == 0) { /* compare node string with string */
pn->refcnt += 1; /* increment ref count */
return *ppn;
}
ppn = &pn->next; /* ppn to address of next */
pn = pn->next; /* advance pointer to next */
}
return *ppn = create_node (s); /* allocate and return node */
}
/** print all nodes in list */
void prn_list (node_t *head)
{
if (!head) { /* check if list is empty */
puts ("list-empty");
return;
}
for (node_t *pn = head; pn; pn = pn->next) /* iterate over each node */
printf ("%-24s %4zu\n", pn->s, pn->refcnt); /* print string an refcount */
}
/** delete node with string s from list (for loop) */
void del_node (node_t **head, const char *s)
{
node_t **ppn = head; /* address of node */
node_t *pn = *head; /* pointer to node */
for (; pn; ppn = &pn->next, pn = pn->next) {
if (strcmp (pn->s, s) == 0) { /* does string match */
if (pn->refcnt) { /* ref count > 0 */
pn->refcnt -= 1; /* decrement ref count */
return; /* done */
}
*ppn = pn->next; /* set content at address to next */
free (pn); /* free original pointer */
break;
}
}
}
/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node_t *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->s); /* free current string */
free (victim); /* free current node */
}
}
int main (int argc, char **argv) {
char buf[MAXC]; /* read buffer */
const char *delim = " \t\n.,;?!()"; /* strtok delimiters */
node_t *list = NULL; /* pointer to list (must initialize NULL */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) /* read all lines in file */
/* tokenize line based on delim */
for (char *p = strtok (buf, delim); p; p = strtok (NULL, delim)) {
if (ispunct(*p)) /* if word is punctionation, skip */
continue;
add_node (&list, p); /* add node or increment refcnt */
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
puts ("string refcnt\n" /* heading */
"-------------------------------");
prn_list (list); /* print contents of list */
del_list (list); /* free all list memory */
return 0;
}
查看 delim
中包含的字符以与 strtok()
一起使用,以及使用 ispunct()
跳过以标点符号结尾的标记(允许连字符单词,但跳过单独用作句子延续等的连字符....)
示例输入文件
$ cat dat/tr_dec_3_1907.txt
No man is above the law and no man is below it;
nor do we ask any man's permission when we require him to obey it.
Obedience to the law is demanded as a right; not asked as a favor.
(Theodore Roosevelt - December 3, 1907)
示例Use/Output
$ ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
string refcnt
-------------------------------
No 0
man 1
is 2
above 0
the 1
law 1
and 0
no 0
below 0
it 1
nor 0
do 0
we 1
ask 0
any 0
man's 0
permission 0
when 0
require 0
him 0
to 1
obey 0
Obedience 0
demanded 0
as 1
a 1
right 0
not 0
asked 0
favor 0
Theodore 0
Roosevelt 0
December 0
3 0
1907 0
内存Use/Error检查
在您编写的任何动态分配内存的代码中,您对分配的任何内存块负有 2 责任:(1) 始终保留指向内存块的起始地址 因此,(2) 当不再需要它时可以释放。
您必须使用内存错误检查程序来确保您不会尝试访问内存或写入 beyond/outside 您分配的块的边界,尝试读取或基于未初始化的条件跳转值,最后,确认您释放了所有已分配的内存。
对于Linux valgrind
是正常的选择。每个平台都有类似的内存检查器。它们都很简单易用,只需运行你的程序就可以了。
$ valgrind ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528== Memcheck, a memory error detector
==8528== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==8528== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==8528== Command: ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528==
string refcnt
-------------------------------
No 0
man 1
is 2
above 0
the 1
law 1
and 0
no 0
below 0
it 1
nor 0
do 0
we 1
ask 0
any 0
man's 0
permission 0
when 0
require 0
him 0
to 1
obey 0
Obedience 0
demanded 0
as 1
a 1
right 0
not 0
asked 0
favor 0
Theodore 0
Roosevelt 0
December 0
3 0
1907 0
==8528==
==8528== HEAP SUMMARY:
==8528== in use at exit: 0 bytes in 0 blocks
==8528== total heap usage: 73 allocs, 73 frees, 6,693 bytes allocated
==8528==
==8528== All heap blocks were freed -- no leaks are possible
==8528==
==8528== For counts of detected and suppressed errors, rerun with: -v
始终确认您已释放所有分配的内存并且没有内存错误。
您已经有了解决眼前问题的方法,但在您的项目中,请考虑上面的一些技巧,以消除列表的多次遍历,以及围绕 strdup()
的可移植性(和验证)问题。祝你编码顺利。
我有以下程序,它从文本文件中读取单词并创建一个链表,每个节点包含:单词、计数、下一个。
当一个词已经存在于链表中时更新计数,否则,在链表的末尾创建一个节点。
我的所有功能都有效,除了我在链表末尾添加单词的功能。节点链接可能有错误?
使用我的以下文本文件:第 1 行“我的名字是娜塔莉”,第 2 行“我的狗是 niko” 我应该得到以下输出:my(2), name(1), is(2), natalie(1), dog(1), niko(1) 但我得到:my(2), dog(2), s(1), iko(1), is(1), niko(1)
我的错误在哪里?
//function to add word to linked list
struct WordNode *addWord(char* word, struct WordNode *wordListHead){
//create new node
struct WordNode *current = malloc(sizeof(struct WordNode));
current->word = word;
current->count = 1;
current->next = NULL;
//
while(1){
if((wordListHead != NULL)&&(wordListHead->next ==NULL)){
//connect
wordListHead->next=current;
break;
}
wordListHead = wordListHead->next;
}
}
主要调用这里:
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("%s\n", filename);
if (fp == NULL){
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
else {
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\/*&^%@!~+=_"; // These are our word delimiters.
char line[1000];
char * token;
int count = 0;//count used so that first word of the doc can be created as head node
//create head pointer
struct WordNode *wordListHead = NULL;
//create current pointer
struct WordNode *current = NULL;
//iterate through each line in file
while(fgets(line, 1000, fp)){
//seperate each word
//first word of line
token = strtok(line, delim);
printf("%s\n", token);
if(count == 0){
//first word of document, create first wordnode (head node)
wordListHead = malloc(sizeof(struct WordNode));
wordListHead->word = token;
wordListHead->count = 1;
wordListHead->next = NULL;
}
else{
//check if first word of line exists in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}
//iterate through all the other words of line
while ((token=strtok(NULL, delim)) != NULL){
printf("%s\n", token);
//check if name is in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}
count++;
}
printWordList(wordListHead);
}
}
此处定义的结构:
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode{
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList( struct WordNode *wordListHead);
struct WordNode *addWord(char* word , struct WordNode *wordListead);
#endif
其他功能供参考:
//function to check if word is in linked list
bool doesWordExist(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
return 0;
}
wordListHead= wordListHead-> next;
}
return 1;
}
//function to update the count of word
void updateCount(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
//update count value
//capture current count and add 1
int curcount = wordListHead->count;
int newcount = curcount + 1;
wordListHead->count = newcount;
//printf("%d\n", newcount);
}
wordListHead= wordListHead-> next;
}
}
//function to print word list
//takes head node as argument
void printWordList( struct WordNode *wordListHead){
//WordNode *toyptr = wordListHead;
while (wordListHead != NULL){
printf("%s\n", wordListHead->word);
printf("%ld\n", wordListHead->count);
wordListHead = wordListHead -> next;
}
}
当您将 token
存储到结构中时,您使用的是作为输入缓冲区一部分的指针。
在新的输入行上,从前行收集的标记将是corrupted/trashed。
您需要分配 space 以将令牌存储在堆上的结构中。为此使用 strdup
。
因此,在 addWord
中,您需要:
current->word = strdup(word);
而在 main
中,您需要:
wordListHead->word = strdup(token);
更新:
这是首要问题。但是,您的代码做了一堆不必要的复制。
addWord
不处理空列表。但是,如果确实如此,main
就不需要为行中的第一个单词和后续单词单独 [复制] 代码。
可以将strcmp
合并到addWord
中,可以“一举多得”。 (即 单次 扫描列表)
对于 doesWordExist
,它 returns 匹配 bool
。如果它返回指向匹配元素的指针,updateCount
将只需要增加计数 [并且 而不是 重新扫描列表]。我已经相应地更新了这些功能,但由于 addWord
以下是我将如何简化和重构代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) \
printf(_fmt)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode {
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList(struct WordNode *wordListHead);
#endif
//function to add word to linked list
struct WordNode *
addWord(char *word, struct WordNode **list)
{
struct WordNode *curr;
struct WordNode *prev = NULL;
struct WordNode *newnode = NULL;
for (curr = *list; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,word) == 0) {
newnode = curr;
break;
}
prev = curr;
}
// create new node
do {
// word already exists
if (newnode != NULL)
break;
newnode = malloc(sizeof(struct WordNode));
newnode->word = strdup(word);
newnode->count = 0;
newnode->next = NULL;
// attach to tail of list
if (prev != NULL) {
prev->next = newnode;
break;
}
// first node -- set list pointer
*list = newnode;
} while (0);
// increment the count
newnode->count += 1;
return newnode;
}
//function to check if word is in linked list
struct WordNode *
findWord(char *myword, struct WordNode *head)
{
struct WordNode *curr;
for (curr = head; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to update the count of word
void
updateCount(char *myword, struct WordNode *head)
{
struct WordNode *match;
match = findWord(myword,head);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(struct WordNode *head)
{
struct WordNode *curr;
for (curr = head; curr != NULL; curr = curr->next) {
printf("%s", curr->word);
printf(" %ld\n", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %s\n", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\/*&^%@!~+=_";
char line[1000];
char *token;
// create head pointer
struct WordNode *wordListHead = NULL;
// iterate through each line in file
while (fgets(line, sizeof(line), fp)) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %s\n", token);
addWord(token,&wordListHead);
}
}
printWordList(wordListHead);
return 0;
}
更新#2:
请注意 addWord
和 findWord
复制代码。 addWord
的第一部分是 [本质上] 复制 findWord
所做的。
但是,addWord
不能 只使用 findWord
[这是可取的] 因为 findWord
,如果找不到匹配 returns NULL
。在那种情况下,它 addWord
需要追加到的最后一个元素(即列表的“尾部”)[没有办法]进行通信。
虽然我们可以向 findWord
添加一个额外的参数来传播这个值,但更好的解决方案是创建一个不同的结构来定义“列表”。
在现有代码中,我们使用“双星”指针作为“列表”指向词头节点。使用单独的结构 更干净 并且有一些额外的优势。
我们可以传递一个指向列表的简单指针。我们不再需要担心我们是否应该传递双星指针。
虽然我们只使用了一个单链表,但是如果我们希望将链表转换为双[=110=,一个单独的列表结构会有所帮助] 链表 [稍后].
我们只是传递一个指向列表的指针,列表可以跟踪列表的头部、列表的尾部以及列表中元素的数量。
链接列表很适合使用 mergesort
进行排序。列表计数有助于提高效率,因为它更容易找到列表的“中点”[合并排序需要知道]。
为了显示 doubly 链表的开头,我在单词结构中添加了一个 prev
元素。这当前未使用,但它暗示了双向链接版本。
我修改了所有函数以获取指向列表的指针,而不是指向头节点的指针。
因为列表结构有一个 tail
指针,addWord
现在可以调用 findWord
。如果 findWord
没有 找到匹配项,addWord
可以简单地使用列表中的 head/tail
指针来找到正确的插入点。
为了进一步简化,我更改了单词节点 [和列表结构] 以使用一些 typedef
语句。
此外,更 usual/idiomatic 将主导结构指针作为第一个参数,因此我颠倒了某些函数的参数顺序。
无论如何,这是 [进一步] 重构的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) \
printf(_fmt)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
// word frequency control
typedef struct WordNode Word_t;
struct WordNode {
const char *word;
unsigned long count;
Word_t *next;
Word_t *prev;
};
// word list control
typedef struct {
Word_t *head;
Word_t *tail;
unsigned long count;
} List_t;
void printWordList(List_t *list);
#endif
// create a list
List_t *
newList(void)
{
List_t *list;
list = calloc(1,sizeof(*list));
return list;
}
// function to check if word is in linked list
Word_t *
findWord(List_t *list,const char *myword)
{
Word_t *curr;
for (curr = list->head; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to add word to linked list
Word_t *
addWord(List_t *list,const char *word)
{
Word_t *match;
do {
// try to find existing word
match = findWord(list,word);
// word already exists
if (match != NULL)
break;
// create new node
match = malloc(sizeof(*match));
match->word = strdup(word);
match->count = 0;
match->next = NULL;
// attach to head of list
if (list->head == NULL)
list->head = match;
// append to tail of list
else
list->tail->next = match;
// set new tail of list
list->tail = match;
// advance list count
list->count += 1;
} while (0);
// increment the word frequency count
match->count += 1;
return match;
}
//function to update the count of word
void
updateCount(List_t *list,const char *myword)
{
Word_t *match;
match = findWord(list,myword);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(List_t *list)
{
Word_t *curr;
for (curr = list->head; curr != NULL; curr = curr->next) {
printf("%s", curr->word);
printf(" %ld\n", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %s\n", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\/*&^%@!~+=_";
char line[1000];
char *token;
// create list/head pointer
List_t *list = newList();
// iterate through each line in file
while (fgets(line, sizeof(line), fp) != NULL) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %s\n", token);
addWord(list,token);
}
}
printWordList(list);
return 0;
}
@Craig Estey 为您提供了一个很好的答案,所以不要更改您的答案选择,而不是只在评论中给您留下 link,这里有几个重要的方法查看可能有帮助的列表操作,您必须使用 memory/error 检查程序来验证您对分配内存的使用,尤其是在处理列表操作时。
取一个包含字符串的节点,该字符串的引用计数是该字符串出现的额外次数,如您的情况,例如
typedef struct node_t { /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;
迭代节点地址和节点指针消除特殊情况
在 Linus on Understanding Pointers 中讨论了同时使用节点地址和指向节点的指针。
例如,您需要检查 if (list->head == NULL)
以将第一个节点添加到列表中,如果同时使用节点地址和指向节点的指针进行迭代,您只需将分配的指针分配给新节点node 到当前节点的地址。无论它是第一个、中间还是最后一个节点,这都有效。它还消除了从列表中删除节点时不必担心前一个节点是什么的问题。在要删除的节点上,您只需将下一个节点的内容分配给当前地址并释放原来存在的节点。这会将您的添加节点功能减少到类似于:
/** add node in sort order to list.
* returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head, /* address of current node */
*pn = *head; /* pointer to current node */
while (pn) { /* iterate to last node */
if (strcmp (pn->s, s) == 0) { /* compare node string with string */
pn->refcnt += 1; /* increment ref count */
return *ppn;
}
ppn = &pn->next; /* ppn to address of next */
pn = pn->next; /* advance pointer to next */
}
return *ppn = create_node (s); /* allocate and return node */
}
(注意: 通过延迟新节点和字符串的分配(上面的 create_node (s)
),在知道需要添加字符串之前避免分配 --简化内存处理)
正如上面评论中提到的,这将您对列表的 doesWordExist()
遍历和您对列表的 addWord()
遍历找到结束合并为一个遍历。如果你的列表中有几十万个节点,你不想多次遍历列表。
使用 strdup()
很好,但知道它 POSIX 不是标准 C
strdup()
用于复制字符串并将结果分配给新指针很方便,但 strdup()
由 POSIX 提供,因此并非所有实现都会提供它。此外,strdup()
分配内存,因此就像任何分配内存的函数一样,在使用 returned 的指针之前,您必须检查结果是否不是 NULL
。您可以通过编写一个简短的等价物来避免潜在的可移植性问题。例如在上面显示的 create_node()
中,它确实:
/** helper function to allocate node, and storage for string.
* copies string to node and initializes node->next pointer NULL
* and node->refcnt zero. returns pointer to allocated node on success,
* NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s); /* get string length */
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) { /* allocate for string */
perror ("create_node: malloc node->s");
free (node); /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1); /* copy string to node */
node->refcnt = 0; /* initialize ref count */
node->next = NULL; /* initialze next NULL */
return node; /* return allocated & initialized node */
}
释放分配的内存
任何时候您编写代码创建数据结构时,如果您要删除单个节点或使用列表完成操作,它们都需要能够自行清理。当您创建仅在 main()
下面的函数中声明和使用的列表等时,这变得势在必行,因为函数 return 上没有释放内存。使用 main()
,在 return 程序退出并将释放所有分配的内存。但是,如果仅在 main()
以下创建和使用列表,并且在 return 之前未释放列表内存,则每次调用该函数时都会导致内存泄漏。释放所有内存的函数简短易写,例如
/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node_t *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->s); /* free current string */
free (victim); /* free current node */
}
}
(**无需担心 refcnt
,因为您正在删除列表)
一个包含所有这些要点的简短示例,以及一个从列表中删除单个节点的函数 del_node()
(或者如果 [=31] 不删除节点,则减少 refcnt
=] 非零)可以是:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
typedef struct node_t { /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;
/** helper function to allocate node, and storage for string.
* copies string to node and initializes node->next pointer NULL
* and node->refcnt zero. returns pointer to allocated node on success,
* NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s); /* get string length */
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) { /* allocate for string */
perror ("create_node: malloc node->s");
free (node); /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1); /* copy string to node */
node->refcnt = 0; /* initialize ref count */
node->next = NULL; /* initialze next NULL */
return node; /* return allocated & initialized node */
}
/** add node in sort order to list.
* returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head, /* address of current node */
*pn = *head; /* pointer to current node */
while (pn) { /* iterate to last node */
if (strcmp (pn->s, s) == 0) { /* compare node string with string */
pn->refcnt += 1; /* increment ref count */
return *ppn;
}
ppn = &pn->next; /* ppn to address of next */
pn = pn->next; /* advance pointer to next */
}
return *ppn = create_node (s); /* allocate and return node */
}
/** print all nodes in list */
void prn_list (node_t *head)
{
if (!head) { /* check if list is empty */
puts ("list-empty");
return;
}
for (node_t *pn = head; pn; pn = pn->next) /* iterate over each node */
printf ("%-24s %4zu\n", pn->s, pn->refcnt); /* print string an refcount */
}
/** delete node with string s from list (for loop) */
void del_node (node_t **head, const char *s)
{
node_t **ppn = head; /* address of node */
node_t *pn = *head; /* pointer to node */
for (; pn; ppn = &pn->next, pn = pn->next) {
if (strcmp (pn->s, s) == 0) { /* does string match */
if (pn->refcnt) { /* ref count > 0 */
pn->refcnt -= 1; /* decrement ref count */
return; /* done */
}
*ppn = pn->next; /* set content at address to next */
free (pn); /* free original pointer */
break;
}
}
}
/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node_t *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->s); /* free current string */
free (victim); /* free current node */
}
}
int main (int argc, char **argv) {
char buf[MAXC]; /* read buffer */
const char *delim = " \t\n.,;?!()"; /* strtok delimiters */
node_t *list = NULL; /* pointer to list (must initialize NULL */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) /* read all lines in file */
/* tokenize line based on delim */
for (char *p = strtok (buf, delim); p; p = strtok (NULL, delim)) {
if (ispunct(*p)) /* if word is punctionation, skip */
continue;
add_node (&list, p); /* add node or increment refcnt */
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
puts ("string refcnt\n" /* heading */
"-------------------------------");
prn_list (list); /* print contents of list */
del_list (list); /* free all list memory */
return 0;
}
查看 delim
中包含的字符以与 strtok()
一起使用,以及使用 ispunct()
跳过以标点符号结尾的标记(允许连字符单词,但跳过单独用作句子延续等的连字符....)
示例输入文件
$ cat dat/tr_dec_3_1907.txt
No man is above the law and no man is below it;
nor do we ask any man's permission when we require him to obey it.
Obedience to the law is demanded as a right; not asked as a favor.
(Theodore Roosevelt - December 3, 1907)
示例Use/Output
$ ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
string refcnt
-------------------------------
No 0
man 1
is 2
above 0
the 1
law 1
and 0
no 0
below 0
it 1
nor 0
do 0
we 1
ask 0
any 0
man's 0
permission 0
when 0
require 0
him 0
to 1
obey 0
Obedience 0
demanded 0
as 1
a 1
right 0
not 0
asked 0
favor 0
Theodore 0
Roosevelt 0
December 0
3 0
1907 0
内存Use/Error检查
在您编写的任何动态分配内存的代码中,您对分配的任何内存块负有 2 责任:(1) 始终保留指向内存块的起始地址 因此,(2) 当不再需要它时可以释放。
您必须使用内存错误检查程序来确保您不会尝试访问内存或写入 beyond/outside 您分配的块的边界,尝试读取或基于未初始化的条件跳转值,最后,确认您释放了所有已分配的内存。
对于Linux valgrind
是正常的选择。每个平台都有类似的内存检查器。它们都很简单易用,只需运行你的程序就可以了。
$ valgrind ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528== Memcheck, a memory error detector
==8528== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==8528== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==8528== Command: ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528==
string refcnt
-------------------------------
No 0
man 1
is 2
above 0
the 1
law 1
and 0
no 0
below 0
it 1
nor 0
do 0
we 1
ask 0
any 0
man's 0
permission 0
when 0
require 0
him 0
to 1
obey 0
Obedience 0
demanded 0
as 1
a 1
right 0
not 0
asked 0
favor 0
Theodore 0
Roosevelt 0
December 0
3 0
1907 0
==8528==
==8528== HEAP SUMMARY:
==8528== in use at exit: 0 bytes in 0 blocks
==8528== total heap usage: 73 allocs, 73 frees, 6,693 bytes allocated
==8528==
==8528== All heap blocks were freed -- no leaks are possible
==8528==
==8528== For counts of detected and suppressed errors, rerun with: -v
始终确认您已释放所有分配的内存并且没有内存错误。
您已经有了解决眼前问题的方法,但在您的项目中,请考虑上面的一些技巧,以消除列表的多次遍历,以及围绕 strdup()
的可移植性(和验证)问题。祝你编码顺利。