如何在 for 循环中使用 strtok()?
How to strtok() in for-loop?
对于给定的表示,
typedef struct {
int age;
char *firstName;
char *lastName;
}Record;
并给定 file.txt
,
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
90,V,E
60,N,M
下面是main()
、
中的代码
pFile=fopen("file.txt", "r");
...
//Complete file is copied to 'readBuffer', approach inspired from
//
....
char *record = strtok(readBuffer,"\n"); //Ignore header record
record = strtok(NULL, "\n");// Read first data record(50,'B','A')
for(;record != NULL; record = strtok(NULL,"\n")){
printf("###Print complete record\n");
puts(record);
Record *r = malloc(sizeof(Record)*1);
r->age = atoi(strtok(record,","));
char *firstName = strtok(NULL,",");
char *lastName = strtok(NULL, ",");
r->firstName = strdup(firstName);
r->lastName = strdup(lastName);
printf("Age: %d\n", r->age);
printf("First name: %s\n", r->firstName);
printf("Last name: %s\n", r->lastName);
}
strtok(readBuffer,",")
在 for-loop
中将编译器与 strtok(record,",")
混淆
实际输出显示标记化只发生在一条记录上。
$ ./program.exe
Print complete file
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
90,V,E
60,N,M
###Print complete record
50,B,A
Age: 50
First name: B
Last name: A
如何解决?
如果在我们的例子中可能的话,使用 strtok_r()
似乎是最简单的方法。特此通知,这不是 标准 C,它在 POSIX.
来自man page,
The strtok_r()
function is a reentrant version strtok()
. The saveptr
argument is a pointer to a char *
variable that is used internally by strtok_r()
in order to maintain context between successive calls that parse the same string.
和
Different strings may be parsed concurrently using sequences of calls to strtok_r()
that specify different saveptr
arguments.
手册页中还有您正在查找的场景的示例。
问题更多与逻辑有关,而不是 strtok 的使用。
另外值得注意的是您正在读取的记录的格式,您遇到了在姓氏字段后没有逗号的问题。
下面的代码将实现您的问题
{
char str[80] = "Age,LastName,FirstName\n50,B,A\n30,A,B\n20,X,D\n";
const char newline[2] = "\n";
const char comma[2] = ",";
/* get over the header fields */
strtok(str, newline);
/* walk through other tokens */
for(;;)
{
Record *r = (Record*)malloc(sizeof(Record)*1);
char *age = strtok(NULL, comma);
if(age != NULL)
{
r->age = atoi(age);
char *firstName = strtok(NULL, comma);
char *lastName = strtok(NULL, newline);
r->firstName = (char *)strdup(firstName);
r->lastName = (char *)strdup(lastName);
printf("Age: %d\n", r->age);
printf("First name: %s\n", r->firstName);
printf("Last name: %s\n", r->lastName);
}
else
break;
}
return(0);
}
正如@David C. Rankin 所建议的那样,使用 fgets
along with strtok
阅读每一行是解决此问题的好方法。
如果您将来想使用 mergesort
,那么将您的数据存储在结构数组中将是使用此排序算法最容易实现的方法。此外,如果您不知道文件中有多少行,那么您可能需要在 运行 时间动态分配它。
您可以有一个较低级别的 struct
存储文件中的每一行:
typedef struct {
int age;
char *firstname;
char *lastname;
} record_t;
还有一个更高级别的 struct
存储文件的所有内容:
typedef struct {
record_t *records; /* pointer to record_t */
char *headers; /* pointer holding header */
size_t currsize; /* current status of information being added */
size_t lastidx;
} allrecords_t;
关于fgets的注意事项:
- 在缓冲区末尾空终止符
[=22=]
之前添加 \n
字符。不过,这个附加的 \n
可以很容易地删除。
- 出错,returns
NULL
。如果达到 EOF
并且没有读取任何字符,那么这也 returns NULL
.
- 缓冲区大小必须静态声明。
- 需要从指定的流中读取,从
stdin
或 FILE *
。
程序中 fgets 的可选用法:
使用fgets()
时,调用一次即可消费头信息:
fgets(buffer, 256, pfile); /* error checking needed */
然后,您可以在 while()
循环中再次调用它,以使用文件中的其余数据:
while (fgets(buffer, 256, pfile) != NULL) {
....
}
在一个程序中实现所有这些想法:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Constants used */
#define INITSIZE 20
#define BUFFSIZE 256
#define MALLOC_MSG "Allocation"
#define REALLOC_MSG "Reallocation"
/* array of structs setup */
typedef struct {
int age;
char *firstname;
char *lastname;
} record_t;
typedef struct {
record_t *records;
char *headers;
size_t currsize;
size_t lastidx;
} allrecords_t;
/* function prototypes */
allrecords_t *initialize_records(void);
void read_header(FILE *filestream, allrecords_t *Record, char buffer[]);
void read_data(FILE *filestream, allrecords_t *Record, char buffer[]);
void print_records(allrecords_t *Record);
void check_ptr(void *ptr, const char *msg);
void remove_newline(char buffer[]);
int main(void) {
FILE *fp;
allrecords_t *Record;
/* static buffer for fgets() */
char buffer[BUFFSIZE];
fp = fopen("fileex.txt", "r");
if (!fp) {
fprintf(stderr, "Cannot read file.\n");
exit(EXIT_FAILURE);
}
Record = initialize_records();
/* Reads the first line */
read_header(fp, Record, buffer);
/* Reads next lines */
read_data(fp, Record, buffer);
/* prints and frees structure elements*/
print_records(Record);
return 0;
}
/* function which reads the age/firstname/lastname data */
void read_data(FILE *filestream, allrecords_t *Record, char buffer[]) {
char *data; /* only need one char *pointer for strtok() */
const char *delim = ",";
while (fgets(buffer, BUFFSIZE, filestream) != NULL) {
remove_newline(buffer); /* optional to remove '\n' */
/* resize array when necessary */
if (Record->currsize == Record->lastidx) {
Record->currsize *= 2;
Record->records = realloc(Record->records, Record->currsize * sizeof(record_t));
check_ptr(Record->records, REALLOC_MSG);
}
/* adding info to array */
/* using strdup() will lead to less code here */
data = strtok(buffer, delim);
Record->records[Record->lastidx].age = atoi(data);
data = strtok(NULL, delim);
Record->records[Record->lastidx].firstname = malloc(strlen(data)+1);
check_ptr(Record->records[Record->lastidx].firstname, MALLOC_MSG);
strcpy(Record->records[Record->lastidx].firstname, data);
data = strtok(NULL, delim);
Record->records[Record->lastidx].lastname = malloc(strlen(data)+1);
check_ptr(Record->records[Record->lastidx].lastname, MALLOC_MSG);
strcpy(Record->records[Record->lastidx].lastname, data);
Record->lastidx++;
}
}
/* prints and frees all members safely, without UB */
void print_records(allrecords_t *Record) {
size_t i;
printf("\nComplete Record:\n");
printf("%s\n", Record->headers);
free(Record->headers);
Record->headers = NULL;
for (i = 0; i < Record->lastidx; i++) {
printf("%d,%s,%s\n", Record->records[i].age,
Record->records[i].firstname,
Record->records[i].lastname);
free(Record->records[i].firstname);
Record->records[i].firstname = NULL;
free(Record->records[i].lastname);
Record->records[i].lastname = NULL;
}
free(Record->records);
Record->records = NULL;
free(Record);
Record = NULL;
}
/* function which only reads header */
void read_header(FILE *filestream, allrecords_t *Record, char buffer[]) {
if (fgets(buffer, BUFFSIZE, filestream) == NULL) {
fprintf(stderr, "Error reading header.\n");
exit(EXIT_FAILURE);
}
remove_newline(buffer);
Record->headers = malloc(strlen(buffer)+1);
check_ptr(Record->headers, MALLOC_MSG);
strcpy(Record->headers, buffer);
}
/* function which removes '\n', lots of methods to do this */
void remove_newline(char buffer[]) {
size_t slen;
slen = strlen(buffer);
/* safe way to remove '\n' and check for bufferoverflow */
if (slen > 0) {
if (buffer[slen-1] == '\n') {
buffer[slen-1] = '[=14=]';
} else {
printf("Buffer overflow detected.\n");
exit(EXIT_FAILURE);
}
}
}
/* initializes higher level struct */
allrecords_t *initialize_records(void) {
allrecords_t *Record = malloc(sizeof(*Record));
check_ptr(Record, MALLOC_MSG);
Record->currsize = INITSIZE;
Record->headers = NULL;
Record->records = malloc(Record->currsize * sizeof(record_t));
check_ptr(Record->records, MALLOC_MSG);
Record->lastidx = 0;
return Record;
}
/* instead of checking for 'ptr == NULL' everywhere, just call this function */
void check_ptr(void *ptr, const char *msg) {
if (!ptr) {
printf("Null pointer returned: %s\n", msg);
exit(EXIT_FAILURE);
}
}
注意:我使用malloc()
+ strcpy()
而不是strdup()
,因为它们来自标准C库,如<string.h>
和 <stdlib.h>
,而不是 POSIX C。
程序输出:
Complete Record:
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
90,V,E
60,N,M
对于给定的表示,
typedef struct {
int age;
char *firstName;
char *lastName;
}Record;
并给定 file.txt
,
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
90,V,E
60,N,M
下面是main()
、
pFile=fopen("file.txt", "r");
...
//Complete file is copied to 'readBuffer', approach inspired from
//
....
char *record = strtok(readBuffer,"\n"); //Ignore header record
record = strtok(NULL, "\n");// Read first data record(50,'B','A')
for(;record != NULL; record = strtok(NULL,"\n")){
printf("###Print complete record\n");
puts(record);
Record *r = malloc(sizeof(Record)*1);
r->age = atoi(strtok(record,","));
char *firstName = strtok(NULL,",");
char *lastName = strtok(NULL, ",");
r->firstName = strdup(firstName);
r->lastName = strdup(lastName);
printf("Age: %d\n", r->age);
printf("First name: %s\n", r->firstName);
printf("Last name: %s\n", r->lastName);
}
strtok(readBuffer,",")
在 for-loop
strtok(record,",")
混淆
实际输出显示标记化只发生在一条记录上。
$ ./program.exe
Print complete file
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
90,V,E
60,N,M
###Print complete record
50,B,A
Age: 50
First name: B
Last name: A
如何解决?
如果在我们的例子中可能的话,使用 strtok_r()
似乎是最简单的方法。特此通知,这不是 标准 C,它在 POSIX.
来自man page,
The
strtok_r()
function is a reentrant versionstrtok()
. Thesaveptr
argument is a pointer to achar *
variable that is used internally bystrtok_r()
in order to maintain context between successive calls that parse the same string.
和
Different strings may be parsed concurrently using sequences of calls to
strtok_r()
that specify differentsaveptr
arguments.
手册页中还有您正在查找的场景的示例。
问题更多与逻辑有关,而不是 strtok 的使用。
另外值得注意的是您正在读取的记录的格式,您遇到了在姓氏字段后没有逗号的问题。
下面的代码将实现您的问题
{
char str[80] = "Age,LastName,FirstName\n50,B,A\n30,A,B\n20,X,D\n";
const char newline[2] = "\n";
const char comma[2] = ",";
/* get over the header fields */
strtok(str, newline);
/* walk through other tokens */
for(;;)
{
Record *r = (Record*)malloc(sizeof(Record)*1);
char *age = strtok(NULL, comma);
if(age != NULL)
{
r->age = atoi(age);
char *firstName = strtok(NULL, comma);
char *lastName = strtok(NULL, newline);
r->firstName = (char *)strdup(firstName);
r->lastName = (char *)strdup(lastName);
printf("Age: %d\n", r->age);
printf("First name: %s\n", r->firstName);
printf("Last name: %s\n", r->lastName);
}
else
break;
}
return(0);
}
正如@David C. Rankin 所建议的那样,使用 fgets
along with strtok
阅读每一行是解决此问题的好方法。
如果您将来想使用 mergesort
,那么将您的数据存储在结构数组中将是使用此排序算法最容易实现的方法。此外,如果您不知道文件中有多少行,那么您可能需要在 运行 时间动态分配它。
您可以有一个较低级别的 struct
存储文件中的每一行:
typedef struct {
int age;
char *firstname;
char *lastname;
} record_t;
还有一个更高级别的 struct
存储文件的所有内容:
typedef struct {
record_t *records; /* pointer to record_t */
char *headers; /* pointer holding header */
size_t currsize; /* current status of information being added */
size_t lastidx;
} allrecords_t;
关于fgets的注意事项:
- 在缓冲区末尾空终止符
[=22=]
之前添加\n
字符。不过,这个附加的\n
可以很容易地删除。 - 出错,returns
NULL
。如果达到EOF
并且没有读取任何字符,那么这也 returnsNULL
. - 缓冲区大小必须静态声明。
- 需要从指定的流中读取,从
stdin
或FILE *
。
程序中 fgets 的可选用法:
使用fgets()
时,调用一次即可消费头信息:
fgets(buffer, 256, pfile); /* error checking needed */
然后,您可以在 while()
循环中再次调用它,以使用文件中的其余数据:
while (fgets(buffer, 256, pfile) != NULL) {
....
}
在一个程序中实现所有这些想法:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Constants used */
#define INITSIZE 20
#define BUFFSIZE 256
#define MALLOC_MSG "Allocation"
#define REALLOC_MSG "Reallocation"
/* array of structs setup */
typedef struct {
int age;
char *firstname;
char *lastname;
} record_t;
typedef struct {
record_t *records;
char *headers;
size_t currsize;
size_t lastidx;
} allrecords_t;
/* function prototypes */
allrecords_t *initialize_records(void);
void read_header(FILE *filestream, allrecords_t *Record, char buffer[]);
void read_data(FILE *filestream, allrecords_t *Record, char buffer[]);
void print_records(allrecords_t *Record);
void check_ptr(void *ptr, const char *msg);
void remove_newline(char buffer[]);
int main(void) {
FILE *fp;
allrecords_t *Record;
/* static buffer for fgets() */
char buffer[BUFFSIZE];
fp = fopen("fileex.txt", "r");
if (!fp) {
fprintf(stderr, "Cannot read file.\n");
exit(EXIT_FAILURE);
}
Record = initialize_records();
/* Reads the first line */
read_header(fp, Record, buffer);
/* Reads next lines */
read_data(fp, Record, buffer);
/* prints and frees structure elements*/
print_records(Record);
return 0;
}
/* function which reads the age/firstname/lastname data */
void read_data(FILE *filestream, allrecords_t *Record, char buffer[]) {
char *data; /* only need one char *pointer for strtok() */
const char *delim = ",";
while (fgets(buffer, BUFFSIZE, filestream) != NULL) {
remove_newline(buffer); /* optional to remove '\n' */
/* resize array when necessary */
if (Record->currsize == Record->lastidx) {
Record->currsize *= 2;
Record->records = realloc(Record->records, Record->currsize * sizeof(record_t));
check_ptr(Record->records, REALLOC_MSG);
}
/* adding info to array */
/* using strdup() will lead to less code here */
data = strtok(buffer, delim);
Record->records[Record->lastidx].age = atoi(data);
data = strtok(NULL, delim);
Record->records[Record->lastidx].firstname = malloc(strlen(data)+1);
check_ptr(Record->records[Record->lastidx].firstname, MALLOC_MSG);
strcpy(Record->records[Record->lastidx].firstname, data);
data = strtok(NULL, delim);
Record->records[Record->lastidx].lastname = malloc(strlen(data)+1);
check_ptr(Record->records[Record->lastidx].lastname, MALLOC_MSG);
strcpy(Record->records[Record->lastidx].lastname, data);
Record->lastidx++;
}
}
/* prints and frees all members safely, without UB */
void print_records(allrecords_t *Record) {
size_t i;
printf("\nComplete Record:\n");
printf("%s\n", Record->headers);
free(Record->headers);
Record->headers = NULL;
for (i = 0; i < Record->lastidx; i++) {
printf("%d,%s,%s\n", Record->records[i].age,
Record->records[i].firstname,
Record->records[i].lastname);
free(Record->records[i].firstname);
Record->records[i].firstname = NULL;
free(Record->records[i].lastname);
Record->records[i].lastname = NULL;
}
free(Record->records);
Record->records = NULL;
free(Record);
Record = NULL;
}
/* function which only reads header */
void read_header(FILE *filestream, allrecords_t *Record, char buffer[]) {
if (fgets(buffer, BUFFSIZE, filestream) == NULL) {
fprintf(stderr, "Error reading header.\n");
exit(EXIT_FAILURE);
}
remove_newline(buffer);
Record->headers = malloc(strlen(buffer)+1);
check_ptr(Record->headers, MALLOC_MSG);
strcpy(Record->headers, buffer);
}
/* function which removes '\n', lots of methods to do this */
void remove_newline(char buffer[]) {
size_t slen;
slen = strlen(buffer);
/* safe way to remove '\n' and check for bufferoverflow */
if (slen > 0) {
if (buffer[slen-1] == '\n') {
buffer[slen-1] = '[=14=]';
} else {
printf("Buffer overflow detected.\n");
exit(EXIT_FAILURE);
}
}
}
/* initializes higher level struct */
allrecords_t *initialize_records(void) {
allrecords_t *Record = malloc(sizeof(*Record));
check_ptr(Record, MALLOC_MSG);
Record->currsize = INITSIZE;
Record->headers = NULL;
Record->records = malloc(Record->currsize * sizeof(record_t));
check_ptr(Record->records, MALLOC_MSG);
Record->lastidx = 0;
return Record;
}
/* instead of checking for 'ptr == NULL' everywhere, just call this function */
void check_ptr(void *ptr, const char *msg) {
if (!ptr) {
printf("Null pointer returned: %s\n", msg);
exit(EXIT_FAILURE);
}
}
注意:我使用malloc()
+ strcpy()
而不是strdup()
,因为它们来自标准C库,如<string.h>
和 <stdlib.h>
,而不是 POSIX C。
程序输出:
Complete Record:
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
90,V,E
60,N,M