实施 scanf 的替代方案
Implementing an alternative to scanf
因为几乎所有旨在从 stdin
获取数据的 C 函数都是错误的/有缺陷的:
gets
这个越少越好
scanf
不检查缓冲区溢出并且 '\n'
不断地保留在 stdin
中,接下来搞砸 scanf
s
scanf_s
几乎相同,但有缓冲区溢出检查
fgets
将 '\n'
附加到 string
gets_s
没有之前的问题,但对其他流没用
我决定编写自己的函数,该函数至少可用于从 stdin
中读取数字
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void *scano(char mode);
int main()
{
// int *num = (int *) scano(sData, 'd');
float *num = (float *)scano('f');
printf("Half: %f", *(num)/2);
return 0;
}
void *scano(char mode){
char sData[20];
fgets(sData, 20, stdin);
*(sData + strlen(sData) - 1) = '[=10=]'; //get rid of the '\n' before the '[=10=]'
switch(mode){
case 'd':{
int *dataI = (int *)malloc(sizeof(int));
*dataI = atoi(sData);
return dataI;
}
case 'f':{
float *dataF = (float *)malloc(sizeof(float));
*dataF = atof(sData);
return dataF;
}
case 'D':{
//double...
}
}
}
其他数据类型的功能显然未完成,但我有一些问题:
- 函数的算法如何改进?
- 我不需要在每个
case
中 free()
吗?我知道分配的内存
需要释放,但在处理列表时,free()
只是
用于删除Nodes
,创建Nodes
时,没有调用free()
malloc()
. 之后
- 完全安全吗?如果不是,如何保证安全?
以下是几个函数的简单示例,return 来自输入源的数值。这些示例期望使用几种类型的白色 space 字符(space、制表符、行尾、return)中的一种来分隔一组数字字段。
这些只是为了演示一种方法,肯定有改进的余地。
建议你看看atoi vs atol vs strtol vs strtoul vs sscanf
中的问答讨论
fgetc()
函数用于从输入源一次拉取一个字符,测试输入源的读取是继续还是停止。通过使用 fgetc()
函数,我们可以允许一些其他函数在使用这些函数 scano_l()
和 scan_d()
时继续从输入源读取。
我们还消除了对 malloc()
和伴随的 free()
以及内存管理的需要,方法是使用本地缓冲区并将实际值 return 设置为 long
或 double
.
例如,使用 C++ main 对这些进行简单测试(_tmain()
是因为我使用 Microsoft Visual Studio 2005 生成 Windows 控制台应用程序)如下.这可以通过编译然后尝试几种不同的数据输入方案来测试,其中输入一个整数,例如 1234
,后跟一个或多个白色 space 字符(space,制表符, 新行)然后是一个浮点数,例如 45.678
后跟至少一个白色 space 字符,然后是一些文本字符。
// scan_no.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdlib.h>
extern "C" {
long scano_l (FILE *pFile);
double scano_d (FILE *pFile);
};
int _tmain(int argc, _TCHAR* argv[])
{
// test by reading a few white space delimited numeric value and then a
// text string just to demonstrate how it works.
long l = scano_l (stdin);
double d = scano_d (stdin);
char testBuffer[256] = {0};
fgets (testBuffer, 255, stdin);
printf (" value is %ld\n", l);
printf (" value is %lf\n", d);
printf (" value is %s\n", testBuffer);
return 0;
}
在我的例子中,这些函数在另一个源文件 csource.c 中,它们是:
#include <stdio.h>
#include <stdlib.h>
// define the states for our Finite State Machine. It be a simple one with
// straight transitions from one state to the next.
enum StateFSM {StateBegin = 1, StateAccept = 2, StateEnd = 3};
static char *fetchValue (FILE *pFile, char *buffer, int nMaxLen)
{
int iBuffIndex = 0;
enum StateFSM iState = StateBegin;
do {
// until we reach an end state of our state machine or we reach end of file
// on our input source, lets fetch characters from the input source and decide
// what to do with the character until our end state is reached.
// end state is either white space trailing the desired characters or end of file.
int iInput = fgetc (pFile);
if (feof(pFile)) break;
switch (iInput) {
case ' ':
case '\t':
case '\n':
case '\r':
// eat up any leading whitespace
if (iState != StateAccept) break;
// we have found trailing white space so we are done.
iState = StateEnd;
break;
default:
if (iBuffIndex < nMaxLen) {
// as long as we are not at the max length lets get a character into
// the supplied buffer. if we are at max buffer length then we will
// just eat any remaining characters until we come to white space.
buffer[iBuffIndex++] = (iInput & 0x7f);
}
iState = StateAccept;
break;
}
} while (! (iState == StateEnd));
return buffer; // be a good citizen and return the pointer provided to us. allows chaining.
}
long scano_l (FILE *pFile)
{
char buffer[32] = {0};
long lValue = 0;
char *pTemp;
lValue = strtol (fetchValue(pFile, buffer, 31), &pTemp, 10); // max characters is 31 to ensure zero terminator.
return lValue;
}
double scano_d (FILE *pFile)
{
char buffer[32] = {0};
double dValue = 0.0;
char *pTemp;
dValue = strtod (fetchValue(pFile, buffer, 31), &pTemp); // max characters is 31 to ensure zero terminator.
return dValue;
}
另外一个方便的函数是一个可以读入一串字符的函数。以下函数从输入中读取字符并将它们添加到字符缓冲区,直到读取结束字符或读取最大字符数。
非space 白色 space 字符(制表符、换行符、return)被认为是文本指示符的结尾。 space 字符现在被认为是一个有效的文本字符,它被添加到从输入构造的字符串中。任何前导的非 space 白色 space 都将被丢弃,并且文本字符串被认为从第一个字符开始,该字符不是非 space 白色 space 字符。
char * scano_asz(FILE *pFile, char *buffer, int nMaxLen)
{
int iBuffIndex = 0;
enum StateFSM iState = StateBegin;
do {
// until we reach an end state of our state machine or we reach end of file
// on our input source, lets fetch characters from the input source and decide
// what to do with the character until our end state is reached.
// end state is either white space trailing the desired characters or end of file.
int iInput = fgetc(pFile);
if (feof(pFile)) break;
switch (iInput) {
case '\t':
case '\n':
case '\r':
// eat up any leading non-space whitespace. spaces embedded in the string are
// considered part of the string. delimiters include tab, new line, return.
if (iState != StateAccept) break;
// we have found trailing non-space white space so we are done.
iState = StateEnd;
break;
default:
if (iBuffIndex < nMaxLen) {
// as long as we are not at the max length lets get a character into
// the supplied buffer. allowable characters include the space character
// but not other white space characters such as tab, new line, return.
buffer[iBuffIndex++] = (iInput & 0x7f);
if (iBuffIndex >= nMaxLen) break; // once we reach max size then we will break and exit.
}
iState = StateAccept;
break;
}
} while (!(iState == StateEnd));
if (iBuffIndex < nMaxLen) buffer[iBuffIndex] = 0; // terminate the string if there is roome in the buffer.
return buffer;
}
因为几乎所有旨在从 stdin
获取数据的 C 函数都是错误的/有缺陷的:
gets
这个越少越好scanf
不检查缓冲区溢出并且'\n'
不断地保留在stdin
中,接下来搞砸scanf
sscanf_s
几乎相同,但有缓冲区溢出检查fgets
将'\n'
附加到string
gets_s
没有之前的问题,但对其他流没用
我决定编写自己的函数,该函数至少可用于从 stdin
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void *scano(char mode);
int main()
{
// int *num = (int *) scano(sData, 'd');
float *num = (float *)scano('f');
printf("Half: %f", *(num)/2);
return 0;
}
void *scano(char mode){
char sData[20];
fgets(sData, 20, stdin);
*(sData + strlen(sData) - 1) = '[=10=]'; //get rid of the '\n' before the '[=10=]'
switch(mode){
case 'd':{
int *dataI = (int *)malloc(sizeof(int));
*dataI = atoi(sData);
return dataI;
}
case 'f':{
float *dataF = (float *)malloc(sizeof(float));
*dataF = atof(sData);
return dataF;
}
case 'D':{
//double...
}
}
}
其他数据类型的功能显然未完成,但我有一些问题:
- 函数的算法如何改进?
- 我不需要在每个
case
中free()
吗?我知道分配的内存 需要释放,但在处理列表时,free()
只是 用于删除Nodes
,创建Nodes
时,没有调用free()
malloc()
. 之后
- 完全安全吗?如果不是,如何保证安全?
以下是几个函数的简单示例,return 来自输入源的数值。这些示例期望使用几种类型的白色 space 字符(space、制表符、行尾、return)中的一种来分隔一组数字字段。
这些只是为了演示一种方法,肯定有改进的余地。
建议你看看atoi vs atol vs strtol vs strtoul vs sscanf
中的问答讨论fgetc()
函数用于从输入源一次拉取一个字符,测试输入源的读取是继续还是停止。通过使用 fgetc()
函数,我们可以允许一些其他函数在使用这些函数 scano_l()
和 scan_d()
时继续从输入源读取。
我们还消除了对 malloc()
和伴随的 free()
以及内存管理的需要,方法是使用本地缓冲区并将实际值 return 设置为 long
或 double
.
例如,使用 C++ main 对这些进行简单测试(_tmain()
是因为我使用 Microsoft Visual Studio 2005 生成 Windows 控制台应用程序)如下.这可以通过编译然后尝试几种不同的数据输入方案来测试,其中输入一个整数,例如 1234
,后跟一个或多个白色 space 字符(space,制表符, 新行)然后是一个浮点数,例如 45.678
后跟至少一个白色 space 字符,然后是一些文本字符。
// scan_no.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdlib.h>
extern "C" {
long scano_l (FILE *pFile);
double scano_d (FILE *pFile);
};
int _tmain(int argc, _TCHAR* argv[])
{
// test by reading a few white space delimited numeric value and then a
// text string just to demonstrate how it works.
long l = scano_l (stdin);
double d = scano_d (stdin);
char testBuffer[256] = {0};
fgets (testBuffer, 255, stdin);
printf (" value is %ld\n", l);
printf (" value is %lf\n", d);
printf (" value is %s\n", testBuffer);
return 0;
}
在我的例子中,这些函数在另一个源文件 csource.c 中,它们是:
#include <stdio.h>
#include <stdlib.h>
// define the states for our Finite State Machine. It be a simple one with
// straight transitions from one state to the next.
enum StateFSM {StateBegin = 1, StateAccept = 2, StateEnd = 3};
static char *fetchValue (FILE *pFile, char *buffer, int nMaxLen)
{
int iBuffIndex = 0;
enum StateFSM iState = StateBegin;
do {
// until we reach an end state of our state machine or we reach end of file
// on our input source, lets fetch characters from the input source and decide
// what to do with the character until our end state is reached.
// end state is either white space trailing the desired characters or end of file.
int iInput = fgetc (pFile);
if (feof(pFile)) break;
switch (iInput) {
case ' ':
case '\t':
case '\n':
case '\r':
// eat up any leading whitespace
if (iState != StateAccept) break;
// we have found trailing white space so we are done.
iState = StateEnd;
break;
default:
if (iBuffIndex < nMaxLen) {
// as long as we are not at the max length lets get a character into
// the supplied buffer. if we are at max buffer length then we will
// just eat any remaining characters until we come to white space.
buffer[iBuffIndex++] = (iInput & 0x7f);
}
iState = StateAccept;
break;
}
} while (! (iState == StateEnd));
return buffer; // be a good citizen and return the pointer provided to us. allows chaining.
}
long scano_l (FILE *pFile)
{
char buffer[32] = {0};
long lValue = 0;
char *pTemp;
lValue = strtol (fetchValue(pFile, buffer, 31), &pTemp, 10); // max characters is 31 to ensure zero terminator.
return lValue;
}
double scano_d (FILE *pFile)
{
char buffer[32] = {0};
double dValue = 0.0;
char *pTemp;
dValue = strtod (fetchValue(pFile, buffer, 31), &pTemp); // max characters is 31 to ensure zero terminator.
return dValue;
}
另外一个方便的函数是一个可以读入一串字符的函数。以下函数从输入中读取字符并将它们添加到字符缓冲区,直到读取结束字符或读取最大字符数。
非space 白色 space 字符(制表符、换行符、return)被认为是文本指示符的结尾。 space 字符现在被认为是一个有效的文本字符,它被添加到从输入构造的字符串中。任何前导的非 space 白色 space 都将被丢弃,并且文本字符串被认为从第一个字符开始,该字符不是非 space 白色 space 字符。
char * scano_asz(FILE *pFile, char *buffer, int nMaxLen)
{
int iBuffIndex = 0;
enum StateFSM iState = StateBegin;
do {
// until we reach an end state of our state machine or we reach end of file
// on our input source, lets fetch characters from the input source and decide
// what to do with the character until our end state is reached.
// end state is either white space trailing the desired characters or end of file.
int iInput = fgetc(pFile);
if (feof(pFile)) break;
switch (iInput) {
case '\t':
case '\n':
case '\r':
// eat up any leading non-space whitespace. spaces embedded in the string are
// considered part of the string. delimiters include tab, new line, return.
if (iState != StateAccept) break;
// we have found trailing non-space white space so we are done.
iState = StateEnd;
break;
default:
if (iBuffIndex < nMaxLen) {
// as long as we are not at the max length lets get a character into
// the supplied buffer. allowable characters include the space character
// but not other white space characters such as tab, new line, return.
buffer[iBuffIndex++] = (iInput & 0x7f);
if (iBuffIndex >= nMaxLen) break; // once we reach max size then we will break and exit.
}
iState = StateAccept;
break;
}
} while (!(iState == StateEnd));
if (iBuffIndex < nMaxLen) buffer[iBuffIndex] = 0; // terminate the string if there is roome in the buffer.
return buffer;
}