我如何在C中找到最接近的字符串
How can i find the closest string in C
我定义了 5 个字符串,并要求用户输入。如果输入不匹配我想问“你的意思是......?”用最接近的字符串。所以我的问题是如何找到最接近的字符串,谢谢 you.Here 是我的代码未改进的形式;
int main(int argc, char* argv[]){
char book[5][100] = {"donusum" , "korluk" , "satranc" };
char input[100];
printf("Enter a book name: ");
fgets(input, 100, stdin);
input[strlen(input)-1] = 0;
int i;
for(i=0; i< strlen(input); i++){
input[i] = tolower(input[i]);
}
for(i=0; i<3; i++){
if(strcmp(book[i], input)==0){
printf("Found input string at book list.\n");
break;
}
}
if(i>=3){
printf("The input string was not found.\n");
}
首先,定义一些可以帮助您完成基本任务的函数:
// Reads at maximum 'size' characters from file stream.
char *scanline(char *line, size_t size, FILE *stream)
{
if (!fgets(line, size, stream))
return NULL;
size_t npos = strcspn(line, "\n");
line[npos] = '[=10=]';
return line;
}
// Returns the minimum of three numbers.
size_t min3(size_t x, size_t y, size_t z)
{
size_t tmp = x < y ? x : y;
return tmp < z ? tmp : z;
}
// Swaps two pointers.
void swap(size_t **a, size_t **b)
{
size_t *tmp = *a;
*a = *b;
*b = tmp;
}
其次,定义一个函数来计算两个字符串之间的距离。例如,Levenshtein Distance:
size_t levenshtein_distance(const char *s1, const size_t len1, const char *s2, const size_t len2)
{
size_t vec0[len2 + 1];
size_t vec1[len2 + 1];
size_t *v0 = vec0;
size_t *v1 = vec1;
for (size_t i = 0; i <= len2; ++i)
v0[i] = i;
for (size_t i = 0; i < len1; ++i) {
v1[0] = i + 1;
for (size_t j = 0; j < len2; ++j) {
// Calculate deletion, insertion and substitution costs.
size_t delcost = v0[j + 1] + 1;
size_t inscost = v1[j] + 1;
size_t subcost = s1[i] == s2[j] ? v0[j] : v0[j] + 1;
v1[j + 1] = min3(delcost, inscost, subcost);
}
// Swap addresses. Avoid copying data.
swap(&v0, &v1);
}
return v0[len2];
}
您可以使用此函数计算给定字符串与一组字符串之间的最小距离。两个字符串之间的距离最小,它们彼此最近。
// Returns the index of the closest string.
int suggest(const char *strings[], size_t nstrings, const char *string)
{
size_t mindist = strlen(string) + 1;
size_t minindx = -1;
for (size_t i = 0; i < nstrings; ++i) {
const char *current = strings[i];
size_t dist = levenshtein_distance(current, strlen(current), string, strlen(string));
if (mindist >= dist) {
mindist = dist;
minindx = i;
}
}
return minindx;
}
你可以这样称呼它:
int main(void)
{
const char *suggestions[] = {"sitting", "filling", "windows", "kitten", "linux"};
char string[100];
printf("Enter something: ");
scanline(string, sizeof string, stdin);
size_t index = suggest(suggestions, 5, string);
printf("Do you mean %s?\n", suggestions[index]);
}
输出:
Enter something: winds
Do you mean windows?
我定义了 5 个字符串,并要求用户输入。如果输入不匹配我想问“你的意思是......?”用最接近的字符串。所以我的问题是如何找到最接近的字符串,谢谢 you.Here 是我的代码未改进的形式;
int main(int argc, char* argv[]){
char book[5][100] = {"donusum" , "korluk" , "satranc" };
char input[100];
printf("Enter a book name: ");
fgets(input, 100, stdin);
input[strlen(input)-1] = 0;
int i;
for(i=0; i< strlen(input); i++){
input[i] = tolower(input[i]);
}
for(i=0; i<3; i++){
if(strcmp(book[i], input)==0){
printf("Found input string at book list.\n");
break;
}
}
if(i>=3){
printf("The input string was not found.\n");
}
首先,定义一些可以帮助您完成基本任务的函数:
// Reads at maximum 'size' characters from file stream.
char *scanline(char *line, size_t size, FILE *stream)
{
if (!fgets(line, size, stream))
return NULL;
size_t npos = strcspn(line, "\n");
line[npos] = '[=10=]';
return line;
}
// Returns the minimum of three numbers.
size_t min3(size_t x, size_t y, size_t z)
{
size_t tmp = x < y ? x : y;
return tmp < z ? tmp : z;
}
// Swaps two pointers.
void swap(size_t **a, size_t **b)
{
size_t *tmp = *a;
*a = *b;
*b = tmp;
}
其次,定义一个函数来计算两个字符串之间的距离。例如,Levenshtein Distance:
size_t levenshtein_distance(const char *s1, const size_t len1, const char *s2, const size_t len2)
{
size_t vec0[len2 + 1];
size_t vec1[len2 + 1];
size_t *v0 = vec0;
size_t *v1 = vec1;
for (size_t i = 0; i <= len2; ++i)
v0[i] = i;
for (size_t i = 0; i < len1; ++i) {
v1[0] = i + 1;
for (size_t j = 0; j < len2; ++j) {
// Calculate deletion, insertion and substitution costs.
size_t delcost = v0[j + 1] + 1;
size_t inscost = v1[j] + 1;
size_t subcost = s1[i] == s2[j] ? v0[j] : v0[j] + 1;
v1[j + 1] = min3(delcost, inscost, subcost);
}
// Swap addresses. Avoid copying data.
swap(&v0, &v1);
}
return v0[len2];
}
您可以使用此函数计算给定字符串与一组字符串之间的最小距离。两个字符串之间的距离最小,它们彼此最近。
// Returns the index of the closest string.
int suggest(const char *strings[], size_t nstrings, const char *string)
{
size_t mindist = strlen(string) + 1;
size_t minindx = -1;
for (size_t i = 0; i < nstrings; ++i) {
const char *current = strings[i];
size_t dist = levenshtein_distance(current, strlen(current), string, strlen(string));
if (mindist >= dist) {
mindist = dist;
minindx = i;
}
}
return minindx;
}
你可以这样称呼它:
int main(void)
{
const char *suggestions[] = {"sitting", "filling", "windows", "kitten", "linux"};
char string[100];
printf("Enter something: ");
scanline(string, sizeof string, stdin);
size_t index = suggest(suggestions, 5, string);
printf("Do you mean %s?\n", suggestions[index]);
}
输出:
Enter something: winds
Do you mean windows?