我如何在C中找到最接近的字符串

How can i find the closest string in C

我定义了 5 个字符串,并要求用户输入。如果输入不匹配我想问“你的意思是......?”用最接近的字符串。所以我的问题是如何找到最接近的字符串,谢谢 you.Here 是我的代码未改进的形式;

int main(int argc, char* argv[]){

char book[5][100] = {"donusum" , "korluk" , "satranc" };
char input[100];

printf("Enter a book name: ");
fgets(input, 100, stdin);
input[strlen(input)-1] = 0;

int i;
for(i=0; i< strlen(input); i++){
    input[i] = tolower(input[i]);
}

for(i=0; i<3; i++){
    if(strcmp(book[i], input)==0){
        printf("Found input  string at book list.\n");
        break;
    }
}

if(i>=3){
    printf("The input string was not found.\n");
}

首先,定义一些可以帮助您完成基本任务的函数:

// Reads at maximum 'size' characters from file stream.
char *scanline(char *line, size_t size, FILE *stream)
{
    if (!fgets(line, size, stream))
        return NULL;
    
    size_t npos = strcspn(line, "\n");
    line[npos] = '[=10=]';
    return line;
}
// Returns the minimum of three numbers.
size_t min3(size_t x, size_t y, size_t z)
{
    size_t tmp = x < y ? x : y;
    return tmp < z ? tmp : z;
}
// Swaps two pointers.
void swap(size_t **a, size_t **b)
{
    size_t *tmp = *a;
    *a = *b;
    *b = tmp;
}

其次,定义一个函数来计算两个字符串之间的距离。例如,Levenshtein Distance:

size_t levenshtein_distance(const char *s1, const size_t len1, const char *s2, const size_t len2)
{
    size_t vec0[len2 + 1];
    size_t vec1[len2 + 1];
    
    size_t *v0 = vec0;
    size_t *v1 = vec1;
    
    for (size_t i = 0; i <= len2; ++i)
        v0[i] = i;
    
    for (size_t i = 0; i < len1; ++i) {
        v1[0] = i + 1;
        
        for (size_t j = 0; j < len2; ++j) {
            // Calculate deletion, insertion and substitution costs.
            size_t delcost = v0[j + 1] + 1;
            size_t inscost = v1[j] + 1;
            size_t subcost = s1[i] == s2[j] ? v0[j] : v0[j] + 1;

            v1[j + 1] = min3(delcost, inscost, subcost);
        }
        
        // Swap addresses. Avoid copying data.
        swap(&v0, &v1);
    }
    
    return v0[len2];
}

您可以使用此函数计算给定字符串与一组字符串之间的最小距离。两个字符串之间的距离最小,它们彼此最近。

// Returns the index of the closest string.
int suggest(const char *strings[], size_t nstrings, const char *string)
{
    size_t mindist = strlen(string) + 1;
    size_t minindx = -1;
    for (size_t i = 0; i < nstrings; ++i) {
        const char *current = strings[i];
        size_t dist = levenshtein_distance(current, strlen(current), string, strlen(string));
        
        if (mindist >= dist) {
            mindist = dist;
            minindx = i;
        }
    }
    
    return minindx;
}

你可以这样称呼它:

int main(void)
{
    const char *suggestions[] = {"sitting", "filling", "windows", "kitten", "linux"};
    
    char string[100];
    printf("Enter something: ");
    scanline(string, sizeof string, stdin);
    
    size_t index = suggest(suggestions, 5, string);
    printf("Do you mean %s?\n", suggestions[index]);
}

输出:

Enter something: winds
Do you mean windows?