如何使用带有俄文字母的数组？

Question

我正在尝试编写一个代码 returns 在一行中输入多少个俄语元音。但是，我总是在编译器中收到此错误“将‘53429’从 'int' 缩小到 { } 内的 'char'”。我愿意接受任何建议或想法。谢谢

我的想法是制作一个字符数组 (glasnii) 并使用循环来检查输入行 (stroka) 的每个字符。

int main (){
 char stroka[100];
 char glasnii[20] = {'А', 'Е' ,'И' ,'Ю','Ё','Ы','У','Э','О','Я','а','у', 'о', 'ы', 'и', 'э', 'я', 'ю', 'ё','е' }

 
 printf( "Введите строку : ");
 gets( stroka);
 size_t len = strlen(stroka);
 
 int count=0;
 
 for(int i=0; i<len; i++){
     for(int j=0; j<20; j++){
         if(stroka[i] == glasnii[j])
             count++;
     }
 }
 printf("\n Количество гласных : %i \n", count);
    
 return 0;
}

Answer 1

西里尔字符以 UTF-8 编码并且是多字节序列。参见 https://en.wikipedia.org/wiki/UTF-8

因此，做（例如）：

char vowel = 'ы';

效果不太好，因为所讨论的常量是多个字节的序列。编译器会抱怨这 not 适合单个 char。那是因为它实际上是（忽略字节顺序）：

char vowel = '\xD1\x8B';

我们想改用 char 序列（即字符串）。

我们必须检查给定 char 的高两位以确定我们是否处于多字节序列或代码点中。

[肯定]有处理 UTF-8 的库。

但是如果您想自己动手，我已经重构了您的程序以使用 UTF-8。我对一些简单的序列做了一些粗略的测试，它似乎有效[但不能保证]。注释为：

#include <stdio.h>
#include <string.h>

#ifdef DEBUG
#define dbgprt(_fmt...) \
    printf(_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

typedef unsigned int code_t;            // utf8/unicode "code point"

// NOTE/BUG: the compiler flags these as multibyte constants [that can _not_
// fit in a char
#if 0
    char glasnii[20] = {
        'А', 'Е', 'И', 'Ю', 'Ё', 'Ы', 'У', 'Э', 'О', 'Я', 'а', 'у', 'о',
        'ы', 'и', 'э', 'я', 'ю', 'ё', 'е'
    }
#else
// NOTE/FIX: the compiler concatenates adjacent strings into a single string
char glasnii[] = {
    "А" "Е" "И" "Ю" "Ё" "Ы" "У" "Э" "О" "Я" "а" "у" "о"
    "ы" "и" "э" "я" "ю" "ё" "е"
};
#endif

// utf8get -- get next code point
// RETURNS: next code point (or 0) and updates the buffer pointer
//
// NOTE: this may not be a true unicode code point -- it's just the multibyte
// sequence in a single unsigned int
code_t
utf8get(char **buf)
{
    char *cp;
    unsigned char chr;
    code_t ret = 0;

    // get string pointer
    cp = *buf;

    dbgprt("utf8get: ENTER cp=%p\n",cp);

    // get first byte
    chr = *cp;

    while (1) {
        // end of string
        if (chr == 0)
            break;

        // advance pointer
        ++cp;

        // add into code point
        ret <<= 8;
        ret |= chr;

        // ordinary ascii
        if ((chr & 0x80) == 0)
            break;

        // peek at next char
        chr = *cp;

        // ordinary ascii (or EOS) -- this is a new code point
        if ((chr & 0x80) == 0)
            break;

        // start of new code point
        if ((chr & 0x40) != 0)
            break;
    }

    dbgprt("utf8get: EXIT ret=%8.8X len=%zu cp=%p\n",
        ret,(size_t) (cp - *buf),cp);

    // return updated pointer to caller
    *buf = cp;

    return ret;
}

int
countvowels(char *str)
{
    int count = 0;

    while (1) {
        dbgprt("countvowels: GETCUR\n");
        code_t curchr = utf8get(&str);
        if (curchr == 0)
            break;

        char *vowelptr = glasnii;
        while (1) {
            dbgprt("countvowels: GETVOWEL\n");
            code_t vowelcur = utf8get(&vowelptr);
            if (vowelcur == 0)
                break;
            if (curchr == vowelcur)
                ++count;
        }
    }

    printf("\n Количество гласных : %i \n", count);

    return count;
}

int
main()
{
    char stroka[100];

    // show the hex values
    for (int i = 0;  glasnii[i];  ++i)
        dbgprt("glasnii[%d] = %2.2X\n",i,(unsigned char) glasnii[i]);

#if 1
    countvowels("QыBыыT");
#endif

    printf("Введите строку : ");
// NOTE/BUG: _never_ use gets -- see the man page
#if 0
    gets(stroka);
#else
    fgets(stroka,sizeof(stroka),stdin);
    stroka[strcspn(stroka,"\n")] = 0;
#endif
    printf("\n");

    countvowels(stroka);

    return 0;
}

如何使用带有俄文字母的数组？

How to use an array with russian letters?

c

arrays

string

c-strings

char