如何在 Arduino 或 C 中将 Unicode 字符转换为 "Unicode HEX Position"

Question

如何在 Arduino 或 C 中将 Unicode 字符转换为“Unicode HEX Position”

我会在这里分享一张照片：

例如在 JavaScript 中，您可以使用 charCodeAt(); 来做到这一点！此函数将 return 完全是字符代码，然后您可以将其转换为十六进制！

例如在 JavaScript 中，我可以这样做以获得 return 精确的 table 值

    var inpString = 'س';
    var myChar=0;
    var output = 0;
    myChar = inpString.charCodeAt(0);
    output = (ToHex((myChar&0xff00)>>8 )) + (ToHex( myChar&0xff ));
    
    function ToHex(i)
    {
        var sHex = "0123456789ABCDEF";
        var Out = "";
        Out = sHex.charAt(i&0xf);
        i>>=4;
        Out = sHex.charAt(i&0xf) + Out;
        return Out;
    }
    alert(output);

那么我如何在 Arduino 或 C 中做到这一点？

Answer 1

对于希望通过 GSM 调制解调器（如 sim800 和 Arduino 或 AVR）发送多部分 SMS 的人来说，这个问题是个大问题

确定还有其他发送 unicode 短信的方法，但是当您尝试发送多部分短信时会出现问题，因此您唯一的选择是使用 PDU 模式，然后您将陷入编码 7 位 ascii 和 16 位 unicode 之间，并且唯一的方法是找到一种方法将 char 转换为“Unicode HEX Position”，就像我在第一个 post 中分享的图片一样，但是最后“Edgar Bonet”在 stackexchange 中发布了答案，我将在这里分享 link 给有同样问题的人

https://arduino.stackexchange.com/a/76886/67600

特别感谢“Edgar Bonet”

Answer 2

如果您需要即时进行转换（而不仅仅是使用源代码中的字符串），找到了答案。

解决方案在这里：

http://bjoern.hoehrmann.de/utf-8/decoder/dfa/

工作示例，table 存储在 PROGMEM 中：

#define UTF8_ACCEPT 0
#define UTF8_REJECT 1

static const uint8_t utf8d_PROGMEM[] PROGMEM = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3f
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5f
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7f
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9f
  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // a0..bf
  8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0..df
  0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // e0..ef
  0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // f0..ff
  0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
  1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
  1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
  1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // s7..s8
};

uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
  uint8_t utf8d;
  memcpy_P(&utf8d, &utf8d_PROGMEM[byte], sizeof utf8d);
  uint32_t type = utf8d;
  *codep = (*state != UTF8_ACCEPT) ?
           (byte & 0x3fu) | (*codep << 6) :
           (0xff >> type) & (byte);
  memcpy_P(&utf8d, &utf8d_PROGMEM[256 + *state * 16 + type], sizeof utf8d);
  *state = utf8d;
  return *state;
}

const char mystring[] = "Kulaťoučký míč";

void printCodePoints(uint8_t* s) {
  uint32_t codepoint;
  uint32_t state = 0;
  for (; *s; ++s) {
    if (decode(&state, &codepoint, *s))
      continue;
    if (codepoint <= 0xFFFF) {
      if (codepoint < 0x1000)
        Serial.print(F("0"));
      if (codepoint < 0x0100)
        Serial.print(F("0"));
      if (codepoint < 0x0010)
        Serial.print(F("0"));
      Serial.print(codepoint, HEX);
      continue;
    }
    // Encode code points above U+FFFF as surrogate pair.
    //    Serial.println((0xD7C0 + (codepoint >> 10)), HEX);
    //    Serial.println((0xDC00 + (codepoint & 0x3FF)), HEX);
  }
  Serial.println();
  if (state != UTF8_ACCEPT)
    Serial.println("The string is not well-formed\n");
}

void setup() {
  // put your setup code here, to run once:
  Serial.begin(115200);

  printCodePoints((uint8_t*)mystring);
}

void loop() {
  // put your main code here, to run repeatedly:

}

如何在 Arduino 或 C 中将 Unicode 字符转换为 "Unicode HEX Position"

How to convert Unicode char to "Unicode HEX Position" in Arduino or C

c

unicode

arduino