如何在 Arduino 或 C 中将 Unicode 字符转换为 "Unicode HEX Position"
How to convert Unicode char to "Unicode HEX Position" in Arduino or C
如何在 Arduino 或 C 中将 Unicode 字符转换为“Unicode HEX Position”
我会在这里分享一张照片:
例如在 JavaScript 中,您可以使用 charCodeAt();
来做到这一点!
此函数将 return 完全是字符代码,然后您可以将其转换为十六进制!
例如在 JavaScript 中,我可以这样做以获得 return 精确的 table 值
var inpString = 'س';
var myChar=0;
var output = 0;
myChar = inpString.charCodeAt(0);
output = (ToHex((myChar&0xff00)>>8 )) + (ToHex( myChar&0xff ));
function ToHex(i)
{
var sHex = "0123456789ABCDEF";
var Out = "";
Out = sHex.charAt(i&0xf);
i>>=4;
Out = sHex.charAt(i&0xf) + Out;
return Out;
}
alert(output);
那么我如何在 Arduino 或 C 中做到这一点?
对于希望通过 GSM 调制解调器(如 sim800 和 Arduino 或 AVR)发送多部分 SMS 的人来说,这个问题是个大问题
确定还有其他发送 unicode 短信的方法,但是当您尝试发送多部分短信时会出现问题,因此您唯一的选择是使用 PDU 模式,然后您将陷入编码 7 位 ascii 和 16 位 unicode 之间,并且唯一的方法是找到一种方法将 char 转换为“Unicode HEX Position”,就像我在第一个 post 中分享的图片一样,
但是最后“Edgar Bonet”在 stackexchange 中发布了答案,我将在这里分享 link 给有同样问题的人
https://arduino.stackexchange.com/a/76886/67600
特别感谢“Edgar Bonet”
如果您需要即时进行转换(而不仅仅是使用源代码中的字符串),找到了答案。
解决方案在这里:
http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
工作示例,table 存储在 PROGMEM 中:
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
static const uint8_t utf8d_PROGMEM[] PROGMEM = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7f
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9f
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // a0..bf
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0..df
0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // e0..ef
0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // f0..ff
0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // s7..s8
};
uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
uint8_t utf8d;
memcpy_P(&utf8d, &utf8d_PROGMEM[byte], sizeof utf8d);
uint32_t type = utf8d;
*codep = (*state != UTF8_ACCEPT) ?
(byte & 0x3fu) | (*codep << 6) :
(0xff >> type) & (byte);
memcpy_P(&utf8d, &utf8d_PROGMEM[256 + *state * 16 + type], sizeof utf8d);
*state = utf8d;
return *state;
}
const char mystring[] = "Kulaťoučký míč";
void printCodePoints(uint8_t* s) {
uint32_t codepoint;
uint32_t state = 0;
for (; *s; ++s) {
if (decode(&state, &codepoint, *s))
continue;
if (codepoint <= 0xFFFF) {
if (codepoint < 0x1000)
Serial.print(F("0"));
if (codepoint < 0x0100)
Serial.print(F("0"));
if (codepoint < 0x0010)
Serial.print(F("0"));
Serial.print(codepoint, HEX);
continue;
}
// Encode code points above U+FFFF as surrogate pair.
// Serial.println((0xD7C0 + (codepoint >> 10)), HEX);
// Serial.println((0xDC00 + (codepoint & 0x3FF)), HEX);
}
Serial.println();
if (state != UTF8_ACCEPT)
Serial.println("The string is not well-formed\n");
}
void setup() {
// put your setup code here, to run once:
Serial.begin(115200);
printCodePoints((uint8_t*)mystring);
}
void loop() {
// put your main code here, to run repeatedly:
}
如何在 Arduino 或 C 中将 Unicode 字符转换为“Unicode HEX Position”
我会在这里分享一张照片:
例如在 JavaScript 中,您可以使用 charCodeAt();
来做到这一点!
此函数将 return 完全是字符代码,然后您可以将其转换为十六进制!
例如在 JavaScript 中,我可以这样做以获得 return 精确的 table 值
var inpString = 'س';
var myChar=0;
var output = 0;
myChar = inpString.charCodeAt(0);
output = (ToHex((myChar&0xff00)>>8 )) + (ToHex( myChar&0xff ));
function ToHex(i)
{
var sHex = "0123456789ABCDEF";
var Out = "";
Out = sHex.charAt(i&0xf);
i>>=4;
Out = sHex.charAt(i&0xf) + Out;
return Out;
}
alert(output);
那么我如何在 Arduino 或 C 中做到这一点?
对于希望通过 GSM 调制解调器(如 sim800 和 Arduino 或 AVR)发送多部分 SMS 的人来说,这个问题是个大问题
确定还有其他发送 unicode 短信的方法,但是当您尝试发送多部分短信时会出现问题,因此您唯一的选择是使用 PDU 模式,然后您将陷入编码 7 位 ascii 和 16 位 unicode 之间,并且唯一的方法是找到一种方法将 char 转换为“Unicode HEX Position”,就像我在第一个 post 中分享的图片一样, 但是最后“Edgar Bonet”在 stackexchange 中发布了答案,我将在这里分享 link 给有同样问题的人
https://arduino.stackexchange.com/a/76886/67600
特别感谢“Edgar Bonet”
如果您需要即时进行转换(而不仅仅是使用源代码中的字符串),找到了答案。
解决方案在这里:
http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
工作示例,table 存储在 PROGMEM 中:
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
static const uint8_t utf8d_PROGMEM[] PROGMEM = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7f
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9f
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // a0..bf
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0..df
0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // e0..ef
0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // f0..ff
0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // s7..s8
};
uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
uint8_t utf8d;
memcpy_P(&utf8d, &utf8d_PROGMEM[byte], sizeof utf8d);
uint32_t type = utf8d;
*codep = (*state != UTF8_ACCEPT) ?
(byte & 0x3fu) | (*codep << 6) :
(0xff >> type) & (byte);
memcpy_P(&utf8d, &utf8d_PROGMEM[256 + *state * 16 + type], sizeof utf8d);
*state = utf8d;
return *state;
}
const char mystring[] = "Kulaťoučký míč";
void printCodePoints(uint8_t* s) {
uint32_t codepoint;
uint32_t state = 0;
for (; *s; ++s) {
if (decode(&state, &codepoint, *s))
continue;
if (codepoint <= 0xFFFF) {
if (codepoint < 0x1000)
Serial.print(F("0"));
if (codepoint < 0x0100)
Serial.print(F("0"));
if (codepoint < 0x0010)
Serial.print(F("0"));
Serial.print(codepoint, HEX);
continue;
}
// Encode code points above U+FFFF as surrogate pair.
// Serial.println((0xD7C0 + (codepoint >> 10)), HEX);
// Serial.println((0xDC00 + (codepoint & 0x3FF)), HEX);
}
Serial.println();
if (state != UTF8_ACCEPT)
Serial.println("The string is not well-formed\n");
}
void setup() {
// put your setup code here, to run once:
Serial.begin(115200);
printCodePoints((uint8_t*)mystring);
}
void loop() {
// put your main code here, to run repeatedly:
}