将具有特殊字符的字符串转换为十六进制 - C#
Convert string with special characters to hex - C#
您好,我正在尝试转换包含 û
和 …
等特殊字符的字符串。
在我的研究和测试中,我几乎成功地使用了以下函数:
public static string ToHex(this string input)
{
char[] values = input.ToCharArray();
string hex = "0x";
string add = "";
foreach (char c in values)
{
int value = Convert.ToInt32(c);
add = String.Format("{0:X}", value).Length == 1 ?
"0" + String.Format("{0:X}", value) + "00"
: String.Format("{0:X}", value) + "00";
hex += add;
}
return hex;
}
如果我尝试解码 ´o¸sçPQ^ûË\u000f±d
,它会正确解码并将其转换为 0xB4006F00B8007300E700500051005E00FB00CB000F00B1006400
、
相反,当我尝试解码 ´o¸sçPQ](ÂF\u0012…a
它失败并将其转换为 0xB4006F00B8007300E700500051005D002800C200460012002026006100
而不是这个
0xB4006F00B8007300E700500051005D002800C2004600120026206100
.
进行最少的调试我看到字符串是从
´o¸sçPQ](ÂF\u0012…a
到 ´o¸sçPQ](ÂF.a
,我不希望这是问题,但我不确定。
编辑
0xB4006F00B8007300E700500051005D002800C2004600120026206100 ´o¸sçPQ](ÂF…a CORRECT
0xB4006F00B8007300E700500051005D002800C200460012002026006100 ´o¸sçPQ](ÂF.a MY OUTPUT
0xB4006F00B8007300E700500051005D003D00CB0042000C00A50061006000AD004500BB00 ´o¸sçPQ]=ËB¥a`E» CORRECT
0xB4006F00B8007300E700500051005D003D00CB0042000C00A50061006000AD004500BB00 ´o¸sçPQ]=ËB¥a`E» MY OUTPUT
0xB4006F00B8007300E700500051005D002F00D30042001900B7006E006100 ´o¸sçPQ]/ÓB·na CORRECT
0xB4006F00B8007300E700500051005D002F00D30042001900B7006E006100 ´o¸sçPQ]/ÓB·na MY OUTPUT
0xB4006F00B8007300E700500051005F001A20BC006B0021003500DD00 ´o¸sçPQ_‚¼k!5Ý CORRECT
0xB4006F00B8007300E700500051005F00201A00BC006B0021003500DD00 ´o¸sçPQ_'¼k!5Ý MY OUTPUT
0xB4006F00B8007300E700500051005D002F00EE006B00290014204E004100 ´o¸sçPQ]/îk)—NA CORRECT
0xB4006F00B8007300E700500051005D002F00EE006B0029002014004E004100 ´o¸sçPQ]/îk)-NA MY OUTPUT
0xB4006F00B8007300E700500051005D003800E600690036001C204C004F00 ´o¸sçPQ]8æi6“LO CORRECT
0xB4006F00B8007300E700500051005D003800E60069003600201C004C004F00 ´o¸sçPQ]8æi6"LO MY OUTPUT
0xB4006F00B8007300E700500051005D002F00F3006200390014204E004700C602 ´o¸sçPQ]/ób9—NGˆ CORRECT
0xB4006F00B8007300E700500051005D002F00F300620039002014004E0047002C600 ´o¸sçPQ]/ób9-NG^ MY OUTPUT
0xB4006F00B8007300E700500051005D003B00EE007200330078014100 ´o¸sçPQ];îr3ŸA CORRECT
0xB4006F00B8007300E700500051005D003B00EE0072003300178004100 ´o¸sçPQ];îr3YA MY OUTPUT
0xB4006F00B8007300E700500051005D003000F20064003E009D004B00 ´o¸sçPQ]0òd>K CORRECT
0xB4006F00B8007300E700500051005D003000F20064003E009D004B00 ´o¸sçPQ]0òd>?K MY OUTPUT
0xB4006F00B8007300E700500051005D002F00E60075003E00 ´o¸sçPQ]/æu> CORRECT
0xB4006F00B8007300E700500051005D002F00E60075003E00 ´o¸sçPQ]/æu> MY OUTPUT
0xB4006F00B8007300E700500051005D002F00EE006A003000DC024500 ´o¸sçPQ]/îj0˜E CORRECT
0xB4006F00B8007300E700500051005D002F00EE006A0030002DC004500 ´o¸sçPQ]/îj0~E MY OUTPUT
提前感谢您的每一条回复或评论,
你好。
这是由于 endianness,以及不同的整数和字符串编码。
char cc = '…';
Console.WriteLine(cc);
// 2026 <-- note, hex value differs from byte representation shown below
Console.WriteLine(((int)cc).ToString("x"));
// 26200000
Console.WriteLine(BytesToHex(BitConverter.GetBytes((int)cc)));
// 2620
Console.WriteLine(BytesToHex(Encoding.GetEncoding("utf-16").GetBytes(new[] { cc })));
您不应将字符视为整数。有很多不同的方式来编码字符串,.net 内部使用 UTF-16。并且所有编码都适用于字节,而不适用于整数。将字符显式转换为整数可能会导致意想不到的结果,就像您的结果一样。为什么不通过 Encoding.GetBytes
获得所需的编码并使用字节?
void Main()
{
// output you expect 0xB4006F00B8007300E700500051005D002800C2004600120026206100
Console.WriteLine(BytesToHex(Encoding.GetEncoding("utf-16").GetBytes("´o¸sçPQ](ÂF\u0012…a")));
}
public static string BytesToHex(byte[] bytes)
{
// whatever way to convert bytes to hex
return "0x" + BitConverter.ToString(bytes).Replace("-", "");
}
您好,我正在尝试转换包含 û
和 …
等特殊字符的字符串。
在我的研究和测试中,我几乎成功地使用了以下函数:
public static string ToHex(this string input)
{
char[] values = input.ToCharArray();
string hex = "0x";
string add = "";
foreach (char c in values)
{
int value = Convert.ToInt32(c);
add = String.Format("{0:X}", value).Length == 1 ?
"0" + String.Format("{0:X}", value) + "00"
: String.Format("{0:X}", value) + "00";
hex += add;
}
return hex;
}
如果我尝试解码 ´o¸sçPQ^ûË\u000f±d
,它会正确解码并将其转换为 0xB4006F00B8007300E700500051005E00FB00CB000F00B1006400
、
相反,当我尝试解码 ´o¸sçPQ](ÂF\u0012…a
它失败并将其转换为 0xB4006F00B8007300E700500051005D002800C200460012002026006100
而不是这个
0xB4006F00B8007300E700500051005D002800C2004600120026206100
.
进行最少的调试我看到字符串是从
´o¸sçPQ](ÂF\u0012…a
到 ´o¸sçPQ](ÂF.a
,我不希望这是问题,但我不确定。
编辑
0xB4006F00B8007300E700500051005D002800C2004600120026206100 ´o¸sçPQ](ÂF…a CORRECT
0xB4006F00B8007300E700500051005D002800C200460012002026006100 ´o¸sçPQ](ÂF.a MY OUTPUT
0xB4006F00B8007300E700500051005D003D00CB0042000C00A50061006000AD004500BB00 ´o¸sçPQ]=ËB¥a`E» CORRECT
0xB4006F00B8007300E700500051005D003D00CB0042000C00A50061006000AD004500BB00 ´o¸sçPQ]=ËB¥a`E» MY OUTPUT
0xB4006F00B8007300E700500051005D002F00D30042001900B7006E006100 ´o¸sçPQ]/ÓB·na CORRECT
0xB4006F00B8007300E700500051005D002F00D30042001900B7006E006100 ´o¸sçPQ]/ÓB·na MY OUTPUT
0xB4006F00B8007300E700500051005F001A20BC006B0021003500DD00 ´o¸sçPQ_‚¼k!5Ý CORRECT
0xB4006F00B8007300E700500051005F00201A00BC006B0021003500DD00 ´o¸sçPQ_'¼k!5Ý MY OUTPUT
0xB4006F00B8007300E700500051005D002F00EE006B00290014204E004100 ´o¸sçPQ]/îk)—NA CORRECT
0xB4006F00B8007300E700500051005D002F00EE006B0029002014004E004100 ´o¸sçPQ]/îk)-NA MY OUTPUT
0xB4006F00B8007300E700500051005D003800E600690036001C204C004F00 ´o¸sçPQ]8æi6“LO CORRECT
0xB4006F00B8007300E700500051005D003800E60069003600201C004C004F00 ´o¸sçPQ]8æi6"LO MY OUTPUT
0xB4006F00B8007300E700500051005D002F00F3006200390014204E004700C602 ´o¸sçPQ]/ób9—NGˆ CORRECT
0xB4006F00B8007300E700500051005D002F00F300620039002014004E0047002C600 ´o¸sçPQ]/ób9-NG^ MY OUTPUT
0xB4006F00B8007300E700500051005D003B00EE007200330078014100 ´o¸sçPQ];îr3ŸA CORRECT
0xB4006F00B8007300E700500051005D003B00EE0072003300178004100 ´o¸sçPQ];îr3YA MY OUTPUT
0xB4006F00B8007300E700500051005D003000F20064003E009D004B00 ´o¸sçPQ]0òd>K CORRECT
0xB4006F00B8007300E700500051005D003000F20064003E009D004B00 ´o¸sçPQ]0òd>?K MY OUTPUT
0xB4006F00B8007300E700500051005D002F00E60075003E00 ´o¸sçPQ]/æu> CORRECT
0xB4006F00B8007300E700500051005D002F00E60075003E00 ´o¸sçPQ]/æu> MY OUTPUT
0xB4006F00B8007300E700500051005D002F00EE006A003000DC024500 ´o¸sçPQ]/îj0˜E CORRECT
0xB4006F00B8007300E700500051005D002F00EE006A0030002DC004500 ´o¸sçPQ]/îj0~E MY OUTPUT
提前感谢您的每一条回复或评论,
你好。
这是由于 endianness,以及不同的整数和字符串编码。
char cc = '…';
Console.WriteLine(cc);
// 2026 <-- note, hex value differs from byte representation shown below
Console.WriteLine(((int)cc).ToString("x"));
// 26200000
Console.WriteLine(BytesToHex(BitConverter.GetBytes((int)cc)));
// 2620
Console.WriteLine(BytesToHex(Encoding.GetEncoding("utf-16").GetBytes(new[] { cc })));
您不应将字符视为整数。有很多不同的方式来编码字符串,.net 内部使用 UTF-16。并且所有编码都适用于字节,而不适用于整数。将字符显式转换为整数可能会导致意想不到的结果,就像您的结果一样。为什么不通过 Encoding.GetBytes
获得所需的编码并使用字节?
void Main()
{
// output you expect 0xB4006F00B8007300E700500051005D002800C2004600120026206100
Console.WriteLine(BytesToHex(Encoding.GetEncoding("utf-16").GetBytes("´o¸sçPQ](ÂF\u0012…a")));
}
public static string BytesToHex(byte[] bytes)
{
// whatever way to convert bytes to hex
return "0x" + BitConverter.ToString(bytes).Replace("-", "");
}