从控制台解码文本
Decode text from console
我尝试执行这段代码:
private void Test(object sender, RoutedEventArgs e)
{
ProcessStartInfo start = new ProcessStartInfo("cmd",
"/c \"wbadmin start recovert -version:02/26/2014-17:38 -itemtype:File - items:C:\test\"");
int exitCode;
using (Process proc = Process.Start(start))
{
proc.ErrorDataReceived += cmd_Error;
proc.OutputDataReceived += cmd_DataReceived;
proc.WaitForExit();
exitCode = proc.ExitCode;
}
}
private void cmd_DataReceived(object sender, DataReceivedEventArgs e)
{
if (e.Data == null) return;
var source = Encoding.Unicode;
var target = Encoding.UTF8;
var sBytes = source.GetBytes(e.Data);
var tBytes = Encoding.Convert(source, target, sBytes);
var tString = Encoding.UTF8.GetString(tBytes);
Console.WriteLine(tString);
}
但是我得到了这个字符串:"wbadmin 1.0 - ®≠·‚‡„¨•≠‚ ™Æ¨†≠§≠Æ© ·‚‡Æ™® †‡Â®¢†Ê®®"
我怎样才能解码这个字符串?
解析 cmd 的输出可能有点棘手,因为您的 cmd 有自己的代码页,通常等于系统的默认语言环境(您可以手动更改它,例如使用 chcp命令)。
阅读 this 了解详情。
重定向输出时,对我有用的方法(经过测试,也使用 wbadmin
)如下:
获取系统的默认语言环境:
[DllImport("kernel32.dll")]
public static extern int GetSystemDefaultLCID();
private static int GetCmdCodePage()
{
int lcid = GetSystemDefaultLCID();
var ci = System.Globalization.CultureInfo.GetCultureInfo(lcid);
return ci.TextInfo.OEMCodePage;
}
获取对应编码:
Encoding enc = null;
try
{
enc = Encoding.GetEncoding(GetCmdCodePage());
}
catch (Exception)
{
enc = Encoding.GetEncoding(855); // the value for Cyrillic
}
设置进程的编码:
if (!File.Exists(Path.Combine(Environment.SystemDirectory, @"wbadmin.exe")))
{
Console.WriteLine("wbadmin.exe not found");
return;
}
Process pr = new Process();
ProcessStartInfo psi = new ProcessStartInfo(@"wbadmin.exe");
psi.WindowStyle = ProcessWindowStyle.Hidden;
psi.CreateNoWindow = true;
psi.UseShellExecute = false;
psi.Arguments = "/?"; // prints avaliable commands
psi.RedirectStandardOutput = true;
psi.RedirectStandardError = true;
psi.Verb = "runas";
psi.StandardOutputEncoding = enc;
psi.StandardErrorEncoding = enc;
pr.StartInfo = psi;
pr.Start();
pr.WaitForExit(1000);
string error = pr.StandardError.ReadToEnd();
if (!string.IsNullOrEmpty(error))
{
Console.WriteLine("error: " + error);
pr.Close();
pr.Dispose();
return;
}
string output = pr.StandardOutput.ReadToEnd();
pr.Close();
pr.Dispose();
您的代码似乎完全正确,但毫无意义。事实上,C# 字符串始终是 UTF-16,无论如何。您的 cmd_DataReceived 方法正在将 UTF-16 转换为字节数组,其中包含原始字符串的 UTF-8 表示形式,然后通过调用 Encoding.UTF8.GetString(tBytes).[= 将其转换回 UTF-16 11=]
看起来外部程序以未知编码(UTF-8?)向控制台写入了一些内容,但 cmd_DataReceived 收到它已经解码为 UTF-16。
我假设,如果您真的想将字符串从 UTF-8 转换为 UTF-16,您的代码应该如下所示
private void cmd_DataReceived(object sender, DataReceivedEventArgs e)
{
if (e.Data == null) return;
var source = Encoding.Unicode;
var target = Encoding.UTF8;
var sBytes = source.GetBytes(e.Data);
var tString = Encoding.UTF8.GetString(sBytes);
Console.WriteLine(tString);
}
我尝试执行这段代码:
private void Test(object sender, RoutedEventArgs e)
{
ProcessStartInfo start = new ProcessStartInfo("cmd",
"/c \"wbadmin start recovert -version:02/26/2014-17:38 -itemtype:File - items:C:\test\"");
int exitCode;
using (Process proc = Process.Start(start))
{
proc.ErrorDataReceived += cmd_Error;
proc.OutputDataReceived += cmd_DataReceived;
proc.WaitForExit();
exitCode = proc.ExitCode;
}
}
private void cmd_DataReceived(object sender, DataReceivedEventArgs e)
{
if (e.Data == null) return;
var source = Encoding.Unicode;
var target = Encoding.UTF8;
var sBytes = source.GetBytes(e.Data);
var tBytes = Encoding.Convert(source, target, sBytes);
var tString = Encoding.UTF8.GetString(tBytes);
Console.WriteLine(tString);
}
但是我得到了这个字符串:"wbadmin 1.0 - ®≠·‚‡„¨•≠‚ ™Æ¨†≠§≠Æ© ·‚‡Æ™® †‡Â®¢†Ê®®" 我怎样才能解码这个字符串?
解析 cmd 的输出可能有点棘手,因为您的 cmd 有自己的代码页,通常等于系统的默认语言环境(您可以手动更改它,例如使用 chcp命令)。
阅读 this 了解详情。
重定向输出时,对我有用的方法(经过测试,也使用 wbadmin
)如下:
获取系统的默认语言环境:
[DllImport("kernel32.dll")] public static extern int GetSystemDefaultLCID(); private static int GetCmdCodePage() { int lcid = GetSystemDefaultLCID(); var ci = System.Globalization.CultureInfo.GetCultureInfo(lcid); return ci.TextInfo.OEMCodePage; }
获取对应编码:
Encoding enc = null; try { enc = Encoding.GetEncoding(GetCmdCodePage()); } catch (Exception) { enc = Encoding.GetEncoding(855); // the value for Cyrillic }
设置进程的编码:
if (!File.Exists(Path.Combine(Environment.SystemDirectory, @"wbadmin.exe"))) { Console.WriteLine("wbadmin.exe not found"); return; } Process pr = new Process(); ProcessStartInfo psi = new ProcessStartInfo(@"wbadmin.exe"); psi.WindowStyle = ProcessWindowStyle.Hidden; psi.CreateNoWindow = true; psi.UseShellExecute = false; psi.Arguments = "/?"; // prints avaliable commands psi.RedirectStandardOutput = true; psi.RedirectStandardError = true; psi.Verb = "runas"; psi.StandardOutputEncoding = enc; psi.StandardErrorEncoding = enc; pr.StartInfo = psi; pr.Start(); pr.WaitForExit(1000); string error = pr.StandardError.ReadToEnd(); if (!string.IsNullOrEmpty(error)) { Console.WriteLine("error: " + error); pr.Close(); pr.Dispose(); return; } string output = pr.StandardOutput.ReadToEnd(); pr.Close(); pr.Dispose();
您的代码似乎完全正确,但毫无意义。事实上,C# 字符串始终是 UTF-16,无论如何。您的 cmd_DataReceived 方法正在将 UTF-16 转换为字节数组,其中包含原始字符串的 UTF-8 表示形式,然后通过调用 Encoding.UTF8.GetString(tBytes).[= 将其转换回 UTF-16 11=]
看起来外部程序以未知编码(UTF-8?)向控制台写入了一些内容,但 cmd_DataReceived 收到它已经解码为 UTF-16。
我假设,如果您真的想将字符串从 UTF-8 转换为 UTF-16,您的代码应该如下所示
private void cmd_DataReceived(object sender, DataReceivedEventArgs e)
{
if (e.Data == null) return;
var source = Encoding.Unicode;
var target = Encoding.UTF8;
var sBytes = source.GetBytes(e.Data);
var tString = Encoding.UTF8.GetString(sBytes);
Console.WriteLine(tString);
}