Node.js 请求 - 无法解码 HTML 页面
Node.js Request - Can`t decode HTML page
我正在尝试使用 Node.js 和 Request 模块解码此 HTML 页面:http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp
javascript 控制台 returns 字符集 windows-1252:
document.characterSet = "windows-1252";
我尝试使用 iconv-lite 中的所有可用编码,但所有 return 都是错误的文本。
var body = iconv.decode(new Buffer(body), "windows1252");
有人知道如何解码此页面吗?
示例代码:
request('http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp', function (err, res, body) {
var body = iconv.decode(new Buffer(body), "windows1252");
console.log(body);
});
Returns:
...
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso n�o permitido.
</td>
...
解码后的字符串应该是:
...
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso não permitido.
</td>
...
谢谢。
这个代码
var request = require('request');
request('http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp', function (err, res, body) {
console.log(body);
});
输出页面
<script language="JavaScript">
function proxima(link)
{
location.replace(link);
return false;
}
function carrega(vobjeto) {
for (var va = 0 ; va < document.forms[0].elements.length; va++) {
if (document.forms[0].elements[va].name == vobjeto) {
document.forms[0].elements[va].focus();
}
}
return false;
}
function volta(vvolta) {
history.go(vvolta*-1);
return false;
}
function SaltaCampo (campo, prox, tammax, teclapres)
{
var tecla = teclapres.keyCode;
vr = campo.value;
tam = vr.length;
if (tecla != 0 && tecla != 10 && tecla != 24)
if (tam == tammax)
prox.focus();
}
</script>
<html>
<head>
<title>Tela de respostas</title>
</head>
<body background="area_texto_back.jpg">
<table border="0" width="100%">
<tr>
<td valign="middle" align="left">
<table border="0" cellspacing="0" cellpadding="0">
<!-- Inibido tendo em vista novo modelo site da SRF (Luis Carlos-22/11/2003)
<tr>
<td>
<img src="srf.gif" height="48" alt="srf.gif (2074 bytes)" width="184">
</td>
</tr> -->
<tr>
<td>
<font color="#000080" face="Arial">
<b>Acesso indevido</b></font>
</td>
</tr>
</table>
</td>
</tr>
<tr>
<td valign="middle" align="left"><hr size="1">
</td>
</tr>
</table>
<table border="0" width="100%">
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left"><b><font face="Arial" size="2">
Contribuinte,
</td>
<td valign="middle" align="right" >
</td>
</tr>
<tr>
</table>
<table border="0" width="100%">
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso n�o permitido.
</td>
</tr>
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left" colspan="2"><hr size="1">
</td>
</tr>
</table>
</body>
</html>
页面returns使用document.characterSet的编码是错误的,正确的编码是ISO-8859-1
body = iconv.decode(body, "ISO-8859-1");
我正在尝试使用 Node.js 和 Request 模块解码此 HTML 页面:http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp
javascript 控制台 returns 字符集 windows-1252:
document.characterSet = "windows-1252";
我尝试使用 iconv-lite 中的所有可用编码,但所有 return 都是错误的文本。
var body = iconv.decode(new Buffer(body), "windows1252");
有人知道如何解码此页面吗?
示例代码:
request('http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp', function (err, res, body) {
var body = iconv.decode(new Buffer(body), "windows1252");
console.log(body);
});
Returns:
...
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso n�o permitido.
</td>
...
解码后的字符串应该是:
...
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso não permitido.
</td>
...
谢谢。
这个代码
var request = require('request');
request('http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp', function (err, res, body) {
console.log(body);
});
输出页面
<script language="JavaScript">
function proxima(link)
{
location.replace(link);
return false;
}
function carrega(vobjeto) {
for (var va = 0 ; va < document.forms[0].elements.length; va++) {
if (document.forms[0].elements[va].name == vobjeto) {
document.forms[0].elements[va].focus();
}
}
return false;
}
function volta(vvolta) {
history.go(vvolta*-1);
return false;
}
function SaltaCampo (campo, prox, tammax, teclapres)
{
var tecla = teclapres.keyCode;
vr = campo.value;
tam = vr.length;
if (tecla != 0 && tecla != 10 && tecla != 24)
if (tam == tammax)
prox.focus();
}
</script>
<html>
<head>
<title>Tela de respostas</title>
</head>
<body background="area_texto_back.jpg">
<table border="0" width="100%">
<tr>
<td valign="middle" align="left">
<table border="0" cellspacing="0" cellpadding="0">
<!-- Inibido tendo em vista novo modelo site da SRF (Luis Carlos-22/11/2003)
<tr>
<td>
<img src="srf.gif" height="48" alt="srf.gif (2074 bytes)" width="184">
</td>
</tr> -->
<tr>
<td>
<font color="#000080" face="Arial">
<b>Acesso indevido</b></font>
</td>
</tr>
</table>
</td>
</tr>
<tr>
<td valign="middle" align="left"><hr size="1">
</td>
</tr>
</table>
<table border="0" width="100%">
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left"><b><font face="Arial" size="2">
Contribuinte,
</td>
<td valign="middle" align="right" >
</td>
</tr>
<tr>
</table>
<table border="0" width="100%">
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso n�o permitido.
</td>
</tr>
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left" colspan="2"><hr size="1">
</td>
</tr>
</table>
</body>
</html>
页面returns使用document.characterSet的编码是错误的,正确的编码是ISO-8859-1
body = iconv.decode(body, "ISO-8859-1");