连接到 Web 服务器时连接被拒绝
connection refused when connecting to web server
我正在尝试连接到网络服务器以便抓取它,但我的程序给出了错误:来自连接函数的“连接被拒绝”。这是代码:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pthread.h>
#include <errno.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <regex.h>
#define MAXLINE 4096
int main() {
char domain[255];
char netAddr[255];
char GETrqst[MAXLINE];
char recvLine[MAXLINE];
int rl;
printf("webscraper\n\n");
printf("enter the domain of the website you want to scrape (eg: google.com): ");
scanf("%s", domain);
//address struct
//2 structs for the getaddrinfo function
struct addrinfo getResult, *getResult2;
memset(&getResult, 0, sizeof(getResult));
getResult.ai_family = AF_INET;
getResult.ai_socktype = SOCK_STREAM;
int addrErr = getaddrinfo(domain, NULL, &getResult, &getResult2);
if (addrErr != 0) {
perror("getaddrinfo");
exit(1);
}
//first half of ip
if (inet_ntop(AF_INET, getResult2->ai_addr->sa_data, netAddr, 255)) {
//second half of ip
void *addr_in = &((struct sockaddr_in *) getResult2->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, 255);
printf("ip for domain is: %s\n", netAddr);
} else {
perror("inet_ntop");
exit(1);
}
struct sockaddr_in socket_address;
//memset(&socket_address, 0, sizeof(socket_address));
socket_address.sin_family = AF_INET;
socket_address.sin_port = htons(80);
int atonErr = inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
if (atonErr != 0) {
perror("inet_aton");
exit(1);
} else {
printf("successfully converted string to struct\n");
}
freeaddrinfo(getResult2);
printf("address struct setup\n");
sleep(1);
printf("setting up socket\n");
int sock = socket(AF_INET, SOCK_STREAM, 0);
sleep(1);
printf("attempting connection to web server...\n");
int connectStatus = connect(sock, (struct sockaddr *) &socket_address, sizeof(socket_address));
if (connectStatus == -1) {
perror("connect error");
exit(1);
} else {
printf("connection success!\n");
}
sleep(1);
//sets get command to get webpage. \r\n\r\n is at the end of every webpage url
sprintf(GETrqst, "GET / HTTP/1.1\r\n\r\n");
write(sock, GETrqst, sizeof(GETrqst));
memset(recvLine, 0, MAXLINE);
while ((rl = read(sock, recvLine, MAXLINE-1)) > 0 ) {
printf("%s", recvLine);
}
exit(1);
return 0;
}
我试过使用 memset() 来重置 sockaddr_in 结构,因为它以前有效,但现在不起作用。我还检查了 ip 是否被正确保存和使用,但这些选项中的 none 有效。我很迷茫,我一直在尝试解决这个问题大约 2 个小时。如有任何帮助,我们将不胜感激!
我做了一些更改以使其成功发出 HTTP 请求:
- 向 HTTP 1.1 请求添加了
Host:
字段。不提供该字段是错误的。
- 我让它为
GETrqst
发送了适当数量的字节。应该是strlen(GETrqst)
,不是sizeof(GETrqst)
.
- 您在
inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
中将错误的内容复制到 socket_address.sin_addr
中。 ((struct sockaddr_in *) getResult2->ai_addr)->sin_addr
中的内容正确,只需复制即可。
随着我的改变:
#define _DEFAULT_SOURCE
#include <errno.h>
#include <netdb.h>
#include <pthread.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#define MAXLINE 4096
int main() {
char domain[255];
char GETrqst[MAXLINE];
char recvLine[MAXLINE];
printf("webscraper\n\n");
printf("enter the domain of the website you want to scrape (eg: google.com): ");
if(scanf("%254s", domain) != 1) return 1;
// address struct
// 2 structs for the getaddrinfo function
struct addrinfo getResult = {0}, *getResult2;
getResult.ai_family = AF_INET;
getResult.ai_socktype = SOCK_STREAM;
getResult.ai_protocol = IPPROTO_TCP;
int addrErr = getaddrinfo(domain, NULL, &getResult, &getResult2);
if(addrErr != 0) {
perror("getaddrinfo");
exit(1);
}
printf("attempting connection to web server...\n");
int sock;
struct addrinfo *rp;
// loop through the possible addresses
for (rp = getResult2; rp != NULL; rp = rp->ai_next) {
printf("%d %d %d\n", rp->ai_family, rp->ai_socktype, rp->ai_protocol);
sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if (sock == -1) {
perror("socket");
continue;
}
printf("socket created - connecting ...\n");
/* if you want to show the IP
char netAddr[255];
void* addr_in = &((struct sockaddr_in*)rp->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, sizeof netAddr);
printf("IP: %s\n", netAddr);
*/
struct sockaddr_in socket_address;
socket_address.sin_family = rp->ai_family; // AF_INET;
socket_address.sin_port = htons(80);
socket_address.sin_addr = ((struct sockaddr_in*)rp->ai_addr)->sin_addr;
if (connect(sock, (const struct sockaddr*) &socket_address,
sizeof socket_address) != -1)
{
// connection success
break;
}
perror("connect");
close(sock);
}
freeaddrinfo(getResult2);
if(rp == NULL) {
printf("failed to connect\n");
return 1;
}
sleep(1);
printf("connection success!\n");
sleep(1);
sprintf(GETrqst, "GET / HTTP/1.1\r\nHost: %s:80\r\n\r\n", domain);
write(sock, GETrqst, strlen(GETrqst));
ssize_t rl;
while((rl = read(sock, recvLine, sizeof recvLine - 1)) > 0) {
recvLine[rl] = '[=10=]';
fputs(recvLine, stdout);
}
}
注意:现在大多数网站都使用 HTTPS,因此,除非您知道您尝试抓取的网站可通过纯 HTTP 访问,否则您可能还需要添加加密。如果您向 google.com:80
发出 HTTP 请求,它会回答如下内容:
HTTP/1.1 301 Moved Permanently
Location: http://www.google.com/
Content-Type: text/html; charset=UTF-8
Date: Tue, 07 Sep 2021 18:57:22 GMT
Expires: Thu, 07 Oct 2021 18:57:22 GMT
Cache-Control: public, max-age=2592000
Server: gws
Content-Length: 219
X-XSS-Protection: 0
X-Frame-Options: SAMEORIGIN
<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>301 Moved</TITLE></HEAD><BODY>
<H1>301 Moved</H1>
The document has moved
<A HREF="http://www.google.com/">here</A>.
</BODY></HTML>
正如我在热门评论中提到的,您的应用确实 连接到该网站。但是,它不喜欢您的 HTTP 请求并返回了 HTTP 错误 400。
但是,那是不正确的。它连接到 a 网站,但 不是 想要的网站。
有两个问题。
sin_addr
在您的 connect
调用中全为零(即 0.0.0.0
)
- 您的 HTTP 请求不完整。
您的代码:
inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
正在存储所有零。
我将其注释掉,并根据您的 inet_ntop
电话进行了 memcpy
:
if (inet_ntop(AF_INET, getResult2->ai_addr->sa_data, netAddr, 255)) {
// second half of ip
void *addr_in = &((struct sockaddr_in *) getResult2->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, 255);
printf("%sip for domain is: %s\n", T_GREEN, netAddr);
memcpy(&socket_address.sin_addr,addr_in,
sizeof(socket_address.sin_addr));
}
您的请求不完整。它需要一个 Host:
字段。
那么,您要发送:
GET / HTTP/1.1\r\n
Host: <url_of_server>\r\n
\r\n
这是我想出的最终代码:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pthread.h>
#include <errno.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <regex.h>
//#include "tColor copy.h"
#define MAXLINE 4096
#define T_RED "RED: "
#define T_GREEN "GREEN: "
#define T_BLUE "BLUE: "
#define SENDLINE(_fmt...) \
do { \
buflen = sprintf(buf, _fmt); \
printf("SENDLINE: %s\n",buf); \
strcpy(&buf[buflen],"\r\n"); \
write(sock, buf, buflen + 2); \
} while (0)
char buf[MAXLINE];
size_t buflen;
int
main(int argc,char **argv)
{
char domain[255];
char netAddr[255];
size_t Getlen;
char GETrqst[MAXLINE];
char recvLine[MAXLINE];
int rl;
--argc;
++argv;
printf("%swebscraper\n\n", T_GREEN);
if (argc > 0) {
strcpy(domain,argv[0]);
printf("domain is '%s'\n",domain);
}
else {
printf("%senter the domain of the website you want to scrape (eg: google.com): ", T_BLUE);
scanf("%s", domain);
}
// address struct
// 2 structs for the getaddrinfo function
struct addrinfo getResult,
*getResult2;
memset(&getResult, 0, sizeof(getResult));
getResult.ai_family = AF_INET;
getResult.ai_socktype = SOCK_STREAM;
int addrErr = getaddrinfo(domain, NULL, &getResult, &getResult2);
if (addrErr != 0) {
printf("%s", T_RED);
perror("getaddrinfo");
exit(1);
}
struct sockaddr_in socket_address;
// first half of ip
if (inet_ntop(AF_INET, getResult2->ai_addr->sa_data, netAddr, 255)) {
// second half of ip
void *addr_in = &((struct sockaddr_in *) getResult2->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, 255);
printf("%sip for domain is: %s\n", T_GREEN, netAddr);
memcpy(&socket_address.sin_addr,addr_in,
sizeof(socket_address.sin_addr));
}
else {
printf("%s", T_RED);
perror("inet_ntop");
exit(1);
}
// memset(&socket_address, 0, sizeof(socket_address));
socket_address.sin_family = AF_INET;
socket_address.sin_port = htons(80);
#if 0
int atonErr = inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
if (atonErr != 0) {
printf("%s", T_RED);
perror("inet_aton");
exit(1);
}
else {
printf("%ssuccessfully converted string to struct\n", T_GREEN);
}
#endif
freeaddrinfo(getResult2);
printf("%saddress struct setup\n", T_GREEN);
sleep(1);
printf("%ssetting up socket\n", T_BLUE);
int sock = socket(AF_INET, SOCK_STREAM, 0);
sleep(1);
printf("%sattempting connection to web server...\n", T_BLUE);
int connectStatus = connect(sock,
(struct sockaddr *) &socket_address, sizeof(socket_address));
if (connectStatus == -1) {
printf("%s", T_RED);
perror("connect error");
exit(1);
}
else {
printf("%sconnection success!\n", T_GREEN);
}
//sleep(1);
// sets get command to get webpage. \r\n\r\n is at the end of every webpage url
SENDLINE("GET / HTTP/1.1");
//SENDLINE("Host: %s",netAddr);
SENDLINE("Host: %s",domain);
SENDLINE("Accept: */*");
//SENDLINE("Connection: keep-alive");
SENDLINE("");
memset(recvLine, 0, MAXLINE);
while ((rl = read(sock, recvLine, MAXLINE - 1)) > 0) {
//printf("%s", recvLine);
fwrite(recvLine,1,rl,stdout);
}
exit(1);
return 0;
}
我正在尝试连接到网络服务器以便抓取它,但我的程序给出了错误:来自连接函数的“连接被拒绝”。这是代码:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pthread.h>
#include <errno.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <regex.h>
#define MAXLINE 4096
int main() {
char domain[255];
char netAddr[255];
char GETrqst[MAXLINE];
char recvLine[MAXLINE];
int rl;
printf("webscraper\n\n");
printf("enter the domain of the website you want to scrape (eg: google.com): ");
scanf("%s", domain);
//address struct
//2 structs for the getaddrinfo function
struct addrinfo getResult, *getResult2;
memset(&getResult, 0, sizeof(getResult));
getResult.ai_family = AF_INET;
getResult.ai_socktype = SOCK_STREAM;
int addrErr = getaddrinfo(domain, NULL, &getResult, &getResult2);
if (addrErr != 0) {
perror("getaddrinfo");
exit(1);
}
//first half of ip
if (inet_ntop(AF_INET, getResult2->ai_addr->sa_data, netAddr, 255)) {
//second half of ip
void *addr_in = &((struct sockaddr_in *) getResult2->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, 255);
printf("ip for domain is: %s\n", netAddr);
} else {
perror("inet_ntop");
exit(1);
}
struct sockaddr_in socket_address;
//memset(&socket_address, 0, sizeof(socket_address));
socket_address.sin_family = AF_INET;
socket_address.sin_port = htons(80);
int atonErr = inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
if (atonErr != 0) {
perror("inet_aton");
exit(1);
} else {
printf("successfully converted string to struct\n");
}
freeaddrinfo(getResult2);
printf("address struct setup\n");
sleep(1);
printf("setting up socket\n");
int sock = socket(AF_INET, SOCK_STREAM, 0);
sleep(1);
printf("attempting connection to web server...\n");
int connectStatus = connect(sock, (struct sockaddr *) &socket_address, sizeof(socket_address));
if (connectStatus == -1) {
perror("connect error");
exit(1);
} else {
printf("connection success!\n");
}
sleep(1);
//sets get command to get webpage. \r\n\r\n is at the end of every webpage url
sprintf(GETrqst, "GET / HTTP/1.1\r\n\r\n");
write(sock, GETrqst, sizeof(GETrqst));
memset(recvLine, 0, MAXLINE);
while ((rl = read(sock, recvLine, MAXLINE-1)) > 0 ) {
printf("%s", recvLine);
}
exit(1);
return 0;
}
我试过使用 memset() 来重置 sockaddr_in 结构,因为它以前有效,但现在不起作用。我还检查了 ip 是否被正确保存和使用,但这些选项中的 none 有效。我很迷茫,我一直在尝试解决这个问题大约 2 个小时。如有任何帮助,我们将不胜感激!
我做了一些更改以使其成功发出 HTTP 请求:
- 向 HTTP 1.1 请求添加了
Host:
字段。不提供该字段是错误的。 - 我让它为
GETrqst
发送了适当数量的字节。应该是strlen(GETrqst)
,不是sizeof(GETrqst)
. - 您在
inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
中将错误的内容复制到socket_address.sin_addr
中。((struct sockaddr_in *) getResult2->ai_addr)->sin_addr
中的内容正确,只需复制即可。
随着我的改变:
#define _DEFAULT_SOURCE
#include <errno.h>
#include <netdb.h>
#include <pthread.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#define MAXLINE 4096
int main() {
char domain[255];
char GETrqst[MAXLINE];
char recvLine[MAXLINE];
printf("webscraper\n\n");
printf("enter the domain of the website you want to scrape (eg: google.com): ");
if(scanf("%254s", domain) != 1) return 1;
// address struct
// 2 structs for the getaddrinfo function
struct addrinfo getResult = {0}, *getResult2;
getResult.ai_family = AF_INET;
getResult.ai_socktype = SOCK_STREAM;
getResult.ai_protocol = IPPROTO_TCP;
int addrErr = getaddrinfo(domain, NULL, &getResult, &getResult2);
if(addrErr != 0) {
perror("getaddrinfo");
exit(1);
}
printf("attempting connection to web server...\n");
int sock;
struct addrinfo *rp;
// loop through the possible addresses
for (rp = getResult2; rp != NULL; rp = rp->ai_next) {
printf("%d %d %d\n", rp->ai_family, rp->ai_socktype, rp->ai_protocol);
sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if (sock == -1) {
perror("socket");
continue;
}
printf("socket created - connecting ...\n");
/* if you want to show the IP
char netAddr[255];
void* addr_in = &((struct sockaddr_in*)rp->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, sizeof netAddr);
printf("IP: %s\n", netAddr);
*/
struct sockaddr_in socket_address;
socket_address.sin_family = rp->ai_family; // AF_INET;
socket_address.sin_port = htons(80);
socket_address.sin_addr = ((struct sockaddr_in*)rp->ai_addr)->sin_addr;
if (connect(sock, (const struct sockaddr*) &socket_address,
sizeof socket_address) != -1)
{
// connection success
break;
}
perror("connect");
close(sock);
}
freeaddrinfo(getResult2);
if(rp == NULL) {
printf("failed to connect\n");
return 1;
}
sleep(1);
printf("connection success!\n");
sleep(1);
sprintf(GETrqst, "GET / HTTP/1.1\r\nHost: %s:80\r\n\r\n", domain);
write(sock, GETrqst, strlen(GETrqst));
ssize_t rl;
while((rl = read(sock, recvLine, sizeof recvLine - 1)) > 0) {
recvLine[rl] = '[=10=]';
fputs(recvLine, stdout);
}
}
注意:现在大多数网站都使用 HTTPS,因此,除非您知道您尝试抓取的网站可通过纯 HTTP 访问,否则您可能还需要添加加密。如果您向 google.com:80
发出 HTTP 请求,它会回答如下内容:
HTTP/1.1 301 Moved Permanently
Location: http://www.google.com/
Content-Type: text/html; charset=UTF-8
Date: Tue, 07 Sep 2021 18:57:22 GMT
Expires: Thu, 07 Oct 2021 18:57:22 GMT
Cache-Control: public, max-age=2592000
Server: gws
Content-Length: 219
X-XSS-Protection: 0
X-Frame-Options: SAMEORIGIN
<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>301 Moved</TITLE></HEAD><BODY>
<H1>301 Moved</H1>
The document has moved
<A HREF="http://www.google.com/">here</A>.
</BODY></HTML>
正如我在热门评论中提到的,您的应用确实 连接到该网站。但是,它不喜欢您的 HTTP 请求并返回了 HTTP 错误 400。
但是,那是不正确的。它连接到 a 网站,但 不是 想要的网站。
有两个问题。
sin_addr
在您的connect
调用中全为零(即0.0.0.0
)- 您的 HTTP 请求不完整。
您的代码:
inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
正在存储所有零。
我将其注释掉,并根据您的 inet_ntop
电话进行了 memcpy
:
if (inet_ntop(AF_INET, getResult2->ai_addr->sa_data, netAddr, 255)) {
// second half of ip
void *addr_in = &((struct sockaddr_in *) getResult2->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, 255);
printf("%sip for domain is: %s\n", T_GREEN, netAddr);
memcpy(&socket_address.sin_addr,addr_in,
sizeof(socket_address.sin_addr));
}
您的请求不完整。它需要一个 Host:
字段。
那么,您要发送:
GET / HTTP/1.1\r\n
Host: <url_of_server>\r\n
\r\n
这是我想出的最终代码:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pthread.h>
#include <errno.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <regex.h>
//#include "tColor copy.h"
#define MAXLINE 4096
#define T_RED "RED: "
#define T_GREEN "GREEN: "
#define T_BLUE "BLUE: "
#define SENDLINE(_fmt...) \
do { \
buflen = sprintf(buf, _fmt); \
printf("SENDLINE: %s\n",buf); \
strcpy(&buf[buflen],"\r\n"); \
write(sock, buf, buflen + 2); \
} while (0)
char buf[MAXLINE];
size_t buflen;
int
main(int argc,char **argv)
{
char domain[255];
char netAddr[255];
size_t Getlen;
char GETrqst[MAXLINE];
char recvLine[MAXLINE];
int rl;
--argc;
++argv;
printf("%swebscraper\n\n", T_GREEN);
if (argc > 0) {
strcpy(domain,argv[0]);
printf("domain is '%s'\n",domain);
}
else {
printf("%senter the domain of the website you want to scrape (eg: google.com): ", T_BLUE);
scanf("%s", domain);
}
// address struct
// 2 structs for the getaddrinfo function
struct addrinfo getResult,
*getResult2;
memset(&getResult, 0, sizeof(getResult));
getResult.ai_family = AF_INET;
getResult.ai_socktype = SOCK_STREAM;
int addrErr = getaddrinfo(domain, NULL, &getResult, &getResult2);
if (addrErr != 0) {
printf("%s", T_RED);
perror("getaddrinfo");
exit(1);
}
struct sockaddr_in socket_address;
// first half of ip
if (inet_ntop(AF_INET, getResult2->ai_addr->sa_data, netAddr, 255)) {
// second half of ip
void *addr_in = &((struct sockaddr_in *) getResult2->ai_addr)->sin_addr;
inet_ntop(AF_INET, addr_in, netAddr, 255);
printf("%sip for domain is: %s\n", T_GREEN, netAddr);
memcpy(&socket_address.sin_addr,addr_in,
sizeof(socket_address.sin_addr));
}
else {
printf("%s", T_RED);
perror("inet_ntop");
exit(1);
}
// memset(&socket_address, 0, sizeof(socket_address));
socket_address.sin_family = AF_INET;
socket_address.sin_port = htons(80);
#if 0
int atonErr = inet_aton(getResult2->ai_addr->sa_data, &socket_address.sin_addr);
if (atonErr != 0) {
printf("%s", T_RED);
perror("inet_aton");
exit(1);
}
else {
printf("%ssuccessfully converted string to struct\n", T_GREEN);
}
#endif
freeaddrinfo(getResult2);
printf("%saddress struct setup\n", T_GREEN);
sleep(1);
printf("%ssetting up socket\n", T_BLUE);
int sock = socket(AF_INET, SOCK_STREAM, 0);
sleep(1);
printf("%sattempting connection to web server...\n", T_BLUE);
int connectStatus = connect(sock,
(struct sockaddr *) &socket_address, sizeof(socket_address));
if (connectStatus == -1) {
printf("%s", T_RED);
perror("connect error");
exit(1);
}
else {
printf("%sconnection success!\n", T_GREEN);
}
//sleep(1);
// sets get command to get webpage. \r\n\r\n is at the end of every webpage url
SENDLINE("GET / HTTP/1.1");
//SENDLINE("Host: %s",netAddr);
SENDLINE("Host: %s",domain);
SENDLINE("Accept: */*");
//SENDLINE("Connection: keep-alive");
SENDLINE("");
memset(recvLine, 0, MAXLINE);
while ((rl = read(sock, recvLine, MAXLINE - 1)) > 0) {
//printf("%s", recvLine);
fwrite(recvLine,1,rl,stdout);
}
exit(1);
return 0;
}