使用少量代理服务器浏览 C# 应用程序中的 html 个页面
Using few proxy servers to explore html pages in C# application
我正在制作一个简单的应用程序来收集有关汽车广告的信息。
如何添加多个代理服务器,以便应用程序交替使用不同的地址,以便我下载数据的门户不会阻止我访问该网站并从该网站下载数据?
我的代码,也许你会需要它:
for (int i = 1; i < 10; i++)
{
var url = "https://www.otomoto.pl/osobowe/audi/a3/?page=" + i;
var httpClient = new HttpClient();
var html = await httpClient.GetStringAsync(url);
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
var Cars = htmlDocument.DocumentNode.Descendants("div")
.Where(node => node.GetAttributeValue("class", "")
.Equals("offers list")).ToList();
var CarsListItems = Cars[0].Descendants("article")
.Where(node => node.GetAttributeValue("data-test", "")
.Contains("search-result-item")).ToList();
foreach (var CarsParamsList in CarsListItems)
{
var marka = CarsParamsList.Descendants("a")
.Where(node => node.GetAttributeValue("class", "")
.Equals("offer-title__link")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(marka);
var year = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Equals("year")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(year);
var mileage = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Equals("mileage")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(mileage);
try
{
var engine = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Equals("engine_capacity")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(engine);
}
catch
{
Console.WriteLine("Brak informacji");
}
var fuel = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Contains("fuel_type")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(fuel);
Console.WriteLine("---------------------------------------------------");
}
}
以下解决方案:
private static async void GetHtmlAsync()
{
string[] SPlist = new string[10] {
"94.75.76.10:8080",
"142.44.243.144:8080",
"178.32.80.234:1080",
"183.88.244.87:8080",
"5.128.35.36:3128",
"139.59.53.106:3128",
"54.37.17.150:8080",
"109.193.195.11:8080",
"35.199.105.78:8080",
"95.208.208.237:8080"
};
int actualServerProxy;
for (int i = 0; i < 20; i++)
{
var url = "https://www.otomoto.pl/osobowe/audi/a3/?page=" + i + 1;
actualServerProxy = ((int)((i + 1) / 50)) % SPlist.Length;
HttpClientHandler handler = new HttpClientHandler()
{
Proxy = new WebProxy("http://" + SPlist[actualServerProxy]),
UseProxy = true,
};
var httpClient = new HttpClient(handler);
var html = await httpClient.GetStringAsync(url);
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
我正在制作一个简单的应用程序来收集有关汽车广告的信息。
如何添加多个代理服务器,以便应用程序交替使用不同的地址,以便我下载数据的门户不会阻止我访问该网站并从该网站下载数据?
我的代码,也许你会需要它:
for (int i = 1; i < 10; i++)
{
var url = "https://www.otomoto.pl/osobowe/audi/a3/?page=" + i;
var httpClient = new HttpClient();
var html = await httpClient.GetStringAsync(url);
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
var Cars = htmlDocument.DocumentNode.Descendants("div")
.Where(node => node.GetAttributeValue("class", "")
.Equals("offers list")).ToList();
var CarsListItems = Cars[0].Descendants("article")
.Where(node => node.GetAttributeValue("data-test", "")
.Contains("search-result-item")).ToList();
foreach (var CarsParamsList in CarsListItems)
{
var marka = CarsParamsList.Descendants("a")
.Where(node => node.GetAttributeValue("class", "")
.Equals("offer-title__link")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(marka);
var year = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Equals("year")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(year);
var mileage = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Equals("mileage")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(mileage);
try
{
var engine = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Equals("engine_capacity")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(engine);
}
catch
{
Console.WriteLine("Brak informacji");
}
var fuel = CarsParamsList.Descendants("li")
.Where(node => node.GetAttributeValue("data-code", "")
.Contains("fuel_type")).First().InnerText.Trim('\n', ' ');
Console.WriteLine(fuel);
Console.WriteLine("---------------------------------------------------");
}
}
以下解决方案:
private static async void GetHtmlAsync()
{
string[] SPlist = new string[10] {
"94.75.76.10:8080",
"142.44.243.144:8080",
"178.32.80.234:1080",
"183.88.244.87:8080",
"5.128.35.36:3128",
"139.59.53.106:3128",
"54.37.17.150:8080",
"109.193.195.11:8080",
"35.199.105.78:8080",
"95.208.208.237:8080"
};
int actualServerProxy;
for (int i = 0; i < 20; i++)
{
var url = "https://www.otomoto.pl/osobowe/audi/a3/?page=" + i + 1;
actualServerProxy = ((int)((i + 1) / 50)) % SPlist.Length;
HttpClientHandler handler = new HttpClientHandler()
{
Proxy = new WebProxy("http://" + SPlist[actualServerProxy]),
UseProxy = true,
};
var httpClient = new HttpClient(handler);
var html = await httpClient.GetStringAsync(url);
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);