在 2 POST 次查询后从网站检索数据
Retrieve data from web site after 2 POST queries
我正在尝试抓取此 site 以获取优惠列表。
问题是我们需要填写 2 个表格(2 POST 个查询)才能收到最终结果。
这是我目前所做的:
import requests as rs
from form_data import form_data1, form_data2
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", data=form_data1)
r = s.post(f"{base_url}/save-form-data", data=form_data2)
然后我尝试在第二个 POST 查询之后检索报价:
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())
但不幸的是,我收到一条指示重定向的消息:
{'status': 'redirect', 'message': 'no data'}
2POSTs使用当前数据:
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"hasConcession": "0",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": "true"
}
和
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": "false"
}
预期结果
预期结果是一个包含优惠的 JSON 对象。这是它的结构:
{
"selectedEnergyType": "Electricity",
"energyTypeCount": 1,
"offers": {
"Electricity": {
"offersList": [{...}]
}
}
}
本站对表单数据有一些要求和限制。
form_data1
:
- 添加必填字段
"solarCapacity"
和 "feedInTariff"
。
"hasSolar": "0",
"solarCapacity": "", # Add this
"hasConcession": "0",
"feedInTariff": "", # Add this
- 将
"loopBack": "true"
更改为 "loopBack": false
。
# "loopBack": "true"
"loopBack": False
- 设置
"serverCacheId"
并将data=
更改为json=
。
# r = s.post(f"{base_url}/save-form-data", data=form_data1)
r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
form_data2
:
- 设置
"serverCacheId"
并将data=
更改为json=
。
# r = s.post(f"{base_url}/save-form-data", data=form_data2)
r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
- (可选,为了保持一致性)将
"loopBack": "false"
更改为 "loopBack": false
。
# "loopBack": "false"
"loopBack": False
合并代码:
import requests as rs
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"solarCapacity": "",
"hasConcession": "0",
"feedInTariff": "",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": False
}
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": False
}
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())
我正在尝试抓取此 site 以获取优惠列表。
问题是我们需要填写 2 个表格(2 POST 个查询)才能收到最终结果。
这是我目前所做的:
import requests as rs
from form_data import form_data1, form_data2
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", data=form_data1)
r = s.post(f"{base_url}/save-form-data", data=form_data2)
然后我尝试在第二个 POST 查询之后检索报价:
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())
但不幸的是,我收到一条指示重定向的消息:
{'status': 'redirect', 'message': 'no data'}
2POSTs使用当前数据:
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"hasConcession": "0",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": "true"
}
和
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": "false"
}
预期结果
预期结果是一个包含优惠的 JSON 对象。这是它的结构:
{
"selectedEnergyType": "Electricity",
"energyTypeCount": 1,
"offers": {
"Electricity": {
"offersList": [{...}]
}
}
}
本站对表单数据有一些要求和限制。
form_data1
:
- 添加必填字段
"solarCapacity"
和"feedInTariff"
。"hasSolar": "0", "solarCapacity": "", # Add this "hasConcession": "0", "feedInTariff": "", # Add this
- 将
"loopBack": "true"
更改为"loopBack": false
。# "loopBack": "true" "loopBack": False
- 设置
"serverCacheId"
并将data=
更改为json=
。# r = s.post(f"{base_url}/save-form-data", data=form_data1) r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
form_data2
:
- 设置
"serverCacheId"
并将data=
更改为json=
。# r = s.post(f"{base_url}/save-form-data", data=form_data2) r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
- (可选,为了保持一致性)将
"loopBack": "false"
更改为"loopBack": false
。# "loopBack": "false" "loopBack": False
合并代码:
import requests as rs
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"solarCapacity": "",
"hasConcession": "0",
"feedInTariff": "",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": False
}
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": False
}
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())