如何将此信息放入列中?
How can I put this info in columns?
我的代码正在运行。但我需要列中的信息。谁能帮我解决这个问题?我提前谢谢你。
from bs4 import BeautifulSoup
import csv
#Request webpage content
result = requests.get('https://www.solar.com/learn/solar-panel-cost/')
#Save content in var
src = result.content
#soupactivate
soup = BeautifulSoup(src,'lxml')
#Open CSV
file = open('priceperwatt','w')
writer = csv.writer(file)
for tr in soup.findAll('tr'):
rowtext = tr.get_text()
writer.writerow([rowtext])
file.close()
所以我对你的代码做了一些改进。主要问题是您正在抓取的数据不适合数组,因为前几行不包含相同数量的元素。但是一旦达到 ['State'、'Market Price Per Watt'、'Solar.com Price Per Watt'],您就可以将它们用作列 headers。我的更改包括修改您的 csv reader 和 writer 以接受分隔每一行的换行符。
from bs4 import BeautifulSoup
import requests
import csv
#Request webpage content
result = requests.get('https://www.solar.com/learn/solar-panel-cost/')
#Save content in var
src = result.content
#soupactivate
soup = BeautifulSoup(src,'lxml')
#Open CSV
with open('priceperwatt','w', newline='') as file:
writer = csv.writer(file)
for tr in soup.findAll('tr'):
rowtext = tr.get_text()
writer.writerow([rowtext])
with open('priceperwatt','r', newline='') as file:
reader = csv.reader(file)
for row in reader:
row = ''.join(row).strip('\n').split('\n')
print(row)
输出:
['Solar Price Per Watt', 'Solar Price Per Kilowatt Hour']
['GROSS system cost / Total system wattage', 'NET system cost / Total lifetime system production']
['Useful for comparing solar quotes against one another', 'Useful for comparing solar versus utility bill']
['Pertains to the POWER of a system', 'Pertains to the PRODUCTION of a system']
['Typically .00-4.00/watt', 'Typically [=11=].06-0.08/kWh']
['State', 'Market Price Per Watt', 'Solar.com Price Per Watt']
['Arizona', '.61/W', '.39/W']
['California', '.31/W', '.76/W']
['Connecticut', '.65/W', '.68/W']
['Florida', '.45/W', '.82/W']
['Massachusetts', '.18/W', '.92/W']
['Maryland', '.93/W', '.64/W']
['Minnesota', '.61/W', '.66/W']
['New Hampshire', '.72/W', '.37/W']
['New Mexico', '.82/W', '.56/W']
['Oregon', '.79/W', '.68/W']
['Texas', '.83/W', '.17/W']
['Wisconsin', '.29/W', '.83/W']
最后:
import pandas as pd
lst = []
with open('priceperwatt','r', newline='') as file:
reader = csv.reader(file)
for row in reader:
row = ''.join(row).strip('\n').split('\n')
lst.append(row)
pd.DataFrame(lst[6:], columns=lst[5])
我的代码正在运行。但我需要列中的信息。谁能帮我解决这个问题?我提前谢谢你。
from bs4 import BeautifulSoup
import csv
#Request webpage content
result = requests.get('https://www.solar.com/learn/solar-panel-cost/')
#Save content in var
src = result.content
#soupactivate
soup = BeautifulSoup(src,'lxml')
#Open CSV
file = open('priceperwatt','w')
writer = csv.writer(file)
for tr in soup.findAll('tr'):
rowtext = tr.get_text()
writer.writerow([rowtext])
file.close()
所以我对你的代码做了一些改进。主要问题是您正在抓取的数据不适合数组,因为前几行不包含相同数量的元素。但是一旦达到 ['State'、'Market Price Per Watt'、'Solar.com Price Per Watt'],您就可以将它们用作列 headers。我的更改包括修改您的 csv reader 和 writer 以接受分隔每一行的换行符。
from bs4 import BeautifulSoup
import requests
import csv
#Request webpage content
result = requests.get('https://www.solar.com/learn/solar-panel-cost/')
#Save content in var
src = result.content
#soupactivate
soup = BeautifulSoup(src,'lxml')
#Open CSV
with open('priceperwatt','w', newline='') as file:
writer = csv.writer(file)
for tr in soup.findAll('tr'):
rowtext = tr.get_text()
writer.writerow([rowtext])
with open('priceperwatt','r', newline='') as file:
reader = csv.reader(file)
for row in reader:
row = ''.join(row).strip('\n').split('\n')
print(row)
输出:
['Solar Price Per Watt', 'Solar Price Per Kilowatt Hour']
['GROSS system cost / Total system wattage', 'NET system cost / Total lifetime system production']
['Useful for comparing solar quotes against one another', 'Useful for comparing solar versus utility bill']
['Pertains to the POWER of a system', 'Pertains to the PRODUCTION of a system']
['Typically .00-4.00/watt', 'Typically [=11=].06-0.08/kWh']
['State', 'Market Price Per Watt', 'Solar.com Price Per Watt']
['Arizona', '.61/W', '.39/W']
['California', '.31/W', '.76/W']
['Connecticut', '.65/W', '.68/W']
['Florida', '.45/W', '.82/W']
['Massachusetts', '.18/W', '.92/W']
['Maryland', '.93/W', '.64/W']
['Minnesota', '.61/W', '.66/W']
['New Hampshire', '.72/W', '.37/W']
['New Mexico', '.82/W', '.56/W']
['Oregon', '.79/W', '.68/W']
['Texas', '.83/W', '.17/W']
['Wisconsin', '.29/W', '.83/W']
最后:
import pandas as pd
lst = []
with open('priceperwatt','r', newline='') as file:
reader = csv.reader(file)
for row in reader:
row = ''.join(row).strip('\n').split('\n')
lst.append(row)
pd.DataFrame(lst[6:], columns=lst[5])