Python 网页抓取到 csv
Python web-scraping into csv
我做了一个网络抓取,我得到了一个 table 我想将哪个写入 CSV。
当我尝试时,我收到这条消息:
"Traceback (most recent call last):
File "C:/Python27/megoldas3.py", line 27, in <module>
file.write(bytes(header,encoding="ascii",errors="ignore")) TypeError:
str() takes at most 1 argument (3 given)"
这段代码有什么问题?我使用 Python 2.7.13.
import urllib2
from bs4 import BeautifulSoup
import csv
import os
out=open("proba.csv","rb")
data=csv.reader(out)
def make_soup(url):
thepage = urllib2.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
maindatatable=""
soup = make_soup("https://www.mnb.hu/arfolyamok")
for record in soup.findAll('tr'):
datatable=""
for data in record.findAll('td'):
datatable=datatable+","+data.text
maindatatable = maindatatable + "\n" + datatable[1:]
header = "Penznem,Devizanev,Egyseg,Penznemforintban"
print maindatatable
file = open(os.path.expanduser("proba.csv"),"wb")
file.write(bytes(header,encoding="ascii",errors="ignore"))
file.write(bytes(maindatatable,encoding="ascii",errors="ignore"))
你放错了括号。 encoding
和 errors
是 file.write()
而非 bytes()
的参数。
file.write(bytes(header),encoding="ascii",errors="ignore")
在尝试编写字符串之前对字符串进行编码怎么样?
utf8_str = maindatatable.encode('utf8')
file.write(utf8_str)
也别忘了file.close()
我认为这对你有用。只需从 bytes
中删除 encoding="ascii",errors="ignore"
# import re
# data = [['revenue', 'margins'], ['revenue', 'liquidity'], ['revenue', 'ratio'], ['revenue', 'pricing'], ['revenue', 'assets'], ['revenue', 'recent trends']]
# with open('a.txt') as f:
# txt = f.read()
# for d in data:
# c1 = re.findall(d[0],txt)
# c2 = re.findall(d[1],txt)
# if c1 and c2:
# print {c1[0]:len(c1),c2[0]:len(c2)}
import urllib2
from bs4 import BeautifulSoup
import csv
import os
out=open("proba.csv","rb")
data=csv.reader(out)
def make_soup(url):
thepage = urllib2.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
maindatatable=""
soup = make_soup("https://www.mnb.hu/arfolyamok")
for record in soup.findAll('tr'):
datatable=""
for data in record.findAll('td'):
datatable=datatable+","+data.text
maindatatable = maindatatable + "\n" + datatable[1:]
header = "Penznem,Devizanev,Egyseg,Penznemforintban"
print maindatatable
file = open(os.path.expanduser("proba.csv"),"wb")
file.write(header.encode('utf-8').strip())
file.write(maindatatable.encode('utf-8').strip())
这应该有效
file.write(bytes(header.encode('ascii','ignore')))
file.write(bytes(maindatatable.encode('ascii','ignore')))
我做了一个网络抓取,我得到了一个 table 我想将哪个写入 CSV。
当我尝试时,我收到这条消息:
"Traceback (most recent call last):
File "C:/Python27/megoldas3.py", line 27, in <module>
file.write(bytes(header,encoding="ascii",errors="ignore")) TypeError:
str() takes at most 1 argument (3 given)"
这段代码有什么问题?我使用 Python 2.7.13.
import urllib2
from bs4 import BeautifulSoup
import csv
import os
out=open("proba.csv","rb")
data=csv.reader(out)
def make_soup(url):
thepage = urllib2.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
maindatatable=""
soup = make_soup("https://www.mnb.hu/arfolyamok")
for record in soup.findAll('tr'):
datatable=""
for data in record.findAll('td'):
datatable=datatable+","+data.text
maindatatable = maindatatable + "\n" + datatable[1:]
header = "Penznem,Devizanev,Egyseg,Penznemforintban"
print maindatatable
file = open(os.path.expanduser("proba.csv"),"wb")
file.write(bytes(header,encoding="ascii",errors="ignore"))
file.write(bytes(maindatatable,encoding="ascii",errors="ignore"))
你放错了括号。 encoding
和 errors
是 file.write()
而非 bytes()
的参数。
file.write(bytes(header),encoding="ascii",errors="ignore")
在尝试编写字符串之前对字符串进行编码怎么样?
utf8_str = maindatatable.encode('utf8')
file.write(utf8_str)
也别忘了file.close()
我认为这对你有用。只需从 bytes
中删除encoding="ascii",errors="ignore"
# import re
# data = [['revenue', 'margins'], ['revenue', 'liquidity'], ['revenue', 'ratio'], ['revenue', 'pricing'], ['revenue', 'assets'], ['revenue', 'recent trends']]
# with open('a.txt') as f:
# txt = f.read()
# for d in data:
# c1 = re.findall(d[0],txt)
# c2 = re.findall(d[1],txt)
# if c1 and c2:
# print {c1[0]:len(c1),c2[0]:len(c2)}
import urllib2
from bs4 import BeautifulSoup
import csv
import os
out=open("proba.csv","rb")
data=csv.reader(out)
def make_soup(url):
thepage = urllib2.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
maindatatable=""
soup = make_soup("https://www.mnb.hu/arfolyamok")
for record in soup.findAll('tr'):
datatable=""
for data in record.findAll('td'):
datatable=datatable+","+data.text
maindatatable = maindatatable + "\n" + datatable[1:]
header = "Penznem,Devizanev,Egyseg,Penznemforintban"
print maindatatable
file = open(os.path.expanduser("proba.csv"),"wb")
file.write(header.encode('utf-8').strip())
file.write(maindatatable.encode('utf-8').strip())
这应该有效
file.write(bytes(header.encode('ascii','ignore')))
file.write(bytes(maindatatable.encode('ascii','ignore')))