AttributeError: 'list' object has no attribute 'extract'?
AttributeError: 'list' object has no attribute 'extract'?
我只想通过 xpath 从 url(http://www.tuniu.com/g3300/whole-nj-0/list-l1602-h0-i-j0_0/) 中提取信息。
正如我运行下面的代码,出现AttributeError: 'list' object has no attribute 'extract'?
我的模块导入错误或不匹配吗?
# -*- coding: utf-8 -*-
import urllib2
import sys
import lxml.html as HTML
reload(sys)
sys.setdefaultencoding("utf-8")
class spider(object):
def __init__(self):
print u'开始爬取内容'
def getSource(self, url):
html = urllib2.Request(url)
pageContent = urllib2.urlopen(html,timeout=60).read()
return pageContent
def getUrl(self, pageContent):
htmlSource = HTML.fromstring(pageContent)
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href').extract()[0]
return urlInfo
if __name__ == "__main__":
url = "http://www.tuniu.com/g3300/whole-nj-0/list-l1602-h0-i-j0_0/"
tuniu = spider()
tuniu.getUrl(url)
以下是错误的!
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "D:\anzhuang\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "D:\anzhuang\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "D:/python/tuniu2/tuniu.py", line 34, in <module>
tuniu.getUrl(url)
File "D:/python/tuniu2/tuniu.py", line 27, in getUrl
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href').extract()[0]
AttributeError: 'list' object has no attribute 'extract'
xpath
将 return 包含在 URL 中的标签列表,因此您要尝试提取列表而不是列表中包含的任何标签。如果您只想提取第一个标签,那么您可能希望在提取调用之前放置 [0]
,如下所示:
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href')[0].extract()
不清楚您想要哪个信息,但如果它不包含在第一个标记中,那么您可能需要使用 for tag in urlInfo
遍历 urlInfo
。然后 tag.extract()
.
首先,getUrl
用 url 调用。它不获取 url 的内容。修改它以获取页面内容。
而extract
则不需要。要获取 href
,只需从返回的列表中获取一项。
def getUrl(self, url):
pageContent = self.getSource(url) # <---
htmlSource = HTML.fromstring(pageContent)
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href')[0]
return urlInfo
我只想通过 xpath 从 url(http://www.tuniu.com/g3300/whole-nj-0/list-l1602-h0-i-j0_0/) 中提取信息。 正如我运行下面的代码,出现AttributeError: 'list' object has no attribute 'extract'? 我的模块导入错误或不匹配吗?
# -*- coding: utf-8 -*-
import urllib2
import sys
import lxml.html as HTML
reload(sys)
sys.setdefaultencoding("utf-8")
class spider(object):
def __init__(self):
print u'开始爬取内容'
def getSource(self, url):
html = urllib2.Request(url)
pageContent = urllib2.urlopen(html,timeout=60).read()
return pageContent
def getUrl(self, pageContent):
htmlSource = HTML.fromstring(pageContent)
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href').extract()[0]
return urlInfo
if __name__ == "__main__":
url = "http://www.tuniu.com/g3300/whole-nj-0/list-l1602-h0-i-j0_0/"
tuniu = spider()
tuniu.getUrl(url)
以下是错误的!
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "D:\anzhuang\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "D:\anzhuang\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "D:/python/tuniu2/tuniu.py", line 34, in <module>
tuniu.getUrl(url)
File "D:/python/tuniu2/tuniu.py", line 27, in getUrl
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href').extract()[0]
AttributeError: 'list' object has no attribute 'extract'
xpath
将 return 包含在 URL 中的标签列表,因此您要尝试提取列表而不是列表中包含的任何标签。如果您只想提取第一个标签,那么您可能希望在提取调用之前放置 [0]
,如下所示:
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href')[0].extract()
不清楚您想要哪个信息,但如果它不包含在第一个标记中,那么您可能需要使用 for tag in urlInfo
遍历 urlInfo
。然后 tag.extract()
.
首先,getUrl
用 url 调用。它不获取 url 的内容。修改它以获取页面内容。
而extract
则不需要。要获取 href
,只需从返回的列表中获取一项。
def getUrl(self, url):
pageContent = self.getSource(url) # <---
htmlSource = HTML.fromstring(pageContent)
urlInfo = htmlSource.xpath('//dd[@class="tqs"]/span/a/@href')[0]
return urlInfo