写入 csv python 每次水平追加
Write to csv python Horizontally append Each time
我写了这段代码,它使用页面 URL 抓取了 Amazon 的某些元素,现在我想添加一个 csv 函数,它使我能够使用以下变量水平附加 CSV 列:- ( Date_time, price, Merchant, Sellers_count ) 每次我 运行 应该在右侧添加此列的代码而不删除任何现有列..这是我的代码和 table 格式想添加
# -*- coding: cp1252 -*-
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
import requests, csv, time, urllib2, gspread, os, ast, datetime
from scrapy import Selector as s
from lxml import html
from random import randint
from oauth2client.client import SignedJwtAssertionCredentials
x = lambda x: source.xpath(x).extract()
links = ['http://www.amazon.com/dp/B00064NZCK',
'http://www.amazon.com/dp/B000CIU7F8',
'http://www.amazon.com/dp/B000H5839I',
'http://www.amazon.com/dp/B000LTLBHG',
'http://www.amazon.com/dp/B000SDLXKU',
'http://www.amazon.com/dp/B000SDLXNC',
'http://www.amazon.com/dp/B000SPHPWI',
'http://www.amazon.com/dp/B000UUMHRE']
driver = webdriver.Firefox()
#driver.set_page_load_timeout(30)
for Url in links:
try:
driver.get(Url)
except:
pass
time.sleep(randint(1,3))
try:
html = driver.page_source
source = s(text=html,type="html")
except:
pass
try:
Page_link = x('//link[@rel="canonical"]//@href')
except:
pass
try:
Product_Name = x('//span[@id="productTitle"]/text()')
except:
pass
Product_Name = str(Product_Name).encode('utf-8'); Product_Name = Product_Name.replace("[u'","").replace("']","")
try:
price = x('//span[@id="priceblock_ourprice"]//text()')
except:
pass
try:
Merchant = x('//div[@id="merchant-info"]//a//text()')
except:
pass
try:
Sellers_count = x('//span[@class="olp-padding-right"]//a/text()')
except:
pass
if Merchant == []:
Merchant = 'Amazon'
else:
Merchant = Merchant[0]
price = str(price).replace("[u'","").replace("']","")
if len(Sellers_count)>0:
Sellers_count = Sellers_count[0].encode('utf-8')
else:
Sellers_count = str(Sellers_count).encode('utf-8')
try:
Sellers_count = Sellers_count.replace("Â new",""); Sellers_count = int(Sellers_count)-1
except:
pass
if Sellers_count == []:
Sellers_count = str(Sellers_count).replace("[]","")
else:
Sellers_count = Sellers_count
Date_time = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
print Date_time, Product_Name, Url, price, Merchant, Sellers_count
我要附加到的现有 table 格式 :-
ASIN ID PRODUCT URL
B00064NZCK MG-5690 BigMouth Inc Over The Hill Parking Privelege Permit http://www.amazon.com/dp/B00064NZCK
B000CIU7F8 BM1102 BigMouth Inc Pocket Disgusting Sounds Machine http://www.amazon.com/dp/B000CIU7F8
B000H5839I MG-4774 BigMouth Inc All Occasion Over The Hill Cane http://www.amazon.com/dp/B000H5839I
B000LTLBHG BM1234 BigMouth Inc Beer Belt / 6 Pack Holster(Black) http://www.amazon.com/dp/B000LTLBHG
B000SDLXKU BM1103 BigMouth Inc Covert Clicker http://www.amazon.com/dp/B000SDLXKU
B000SDLXNC BM1254 BigMouth Inc Inflatable John http://www.amazon.com/dp/B000SDLXNC
B000SPHPWI SO:AP Design Sense Generic Weener Kleener Soap http://www.amazon.com/dp/B000SPHPWI
B000UUMHRE MG-5305 BigMouth Inc Over the Hill Rectal Thermometer http://www.amazon.com/dp/B000UUMHRE
您必须读取已有的 CSV 文件并编写一个包含您添加的列的新文件,这里有一个示例:
with open('your.csv', 'w') as out_file:
with open('new.csv', 'r') as in_file:
for line in in_file:
out_file.write(line.rstrip('\n') + Date_time+ Product_name + '\n')
显然,您必须管理 header(我想是第一行)
希望对你有所帮助
以下应该满足您的需要。它会读取您现有的 CSV 文件并添加四个新的列标题。然后,对于每个 URL,您的代码都会获取新数据。然后将其添加到现有行的末尾(顺序无关紧要)。之后,将创建一个更新的 CSV 文件:
import csv
links = ['http://www.amazon.com/dp/B00064NZCK',
'http://www.amazon.com/dp/B000CIU7F8',
'http://www.amazon.com/dp/B000H5839I',
'http://www.amazon.com/dp/B000LTLBHG',
'http://www.amazon.com/dp/B000SDLXKU',
'http://www.amazon.com/dp/B000SDLXNC',
'http://www.amazon.com/dp/B000SPHPWI',
'http://www.amazon.com/dp/B000UUMHRE']
with open('existing.csv', 'r') as f_input:
csv_input = csv.reader(f_input)
# Read in the existing CSV file
headers = next(csv_input) + ["Date_time", "price", "Merchant", "Sellers_count"]
rows = list(csv_input)
# Create an index just in case the order changes or there are other entries
url_indexes = {row[3] : index for index, row in enumerate(rows)}
for url in links:
# Insert your existing code here to get the actual data
Date_time = "2015-08-27_12-34-56"
price = "123.45"
Merchant = "Def"
Sellers_count = "42"
rows[url_indexes[url]].extend([Date_time, price, Merchant, Sellers_count])
# Write the updated CSV to a new file
with open('updated.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(headers)
csv_output.writerows(rows)
我写了这段代码,它使用页面 URL 抓取了 Amazon 的某些元素,现在我想添加一个 csv 函数,它使我能够使用以下变量水平附加 CSV 列:- ( Date_time, price, Merchant, Sellers_count ) 每次我 运行 应该在右侧添加此列的代码而不删除任何现有列..这是我的代码和 table 格式想添加
# -*- coding: cp1252 -*-
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
import requests, csv, time, urllib2, gspread, os, ast, datetime
from scrapy import Selector as s
from lxml import html
from random import randint
from oauth2client.client import SignedJwtAssertionCredentials
x = lambda x: source.xpath(x).extract()
links = ['http://www.amazon.com/dp/B00064NZCK',
'http://www.amazon.com/dp/B000CIU7F8',
'http://www.amazon.com/dp/B000H5839I',
'http://www.amazon.com/dp/B000LTLBHG',
'http://www.amazon.com/dp/B000SDLXKU',
'http://www.amazon.com/dp/B000SDLXNC',
'http://www.amazon.com/dp/B000SPHPWI',
'http://www.amazon.com/dp/B000UUMHRE']
driver = webdriver.Firefox()
#driver.set_page_load_timeout(30)
for Url in links:
try:
driver.get(Url)
except:
pass
time.sleep(randint(1,3))
try:
html = driver.page_source
source = s(text=html,type="html")
except:
pass
try:
Page_link = x('//link[@rel="canonical"]//@href')
except:
pass
try:
Product_Name = x('//span[@id="productTitle"]/text()')
except:
pass
Product_Name = str(Product_Name).encode('utf-8'); Product_Name = Product_Name.replace("[u'","").replace("']","")
try:
price = x('//span[@id="priceblock_ourprice"]//text()')
except:
pass
try:
Merchant = x('//div[@id="merchant-info"]//a//text()')
except:
pass
try:
Sellers_count = x('//span[@class="olp-padding-right"]//a/text()')
except:
pass
if Merchant == []:
Merchant = 'Amazon'
else:
Merchant = Merchant[0]
price = str(price).replace("[u'","").replace("']","")
if len(Sellers_count)>0:
Sellers_count = Sellers_count[0].encode('utf-8')
else:
Sellers_count = str(Sellers_count).encode('utf-8')
try:
Sellers_count = Sellers_count.replace("Â new",""); Sellers_count = int(Sellers_count)-1
except:
pass
if Sellers_count == []:
Sellers_count = str(Sellers_count).replace("[]","")
else:
Sellers_count = Sellers_count
Date_time = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
print Date_time, Product_Name, Url, price, Merchant, Sellers_count
我要附加到的现有 table 格式 :-
ASIN ID PRODUCT URL
B00064NZCK MG-5690 BigMouth Inc Over The Hill Parking Privelege Permit http://www.amazon.com/dp/B00064NZCK
B000CIU7F8 BM1102 BigMouth Inc Pocket Disgusting Sounds Machine http://www.amazon.com/dp/B000CIU7F8
B000H5839I MG-4774 BigMouth Inc All Occasion Over The Hill Cane http://www.amazon.com/dp/B000H5839I
B000LTLBHG BM1234 BigMouth Inc Beer Belt / 6 Pack Holster(Black) http://www.amazon.com/dp/B000LTLBHG
B000SDLXKU BM1103 BigMouth Inc Covert Clicker http://www.amazon.com/dp/B000SDLXKU
B000SDLXNC BM1254 BigMouth Inc Inflatable John http://www.amazon.com/dp/B000SDLXNC
B000SPHPWI SO:AP Design Sense Generic Weener Kleener Soap http://www.amazon.com/dp/B000SPHPWI
B000UUMHRE MG-5305 BigMouth Inc Over the Hill Rectal Thermometer http://www.amazon.com/dp/B000UUMHRE
您必须读取已有的 CSV 文件并编写一个包含您添加的列的新文件,这里有一个示例:
with open('your.csv', 'w') as out_file:
with open('new.csv', 'r') as in_file:
for line in in_file:
out_file.write(line.rstrip('\n') + Date_time+ Product_name + '\n')
显然,您必须管理 header(我想是第一行)
希望对你有所帮助
以下应该满足您的需要。它会读取您现有的 CSV 文件并添加四个新的列标题。然后,对于每个 URL,您的代码都会获取新数据。然后将其添加到现有行的末尾(顺序无关紧要)。之后,将创建一个更新的 CSV 文件:
import csv
links = ['http://www.amazon.com/dp/B00064NZCK',
'http://www.amazon.com/dp/B000CIU7F8',
'http://www.amazon.com/dp/B000H5839I',
'http://www.amazon.com/dp/B000LTLBHG',
'http://www.amazon.com/dp/B000SDLXKU',
'http://www.amazon.com/dp/B000SDLXNC',
'http://www.amazon.com/dp/B000SPHPWI',
'http://www.amazon.com/dp/B000UUMHRE']
with open('existing.csv', 'r') as f_input:
csv_input = csv.reader(f_input)
# Read in the existing CSV file
headers = next(csv_input) + ["Date_time", "price", "Merchant", "Sellers_count"]
rows = list(csv_input)
# Create an index just in case the order changes or there are other entries
url_indexes = {row[3] : index for index, row in enumerate(rows)}
for url in links:
# Insert your existing code here to get the actual data
Date_time = "2015-08-27_12-34-56"
price = "123.45"
Merchant = "Def"
Sellers_count = "42"
rows[url_indexes[url]].extend([Date_time, price, Merchant, Sellers_count])
# Write the updated CSV to a new file
with open('updated.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(headers)
csv_output.writerows(rows)