如何将数据从 selenium 插入到 python 中的 mysql
how to insert data from selenium to mysql in python
我正在尝试使用 selenium 从旅游网站获取数据。我可以提取 CSV 格式的数据,但无法将数据插入我的 mysql 数据库。
import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import unittest
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import unittest
import re
import sys
import urllib
import json
import sys, mysql.connector
import csv
import mysql
user_agent = {'User-agent': 'Chrome/43.0.2357.124'}
output_file = open("Excel.csv", "w", newline='')
class Crawling(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.set_window_size(1024, 768)
self.base_url = "https://www.ctrip.com/"
self.accept_next_alert = True
def test_sel(self):
driver = self.driver
delay = 3
driver.get(self.base_url + "Search/new york")
for i in range(1,2):
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(2)
html_source = driver.page_source
data = html_source.encode("utf-8")
elements = driver.find_elements_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li[1]/div/div[1]")
innerElements = 15
outerElements = len(elements)/innerElements
#print(innerElements, "\t", outerElements, "\t", len(elements))
for j in range(1, 20):
price = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/span[1]").text
headline = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/strong").text
deeplink = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[3]/div/ul/li[1]/a").get_attribute("href")
if not all([headline, price]):
print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
headline = "Not available as well as price"
else:
print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))
writer = csv.writer(output_file)
csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
if elements:
writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])
if __name__ == "__main__":
unittest.main()
这是我应该能够将其提取到数据库的附加代码:
if not all([headline, price]):
print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
headline = "Not available as well as price"
else:
print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))
writer = csv.writer(output_file)
csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
if elements:
writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])
try:
connection = mysql.connector.connect\
(host = "localhost", user = "root", passwd ="", db = "crawling")
except:
print("No connection")
sys.exit(0)
cursor = connection.cursor()
cursor.execute("TRUNCATE meta;")
connection.commit()
cursor.execute("ALTER TABLE meta AUTO_INCREMENT =1;")
connection.commit()
cursor.execute('''INSERT INTO meta (price_id, Header, Price, Deeplink) \
VALUES("%s", "%s", "%s", "%s")''')
connection.commit()
cursor.close()
connection.close()
if __name__ == "__main__":
unittest.main()
但问题是它没有提取到数据库。你们能帮我 out/give 给我一个提示吗?感谢任何反馈
您没有将任何数据传递到 execute()
:
cursor.execute("""
INSERT INTO
meta
(price_id, Header, Price, Deeplink)
VALUES
(%s, %s, %s, %s)
""", [price[4:], deeplink, partner_ID, location_ID])
我正在尝试使用 selenium 从旅游网站获取数据。我可以提取 CSV 格式的数据,但无法将数据插入我的 mysql 数据库。
import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import unittest
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import unittest
import re
import sys
import urllib
import json
import sys, mysql.connector
import csv
import mysql
user_agent = {'User-agent': 'Chrome/43.0.2357.124'}
output_file = open("Excel.csv", "w", newline='')
class Crawling(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.set_window_size(1024, 768)
self.base_url = "https://www.ctrip.com/"
self.accept_next_alert = True
def test_sel(self):
driver = self.driver
delay = 3
driver.get(self.base_url + "Search/new york")
for i in range(1,2):
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(2)
html_source = driver.page_source
data = html_source.encode("utf-8")
elements = driver.find_elements_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li[1]/div/div[1]")
innerElements = 15
outerElements = len(elements)/innerElements
#print(innerElements, "\t", outerElements, "\t", len(elements))
for j in range(1, 20):
price = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/span[1]").text
headline = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/strong").text
deeplink = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[3]/div/ul/li[1]/a").get_attribute("href")
if not all([headline, price]):
print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
headline = "Not available as well as price"
else:
print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))
writer = csv.writer(output_file)
csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
if elements:
writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])
if __name__ == "__main__":
unittest.main()
这是我应该能够将其提取到数据库的附加代码:
if not all([headline, price]):
print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
headline = "Not available as well as price"
else:
print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))
writer = csv.writer(output_file)
csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
if elements:
writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])
try:
connection = mysql.connector.connect\
(host = "localhost", user = "root", passwd ="", db = "crawling")
except:
print("No connection")
sys.exit(0)
cursor = connection.cursor()
cursor.execute("TRUNCATE meta;")
connection.commit()
cursor.execute("ALTER TABLE meta AUTO_INCREMENT =1;")
connection.commit()
cursor.execute('''INSERT INTO meta (price_id, Header, Price, Deeplink) \
VALUES("%s", "%s", "%s", "%s")''')
connection.commit()
cursor.close()
connection.close()
if __name__ == "__main__":
unittest.main()
但问题是它没有提取到数据库。你们能帮我 out/give 给我一个提示吗?感谢任何反馈
您没有将任何数据传递到 execute()
:
cursor.execute("""
INSERT INTO
meta
(price_id, Header, Price, Deeplink)
VALUES
(%s, %s, %s, %s)
""", [price[4:], deeplink, partner_ID, location_ID])