如何将数据从 selenium 插入到 python 中的 mysql

how to insert data from selenium to mysql in python

我正在尝试使用 selenium 从旅游网站获取数据。我可以提取 CSV 格式的数据,但无法将数据插入我的 mysql 数据库。

import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import unittest
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import unittest
import re
import sys
import urllib
import json
import sys, mysql.connector
import csv
import mysql


user_agent = {'User-agent': 'Chrome/43.0.2357.124'}

output_file = open("Excel.csv", "w", newline='')  
class Crawling(unittest.TestCase):
def setUp(self):
    self.driver = webdriver.Firefox()
    self.driver.set_window_size(1024, 768)
    self.base_url = "https://www.ctrip.com/"
    self.accept_next_alert = True

def test_sel(self):
    driver = self.driver
    delay = 3
    driver.get(self.base_url + "Search/new york")
    for i in range(1,2):

driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
        time.sleep(2)

    html_source = driver.page_source
    data = html_source.encode("utf-8")

    elements =    driver.find_elements_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li[1]/div/div[1]")
    innerElements = 15
    outerElements = len(elements)/innerElements
    #print(innerElements,  "\t", outerElements, "\t", len(elements))


    for j in range(1, 20):

            price = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/span[1]").text

            headline = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/strong").text

            deeplink = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[3]/div/ul/li[1]/a").get_attribute("href")





            if not all([headline, price]):                                                                          
                print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
                headline = "Not available as well as price"

            else:
                print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))


                writer = csv.writer(output_file)
                csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
                if elements:
                    writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])



if __name__ == "__main__":
unittest.main()

这是我应该能够将其提取到数据库的附加代码:

if not all([headline, price]):                                                                          
                print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
                headline = "Not available as well as price"

            else:
                print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))


                writer = csv.writer(output_file)
                csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
                if elements:
                    writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])

try:
    connection = mysql.connector.connect\
    (host = "localhost", user = "root", passwd ="", db = "crawling")
except:
       print("No connection")
       sys.exit(0)


cursor = connection.cursor()

cursor.execute("TRUNCATE meta;")
connection.commit()

cursor.execute("ALTER TABLE meta AUTO_INCREMENT =1;")
connection.commit()


cursor.execute('''INSERT INTO meta (price_id, Header, Price, Deeplink) \
    VALUES("%s", "%s", "%s", "%s")''')


connection.commit()

cursor.close()

connection.close()



if __name__ == "__main__":
     unittest.main() 

但问题是它没有提取到数据库。你们能帮我 out/give 给我一个提示吗?感谢任何反馈

您没有将任何数据传递到 execute():

cursor.execute("""
    INSERT INTO 
        meta 
        (price_id, Header, Price, Deeplink) 
    VALUES 
        (%s, %s, %s, %s)
""", [price[4:], deeplink, partner_ID, location_ID])