执行数据库插入时意外的文本连接
Unexpected text concatenation when performing database inserts
import bs4
import sqlite3
from urllib.request import urlopen as ureq
from bs4 import BeautifulSoup as soup
#crawling pages in website
for page in range(2, 32):
my_url = 'https://www.bproperty.com/en/dhaka/apartments-for-sale/page-{}/'.format(page)
zclient = ureq(my_url)
data_html = zclient.read()
zclient.close()
data_soup = soup(data_html, "html.parser")
contents = data_soup.findAll("li", {"class": "ef447dde"})
#extracting needed data only
for container in contents:
Ad_Title = container.a["title"]
amount = container.findAll("div", {"class": "cd6d5974 d8b3c34d"})
price = amount[0].text
area = container.findAll("div", {"class": "_7afabd84"})
property_location = area[0].text
category = container.findAll("div", {"class": "_9a4e3964"})
listing_category = category[0].text
size = container.findAll("div", {"class": "_22b2f6ed"})
sq_ft = size[0].text
rooms = container.findAll("span", {"class": "b6a29bc0"})
bed_rooms = rooms[0].text
bath_rooms = rooms[1].text
image = container.img["src"]
important_stuff = str(container.script)
#connecting to the database
database = "properties1.db"
conn = sqlite3.connect(database)
#creating a table in sqlite3
conn.execute("""
CREATE TABLE IF NOT EXISTS adverts (
"Ad_Title" TEXT DEFAULT 'Buy Now!',
"price" TEXT,
"property_location" TEXT,
"listing_category" TEXT,
"sq_ft" TEXT,
"bed_rooms" TEXT DEFAULT 2,
"bath_rooms" TEXT DEFAULT 1,
"image" TEXT
);""")
cursor = conn.cursor()
sqlite_insert_query = """INSERT INTO adverts
(Ad_Title, price, property_location, listing_category, sq_ft, bed_rooms, bath_rooms, image)
VALUES (?, ?, ?, ?, ?, ?, ? ,?);"""
data_tuple = (Ad_Title, price, property_location,
listing_category, sq_ft, bed_rooms, bath_rooms, image)
#seeding the database
cursor.execute(sqlite_insert_query, data_tuple)
conn.commit()
conn.close()
For example: if the apartment size is 1300sqft and it has 3 rooms and 3 bathrooms, the sq_ft entry in databse shows 331300 sqft. But my code looks ok or i'm just blind.
您的 class("class": "_22b2f6ed") 中包含所有 3 个数据,即房间、浴室和面积,这就是为什么您看到附加了所有这 3 个值的原因 (331300 ).
所以你必须跳转到 span class({"class": "b6a29bc0"}) 上的第 3 个索引,这是区域,你应该在那里看到正确的答案区域。
# size = container.findAll("div", {"class": "_22b2f6ed"})
size = container.findAll("span", {"class": "b6a29bc0"})
sq_ft = size[2].text
import bs4
import sqlite3
from urllib.request import urlopen as ureq
from bs4 import BeautifulSoup as soup
#crawling pages in website
for page in range(2, 32):
my_url = 'https://www.bproperty.com/en/dhaka/apartments-for-sale/page-{}/'.format(page)
zclient = ureq(my_url)
data_html = zclient.read()
zclient.close()
data_soup = soup(data_html, "html.parser")
contents = data_soup.findAll("li", {"class": "ef447dde"})
#extracting needed data only
for container in contents:
Ad_Title = container.a["title"]
amount = container.findAll("div", {"class": "cd6d5974 d8b3c34d"})
price = amount[0].text
area = container.findAll("div", {"class": "_7afabd84"})
property_location = area[0].text
category = container.findAll("div", {"class": "_9a4e3964"})
listing_category = category[0].text
size = container.findAll("div", {"class": "_22b2f6ed"})
sq_ft = size[0].text
rooms = container.findAll("span", {"class": "b6a29bc0"})
bed_rooms = rooms[0].text
bath_rooms = rooms[1].text
image = container.img["src"]
important_stuff = str(container.script)
#connecting to the database
database = "properties1.db"
conn = sqlite3.connect(database)
#creating a table in sqlite3
conn.execute("""
CREATE TABLE IF NOT EXISTS adverts (
"Ad_Title" TEXT DEFAULT 'Buy Now!',
"price" TEXT,
"property_location" TEXT,
"listing_category" TEXT,
"sq_ft" TEXT,
"bed_rooms" TEXT DEFAULT 2,
"bath_rooms" TEXT DEFAULT 1,
"image" TEXT
);""")
cursor = conn.cursor()
sqlite_insert_query = """INSERT INTO adverts
(Ad_Title, price, property_location, listing_category, sq_ft, bed_rooms, bath_rooms, image)
VALUES (?, ?, ?, ?, ?, ?, ? ,?);"""
data_tuple = (Ad_Title, price, property_location,
listing_category, sq_ft, bed_rooms, bath_rooms, image)
#seeding the database
cursor.execute(sqlite_insert_query, data_tuple)
conn.commit()
conn.close()
For example: if the apartment size is 1300sqft and it has 3 rooms and 3 bathrooms, the sq_ft entry in databse shows 331300 sqft. But my code looks ok or i'm just blind.
您的 class("class": "_22b2f6ed") 中包含所有 3 个数据,即房间、浴室和面积,这就是为什么您看到附加了所有这 3 个值的原因 (331300 ).
所以你必须跳转到 span class({"class": "b6a29bc0"}) 上的第 3 个索引,这是区域,你应该在那里看到正确的答案区域。
# size = container.findAll("div", {"class": "_22b2f6ed"})
size = container.findAll("span", {"class": "b6a29bc0"})
sq_ft = size[2].text