写入和读取 peewee

writing to and reading peewee

我正在编写一个程序来抓取多个人的推文,如果推文的正文是唯一的,它将存储在那个人的 sqlite 数据库中。我有两个文件,一个用于写入数据库,一个用于读取数据库并使用搜索词搜索推文。在写入数据库之前,我在终端上打印了推文,这些推文被正确地从推特中提取出来。当我尝试搜索一个词时,即使没有词,所有数据库的推文都是零。数据库的写入或读取有问题。请帮忙,感谢我是 python.

的新手

写入文件:

import requests
import datetime
from bs4 import BeautifulSoup
from peewee import *
from time import sleep

databases = ["femfreq.db", "boris_johnson.db", "barack_obama.db",
         "daily_mail.db", "guardian.db", "times.db", "zac_goldsmith.db",
         "bernie_sanders.db", "george_osborne.db", "john_mcdonnell.db",
         "donald_trump.db", "hillary_clinton.db", "nigel_farage.db"]

urls = ["https://twitter.com/femfreq", "https://twitter.com/BorisJohnson",
    "https://twitter.com/BarackObama",
    "https://twitter.com/MailOnline?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor",
    "https://twitter.com/guardian?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor",
    "https://twitter.com/thetimes",
    "https://twitter.com/ZacGoldsmith?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor",
    "https://twitter.com/berniesanders?lang=en-gb",
    "https://twitter.com/George_Osborne?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor",
    "https://twitter.com/johnmcdonnellMP?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor",
    "https://twitter.com/realDonaldTrump?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor",
    "https://twitter.com/HillaryClinton?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor"
    "https://twitter.com/Nigel_Farage?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor"]

selection = 0

for database_chosen in databases:

    r = requests.get(urls[selection])
    soup = BeautifulSoup(r.content, "html.parser")
    content =soup.find_all("div",
              {"class":
               "content"})

    db = SqliteDatabase(database_chosen)


    class data_input(Model):
        time_position = DateTimeField(default=datetime.datetime.now)
        header = CharField()
        time_posted = CharField()
        tweet_body = CharField(unique=True)
        class Meta:
            database = db

    db.connect()
    db.create_tables([data_input], safe=True)


    for i in content:
        try:
            data_input.create(header = i.contents[1].text,
                          time_posted = i.contents[3].text,
                          tweet_body = i.contents[5].text)

        except IntegrityError:
            pass

    for i in content:
        print("=============")
        print(i.contents[1].text)
        print(i.contents[3].text)
        print(i.contents[5].text)


    selection += 1

    print("database: {} updated".format(database_chosen))

阅读文件

from peewee import *
import datetime

databases = ["femfreq.db", "boris_johnson.db", "barack_obama.db",
         "daily_mail.db", "guardian.db", "times.db", "zac_goldsmith.db",
         "bernie_sanders.db", "george_osborne.db", "john_mcdonnell.db",
         "donald_trump.db", "hillary_clinton.db", "nigel_farage.db"]

search_results = []


search_index = 0
print("")
print("Please enter the number for the database you want to search: ")
for i in databases:
    print("{}:{}".format(i, search_index))
    search_index += 1

select = int(input("please select: "))

database_chosen = databases[select]

db = SqliteDatabase(database_chosen)

class data_input(Model):
    time_position = DateTimeField(default=datetime.datetime.now)
    header = CharField()
    time_posted = CharField()
    tweet_body = CharField(unique=True)
    class Meta:
        database = db

db.connect()


enteries = data_input.select().order_by(data_input.time_position.desc())

print(enteries)

enteries = enteries.where(data_input.tweet_body)
print("")
print("The total number of tweets in {} is: {}".format(database_chosen,
                      len(enteries)))

对于阅读文件,我还没有放入搜索功能,但当我能首先遇到这个问题时,我会转到那个。非常感谢

您打算通过在查询中放置“.where(data_input.tweet_body)”来读取条目来完成什么?尝试删除整行:

entries = entries.where(data_input.tweet_body)

当你去添加你的搜索时,那时你会想要添加一个 where 子句......像这样:

entries = entries.where(data_input.tweet_body.contains(search_term))