循环多个 Twitter 搜索查询 REST API

Loop over multiple Twitter search queries REST API

我有一个有效的 REST 搜索 API 脚本可以根据 https://www.karambelkar.info/2015/01/how-to-use-twitters-search-rest-api-most-effectively./

提取推文

问题: 此代码有效,但使用 searchQuery1searchQuery2 提取推文。 (例如带有 Prostate Cancer + Colon Cancer 的推文)。我不想要这个。相反,我想获取来自 searchQuery1 的所有推文(仅包含 Prostate Cancer 的推文),然后是来自 searchQuery2 的所有推文(仅包含 Colon Cancer 的推文) .查询应该 运行 分开。

目标: 按顺序循环 X 次搜索查询(例如 searchQuery1searchQuery2 等)

谢谢!

searchQuery1 = 'Prostate Cancer'  
searchQuery2 = 'Colon Cancer' 


maxTweets = 10000
tweetsPerQry = 100  
fprefix = 'REST' 
sinceId = None
max_id = -1L


tweetCount = 0
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: #open file
    while tweetCount < maxTweets: 
        try:

            if (max_id <= 0):
                if (not sinceId):
                    for x,y in zip(searchQuery1,searchQuery2):
                        new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry)
                else:
                    print "sinceID 1"
                    new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
                                            since_id=sinceId)

            else:
                if (not sinceId):
                    print "not sinceID 2"
                    new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
                                            max_id=str(max_id - 1))
                else:
                    print "sinceID 1"
                    new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
                                            max_id=str(max_id - 1),
                                            since_id=sinceId)
            if not new_tweets:
                print("No more tweets found")
                break                 

            for tweet in new_tweets: 
                f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
                        '\n')


            tweetCount += len(new_tweets) 
            max_id = new_tweets[-1].id

        except tweepy.TweepError as e:
            print("some error : " + str(e))
            break

print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))

我会将您的查询更改为 '"Prostate Cancer" OR "Colon Cancer"' 并存储结果。然后按照您的意愿订购它们。听起来你想要伪代码中的以下内容:

tweets_with_Prostate_Cancer = []
tweets_with_Colon_Cancer = []

for each tweet in the result set:
    if tweet contains "Prostate Cancer" and does not contain "Colon Cancer":
        tweets_with_Prostate_Cancer.Add(tweet)
    if tweet contains "Colon Cancer" and does not contain "Prostate Cancer":
        tweets_with_Color_Cancer.Add(tweet)

final_results = Concatenate(tweets_with_Prostate_Cancer, tweets_with_Colon_Cancer)
searchQuery = ['Prostate Cancer', 'Colon Cancer']
i = 0


maxTweets = 1000
tweetsPerQry = 100  
fprefix = 'REST' 
language = ['en']

sinceId = None
max_id = -1L

tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: 
    while tweetCount < maxTweets: 
        try:
            if (max_id <= 0):
                if (not sinceId):

                    for search in searchQuery:
                        new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)

                else:
                    for search in searchQuery:
                        new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
                                            since_id=sinceId, languages=language)

            else:
                    print "not sinceID 2"
                    for search in searchQuery:
                        new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
                                            max_id=str(max_id - 1),languages=language)
                else:

                    for search in searchQuery:
                        new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
                                            max_id=str(max_id - 1),
                                            since_id=sinceId, languages=language)
            if not new_tweets:
                print("No more tweets found; checking next query")
                i = i + 1

                try:
                    for search in searchQuery:
                        new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)
                except IndexError:
                    break

            for tweet in new_tweets:         
                f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
                        '\n')

            tweetCount += len(new_tweets) 
            print("Downloaded {0} tweets".format(tweetCount))
            max_id = new_tweets[-1].id

        except tweepy.TweepError as e:
            print("some error : " + str(e))
            break

print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))