循环多个 Twitter 搜索查询 REST API
Loop over multiple Twitter search queries REST API
我有一个有效的 REST 搜索 API 脚本可以根据 https://www.karambelkar.info/2015/01/how-to-use-twitters-search-rest-api-most-effectively./
提取推文
问题: 此代码有效,但使用 searchQuery1
和 searchQuery2
提取推文。 (例如带有 Prostate Cancer
+ Colon Cancer
的推文)。我不想要这个。相反,我想获取来自 searchQuery1
的所有推文(仅包含 Prostate Cancer
的推文),然后是来自 searchQuery2
的所有推文(仅包含 Colon Cancer
的推文) .查询应该 运行 分开。
目标: 按顺序循环 X 次搜索查询(例如 searchQuery1
、searchQuery2
等)
谢谢!
searchQuery1 = 'Prostate Cancer'
searchQuery2 = 'Colon Cancer'
maxTweets = 10000
tweetsPerQry = 100
fprefix = 'REST'
sinceId = None
max_id = -1L
tweetCount = 0
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: #open file
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
for x,y in zip(searchQuery1,searchQuery2):
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry)
else:
print "sinceID 1"
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
since_id=sinceId)
else:
if (not sinceId):
print "not sinceID 2"
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
max_id=str(max_id - 1))
else:
print "sinceID 1"
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
max_id=str(max_id - 1),
since_id=sinceId)
if not new_tweets:
print("No more tweets found")
break
for tweet in new_tweets:
f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
'\n')
tweetCount += len(new_tweets)
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
print("some error : " + str(e))
break
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))
我会将您的查询更改为 '"Prostate Cancer" OR "Colon Cancer"'
并存储结果。然后按照您的意愿订购它们。听起来你想要伪代码中的以下内容:
tweets_with_Prostate_Cancer = []
tweets_with_Colon_Cancer = []
for each tweet in the result set:
if tweet contains "Prostate Cancer" and does not contain "Colon Cancer":
tweets_with_Prostate_Cancer.Add(tweet)
if tweet contains "Colon Cancer" and does not contain "Prostate Cancer":
tweets_with_Color_Cancer.Add(tweet)
final_results = Concatenate(tweets_with_Prostate_Cancer, tweets_with_Colon_Cancer)
searchQuery = ['Prostate Cancer', 'Colon Cancer']
i = 0
maxTweets = 1000
tweetsPerQry = 100
fprefix = 'REST'
language = ['en']
sinceId = None
max_id = -1L
tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f:
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)
else:
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
since_id=sinceId, languages=language)
else:
print "not sinceID 2"
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
max_id=str(max_id - 1),languages=language)
else:
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
max_id=str(max_id - 1),
since_id=sinceId, languages=language)
if not new_tweets:
print("No more tweets found; checking next query")
i = i + 1
try:
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)
except IndexError:
break
for tweet in new_tweets:
f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
'\n')
tweetCount += len(new_tweets)
print("Downloaded {0} tweets".format(tweetCount))
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
print("some error : " + str(e))
break
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))
我有一个有效的 REST 搜索 API 脚本可以根据 https://www.karambelkar.info/2015/01/how-to-use-twitters-search-rest-api-most-effectively./
提取推文问题: 此代码有效,但使用 searchQuery1
和 searchQuery2
提取推文。 (例如带有 Prostate Cancer
+ Colon Cancer
的推文)。我不想要这个。相反,我想获取来自 searchQuery1
的所有推文(仅包含 Prostate Cancer
的推文),然后是来自 searchQuery2
的所有推文(仅包含 Colon Cancer
的推文) .查询应该 运行 分开。
目标: 按顺序循环 X 次搜索查询(例如 searchQuery1
、searchQuery2
等)
谢谢!
searchQuery1 = 'Prostate Cancer'
searchQuery2 = 'Colon Cancer'
maxTweets = 10000
tweetsPerQry = 100
fprefix = 'REST'
sinceId = None
max_id = -1L
tweetCount = 0
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: #open file
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
for x,y in zip(searchQuery1,searchQuery2):
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry)
else:
print "sinceID 1"
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
since_id=sinceId)
else:
if (not sinceId):
print "not sinceID 2"
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
max_id=str(max_id - 1))
else:
print "sinceID 1"
new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
max_id=str(max_id - 1),
since_id=sinceId)
if not new_tweets:
print("No more tweets found")
break
for tweet in new_tweets:
f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
'\n')
tweetCount += len(new_tweets)
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
print("some error : " + str(e))
break
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))
我会将您的查询更改为 '"Prostate Cancer" OR "Colon Cancer"'
并存储结果。然后按照您的意愿订购它们。听起来你想要伪代码中的以下内容:
tweets_with_Prostate_Cancer = []
tweets_with_Colon_Cancer = []
for each tweet in the result set:
if tweet contains "Prostate Cancer" and does not contain "Colon Cancer":
tweets_with_Prostate_Cancer.Add(tweet)
if tweet contains "Colon Cancer" and does not contain "Prostate Cancer":
tweets_with_Color_Cancer.Add(tweet)
final_results = Concatenate(tweets_with_Prostate_Cancer, tweets_with_Colon_Cancer)
searchQuery = ['Prostate Cancer', 'Colon Cancer']
i = 0
maxTweets = 1000
tweetsPerQry = 100
fprefix = 'REST'
language = ['en']
sinceId = None
max_id = -1L
tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f:
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)
else:
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
since_id=sinceId, languages=language)
else:
print "not sinceID 2"
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
max_id=str(max_id - 1),languages=language)
else:
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
max_id=str(max_id - 1),
since_id=sinceId, languages=language)
if not new_tweets:
print("No more tweets found; checking next query")
i = i + 1
try:
for search in searchQuery:
new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)
except IndexError:
break
for tweet in new_tweets:
f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
'\n')
tweetCount += len(new_tweets)
print("Downloaded {0} tweets".format(tweetCount))
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
print("some error : " + str(e))
break
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))