在 csv 文件中写入从推文中提取的 url
Writing url extracted from tweets in a csv file
我正在尝试使用此代码从推文中提取 urls。它完美地工作,并给我完整的 urls 作为输出。我想将所有这些 url 写在一个 csv 文件中,我不在乎它是否与推文相同(这样会更好)或另一个。我尝试了不同的东西,如“.to_csv”或 writerow 函数,但它们没有用,也许是因为我把它们放错了地方。感谢您的帮助!
def get_tweets(handle):
try:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
number_of_tweets = 200
tweets = api.user_timeline(screen_name = handle,count = number_of_tweets)
print(handle, "Number of tweets extracted: {}\n".format(len(tweets)))
df = pd.DataFrame(data=[tweet.user.screen_name for tweet in tweets], columns=['ScreenName'])
for tweet in tweets:
urls = re.findall("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", tweet.text)
for url in urls:
try:
opener = urllib.request.build_opener()
request = urllib.request.Request(url)
response = opener.open(request)
actual_url = response.geturl()
print (actual_url)
except:
print(url)
except:
pass
return df
handles = ["name of the user"]
for handle in handles:
df_new = get_tweets(handle)
你可以试试这个:
handles = ["name of the user"]
for handle in handles:
df_new = get_tweets(handle)
df_new.to_csv(path_or_buf=f"{handle}_tweets.csv", index=False)
我正在尝试使用此代码从推文中提取 urls。它完美地工作,并给我完整的 urls 作为输出。我想将所有这些 url 写在一个 csv 文件中,我不在乎它是否与推文相同(这样会更好)或另一个。我尝试了不同的东西,如“.to_csv”或 writerow 函数,但它们没有用,也许是因为我把它们放错了地方。感谢您的帮助!
def get_tweets(handle):
try:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
number_of_tweets = 200
tweets = api.user_timeline(screen_name = handle,count = number_of_tweets)
print(handle, "Number of tweets extracted: {}\n".format(len(tweets)))
df = pd.DataFrame(data=[tweet.user.screen_name for tweet in tweets], columns=['ScreenName'])
for tweet in tweets:
urls = re.findall("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", tweet.text)
for url in urls:
try:
opener = urllib.request.build_opener()
request = urllib.request.Request(url)
response = opener.open(request)
actual_url = response.geturl()
print (actual_url)
except:
print(url)
except:
pass
return df
handles = ["name of the user"]
for handle in handles:
df_new = get_tweets(handle)
你可以试试这个:
handles = ["name of the user"]
for handle in handles:
df_new = get_tweets(handle)
df_new.to_csv(path_or_buf=f"{handle}_tweets.csv", index=False)