如何获取 Instagram 用户最新帖子的 URL? (Python)
How to get the URLs of the most recent posts of a Instagram user? (with Python)
我想获取 Instagram 用户(不是我,而且我没有 IG 帐户,因此无法使用 API)的最新帖子的 URL。网址应采用 https://www.instagram.com/p/BpnlsmWgqon/
的样式
我尝试使用 response = requests.get(profile_url)
发出请求,然后使用 soup = BeautifulSoup(html, 'html.parser')
解析 HTML。
在这些和其他一些函数之后,我得到一个很大的 JSON 文件,其中包含最新照片的数据(但不是它们的 URL)。
我如何获取 URL 并提取它?
编辑: 这就是我现在编写的代码。真是一团糟,我尝试了很多方法,但 none 奏效了。
#from subprocess import call
#from instagram.client import InstagramAPI
import requests
import json
from bs4 import BeautifulSoup
#from InstagramAPI.InstagramAPI import InstagramAPI
from instagram.client import InstagramAPI
from config import login, password
userid = "6194091573"
#url = "https://www.instagram.com/mercadona.novedades/?__a=1"
#pic_url =
#call('instalooter user mercadona.novedades ./pics -n 2')
#r = requests.get("https://www.instagram.com/mercadona.novedades")
#print(r.text)
def request_pic_url(profile_url):
response = requests.get(profile_url)
return response.text
def extract_json(html):
soup = BeautifulSoup(html, 'html.parser')
body = soup.find('body')
script_tag = body.find('script')
raw_string = script_tag.text.strip().replace('window._sharedData =', '').replace(';', '')
return json.loads(raw_string)
def get_recent_pics(profile_url):
results = []
response = request_pic_url(profile_url)
json_data = extract_json(response)
metrics = json_data['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']["edges"]
for node in metrics:
node = node.get('node')
if node and isinstance(node, dict):
results.append(node)
return results
def api_thing():
api = InstagramAPI(login, password)
recent_media, next_ = api.user_recent_media(userid, 2)
for media in recent_media:
print(media.caption.text)
def main():
userid = "6194091573"
api_thing()
if __name__ == "__main__":
main()
def get_large_pic(url):
return url + "/media/?size=l"
def get_media_id(url):
req = requests.get('https://api.instagram.com/oembed/?url={}'.format(url))
media_id = req.json()['media_id']
return media_id
我建议您使用以下库:https://github.com/LevPasha/Instagram-API-python
api = InstagramAPI("username", "password")
api.login()
def get_lastposts(us_id):
api.getUserFeed(us_id)
if 'items' in api.LastJson:
info = api.LastJson['items']
posts=[]
for media in info:
if (media['caption']!=None):
#print(media['caption']['media_id'])
posts.append(media['caption']['media_id'])
return posts
get_lastposts('user_id')
我想获取 Instagram 用户(不是我,而且我没有 IG 帐户,因此无法使用 API)的最新帖子的 URL。网址应采用 https://www.instagram.com/p/BpnlsmWgqon/
的样式我尝试使用 response = requests.get(profile_url)
发出请求,然后使用 soup = BeautifulSoup(html, 'html.parser')
解析 HTML。
在这些和其他一些函数之后,我得到一个很大的 JSON 文件,其中包含最新照片的数据(但不是它们的 URL)。
我如何获取 URL 并提取它?
编辑: 这就是我现在编写的代码。真是一团糟,我尝试了很多方法,但 none 奏效了。
#from subprocess import call
#from instagram.client import InstagramAPI
import requests
import json
from bs4 import BeautifulSoup
#from InstagramAPI.InstagramAPI import InstagramAPI
from instagram.client import InstagramAPI
from config import login, password
userid = "6194091573"
#url = "https://www.instagram.com/mercadona.novedades/?__a=1"
#pic_url =
#call('instalooter user mercadona.novedades ./pics -n 2')
#r = requests.get("https://www.instagram.com/mercadona.novedades")
#print(r.text)
def request_pic_url(profile_url):
response = requests.get(profile_url)
return response.text
def extract_json(html):
soup = BeautifulSoup(html, 'html.parser')
body = soup.find('body')
script_tag = body.find('script')
raw_string = script_tag.text.strip().replace('window._sharedData =', '').replace(';', '')
return json.loads(raw_string)
def get_recent_pics(profile_url):
results = []
response = request_pic_url(profile_url)
json_data = extract_json(response)
metrics = json_data['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']["edges"]
for node in metrics:
node = node.get('node')
if node and isinstance(node, dict):
results.append(node)
return results
def api_thing():
api = InstagramAPI(login, password)
recent_media, next_ = api.user_recent_media(userid, 2)
for media in recent_media:
print(media.caption.text)
def main():
userid = "6194091573"
api_thing()
if __name__ == "__main__":
main()
def get_large_pic(url):
return url + "/media/?size=l"
def get_media_id(url):
req = requests.get('https://api.instagram.com/oembed/?url={}'.format(url))
media_id = req.json()['media_id']
return media_id
我建议您使用以下库:https://github.com/LevPasha/Instagram-API-python
api = InstagramAPI("username", "password")
api.login()
def get_lastposts(us_id):
api.getUserFeed(us_id)
if 'items' in api.LastJson:
info = api.LastJson['items']
posts=[]
for media in info:
if (media['caption']!=None):
#print(media['caption']['media_id'])
posts.append(media['caption']['media_id'])
return posts
get_lastposts('user_id')