获取 Instagram 的点赞者列表 post - Python & Selenium
Getting list of likers for an instagram post - Python & Selenium
我正在训练网络抓取。为此,我向自己发起了挑战,要获得所有在 Instagram 上点赞 post 的人的名单。
我的问题是我一直坚持只获得喜欢者的前 11 个用户名。我找不到在获得喜欢的同时自动执行滚动过程的正确方法。
这是我在 Jupyter Notebook 中的过程(它还不能作为脚本运行):
from selenium import webdriver
import pandas as pd
driver = webdriver.Chrome()
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
userid_element = driver.find_elements_by_xpath('//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/a')[0].click()
elems = driver.find_elements_by_xpath("//*[@id]/div/a")
users = []
for elem in elems:
users.append(elem.get_attribute('title'))
print(users)
你们有什么想法吗?
非常感谢
请尝试以下代码,如果可行请告诉我。
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
elems = driver.find_elements_by_xpath("//a[@class='FPmhX notranslate TlrDj']")
users = []
for elem in elems:
users.append(elem.get_attribute('title'))
print('Title : ' +elem.get_attribute('title'))
print(users)
输出:-
Title : kyliejenner
Title : saturdayshade28
Title : worldmeetzboy
Title : mrokon
Title : addieisaac
Title : addieisaac
Title : amber_doerksen
Title : amber_doerksen
Title : addieisaac
Title : zayn6117
Title : amber_doerksen
Title : amber_doerksen
Title : worldmeetzboy
Title : worldmeetzboy
Title : razvanpopic1301
Title : johanna.trmn
Title : johanna.trmn
Title : johanna.trmn
Title : americ.av
Title : gabriellcostta1.0
Title : gabriellcostta1.0
Title : gabriellcostta1.0
Title : worldmeetzboy
Title : enactusepi
Title : enactusepi
[u'kyliejenner', u'saturdayshade28', u'worldmeetzboy', u'mrokon', u'addieisaac', u'addieisaac', u'amber_doerksen', u'amber_doerksen', u'addieisaac', u'zayn6117', u'amber_doerksen', u'amber_doerksen', u'worldmeetzboy', u'worldmeetzboy', u'razvanpopic1301', u'johanna.trmn', u'johanna.trmn', u'johanna.trmn', u'americ.av', u'gabriellcostta1.0', u'gabriellcostta1.0', u'gabriellcostta1.0', u'worldmeetzboy', u'enactusepi', u'enactusepi']
我猜 instagram 网站使用最多 17 个喜欢的用户元素。
所以,这是一个循环
- 从网络获取元素列表
- 保存到我的列表
- 向下滚动获取新元素
- 检查,这是最后一个滚动元素吗?
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
userid_element = driver.find_elements_by_xpath('//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/a')[0].click()
time.sleep(2)
# here, you can see user list you want.
# you have to scroll down to download more data from instagram server.
# loop until last element with users table view height value.
users = []
height = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div").value_of_css_property("padding-top")
match = False
while match==False:
lastHeight = height
# step 1
elements = driver.find_elements_by_xpath("//*[@id]/div/a")
# step 2
for element in elements:
if element.get_attribute('title') not in users:
users.append(element.get_attribute('title'))
# step 3
driver.execute_script("return arguments[0].scrollIntoView();", elements[-1])
time.sleep(1)
# step 4
height = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div").value_of_css_property("padding-top")
if lastHeight==height:
match = True
print(users)
print(len(users))
driver.quit()
我测试了将近 100 个赞 post,并且有效。
我无法让代码像 post 在预测的答案中编辑的那样工作。因此,我进行了下面的改编,现在每个 post.
获得了 ~500 个赞
def get_post_likers(shortcode):
chrome = ch.initialize()
chrome.get('https://www.instagram.com/p/' + shortcode + '/')
chrome.execute_script("window.scrollTo(0, 1080)")
url = "/p/" + shortcode + "/liked_by/"
time.sleep(2)
like_link = chrome.find_element_by_xpath('//a[@href="'+url+'"]')
like_link.click()
time.sleep(2)
users = []
pb = chrome.find_element_by_xpath("//div[@role = 'dialog']/div[2]/div[1]/div[1]").value_of_css_property("padding-bottom")
match = False
while match==False:
lastHeight = pb
# step 1
elements = chrome.find_elements_by_xpath("//*[@id]/div/a")
# step 2
for element in elements:
if element.get_attribute('title') not in users:
users.append(element.get_attribute('title'))
# step 3
chrome.execute_script("return arguments[0].scrollIntoView();", elements[-1])
time.sleep(1)
# step 4
pb = chrome.find_element_by_xpath("//div[@role = 'dialog']/div[2]/div[1]/div[1]").value_of_css_property("padding-bottom")
if lastHeight==pb or len(users) >= 1500:
match = True
return users
这对我有用:
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
time.sleep(2)
userid_element = driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/div[1]/article/div[3]/section[2]/div/div[2]/button').click()
time.sleep(2)
elems = driver.find_elements_by_xpath("//a[@class='FPmhX notranslate TlrDj']")
users = []
for i in range(10):
i += 1
if(i%10) == 9 :
driver.find_element_by_xpath('/html/body/div[4]/div/div/div[2]/div').click()
actionChain.key_down(Keys.SPACE).key_up(Keys.SPACE).perform()
print('/html/body/div[4]/div/div/div[2]/div/div/div['+str(i)+']/div[2]/div[1]/div/a')
Title = driver.find_element_by_xpath('/html/body/div[4]/div/div/div[2]/div/div/div['+str(i)+']/div[2]/div[1]/div/a').get_attribute('title')
users.append(Title)
print('Title : ' + Title)
print(users)
我尝试了以上所有解决方案,但其中 none 有效。我认为它们已经过时了。
相反,我写了自己的。它在 2020 年完美运行。
此代码转到“用户名”地址并在配置文件中获取最新的 post 并获得喜欢的用户。
def getPosts():
hrefs_in_view = driver.find_elements_by_tag_name('a')
# finding relevant hrefs
hrefs_in_view = [elem.get_attribute('href') for elem in hrefs_in_view
if '.com/p/' in elem.get_attribute('href')]
return hrefs_in_view;
def getLikers(username,limit,post=1):
driver.get('https://www.instagram.com/' + username)
time.sleep(1)
users=[]
#Get Latest Post
driver.get(getPosts()[post])
time.sleep(2)
#Open Dialog
followersLinkX = driver.find_element_by_xpath('//button[@class="sqdOP yWX7d _8A5w5 "]')
followersLinkX.click()
time.sleep(1)
#Get Dialog
xxx = driver.find_element_by_xpath('//div[@role="dialog"]/div[1]/div[2]/div[1]/div[1]')
#Focus on and Scroll
xxx.click()
# step 3
actionChain = webdriver.ActionChains(driver)
count = 0
while(count < limit):
for i in range(1,1000):
try:
users.append("https://www.instagram.com/" + driver.find_element_by_xpath('//div[@role="dialog"]/div[1]/div[2]/div[1]/div[1]/div['+ str(i) +']/div[2]/div[1]/div[1]').text)
count+=1
except:
break
actionChain.key_down(Keys.SPACE).key_up(Keys.SPACE).perform()
time.sleep(0.5)
return users
运行likers = getLikers("deirvlon",100,1)
我正在训练网络抓取。为此,我向自己发起了挑战,要获得所有在 Instagram 上点赞 post 的人的名单。 我的问题是我一直坚持只获得喜欢者的前 11 个用户名。我找不到在获得喜欢的同时自动执行滚动过程的正确方法。
这是我在 Jupyter Notebook 中的过程(它还不能作为脚本运行):
from selenium import webdriver
import pandas as pd
driver = webdriver.Chrome()
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
userid_element = driver.find_elements_by_xpath('//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/a')[0].click()
elems = driver.find_elements_by_xpath("//*[@id]/div/a")
users = []
for elem in elems:
users.append(elem.get_attribute('title'))
print(users)
你们有什么想法吗?
非常感谢
请尝试以下代码,如果可行请告诉我。
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
elems = driver.find_elements_by_xpath("//a[@class='FPmhX notranslate TlrDj']")
users = []
for elem in elems:
users.append(elem.get_attribute('title'))
print('Title : ' +elem.get_attribute('title'))
print(users)
输出:-
Title : kyliejenner
Title : saturdayshade28
Title : worldmeetzboy
Title : mrokon
Title : addieisaac
Title : addieisaac
Title : amber_doerksen
Title : amber_doerksen
Title : addieisaac
Title : zayn6117
Title : amber_doerksen
Title : amber_doerksen
Title : worldmeetzboy
Title : worldmeetzboy
Title : razvanpopic1301
Title : johanna.trmn
Title : johanna.trmn
Title : johanna.trmn
Title : americ.av
Title : gabriellcostta1.0
Title : gabriellcostta1.0
Title : gabriellcostta1.0
Title : worldmeetzboy
Title : enactusepi
Title : enactusepi
[u'kyliejenner', u'saturdayshade28', u'worldmeetzboy', u'mrokon', u'addieisaac', u'addieisaac', u'amber_doerksen', u'amber_doerksen', u'addieisaac', u'zayn6117', u'amber_doerksen', u'amber_doerksen', u'worldmeetzboy', u'worldmeetzboy', u'razvanpopic1301', u'johanna.trmn', u'johanna.trmn', u'johanna.trmn', u'americ.av', u'gabriellcostta1.0', u'gabriellcostta1.0', u'gabriellcostta1.0', u'worldmeetzboy', u'enactusepi', u'enactusepi']
我猜 instagram 网站使用最多 17 个喜欢的用户元素。
所以,这是一个循环
- 从网络获取元素列表
- 保存到我的列表
- 向下滚动获取新元素
- 检查,这是最后一个滚动元素吗?
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
userid_element = driver.find_elements_by_xpath('//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/a')[0].click()
time.sleep(2)
# here, you can see user list you want.
# you have to scroll down to download more data from instagram server.
# loop until last element with users table view height value.
users = []
height = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div").value_of_css_property("padding-top")
match = False
while match==False:
lastHeight = height
# step 1
elements = driver.find_elements_by_xpath("//*[@id]/div/a")
# step 2
for element in elements:
if element.get_attribute('title') not in users:
users.append(element.get_attribute('title'))
# step 3
driver.execute_script("return arguments[0].scrollIntoView();", elements[-1])
time.sleep(1)
# step 4
height = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div").value_of_css_property("padding-top")
if lastHeight==height:
match = True
print(users)
print(len(users))
driver.quit()
我测试了将近 100 个赞 post,并且有效。
我无法让代码像 post 在预测的答案中编辑的那样工作。因此,我进行了下面的改编,现在每个 post.
获得了 ~500 个赞def get_post_likers(shortcode):
chrome = ch.initialize()
chrome.get('https://www.instagram.com/p/' + shortcode + '/')
chrome.execute_script("window.scrollTo(0, 1080)")
url = "/p/" + shortcode + "/liked_by/"
time.sleep(2)
like_link = chrome.find_element_by_xpath('//a[@href="'+url+'"]')
like_link.click()
time.sleep(2)
users = []
pb = chrome.find_element_by_xpath("//div[@role = 'dialog']/div[2]/div[1]/div[1]").value_of_css_property("padding-bottom")
match = False
while match==False:
lastHeight = pb
# step 1
elements = chrome.find_elements_by_xpath("//*[@id]/div/a")
# step 2
for element in elements:
if element.get_attribute('title') not in users:
users.append(element.get_attribute('title'))
# step 3
chrome.execute_script("return arguments[0].scrollIntoView();", elements[-1])
time.sleep(1)
# step 4
pb = chrome.find_element_by_xpath("//div[@role = 'dialog']/div[2]/div[1]/div[1]").value_of_css_property("padding-bottom")
if lastHeight==pb or len(users) >= 1500:
match = True
return users
这对我有用:
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
time.sleep(2)
userid_element = driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/div[1]/article/div[3]/section[2]/div/div[2]/button').click()
time.sleep(2)
elems = driver.find_elements_by_xpath("//a[@class='FPmhX notranslate TlrDj']")
users = []
for i in range(10):
i += 1
if(i%10) == 9 :
driver.find_element_by_xpath('/html/body/div[4]/div/div/div[2]/div').click()
actionChain.key_down(Keys.SPACE).key_up(Keys.SPACE).perform()
print('/html/body/div[4]/div/div/div[2]/div/div/div['+str(i)+']/div[2]/div[1]/div/a')
Title = driver.find_element_by_xpath('/html/body/div[4]/div/div/div[2]/div/div/div['+str(i)+']/div[2]/div[1]/div/a').get_attribute('title')
users.append(Title)
print('Title : ' + Title)
print(users)
我尝试了以上所有解决方案,但其中 none 有效。我认为它们已经过时了。
相反,我写了自己的。它在 2020 年完美运行。
此代码转到“用户名”地址并在配置文件中获取最新的 post 并获得喜欢的用户。
def getPosts():
hrefs_in_view = driver.find_elements_by_tag_name('a')
# finding relevant hrefs
hrefs_in_view = [elem.get_attribute('href') for elem in hrefs_in_view
if '.com/p/' in elem.get_attribute('href')]
return hrefs_in_view;
def getLikers(username,limit,post=1):
driver.get('https://www.instagram.com/' + username)
time.sleep(1)
users=[]
#Get Latest Post
driver.get(getPosts()[post])
time.sleep(2)
#Open Dialog
followersLinkX = driver.find_element_by_xpath('//button[@class="sqdOP yWX7d _8A5w5 "]')
followersLinkX.click()
time.sleep(1)
#Get Dialog
xxx = driver.find_element_by_xpath('//div[@role="dialog"]/div[1]/div[2]/div[1]/div[1]')
#Focus on and Scroll
xxx.click()
# step 3
actionChain = webdriver.ActionChains(driver)
count = 0
while(count < limit):
for i in range(1,1000):
try:
users.append("https://www.instagram.com/" + driver.find_element_by_xpath('//div[@role="dialog"]/div[1]/div[2]/div[1]/div[1]/div['+ str(i) +']/div[2]/div[1]/div[1]').text)
count+=1
except:
break
actionChain.key_down(Keys.SPACE).key_up(Keys.SPACE).perform()
time.sleep(0.5)
return users
运行likers = getLikers("deirvlon",100,1)