从 Chartink 筛选器中抓取股票名称
Scraping stock names from Chartink screener
我正在尝试在任何给定时间从 chartink 筛选器中抓取可用的股票列表。
示例筛选器:https://chartink.com/screener/15-minute-stock-breakouts
检查元素选项在 HTML 标签之间(在 'td' 和 'tr' 之间)向我显示股票名称。
但是,当我在 Python 页面上打印输出时,缺少股票名称('td' 和 'tr' 之间没有任何可用信息)。
让我怀疑 Chartink 网站是否防刮。也可能是我知识有限
能否请您试一试,并提出建议。如果不是 Python,我能否通过任何其他工具(例如 VBA)获取库存清单?我在 Windows 11.
上使用 Microsoft Edge
下面是代码。如您所见,我尝试了不同的方法,但都失败了。
import pandas as pd
# from selenium import webdriver
# from selenium.webdriver.common.by import By
import numpy as np
import schedule
from datetime import datetime
import requests
from bs4 import BeautifulSoup
page = requests.get("https://chartink.com/screener/15-minute-stock-breakouts")
soup = BeautifulSoup(page.content, 'lxml')
print(soup)
# url = 'https://chartink.com/screener/15-minute-stock-breakouts'
# driver = webdriver.Edge(executable_path=r'C:\Users\kashk\Downloads\edgedriver_win64\msedgedriver.exe')
# driver.get(url)
# pd.read_html(driver.find_element(by=By.XPATH, value='//*[@id="DataTables_Table_0"]').get_attribute('outerHTML'))
数据是动态加载的,并且是从 XHR 中检索的,因此如果您使用的是 Selenium,您可能必须先等待数据加载。
以下方法使用 XMLHTTP 方法并且似乎对我有用:
Option Explicit
Sub Chartink()
Dim reqObj As Object
Set reqObj = CreateObject("MSXML2.XMLHTTP")
With reqObj
.Open "GET", "https://chartink.com/screener/15-minute-stock-breakouts", False
.Send
Dim reqDoc As Object
Set reqDoc = CreateObject("HTMLFile")
reqDoc.body.innerHTML = .responseText
'Retrieve the CSRF token that is required for XHR later
Dim metaEle As Object
Set metaEle = reqDoc.getElementsByName("csrf-token")(0)
'Retrieve the JSON data
.Open "POST", "https://chartink.com/screener/process", False
.setRequestHeader "x-csrf-token", metaEle.Content
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.Send "scan_clause=(+%7B57960%7D+(+%5B0%5D+15+minute+close+%3E+%5B-1%5D+15+minute+max(+20+%2C+%5B0%5D+15+minute+close+)+and+%5B0%5D+15+minute+volume+%3E+%5B0%5D+15+minute+sma(+volume%2C20+)+)+)+"
Dim resultDict As Scripting.Dictionary
Set resultDict = JsonConverter.ParseJson(.responseText)
Dim i As Long
For i = 1 To resultDict("data").Count
Debug.Print resultDict("data")(i)("name") & vbTab & resultDict("data")(i)("close") & vbTab & resultDict("data")(i)("volume")
Next i
End With
End Sub
您将需要 VBA-JSON 并参考 Microsoft Scripting Runtime
以获得 JsonConverter.ParseJson
方法。
我正在尝试在任何给定时间从 chartink 筛选器中抓取可用的股票列表。
示例筛选器:https://chartink.com/screener/15-minute-stock-breakouts
检查元素选项在 HTML 标签之间(在 'td' 和 'tr' 之间)向我显示股票名称。 但是,当我在 Python 页面上打印输出时,缺少股票名称('td' 和 'tr' 之间没有任何可用信息)。 让我怀疑 Chartink 网站是否防刮。也可能是我知识有限
能否请您试一试,并提出建议。如果不是 Python,我能否通过任何其他工具(例如 VBA)获取库存清单?我在 Windows 11.
上使用 Microsoft Edge下面是代码。如您所见,我尝试了不同的方法,但都失败了。
import pandas as pd
# from selenium import webdriver
# from selenium.webdriver.common.by import By
import numpy as np
import schedule
from datetime import datetime
import requests
from bs4 import BeautifulSoup
page = requests.get("https://chartink.com/screener/15-minute-stock-breakouts")
soup = BeautifulSoup(page.content, 'lxml')
print(soup)
# url = 'https://chartink.com/screener/15-minute-stock-breakouts'
# driver = webdriver.Edge(executable_path=r'C:\Users\kashk\Downloads\edgedriver_win64\msedgedriver.exe')
# driver.get(url)
# pd.read_html(driver.find_element(by=By.XPATH, value='//*[@id="DataTables_Table_0"]').get_attribute('outerHTML'))
数据是动态加载的,并且是从 XHR 中检索的,因此如果您使用的是 Selenium,您可能必须先等待数据加载。
以下方法使用 XMLHTTP 方法并且似乎对我有用:
Option Explicit
Sub Chartink()
Dim reqObj As Object
Set reqObj = CreateObject("MSXML2.XMLHTTP")
With reqObj
.Open "GET", "https://chartink.com/screener/15-minute-stock-breakouts", False
.Send
Dim reqDoc As Object
Set reqDoc = CreateObject("HTMLFile")
reqDoc.body.innerHTML = .responseText
'Retrieve the CSRF token that is required for XHR later
Dim metaEle As Object
Set metaEle = reqDoc.getElementsByName("csrf-token")(0)
'Retrieve the JSON data
.Open "POST", "https://chartink.com/screener/process", False
.setRequestHeader "x-csrf-token", metaEle.Content
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.Send "scan_clause=(+%7B57960%7D+(+%5B0%5D+15+minute+close+%3E+%5B-1%5D+15+minute+max(+20+%2C+%5B0%5D+15+minute+close+)+and+%5B0%5D+15+minute+volume+%3E+%5B0%5D+15+minute+sma(+volume%2C20+)+)+)+"
Dim resultDict As Scripting.Dictionary
Set resultDict = JsonConverter.ParseJson(.responseText)
Dim i As Long
For i = 1 To resultDict("data").Count
Debug.Print resultDict("data")(i)("name") & vbTab & resultDict("data")(i)("close") & vbTab & resultDict("data")(i)("volume")
Next i
End With
End Sub
您将需要 VBA-JSON 并参考 Microsoft Scripting Runtime
以获得 JsonConverter.ParseJson
方法。