Python Selenium Table 人体数据提取
Python Selenium Table Body Data Extraction
我正在尝试从我的 table 中获取 class td
的数据元素,但我的代码一直只能从 thead
中提取行.如果我添加 find_element_by_tag_name("tbody")
,那么我会得到 classic 消息:没有这样的元素:无法定位元素...。有什么建议吗?
源代码:来自https://shinyapps.asee.org/apps/Profiles/
<table class="cell-border stripe compact dataTable no-footer" id="DataTables_Table_4" role="grid" aria-describedby="DataTables_Table_4_info">
<thead>
<tr>
<th>...</th>
.
.
.
</tr>
</thead>
<tbody>
<tr>
<td>...</td>
.
.
.
</tr>
.
.
.
</tbody>
</table>
硒Python:
for opt in element.find_elements_by_css_selector("div.option"):
#Record College Names
colleges.append(opt.get_attribute("data-value"))
time.sleep(2)
#Select College
opt.click() #does pull data into graph
#Scrape Data
table = driver.find_element_by_tag_name("table")
alldata = table.find_element_by_tag_name("tbody")
rows = table.find_elements_by_tag_name("tr")
#print(table.tag_name)
for row in rows:
print(row.tag_name)
data = []
data.append(year)
data.append("Degrees Awarded")
data_elements = row.find_elements_by_tag_name("td")
#add to pandas table
for fact in data_elements:
try:
data.append(fact.text)
except:
print("nothing")
print(data)
#DF.loc[len(DF.index)]=data
#reclick on dropdown box to get next school's data
element.click()
有两个 table 元素 - 一个用于 Header(没有 id
属性),另一个用于 Data(具有 id
属性)。
像下面这样尝试并确认。
driver.get("https://shinyapps.asee.org/apps/Profiles/")
# Code to select "Degrees Awarded" and other option in the drop down.
table_header = driver.find_elements(By.XPATH,"//table[not(@id)]//th")
header_row = []
for header in table_header:
header_row.append(header.text)
print(header_row)
table_data = driver.find_elements(By.XPATH,"//table[@id]/tbody/tr")
for row in table_data:
columns = row.find_elements(By.XPATH,"./td") # Use dot in the xpath to find elements with in element.
table_row = []
for column in columns:
table_row.append(column.text)
print(table_row)
['INSTITUTIONS', 'DEGREE NAME', 'DISCIPLINE NAME', 'NON RES ALIEN M', 'NON RES ALIEN F', 'UNKNOWN M', 'UNKNOWN F', 'HISPANIC M', 'HISPANIC F', 'AMERICAN INDIAN M', 'AMERICAN INDIAN F', 'ASIAN AMERICAN M', 'ASIAN AMERICAN F', 'AFRICAN AMERICAN M', 'AFRICAN AMERICAN F', '', '', '', '', '', '', '']
['Air Force Institute of Technology', 'Aeronautical Engineering (M.S)', 'Aerospace Engineering', '0', '0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '17', '4', '0', '0', '23']
['Air Force Institute of Technology', 'Applied Mathematics (M.S)', 'Other Engineering Disciplines', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '1']
...
要获取 table 元素的 id
属性,您可以使用以下行。
table_id = driver.find_element(By.XPATH,"//table[@id]").get_attribute("id")
print(table_id)
DataTables_Table_3
我正在尝试从我的 table 中获取 class td
的数据元素,但我的代码一直只能从 thead
中提取行.如果我添加 find_element_by_tag_name("tbody")
,那么我会得到 classic 消息:没有这样的元素:无法定位元素...。有什么建议吗?
源代码:来自https://shinyapps.asee.org/apps/Profiles/
<table class="cell-border stripe compact dataTable no-footer" id="DataTables_Table_4" role="grid" aria-describedby="DataTables_Table_4_info">
<thead>
<tr>
<th>...</th>
.
.
.
</tr>
</thead>
<tbody>
<tr>
<td>...</td>
.
.
.
</tr>
.
.
.
</tbody>
</table>
硒Python:
for opt in element.find_elements_by_css_selector("div.option"):
#Record College Names
colleges.append(opt.get_attribute("data-value"))
time.sleep(2)
#Select College
opt.click() #does pull data into graph
#Scrape Data
table = driver.find_element_by_tag_name("table")
alldata = table.find_element_by_tag_name("tbody")
rows = table.find_elements_by_tag_name("tr")
#print(table.tag_name)
for row in rows:
print(row.tag_name)
data = []
data.append(year)
data.append("Degrees Awarded")
data_elements = row.find_elements_by_tag_name("td")
#add to pandas table
for fact in data_elements:
try:
data.append(fact.text)
except:
print("nothing")
print(data)
#DF.loc[len(DF.index)]=data
#reclick on dropdown box to get next school's data
element.click()
有两个 table 元素 - 一个用于 Header(没有 id
属性),另一个用于 Data(具有 id
属性)。
像下面这样尝试并确认。
driver.get("https://shinyapps.asee.org/apps/Profiles/")
# Code to select "Degrees Awarded" and other option in the drop down.
table_header = driver.find_elements(By.XPATH,"//table[not(@id)]//th")
header_row = []
for header in table_header:
header_row.append(header.text)
print(header_row)
table_data = driver.find_elements(By.XPATH,"//table[@id]/tbody/tr")
for row in table_data:
columns = row.find_elements(By.XPATH,"./td") # Use dot in the xpath to find elements with in element.
table_row = []
for column in columns:
table_row.append(column.text)
print(table_row)
['INSTITUTIONS', 'DEGREE NAME', 'DISCIPLINE NAME', 'NON RES ALIEN M', 'NON RES ALIEN F', 'UNKNOWN M', 'UNKNOWN F', 'HISPANIC M', 'HISPANIC F', 'AMERICAN INDIAN M', 'AMERICAN INDIAN F', 'ASIAN AMERICAN M', 'ASIAN AMERICAN F', 'AFRICAN AMERICAN M', 'AFRICAN AMERICAN F', '', '', '', '', '', '', '']
['Air Force Institute of Technology', 'Aeronautical Engineering (M.S)', 'Aerospace Engineering', '0', '0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '17', '4', '0', '0', '23']
['Air Force Institute of Technology', 'Applied Mathematics (M.S)', 'Other Engineering Disciplines', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '1']
...
要获取 table 元素的 id
属性,您可以使用以下行。
table_id = driver.find_element(By.XPATH,"//table[@id]").get_attribute("id")
print(table_id)
DataTables_Table_3