如何将单个值从数据帧输出到 CSV?
How do I output a single value from a dataframe to a CSV?
我正在尝试将数据框中的单个值导入到 CSV 文件中。我正在尝试获取有关 MLB 统计信息的信息,并在进行一些抓取后使用附加函数从 MLB.com 中提取数据。但是,我只想将第一行(最好是团队名称)提取到单独的 CSV 文件中。从下面的输出中,我只想 return "Toronto Blue Jays".
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import random
import time
driver = webdriver.Chrome(executable_path='filepath/chromedriver')
driver.get('http://www.mlb.com')
stats_header_bar = driver.find_element_by_class_name('megamenu-navbar-overflow__menu-item--stats')
stats_header_bar.click()
stats_line_items = stats_header_bar.find_elements_by_tag_name('li')
stats_line_items[2].click()
hitting_season_element =
driver.find_element_by_id('st_hitting_season')
season_select = Select(hitting_season_element)
season_select.select_by_value('2015')
wait = wait = WebDriverWait(driver, 10)
team_hr_stats = wait.until(EC.visibility_of_element_located((By.ID, 'datagrid')))
print('The HR dropdown in the header was loaded successfully. The mouse will move over the element after a short delay')
normal_delay = random.normalvariate(2, 0.5)
print('Sleeping for {} seconds'.format(normal_delay))
time.sleep(normal_delay)
print('Now moving mouse...')
ActionChains(driver).move_to_element(team_hr_stats).perform()
team_hr_total = team_hr_stats.find_elements_by_tag_name('th')
team_hr_total[10].click()
data_div_1 = driver.find_element_by_id('datagrid')
data_html_1 = data_div_1.get_attribute('innerHTML')
import bs4
import requests
soup_1 = bs4.BeautifulSoup(data_html_1, 'html5lib')
def extract_stats_data(data_element):
data_html = data_element.get_attribute('innerHTML')
soup = bs4.BeautifulSoup(data_html, 'html5lib')
column_names = [t.text.replace('▼', ' ').replace('▲', ' ').strip()
for t in soup.thead.tr.findAll('th')]
row_lists = []
for row in soup.tbody.findAll('tr'):
row_lists.append([col.text for col in row.findAll('td')])
df = pd.DataFrame(row_lists, columns=column_names)
numeric_fields = ['HR']
for field in numeric_fields:
df[field] = pd.to_numeric(df[field])
return df
df = extract_stats_data(data_div_1)
df.to_csv('Filename.csv')
您提供的屏幕截图显示数据按最大 HR
排序。因此,您首先需要以相同的方式对 df
进行排序:
df = extract_stats_data(data_div_1)
team_name = df.sort_values('HR', ascending=False).iloc[0]['Team'] # With highest HR
# Write team name string to file
with open('Filename.csv', 'w') as f_output:
f_output.write(team_name)
我正在尝试将数据框中的单个值导入到 CSV 文件中。我正在尝试获取有关 MLB 统计信息的信息,并在进行一些抓取后使用附加函数从 MLB.com 中提取数据。但是,我只想将第一行(最好是团队名称)提取到单独的 CSV 文件中。从下面的输出中,我只想 return "Toronto Blue Jays".
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import random
import time
driver = webdriver.Chrome(executable_path='filepath/chromedriver')
driver.get('http://www.mlb.com')
stats_header_bar = driver.find_element_by_class_name('megamenu-navbar-overflow__menu-item--stats')
stats_header_bar.click()
stats_line_items = stats_header_bar.find_elements_by_tag_name('li')
stats_line_items[2].click()
hitting_season_element =
driver.find_element_by_id('st_hitting_season')
season_select = Select(hitting_season_element)
season_select.select_by_value('2015')
wait = wait = WebDriverWait(driver, 10)
team_hr_stats = wait.until(EC.visibility_of_element_located((By.ID, 'datagrid')))
print('The HR dropdown in the header was loaded successfully. The mouse will move over the element after a short delay')
normal_delay = random.normalvariate(2, 0.5)
print('Sleeping for {} seconds'.format(normal_delay))
time.sleep(normal_delay)
print('Now moving mouse...')
ActionChains(driver).move_to_element(team_hr_stats).perform()
team_hr_total = team_hr_stats.find_elements_by_tag_name('th')
team_hr_total[10].click()
data_div_1 = driver.find_element_by_id('datagrid')
data_html_1 = data_div_1.get_attribute('innerHTML')
import bs4
import requests
soup_1 = bs4.BeautifulSoup(data_html_1, 'html5lib')
def extract_stats_data(data_element):
data_html = data_element.get_attribute('innerHTML')
soup = bs4.BeautifulSoup(data_html, 'html5lib')
column_names = [t.text.replace('▼', ' ').replace('▲', ' ').strip()
for t in soup.thead.tr.findAll('th')]
row_lists = []
for row in soup.tbody.findAll('tr'):
row_lists.append([col.text for col in row.findAll('td')])
df = pd.DataFrame(row_lists, columns=column_names)
numeric_fields = ['HR']
for field in numeric_fields:
df[field] = pd.to_numeric(df[field])
return df
df = extract_stats_data(data_div_1)
df.to_csv('Filename.csv')
您提供的屏幕截图显示数据按最大 HR
排序。因此,您首先需要以相同的方式对 df
进行排序:
df = extract_stats_data(data_div_1)
team_name = df.sort_values('HR', ascending=False).iloc[0]['Team'] # With highest HR
# Write team name string to file
with open('Filename.csv', 'w') as f_output:
f_output.write(team_name)