使用列表向 DataFrame 添加新列
Adding new column to DataFrame using a list
我正在从雅虎的财经网站上抓取股票价格和名称。在制作了一个包含三列“名称”、“代码”和“价格”并代表传递的索引变量的数据框之后。我想转到另一个循环并向原始数据框添加一列更新价格。但是当我添加列时,它会为我的原始数据创建 NaN 值。我需要做什么才能正确放置索引而不干扰原始数据帧数据?
class Stocks():
def __init__(self):
return self
def Stock_ABV(str):
pattern = re.compile(r'/([A-Z]*-[A-Z]*|[A-Z]*)')
match = pattern.finditer(str)
length = len(str)
for match in match:
name = match.group(0)
return match.group(0)[1:length]
def Yahoo_Finance(index):
url_list = ['https://finance.yahoo.com/quote/GOOG','https://finance.yahoo.com/quote/DOGE-USD',
'https://finance.yahoo.com/quote/AAPL', 'https://finance.yahoo.com/quote/HMC',
'https://finance.yahoo.com/quote/TM', 'https://finance.yahoo.com/quote/DKS',
'https://finance.yahoo.com/quote/SHIB-USD', 'https://finance.yahoo.com/quote/BTC-USD',
'https://finance.yahoo.com/quote/WMT', 'https://finance.yahoo.com/quote/AMZN',
'https://finance.yahoo.com/quote/NKE', 'https://finance.yahoo.com/quote/KO',
'https://finance.yahoo.com/quote/PEP', 'https://finance.yahoo.com/quote/DAL',
'https://finance.yahoo.com/quote/SAVE', 'https://finance.yahoo.com/quote/BLL',
'https://finance.yahoo.com/quote/KMB', 'https://finance.yahoo.com/quote/GIS']
url = ''
i = 0
L1 = []
L2 = []
structure = pd.DataFrame({'Name': [], 'Code': [], 'Price': []})
if index == 1:
while i < len(url_list):
url = url + url_list[i]
req = requests.get(url)
soup = BeautifulSoup(req.text, 'html.parser')
name = soup.find('h1', {'class': 'D(ib) Fz(18px)'}).text
price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
L1.append([name,Stocks.Stock_ABV(url_list[i]),price])
df = pd.DataFrame(L1, columns = ['Name', 'Code', 'Price'])
i += 1
url = ''
structure = df
structure = structure.set_index(df.index)
else:
while i < len(url_list):
req = requests.get(url_list[i])
soup = BeautifulSoup(req.text, 'html.parser')
price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
L2.append(price)
df2 = pd.DataFrame(L2, columns = [f'Price{index}'])
i += 1
url = ''
structure[f'Price{index}'] = L2
pd.set_option('display.max_rows', None)
return structure
def AFK_Runner():
Stocks.Yahoo_Finance(1)
return Stocks.Yahoo_Finance(2)
Stocks.AFK_Runner()
你知道yfinance
包吗?
# pip install yfinance
import yfinance as yf
data = yf.download('GOOG DOGE-USD AAPL HMC')
输出:
Adj Close Close ... Open Volume
AAPL DOGE-USD GOOG HMC AAPL DOGE-USD GOOG HMC ... AAPL DOGE-USD GOOG HMC AAPL DOGE-USD GOOG HMC
Date ...
1980-03-17 NaN NaN NaN 0.718973 NaN NaN NaN 0.893750 ... NaN NaN NaN 0.893750 NaN NaN NaN 26000.0
1980-03-18 NaN NaN NaN 0.731542 NaN NaN NaN 0.909375 ... NaN NaN NaN 0.909375 NaN NaN NaN 2000.0
1980-03-19 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 2000.0
1980-03-20 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 0.0
1980-03-21 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 2000.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-02-09 176.279999 0.159123 2829.060059 30.500000 176.279999 0.159123 2829.060059 30.500000 ... 176.050003 0.158357 2816.995117 30.120001 71285000.0 7.786708e+08 1431400.0 1554600.0
2022-02-10 172.119995 0.151889 2772.050049 30.760000 172.119995 0.151889 2772.050049 30.760000 ... 174.139999 0.159145 2790.000000 31.000000 90865900.0 1.053631e+09 1650900.0 1398400.0
2022-02-11 168.639999 0.144847 2682.600098 30.459999 168.639999 0.144847 2682.600098 30.459999 ... 172.330002 0.151895 2775.000000 30.760000 98566000.0 7.767306e+08 1937700.0 1004200.0
2022-02-12 NaN 0.144405 NaN NaN NaN 0.144405 NaN NaN ... NaN 0.144856 NaN NaN NaN 6.026994e+08 NaN NaN
2022-02-13 NaN 0.153793 NaN NaN NaN 0.153793 NaN NaN ... NaN 0.144308 NaN NaN NaN 1.346092e+09 NaN NaN
[11055 rows x 24 columns]
我正在从雅虎的财经网站上抓取股票价格和名称。在制作了一个包含三列“名称”、“代码”和“价格”并代表传递的索引变量的数据框之后。我想转到另一个循环并向原始数据框添加一列更新价格。但是当我添加列时,它会为我的原始数据创建 NaN 值。我需要做什么才能正确放置索引而不干扰原始数据帧数据?
class Stocks():
def __init__(self):
return self
def Stock_ABV(str):
pattern = re.compile(r'/([A-Z]*-[A-Z]*|[A-Z]*)')
match = pattern.finditer(str)
length = len(str)
for match in match:
name = match.group(0)
return match.group(0)[1:length]
def Yahoo_Finance(index):
url_list = ['https://finance.yahoo.com/quote/GOOG','https://finance.yahoo.com/quote/DOGE-USD',
'https://finance.yahoo.com/quote/AAPL', 'https://finance.yahoo.com/quote/HMC',
'https://finance.yahoo.com/quote/TM', 'https://finance.yahoo.com/quote/DKS',
'https://finance.yahoo.com/quote/SHIB-USD', 'https://finance.yahoo.com/quote/BTC-USD',
'https://finance.yahoo.com/quote/WMT', 'https://finance.yahoo.com/quote/AMZN',
'https://finance.yahoo.com/quote/NKE', 'https://finance.yahoo.com/quote/KO',
'https://finance.yahoo.com/quote/PEP', 'https://finance.yahoo.com/quote/DAL',
'https://finance.yahoo.com/quote/SAVE', 'https://finance.yahoo.com/quote/BLL',
'https://finance.yahoo.com/quote/KMB', 'https://finance.yahoo.com/quote/GIS']
url = ''
i = 0
L1 = []
L2 = []
structure = pd.DataFrame({'Name': [], 'Code': [], 'Price': []})
if index == 1:
while i < len(url_list):
url = url + url_list[i]
req = requests.get(url)
soup = BeautifulSoup(req.text, 'html.parser')
name = soup.find('h1', {'class': 'D(ib) Fz(18px)'}).text
price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
L1.append([name,Stocks.Stock_ABV(url_list[i]),price])
df = pd.DataFrame(L1, columns = ['Name', 'Code', 'Price'])
i += 1
url = ''
structure = df
structure = structure.set_index(df.index)
else:
while i < len(url_list):
req = requests.get(url_list[i])
soup = BeautifulSoup(req.text, 'html.parser')
price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
L2.append(price)
df2 = pd.DataFrame(L2, columns = [f'Price{index}'])
i += 1
url = ''
structure[f'Price{index}'] = L2
pd.set_option('display.max_rows', None)
return structure
def AFK_Runner():
Stocks.Yahoo_Finance(1)
return Stocks.Yahoo_Finance(2)
Stocks.AFK_Runner()
你知道yfinance
包吗?
# pip install yfinance
import yfinance as yf
data = yf.download('GOOG DOGE-USD AAPL HMC')
输出:
Adj Close Close ... Open Volume
AAPL DOGE-USD GOOG HMC AAPL DOGE-USD GOOG HMC ... AAPL DOGE-USD GOOG HMC AAPL DOGE-USD GOOG HMC
Date ...
1980-03-17 NaN NaN NaN 0.718973 NaN NaN NaN 0.893750 ... NaN NaN NaN 0.893750 NaN NaN NaN 26000.0
1980-03-18 NaN NaN NaN 0.731542 NaN NaN NaN 0.909375 ... NaN NaN NaN 0.909375 NaN NaN NaN 2000.0
1980-03-19 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 2000.0
1980-03-20 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 0.0
1980-03-21 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 2000.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-02-09 176.279999 0.159123 2829.060059 30.500000 176.279999 0.159123 2829.060059 30.500000 ... 176.050003 0.158357 2816.995117 30.120001 71285000.0 7.786708e+08 1431400.0 1554600.0
2022-02-10 172.119995 0.151889 2772.050049 30.760000 172.119995 0.151889 2772.050049 30.760000 ... 174.139999 0.159145 2790.000000 31.000000 90865900.0 1.053631e+09 1650900.0 1398400.0
2022-02-11 168.639999 0.144847 2682.600098 30.459999 168.639999 0.144847 2682.600098 30.459999 ... 172.330002 0.151895 2775.000000 30.760000 98566000.0 7.767306e+08 1937700.0 1004200.0
2022-02-12 NaN 0.144405 NaN NaN NaN 0.144405 NaN NaN ... NaN 0.144856 NaN NaN NaN 6.026994e+08 NaN NaN
2022-02-13 NaN 0.153793 NaN NaN NaN 0.153793 NaN NaN ... NaN 0.144308 NaN NaN NaN 1.346092e+09 NaN NaN
[11055 rows x 24 columns]