从 XML url 到 Pandas 数据帧
From XML url to Pandas dataframe
我是 Python 的新手,我在从网络导入一个简单的 XML 文件并将其转换为 pandas DF 时遇到了一些问题:
https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/cny.xml
我尝试了几种方法,包括使用 BS4,但我没能使它们起作用。
from bs4 import BeautifulSoup
import requests
socket = requests.get('https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/cny.xml')
soup = bs4.BeautifulSoup(socket.content, ['lxml', 'xml'])
all_obs = soup.find_all('Obs')
l = []
df = pd.DataFrame(columns=['TIME_PERIOD','OBS_VALUE'])
pos= 0
for obs in all_obs:
l.append(obs.find('TIME_PERIOD').text)
l.append(obs.find('OBS_VALUE').text)
df.loc[pos] = l
l = []
pos+=1
print(df)
有人可以帮助我吗?
谢谢
嗨!
from bs4 import BeautifulSoup
import requests
import pandas as pd
response = requests.get('https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/cny.xml')
bs = BeautifulSoup(response.text, ['xml'])
obs = bs.find_all("Obs")
#<Obs OBS_CONF="F" OBS_STATUS="A" OBS_VALUE="10.7255" TIME_PERIOD="2005-04-01"/>
df = pd.DataFrame(columns=['TIME_PERIOD','OBS_VALUE'])
for node in obs:
df = df.append({'TIME_PERIOD': node.get("TIME_PERIOD"), 'OBS_VALUE': node.get("OBS_VALUE")}, ignore_index=True)
df.head()
我是 Python 的新手,我在从网络导入一个简单的 XML 文件并将其转换为 pandas DF 时遇到了一些问题: https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/cny.xml
我尝试了几种方法,包括使用 BS4,但我没能使它们起作用。
from bs4 import BeautifulSoup
import requests
socket = requests.get('https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/cny.xml')
soup = bs4.BeautifulSoup(socket.content, ['lxml', 'xml'])
all_obs = soup.find_all('Obs')
l = []
df = pd.DataFrame(columns=['TIME_PERIOD','OBS_VALUE'])
pos= 0
for obs in all_obs:
l.append(obs.find('TIME_PERIOD').text)
l.append(obs.find('OBS_VALUE').text)
df.loc[pos] = l
l = []
pos+=1
print(df)
有人可以帮助我吗? 谢谢
嗨!
from bs4 import BeautifulSoup
import requests
import pandas as pd
response = requests.get('https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/cny.xml')
bs = BeautifulSoup(response.text, ['xml'])
obs = bs.find_all("Obs")
#<Obs OBS_CONF="F" OBS_STATUS="A" OBS_VALUE="10.7255" TIME_PERIOD="2005-04-01"/>
df = pd.DataFrame(columns=['TIME_PERIOD','OBS_VALUE'])
for node in obs:
df = df.append({'TIME_PERIOD': node.get("TIME_PERIOD"), 'OBS_VALUE': node.get("OBS_VALUE")}, ignore_index=True)
df.head()