一个接一个地连接数据帧
concatenate dataframes one by one
下面的代码读取文件,将它们保存在数据框中,然后连接所有文件,连接后每秒重新采样数据。因为这对记忆来说太难了。我想要的是一块一块地做这件事。例如,我读了两个文件,将它们连接起来并重新采样。然后读取下一个文件,将其与前两个文件的结果连接起来,然后对 10 个文件进行重新采样,逐个文件。我怎样才能更改代码。有人可以帮我 that.Following 是我的代码
import pandas as pd
import os
#import matplotlib.pyplot as plt
#df1 = pd.read_hdf("E:\examples\hdf files\conew1.h5", 'df')
#df2 = pd.read_hdf("E:\examples\hdf files\conew2.h5", 'df')
#df3 = pd.read_hdf("E:\examples\hdf files\conew3.h5", 'df')
hdfdirectory = "E:\examples\hdf files"
number_of_dfs=1
df=None
for fi in os.listdir(hdfdirectory):
hdfpath = os.path.join(hdfdirectory, fi)
print hdfpath
df1 = pd.read_hdf(hdfpath, 'df')
for i in range(number_of_dfs):
if df is None:
df=pd.DataFrame({'timestamp':df1.timestamp , 'url' : df1.url})
dft = df.set_index('timestamp').resample('S').count()
else:
temp=pd.DataFrame({'timestamp':df1.timestamp , 'url' :df1.url})
tempt = temp.set_index('timestamp').resample('S').count()
df=pd.concat([dft,tempt])
试图创建一个例子来说明我的观点。您可能需要稍微调整一下,但会有一个想法
hdfdirectory = "E:\examples\hdf files"
df=None
for fi in os.listdir(hdfdirectory):
hdfpath = os.path.join(hdfdirectory, fi)
print hdfpath
df1 = pd.read_hdf(hdfpath, 'df')
if df is None:
df=pd.DataFrame({'timestamp':df1.timestamp , 'url' : df1.url})
dft = df.set_index('timestamp').resample('S').count()
df=dft
else:
temp=pd.DataFrame({'timestamp':df1.timestamp , 'url' :df1.url})
tempt = temp.set_index('timestamp').resample('S').count()
df=pd.concat([df,tempt])
下面的代码读取文件,将它们保存在数据框中,然后连接所有文件,连接后每秒重新采样数据。因为这对记忆来说太难了。我想要的是一块一块地做这件事。例如,我读了两个文件,将它们连接起来并重新采样。然后读取下一个文件,将其与前两个文件的结果连接起来,然后对 10 个文件进行重新采样,逐个文件。我怎样才能更改代码。有人可以帮我 that.Following 是我的代码
import pandas as pd
import os
#import matplotlib.pyplot as plt
#df1 = pd.read_hdf("E:\examples\hdf files\conew1.h5", 'df')
#df2 = pd.read_hdf("E:\examples\hdf files\conew2.h5", 'df')
#df3 = pd.read_hdf("E:\examples\hdf files\conew3.h5", 'df')
hdfdirectory = "E:\examples\hdf files"
number_of_dfs=1
df=None
for fi in os.listdir(hdfdirectory):
hdfpath = os.path.join(hdfdirectory, fi)
print hdfpath
df1 = pd.read_hdf(hdfpath, 'df')
for i in range(number_of_dfs):
if df is None:
df=pd.DataFrame({'timestamp':df1.timestamp , 'url' : df1.url})
dft = df.set_index('timestamp').resample('S').count()
else:
temp=pd.DataFrame({'timestamp':df1.timestamp , 'url' :df1.url})
tempt = temp.set_index('timestamp').resample('S').count()
df=pd.concat([dft,tempt])
试图创建一个例子来说明我的观点。您可能需要稍微调整一下,但会有一个想法
hdfdirectory = "E:\examples\hdf files"
df=None
for fi in os.listdir(hdfdirectory):
hdfpath = os.path.join(hdfdirectory, fi)
print hdfpath
df1 = pd.read_hdf(hdfpath, 'df')
if df is None:
df=pd.DataFrame({'timestamp':df1.timestamp , 'url' : df1.url})
dft = df.set_index('timestamp').resample('S').count()
df=dft
else:
temp=pd.DataFrame({'timestamp':df1.timestamp , 'url' :df1.url})
tempt = temp.set_index('timestamp').resample('S').count()
df=pd.concat([df,tempt])