Return DateTime 基于 pandas DF 中的值
Return DateTime based on values in a pandas DF
我正在使用“anual_quarters”作为每个季度每个数据点的列名称和值的数据创建数据时间列。
我正在考虑创建自定义函数并使用 .apply 到 return 我可以使用的数据。但是我似乎无法编写一个函数来使用 iterrows() 和 iteritems()
迭代每个 row/col 数据
这是我尝试的最后一件事。
from datetime import datetime
def get_recent_orders(merged_data):
for index, row in merged_data.iteritems():
if row['q4_sales_2021'] > 0:
return datetime(2021, 10, 1)
elif row['q3_sales_2021'] > 0:
return datetime(2021, 7, 1)
elif row['q2_sales_2021'] > 0:
return datetime(2021, 4, 1)
elif row['q1_sales_2021'] > 0:
return datetime(2021, 1, 1)
elif row['q4_sales_2020'] > 0:
return datetime(2020, 10, 1)
elif row['q3_sales_2020'] > 0:
return datetime(2020, 7, 1)
elif row['q2_sales_2020'] > 0:
return datetime(2020, 4, 1)
elif row['q1_sales_2020'] > 0:
return datetime(2020, 1, 1)
else:
return None
merged_data['last_order'] = merged_data.apply(get_recent_orders, axis=0)
使用numpy.select
:
import numpy as np
conditions = [merged_data['q4_sales_2021'].gt(0),
merged_data['q3_sales_2021'].gt(0),
merged_data['q2_sales_2021'].gt(0),
merged_data['q1_sales_2021'].gt(0),
merged_data['q4_sales_2020'].gt(0),
merged_data['q3_sales_2020'].gt(0),
merged_data['q2_sales_2020'].gt(0),
merged_data['q1_sales_2020'].gt(0)]
choices = [datetime(2021, 10, 1),
datetime(2021, 7, 1),
datetime(2021, 4, 1),
datetime(2021, 1, 1),
datetime(2020, 10, 1),
datetime(2020, 7, 1),
datetime(2020, 4, 1),
datetime(2020, 1, 1)]
merged_data["last_order"] = np.select(conditions, choices, None)
与np.select
:
condlist = [df['q4_sales_2021'] > 0,
df['q3_sales_2021'] > 0,
df['q2_sales_2021'] > 0,
df['q1_sales_2021'] > 0,
df['q4_sales_2020'] > 0,
df['q3_sales_2020'] > 0,
df['q2_sales_2020'] > 0,
df['q1_sales_2020'] > 0]
choicelist = [datetime(2021, 10, 1),
datetime(2021, 7, 1),
datetime(2021, 4, 1),
datetime(2021, 1, 1),
datetime(2020, 10, 1),
datetime(2020, 7, 1),
datetime(2020, 4, 1),
datetime(2020, 1, 1)]
default = pd.NAT
merged_data['last_order'] = np.select(condlist, choicelist, default)
您可以这样做:
df = pd.DataFrame(data={"annual_quarters": ["q4_sales_2021", "q3_sales_2021", "q2_sales_2021", "q1_sales_2021",
"q4_sales_2020", "q3_sales_2020", "q2_sales_2020", "q1_sales_2020"]})
import datetime as dt
def get_recent_orders(quarter):
month_list = [1, 4, 7, 10]
month = month_list[int(str(quarter)[1])-1]
year = int(str(quarter)[-4:])
return dt.date(year, month, 1)
df["last_order"] = df["annual_quarters"].apply(get_recent_orders)
由于月份将是 Jan、Apr、Jul 或 Oct,具体取决于 Q1、2、3 或 4,您可以 return a datetime.date
通过从第二个字符串字符中查找月份值,最后 4 个字符的年份,day = 1。这样,根本不需要遍历行。
我正在使用“anual_quarters”作为每个季度每个数据点的列名称和值的数据创建数据时间列。
我正在考虑创建自定义函数并使用 .apply 到 return 我可以使用的数据。但是我似乎无法编写一个函数来使用 iterrows() 和 iteritems()
迭代每个 row/col 数据这是我尝试的最后一件事。
from datetime import datetime
def get_recent_orders(merged_data):
for index, row in merged_data.iteritems():
if row['q4_sales_2021'] > 0:
return datetime(2021, 10, 1)
elif row['q3_sales_2021'] > 0:
return datetime(2021, 7, 1)
elif row['q2_sales_2021'] > 0:
return datetime(2021, 4, 1)
elif row['q1_sales_2021'] > 0:
return datetime(2021, 1, 1)
elif row['q4_sales_2020'] > 0:
return datetime(2020, 10, 1)
elif row['q3_sales_2020'] > 0:
return datetime(2020, 7, 1)
elif row['q2_sales_2020'] > 0:
return datetime(2020, 4, 1)
elif row['q1_sales_2020'] > 0:
return datetime(2020, 1, 1)
else:
return None
merged_data['last_order'] = merged_data.apply(get_recent_orders, axis=0)
使用numpy.select
:
import numpy as np
conditions = [merged_data['q4_sales_2021'].gt(0),
merged_data['q3_sales_2021'].gt(0),
merged_data['q2_sales_2021'].gt(0),
merged_data['q1_sales_2021'].gt(0),
merged_data['q4_sales_2020'].gt(0),
merged_data['q3_sales_2020'].gt(0),
merged_data['q2_sales_2020'].gt(0),
merged_data['q1_sales_2020'].gt(0)]
choices = [datetime(2021, 10, 1),
datetime(2021, 7, 1),
datetime(2021, 4, 1),
datetime(2021, 1, 1),
datetime(2020, 10, 1),
datetime(2020, 7, 1),
datetime(2020, 4, 1),
datetime(2020, 1, 1)]
merged_data["last_order"] = np.select(conditions, choices, None)
与np.select
:
condlist = [df['q4_sales_2021'] > 0,
df['q3_sales_2021'] > 0,
df['q2_sales_2021'] > 0,
df['q1_sales_2021'] > 0,
df['q4_sales_2020'] > 0,
df['q3_sales_2020'] > 0,
df['q2_sales_2020'] > 0,
df['q1_sales_2020'] > 0]
choicelist = [datetime(2021, 10, 1),
datetime(2021, 7, 1),
datetime(2021, 4, 1),
datetime(2021, 1, 1),
datetime(2020, 10, 1),
datetime(2020, 7, 1),
datetime(2020, 4, 1),
datetime(2020, 1, 1)]
default = pd.NAT
merged_data['last_order'] = np.select(condlist, choicelist, default)
您可以这样做:
df = pd.DataFrame(data={"annual_quarters": ["q4_sales_2021", "q3_sales_2021", "q2_sales_2021", "q1_sales_2021",
"q4_sales_2020", "q3_sales_2020", "q2_sales_2020", "q1_sales_2020"]})
import datetime as dt
def get_recent_orders(quarter):
month_list = [1, 4, 7, 10]
month = month_list[int(str(quarter)[1])-1]
year = int(str(quarter)[-4:])
return dt.date(year, month, 1)
df["last_order"] = df["annual_quarters"].apply(get_recent_orders)
由于月份将是 Jan、Apr、Jul 或 Oct,具体取决于 Q1、2、3 或 4,您可以 return a datetime.date
通过从第二个字符串字符中查找月份值,最后 4 个字符的年份,day = 1。这样,根本不需要遍历行。