根据列名用 0 填充 DataFrame 的列行
Fill rows of columns of a DataFrame with 0s based on the column name
我有一个 DataFrame
看起来像这样:
dput(head(f))
structure(list(`2021-04-01` = c(18.75, 18.8047425327496, 19.044178996207,
19.1751973213021, 19.1689792828825, 19.5836158822698), `2021-05-01` = c(18.68,
18.6054569055353, 18.8313151379181, 19.0204171560939, 19.0789341312536,
19.5202197400974), `2021-06-01` = c(18.7, 18.6167441350011, 18.8507363020792,
19.0240964484369, 19.0811699993501, 19.5312231113571), `2021-07-01` = c(18.74,
18.6232367708829, 18.8951394692142, 19.0857033425782, 19.1976392153446,
19.6658807214903), `2021-08-01` = c(18.77, 18.644593579267, 18.9246150091022,
19.1032894987568, 19.2156342675263, 19.6849727955065), `2021-09-01` = c(18.84,
18.7266061665193, 19.0136981423636, 19.2080462372435, 19.3252661741782,
19.7976573506291), `2021-10-01` = c(19.16, 19.0429325189963,
19.2868666432121, 19.5302122400335, 19.6246896164923, 20.0574681959085
), `2021-11-01` = c(19.61, 19.4885636931401, 19.7281581945936,
19.9833632850547, 20.1125365664892, 20.5253519746824), `2021-12-01` = c(20.49,
20.3817436029844, 20.615343551369, 20.8552877862877, 20.9919455460469,
21.3954384388117), `2022-01-01` = c(20.48, 20.3740323095769,
20.6359913586072, 20.8916278650574, 21.0735209988655, 21.4729020165058
), `2022-02-01` = c(20.61, 20.5005105056777, 20.7692639443274,
21.0278974843904, 21.2088372694747, 21.6274189250732), `2022-03-01` = c(20.01,
19.8989327908198, 20.160435596222, 20.4165374436547, 20.5841189948464,
20.9775903556065), `2022-04-01` = c(17.33, 17.2371485625473,
17.4547245416883, 17.5005672257715, 17.6708376845902, 18.1363806857805
), `2022-05-01` = c(16.98, 16.8871485729785, 17.1047245517176,
17.1505672343433, 17.320837688241, 17.7863806850577), `2022-06-01` = c(16.93,
16.8371485710296, 17.0547245439537, 17.100567227402, 17.270837681622,
17.7363806838834), `2022-07-01` = c(16.55, 16.4602929774512,
16.679186983992, 16.72087791148, 16.902375852915, 17.3788582533972
), `2022-08-01` = c(16.55, 16.4602929762459, 16.6791869839035,
16.7208779112979, 16.902375853404, 17.3788582563241), `2022-09-01` = c(16.63,
16.5371485669301, 16.7547245454607, 16.8005672316863, 16.9708376873097,
17.4363806848043), `2022-10-01` = c(17.55, 17.4565144059899,
17.6932969707138, 17.7610505877454, 17.9658483820753, 18.3789383274342
), `2022-11-01` = c(17.95, 17.8565144105445, 18.0932969766868,
18.1610505955585, 18.3658483847519, 18.7789383271028), `2022-12-01` = c(18.83,
18.746506350021, 18.9775786073436, 19.0456532330963, 19.247708755873,
19.6618961429746), `2023-01-01` = c(18.85, 18.7565144062112,
18.9932969711003, 19.0610505896545, 19.2658483811053, 19.6789383251607
), `2023-02-01` = c(18.9, 18.8065144004764, 19.0432969669948,
19.1110505845328, 19.3158483743015, 19.7289383179684), `2023-03-01` = c(18.48,
18.3965063458801, 18.6275786028996, 18.695653229726, 18.8977087489342,
19.3118961401579), `2023-04-01` = c(16.45, 16.3946104511309,
16.6014635966272, 16.5836894226683, 16.7780479423448, 17.1192896129417
), `2023-05-01` = c(16.1, 16.0446104494817, 16.2514635937641,
16.2336894172376, 16.4280479394591, 16.7692896149847), `2023-06-01` = c(16.05,
15.9946104513846, 16.2014635962793, 16.1836894234373, 16.3780479477376,
16.7192896151668), `2023-07-01` = c(15.68, 15.6196427933565,
15.8264798722289, 15.8067013586247, 15.9929523803558, 16.3360049618321
), `2023-08-01` = c(15.68, 15.6196427956788, 15.8264798738901,
15.8067013650241, 15.9929523892458, 16.3360049695611), `2023-09-01` = c(15.75,
15.6946104485846, 15.9014635974283, 15.8836894236607, 16.0780479442838,
16.4192896139878), `2023-10-01` = c(16.55, 16.4859287021049,
16.6972792014036, 16.6652734109894, 16.9082634547469, 17.2600362319714
), `2023-11-01` = c(16.95, 16.88772137434, 17.1001895977296,
17.0660375905701, 17.3119906079332, 17.6598028036498), `2023-12-01` = c(17.83,
17.7629779733904, 17.9769203281466, 17.9457670813362, 18.1919876279738,
18.5322466212441), `2024-01-01` = c(17.85, 17.7877213736662,
18.0001895966288, 17.9660375873591, 18.21199060611, 18.5598028001363
), `2024-02-01` = c(17.9, 17.8377213696917, 18.0501895967882,
18.0160375825238, 18.2619906037143, 18.609802799337), `2024-03-01` = c(17.48,
17.412977978269, 17.6269203306822, 17.5957670842671, 17.8419876327419,
18.182246625778), `2024-04-01` = c(15.58, 15.6320820495314, 15.9040941973079,
16.0011191104772, 16.1344022617966, 16.2335337117727), `2024-05-01` = c(15.23,
15.282082046754, 15.5540941922692, 15.6511191067781, 15.7844022615365,
15.8835337091116), `2024-06-01` = c(15.18, 15.2320820475369,
15.5040941967701, 15.6011191055654, 15.7344022621257, 15.8335337059841
), `2024-07-01` = c(14.8, 14.8510913081696, 15.1150995970647,
15.2182582392206, 15.34485846568, 15.4485814541912), `2024-08-01` = c(14.8,
14.8510913086602, 15.1150995968972, 15.2182582375734, 15.3448584628037,
15.4485814528923), `2024-09-01` = c(14.88, 14.9320820482739,
15.2040941957414, 15.3011191040817, 15.434402262001, 15.5335337090296
), `2024-10-01` = c(15.48, 15.4841139854442, 15.5870431340295,
15.6417314142646, 15.8049227959112, 16.098072929477), `2024-11-01` = c(15.88,
15.8841139866634, 15.9870431368691, 16.0417314145131, 16.2049227962264,
16.4980729322541), `2024-12-01` = c(16.75, 16.7489599312631,
16.8369242831981, 16.8966205174003, 17.071500700515, 17.3712327127006
), `2025-01-01` = c(16.78, 16.6562480763831, 16.6057902890351,
16.4748384688119, 16.6420566625675, 16.8092903223808), `2025-02-01` = c(16.83,
16.7062480758168, 16.6557902869362, 16.5248384644428, 16.69205666146,
16.8592903220325), `2025-03-01` = c(16.4, 16.2779097817421, 16.2307811943544,
16.1021665690717, 16.2687265649343, 16.4311314730022), `2025-04-01` = c(15.73,
15.7087537504016, 15.6075842903385, 15.5299411837982, 15.7125519597493,
15.8306868005935), `2025-05-01` = c(15.38, 15.3587537511295,
15.257584289102, 15.1799411833166, 15.3625519604448, 15.480686801248
), `2025-06-01` = c(15.33, 15.3087537532081, 15.2075842896311,
15.129941184956, 15.312551964791, 15.4306868025988), `2025-07-01` = c(14.95,
14.9219729698033, 14.8130638959243, 14.7435847446705, 14.9181912547184,
15.0389778390791), `2025-08-01` = c(14.95, 14.9219729696512,
14.8130638953738, 14.7435847412341, 14.9181912530557, 15.0389778362589
), `2025-09-01` = c(15.03, 15.0087537477099, 14.9075842830852,
14.8299411740066, 15.0125519532892, 15.1306867930961), `2025-10-01` = c(15.75,
15.7563564789682, 15.6174493875532, 15.5606783126084, 15.7533723756061,
15.8531515873088), `2025-11-01` = c(16.15, 16.1563564821382,
16.0174493913382, 15.9606783194065, 16.1533723777501, 16.2531515879125
), `2025-12-01` = c(17.03, 17.0272458979667, 16.8737455455617,
16.8203287006477, 17.0225669443998, 17.1255973103374), Date = c("2021-04-01",
"2021-04-02", "2021-04-03", "2021-04-04", "2021-04-05", "2021-04-06"
)), row.names = c(NA, 6L), class = "data.frame")
如您所见,我有一个名为日期的列,日期从 2021-04-01
到 2025-12-31
。我也有专栏,这些日期之间的每个月都有一个专栏。因此,2021 年 4 月、2021 年 5 月、...、2025 年 12 月的专栏。
我想要的是优化我正在使用的nested for loop
(可能与dplyr
)如下:
for(i in 1:nrow(f)):
for(j in 1:ncol(f)):
if(as.character(f$Date[i]) != colnames(f)[j]):
f[i, j] <- 0
我尝试使用评论的建议来做到这一点并且它有效,但不知何故速度较慢:
require(tidyr)
long <- f %>%
gather(Month_Product, Price, -c(Date)) %>%
filter(Month_Product == as.character(floor_date(ymd(Date), unit = "month")))%>%
spread(Month_Product, Price) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(SPOT = rowSums(across(where(is.numeric)))) %>%
dplyr::select(Date, SPOT)
因此,如果我只取结果的头部 DataFrame
它看起来会有 2021-04-01
的列,因为前 6 个日期值在 [=14] 月份=] 和其他 columns
值都将等于 0。
我希望这是清楚的,如果需要我可以提供进一步的说明。
谢谢。
Base解决方案,借助lubridate包获取floor date
library(lubridate) # floor_date
# get index where floor date of "Date" doesn't match column name
ix <- matrix(
rep(head(colnames(f), -1), each = nrow(f)),
nrow = nrow(f)) != as.character(floor_date(ymd(f$Date), unit = "month"))
# assign 0, based on index, excluding the last "Date" column
f[, -ncol(f)][ ix ] <- 0
# then row sums
data.frame(f$Date,
SPOT = rowSums(f[, -ncol(f)]))
# Date SPOT
# 1 2021-04-01 18.75000
# 2 2021-04-02 18.80474
# 3 2021-04-03 19.04418
# 4 2021-04-04 19.17520
# 5 2021-04-05 19.16898
# 6 2021-04-06 19.58362
再澄清一点:获取不包括最后一个“日期”列的列名,然后重复每个列的行数,然后转换为矩阵。然后将此矩阵的每一列与“日期”列的底部进行比较。这将为我们提供逻辑 (TRUE/FALSE) 矩阵,然后我们将其用作索引以分配零。最后,按行求和,不包括最后一个“日期”列。
我有一个 DataFrame
看起来像这样:
dput(head(f))
structure(list(`2021-04-01` = c(18.75, 18.8047425327496, 19.044178996207,
19.1751973213021, 19.1689792828825, 19.5836158822698), `2021-05-01` = c(18.68,
18.6054569055353, 18.8313151379181, 19.0204171560939, 19.0789341312536,
19.5202197400974), `2021-06-01` = c(18.7, 18.6167441350011, 18.8507363020792,
19.0240964484369, 19.0811699993501, 19.5312231113571), `2021-07-01` = c(18.74,
18.6232367708829, 18.8951394692142, 19.0857033425782, 19.1976392153446,
19.6658807214903), `2021-08-01` = c(18.77, 18.644593579267, 18.9246150091022,
19.1032894987568, 19.2156342675263, 19.6849727955065), `2021-09-01` = c(18.84,
18.7266061665193, 19.0136981423636, 19.2080462372435, 19.3252661741782,
19.7976573506291), `2021-10-01` = c(19.16, 19.0429325189963,
19.2868666432121, 19.5302122400335, 19.6246896164923, 20.0574681959085
), `2021-11-01` = c(19.61, 19.4885636931401, 19.7281581945936,
19.9833632850547, 20.1125365664892, 20.5253519746824), `2021-12-01` = c(20.49,
20.3817436029844, 20.615343551369, 20.8552877862877, 20.9919455460469,
21.3954384388117), `2022-01-01` = c(20.48, 20.3740323095769,
20.6359913586072, 20.8916278650574, 21.0735209988655, 21.4729020165058
), `2022-02-01` = c(20.61, 20.5005105056777, 20.7692639443274,
21.0278974843904, 21.2088372694747, 21.6274189250732), `2022-03-01` = c(20.01,
19.8989327908198, 20.160435596222, 20.4165374436547, 20.5841189948464,
20.9775903556065), `2022-04-01` = c(17.33, 17.2371485625473,
17.4547245416883, 17.5005672257715, 17.6708376845902, 18.1363806857805
), `2022-05-01` = c(16.98, 16.8871485729785, 17.1047245517176,
17.1505672343433, 17.320837688241, 17.7863806850577), `2022-06-01` = c(16.93,
16.8371485710296, 17.0547245439537, 17.100567227402, 17.270837681622,
17.7363806838834), `2022-07-01` = c(16.55, 16.4602929774512,
16.679186983992, 16.72087791148, 16.902375852915, 17.3788582533972
), `2022-08-01` = c(16.55, 16.4602929762459, 16.6791869839035,
16.7208779112979, 16.902375853404, 17.3788582563241), `2022-09-01` = c(16.63,
16.5371485669301, 16.7547245454607, 16.8005672316863, 16.9708376873097,
17.4363806848043), `2022-10-01` = c(17.55, 17.4565144059899,
17.6932969707138, 17.7610505877454, 17.9658483820753, 18.3789383274342
), `2022-11-01` = c(17.95, 17.8565144105445, 18.0932969766868,
18.1610505955585, 18.3658483847519, 18.7789383271028), `2022-12-01` = c(18.83,
18.746506350021, 18.9775786073436, 19.0456532330963, 19.247708755873,
19.6618961429746), `2023-01-01` = c(18.85, 18.7565144062112,
18.9932969711003, 19.0610505896545, 19.2658483811053, 19.6789383251607
), `2023-02-01` = c(18.9, 18.8065144004764, 19.0432969669948,
19.1110505845328, 19.3158483743015, 19.7289383179684), `2023-03-01` = c(18.48,
18.3965063458801, 18.6275786028996, 18.695653229726, 18.8977087489342,
19.3118961401579), `2023-04-01` = c(16.45, 16.3946104511309,
16.6014635966272, 16.5836894226683, 16.7780479423448, 17.1192896129417
), `2023-05-01` = c(16.1, 16.0446104494817, 16.2514635937641,
16.2336894172376, 16.4280479394591, 16.7692896149847), `2023-06-01` = c(16.05,
15.9946104513846, 16.2014635962793, 16.1836894234373, 16.3780479477376,
16.7192896151668), `2023-07-01` = c(15.68, 15.6196427933565,
15.8264798722289, 15.8067013586247, 15.9929523803558, 16.3360049618321
), `2023-08-01` = c(15.68, 15.6196427956788, 15.8264798738901,
15.8067013650241, 15.9929523892458, 16.3360049695611), `2023-09-01` = c(15.75,
15.6946104485846, 15.9014635974283, 15.8836894236607, 16.0780479442838,
16.4192896139878), `2023-10-01` = c(16.55, 16.4859287021049,
16.6972792014036, 16.6652734109894, 16.9082634547469, 17.2600362319714
), `2023-11-01` = c(16.95, 16.88772137434, 17.1001895977296,
17.0660375905701, 17.3119906079332, 17.6598028036498), `2023-12-01` = c(17.83,
17.7629779733904, 17.9769203281466, 17.9457670813362, 18.1919876279738,
18.5322466212441), `2024-01-01` = c(17.85, 17.7877213736662,
18.0001895966288, 17.9660375873591, 18.21199060611, 18.5598028001363
), `2024-02-01` = c(17.9, 17.8377213696917, 18.0501895967882,
18.0160375825238, 18.2619906037143, 18.609802799337), `2024-03-01` = c(17.48,
17.412977978269, 17.6269203306822, 17.5957670842671, 17.8419876327419,
18.182246625778), `2024-04-01` = c(15.58, 15.6320820495314, 15.9040941973079,
16.0011191104772, 16.1344022617966, 16.2335337117727), `2024-05-01` = c(15.23,
15.282082046754, 15.5540941922692, 15.6511191067781, 15.7844022615365,
15.8835337091116), `2024-06-01` = c(15.18, 15.2320820475369,
15.5040941967701, 15.6011191055654, 15.7344022621257, 15.8335337059841
), `2024-07-01` = c(14.8, 14.8510913081696, 15.1150995970647,
15.2182582392206, 15.34485846568, 15.4485814541912), `2024-08-01` = c(14.8,
14.8510913086602, 15.1150995968972, 15.2182582375734, 15.3448584628037,
15.4485814528923), `2024-09-01` = c(14.88, 14.9320820482739,
15.2040941957414, 15.3011191040817, 15.434402262001, 15.5335337090296
), `2024-10-01` = c(15.48, 15.4841139854442, 15.5870431340295,
15.6417314142646, 15.8049227959112, 16.098072929477), `2024-11-01` = c(15.88,
15.8841139866634, 15.9870431368691, 16.0417314145131, 16.2049227962264,
16.4980729322541), `2024-12-01` = c(16.75, 16.7489599312631,
16.8369242831981, 16.8966205174003, 17.071500700515, 17.3712327127006
), `2025-01-01` = c(16.78, 16.6562480763831, 16.6057902890351,
16.4748384688119, 16.6420566625675, 16.8092903223808), `2025-02-01` = c(16.83,
16.7062480758168, 16.6557902869362, 16.5248384644428, 16.69205666146,
16.8592903220325), `2025-03-01` = c(16.4, 16.2779097817421, 16.2307811943544,
16.1021665690717, 16.2687265649343, 16.4311314730022), `2025-04-01` = c(15.73,
15.7087537504016, 15.6075842903385, 15.5299411837982, 15.7125519597493,
15.8306868005935), `2025-05-01` = c(15.38, 15.3587537511295,
15.257584289102, 15.1799411833166, 15.3625519604448, 15.480686801248
), `2025-06-01` = c(15.33, 15.3087537532081, 15.2075842896311,
15.129941184956, 15.312551964791, 15.4306868025988), `2025-07-01` = c(14.95,
14.9219729698033, 14.8130638959243, 14.7435847446705, 14.9181912547184,
15.0389778390791), `2025-08-01` = c(14.95, 14.9219729696512,
14.8130638953738, 14.7435847412341, 14.9181912530557, 15.0389778362589
), `2025-09-01` = c(15.03, 15.0087537477099, 14.9075842830852,
14.8299411740066, 15.0125519532892, 15.1306867930961), `2025-10-01` = c(15.75,
15.7563564789682, 15.6174493875532, 15.5606783126084, 15.7533723756061,
15.8531515873088), `2025-11-01` = c(16.15, 16.1563564821382,
16.0174493913382, 15.9606783194065, 16.1533723777501, 16.2531515879125
), `2025-12-01` = c(17.03, 17.0272458979667, 16.8737455455617,
16.8203287006477, 17.0225669443998, 17.1255973103374), Date = c("2021-04-01",
"2021-04-02", "2021-04-03", "2021-04-04", "2021-04-05", "2021-04-06"
)), row.names = c(NA, 6L), class = "data.frame")
如您所见,我有一个名为日期的列,日期从 2021-04-01
到 2025-12-31
。我也有专栏,这些日期之间的每个月都有一个专栏。因此,2021 年 4 月、2021 年 5 月、...、2025 年 12 月的专栏。
我想要的是优化我正在使用的nested for loop
(可能与dplyr
)如下:
for(i in 1:nrow(f)):
for(j in 1:ncol(f)):
if(as.character(f$Date[i]) != colnames(f)[j]):
f[i, j] <- 0
我尝试使用评论的建议来做到这一点并且它有效,但不知何故速度较慢:
require(tidyr)
long <- f %>%
gather(Month_Product, Price, -c(Date)) %>%
filter(Month_Product == as.character(floor_date(ymd(Date), unit = "month")))%>%
spread(Month_Product, Price) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(SPOT = rowSums(across(where(is.numeric)))) %>%
dplyr::select(Date, SPOT)
因此,如果我只取结果的头部 DataFrame
它看起来会有 2021-04-01
的列,因为前 6 个日期值在 [=14] 月份=] 和其他 columns
值都将等于 0。
我希望这是清楚的,如果需要我可以提供进一步的说明。
谢谢。
Base解决方案,借助lubridate包获取floor date
library(lubridate) # floor_date
# get index where floor date of "Date" doesn't match column name
ix <- matrix(
rep(head(colnames(f), -1), each = nrow(f)),
nrow = nrow(f)) != as.character(floor_date(ymd(f$Date), unit = "month"))
# assign 0, based on index, excluding the last "Date" column
f[, -ncol(f)][ ix ] <- 0
# then row sums
data.frame(f$Date,
SPOT = rowSums(f[, -ncol(f)]))
# Date SPOT
# 1 2021-04-01 18.75000
# 2 2021-04-02 18.80474
# 3 2021-04-03 19.04418
# 4 2021-04-04 19.17520
# 5 2021-04-05 19.16898
# 6 2021-04-06 19.58362
再澄清一点:获取不包括最后一个“日期”列的列名,然后重复每个列的行数,然后转换为矩阵。然后将此矩阵的每一列与“日期”列的底部进行比较。这将为我们提供逻辑 (TRUE/FALSE) 矩阵,然后我们将其用作索引以分配零。最后,按行求和,不包括最后一个“日期”列。