R中除以组的数学动作序列
sequence of mathematical actions divided by groups in R
我有数据。这里的例子
mydat=structure(list(ItemRelation = c(11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L
), SaleCount = c(0L, 0L, 6L, 0L, 38L, -14L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 33L, 0L, -10L, -2L, 0L, 22L, -4L, 0L, 0L, -5L, 3L, 0L,
28L, -14L, 0L, 0L, 0L, 0L, 0L, 21L, -5L, 0L, 0L, 0L, 0L, 0L,
32L, -8L, 6L, 0L, 0L, 0L, 0L, 33L, -7L, 0L, 0L, 0L, 3L, -3L,
47L, -22L, 0L, 0L, 0L, 0L, 0L, 26L, -3L, 0L, 0L, 0L, 6L, 0L,
0L, 6L, 0L, 38L, -14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 33L, 0L, -10L,
-2L, 0L, 22L, -4L, 0L, 0L, -5L, 3L, 0L, 28L, -14L, 0L, 0L, 0L,
0L, 0L, 21L, -5L, 0L, 0L, 0L, 0L, 0L, 32L, -8L, 6L, 0L, 0L, 0L,
0L, 33L, -7L, 0L, 0L, 0L, 3L, -3L, 47L, -22L, 0L, 0L, 0L, 0L,
0L, 26L, -3L, 0L, 0L, 0L, 6L), DocumentNum = c(3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L), IsPromo = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L)), .Names = c("ItemRelation",
"SaleCount", "DocumentNum", "IsPromo"), class = "data.frame", row.names = c(NA,
-132L))
数据包含两组按 ItemRelation+DocumentNum 列。
11628 3270
11627 3271
有 Ispromo 专栏。它只能取两个值
0 或 1。
所以我需要 SaleCount 的 Ispromo 零类别得到非负值或零值的总和。 Only
正值之和。
在这种情况下
6
38
33
22
3个
28
21
6
sum=157.
然后我需要求和 only
以获得负值
-14
-10
-2
-4
-5
-14
-5
sum=-54
那我一定要把这两个值相加!
157+-54=103
之后,我需要103除以正值的总数。
这里只有8个正值。 103/8=12,875。对于 ispromo 列的零类别。
第一类 Ispromo
通过 salescount,我需要得到所有值的总和以及正值和负值。
32
-8
6
33
-7
3
-3
47
-22
26
-3
sum=104
然后这个结果我需要除以总计数正值。
这是6
32
6
33
3
47
26
104/6=17,33333333
以及最终结果。从这个值 (17,33333333
) 我需要 sustract Zero category of ispromo when we 103 divided by the total number of positive values.
的结果
*103/8=12,875*
并将其乘以第一类 ispromo 的正值计数
在我们的例子中是 6
17,33333333-(12,875*6)= -59,91666667
必须对每个组进行此数学运算
11628 3270
11627 3271
怎么做?
正如预期的输出
ItemRelation DocumentNum Ispromo_by_SaleCount_sum_of_not_negative_or_zero_value for_negative_value
1 11628 3270 157 -54
2 11627 3271 157 -54
substract_positive_and_negative Ispromo_by_salescount_i_need_get_sum_all_values_and_positive_and_negative
1 103 104
2 103 104
divide_on_total_count_positive_value._It_is_5 end_result
1 12.875 -59.9
2 12.875 -59.9
或输入预期结果
expect=sstructure(list(ItemRelation = c(11628L, 11627L), DocumentNum = 3270:3271,
Ispromo_by_SaleCount_sum_of_not_negative_or_zero_value = c(157L,
157L), for_negative_value = c(-54L, -54L), substract_positive_and_negative = c(103L,
103L), Ispromo_by_salescount_i_need_get_sum_all_values_and_positive_and_negative = c(104L,
104L), divide_on_total_count_positive_value._It_is_5 = c(12.875,
12.875), end_result = c(-59.9, -59.9)), .Names = c("ItemRelation",
"DocumentNum", "Ispromo_by_SaleCount_sum_of_not_negative_or_zero_value",
"for_negative_value", "substract_positive_and_negative", "Ispromo_by_salescount_i_need_get_sum_all_values_and_positive_and_negative",
"divide_on_total_count_positive_value._It_is_5", "end_result"
), class = "data.frame", row.names = c(NA, -2L))
使用特定数据进行编辑
如果对于 SaleCount 的零类别 ispromo 我只有零值或负值,那么默认情况下 x4 必须 =0。
还有另一种变体:
如果对于 SaleCount 的一类 ispromo 只有零值或负值
然后X6计算为X6=0-x4。
这里的数据
而且当然可以同时是两个变体,就像我的例子一样。
mydat=structure(list(ItemRelation = c(11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L
), SaleCount = c(0L, 0L, -1L, 0L, 0L, 0L, -2L, 0L, 0L, -1L, 0L,
0L, 0L, -1L, -1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, -1L, 0L, 0L, 0L,
0L, 0L, 0L, -2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, -1L, 0L, 0L,
0L, -1L, 0L, 0L, 0L, 1L, -2L, 0L, 0L, 0L, 0L), DocumentNum = c(1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L), IsPromo = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L)), .Names = c("ItemRelation", "SaleCount", "DocumentNum",
"IsPromo"), class = "data.frame", row.names = c(NA, -52L))
此处输出
ItemRelation DocumentNum CalendarYear X1 X2 X3 X4 X5 X6
1 11709 1002 2018 any value any value any value 0 any value 0-x4=0
library(dplyr)
mydat %>%
group_by(ItemRelation, DocumentNum) %>%
summarise(X1 = sum(SaleCount[SaleCount > 0 & IsPromo == 0]),
X2 = sum(SaleCount[SaleCount < 0 & IsPromo == 0]),
X3 = X1 + X2,
X4 = X3/sum(SaleCount > 0 & IsPromo == 0),
X5 = sum(SaleCount[IsPromo == 1]),
X6 = X5/sum(SaleCount > 0 & IsPromo == 1) -
X3/sum(SaleCount > 0 & IsPromo == 0)*
sum(SaleCount > 0 & IsPromo == 1)) %>%
ungroup()
# # A tibble: 2 x 8
# ItemRelation DocumentNum X1 X2 X3 X4 X5 X6
# <int> <int> <int> <int> <int> <dbl> <int> <dbl>
# 1 11627 3271 157 -54 103 12.9 104 -59.9
# 2 11628 3270 157 -54 103 12.9 104 -59.9
如您所见,此过程的关键是能够 sum
列 SaleCount
使用适当的值子集。例如:sum(SaleCount[SaleCount > 0 & IsPromo == 0])
将计算 sum
仅当 SaleCount
和 IsPromo
等于 0
.
以类似的方式,我们可以使用 sum(SaleCount > 0 & IsPromo == 0)
来计算 SaleCount
和 IsPromo
等于 0
的观察值,因为我们得到 sum
TRUE
和 FALSE
值的(逻辑)向量。
对于您的编辑,试试这个:
mydat %>%
group_by(ItemRelation, DocumentNum) %>%
summarise(X1 = sum(SaleCount[SaleCount > 0 & IsPromo == 0]),
X2 = sum(SaleCount[SaleCount < 0 & IsPromo == 0]),
X3 = X1 + X2,
X4 = ifelse(sum(SaleCount > 0 & IsPromo == 0)==0, 0, X3/sum(SaleCount > 0 & IsPromo == 0)),
X5 = sum(SaleCount[IsPromo == 1]),
X6 = ifelse(sum(SaleCount > 0 & IsPromo == 1)==0, -X4,
X5/sum(SaleCount > 0 & IsPromo == 1) -
X3/sum(SaleCount > 0 & IsPromo == 0)*
sum(SaleCount > 0 & IsPromo == 1))) %>%
ungroup() %>%
mutate_all(~ifelse(. %in% c(-Inf, Inf), 0, .))
我有数据。这里的例子
mydat=structure(list(ItemRelation = c(11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11628L,
11628L, 11628L, 11628L, 11628L, 11628L, 11628L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L,
11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L, 11627L
), SaleCount = c(0L, 0L, 6L, 0L, 38L, -14L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 33L, 0L, -10L, -2L, 0L, 22L, -4L, 0L, 0L, -5L, 3L, 0L,
28L, -14L, 0L, 0L, 0L, 0L, 0L, 21L, -5L, 0L, 0L, 0L, 0L, 0L,
32L, -8L, 6L, 0L, 0L, 0L, 0L, 33L, -7L, 0L, 0L, 0L, 3L, -3L,
47L, -22L, 0L, 0L, 0L, 0L, 0L, 26L, -3L, 0L, 0L, 0L, 6L, 0L,
0L, 6L, 0L, 38L, -14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 33L, 0L, -10L,
-2L, 0L, 22L, -4L, 0L, 0L, -5L, 3L, 0L, 28L, -14L, 0L, 0L, 0L,
0L, 0L, 21L, -5L, 0L, 0L, 0L, 0L, 0L, 32L, -8L, 6L, 0L, 0L, 0L,
0L, 33L, -7L, 0L, 0L, 0L, 3L, -3L, 47L, -22L, 0L, 0L, 0L, 0L,
0L, 26L, -3L, 0L, 0L, 0L, 6L), DocumentNum = c(3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L, 3270L,
3270L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L, 3271L,
3271L, 3271L, 3271L, 3271L), IsPromo = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L)), .Names = c("ItemRelation",
"SaleCount", "DocumentNum", "IsPromo"), class = "data.frame", row.names = c(NA,
-132L))
数据包含两组按 ItemRelation+DocumentNum 列。
11628 3270
11627 3271
有 Ispromo 专栏。它只能取两个值
0 或 1。
所以我需要 SaleCount 的 Ispromo 零类别得到非负值或零值的总和。 Only
正值之和。
在这种情况下
6 38 33 22 3个 28 21 6
sum=157.
然后我需要求和 only
以获得负值
-14
-10
-2
-4
-5
-14
-5
sum=-54
那我一定要把这两个值相加!
157+-54=103
之后,我需要103除以正值的总数。
这里只有8个正值。 103/8=12,875。对于 ispromo 列的零类别。
第一类 Ispromo
通过 salescount,我需要得到所有值的总和以及正值和负值。
32
-8
6
33
-7
3
-3
47
-22
26
-3
sum=104
然后这个结果我需要除以总计数正值。
这是6
32
6
33
3
47
26
104/6=17,33333333
以及最终结果。从这个值 (17,33333333
) 我需要 sustract Zero category of ispromo when we 103 divided by the total number of positive values.
*103/8=12,875*
并将其乘以第一类 ispromo 的正值计数
在我们的例子中是 6
17,33333333-(12,875*6)= -59,91666667
必须对每个组进行此数学运算
11628 3270
11627 3271
怎么做? 正如预期的输出
ItemRelation DocumentNum Ispromo_by_SaleCount_sum_of_not_negative_or_zero_value for_negative_value
1 11628 3270 157 -54
2 11627 3271 157 -54
substract_positive_and_negative Ispromo_by_salescount_i_need_get_sum_all_values_and_positive_and_negative
1 103 104
2 103 104
divide_on_total_count_positive_value._It_is_5 end_result
1 12.875 -59.9
2 12.875 -59.9
或输入预期结果
expect=sstructure(list(ItemRelation = c(11628L, 11627L), DocumentNum = 3270:3271,
Ispromo_by_SaleCount_sum_of_not_negative_or_zero_value = c(157L,
157L), for_negative_value = c(-54L, -54L), substract_positive_and_negative = c(103L,
103L), Ispromo_by_salescount_i_need_get_sum_all_values_and_positive_and_negative = c(104L,
104L), divide_on_total_count_positive_value._It_is_5 = c(12.875,
12.875), end_result = c(-59.9, -59.9)), .Names = c("ItemRelation",
"DocumentNum", "Ispromo_by_SaleCount_sum_of_not_negative_or_zero_value",
"for_negative_value", "substract_positive_and_negative", "Ispromo_by_salescount_i_need_get_sum_all_values_and_positive_and_negative",
"divide_on_total_count_positive_value._It_is_5", "end_result"
), class = "data.frame", row.names = c(NA, -2L))
使用特定数据进行编辑
如果对于 SaleCount 的零类别 ispromo 我只有零值或负值,那么默认情况下 x4 必须 =0。 还有另一种变体: 如果对于 SaleCount 的一类 ispromo 只有零值或负值 然后X6计算为X6=0-x4。 这里的数据 而且当然可以同时是两个变体,就像我的例子一样。
mydat=structure(list(ItemRelation = c(11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L,
11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L, 11709L
), SaleCount = c(0L, 0L, -1L, 0L, 0L, 0L, -2L, 0L, 0L, -1L, 0L,
0L, 0L, -1L, -1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, -1L, 0L, 0L, 0L,
0L, 0L, 0L, -2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, -1L, 0L, 0L,
0L, -1L, 0L, 0L, 0L, 1L, -2L, 0L, 0L, 0L, 0L), DocumentNum = c(1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L, 1002L,
1002L, 1002L, 1002L, 1002L, 1002L, 1002L), IsPromo = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L)), .Names = c("ItemRelation", "SaleCount", "DocumentNum",
"IsPromo"), class = "data.frame", row.names = c(NA, -52L))
此处输出
ItemRelation DocumentNum CalendarYear X1 X2 X3 X4 X5 X6
1 11709 1002 2018 any value any value any value 0 any value 0-x4=0
library(dplyr)
mydat %>%
group_by(ItemRelation, DocumentNum) %>%
summarise(X1 = sum(SaleCount[SaleCount > 0 & IsPromo == 0]),
X2 = sum(SaleCount[SaleCount < 0 & IsPromo == 0]),
X3 = X1 + X2,
X4 = X3/sum(SaleCount > 0 & IsPromo == 0),
X5 = sum(SaleCount[IsPromo == 1]),
X6 = X5/sum(SaleCount > 0 & IsPromo == 1) -
X3/sum(SaleCount > 0 & IsPromo == 0)*
sum(SaleCount > 0 & IsPromo == 1)) %>%
ungroup()
# # A tibble: 2 x 8
# ItemRelation DocumentNum X1 X2 X3 X4 X5 X6
# <int> <int> <int> <int> <int> <dbl> <int> <dbl>
# 1 11627 3271 157 -54 103 12.9 104 -59.9
# 2 11628 3270 157 -54 103 12.9 104 -59.9
如您所见,此过程的关键是能够 sum
列 SaleCount
使用适当的值子集。例如:sum(SaleCount[SaleCount > 0 & IsPromo == 0])
将计算 sum
仅当 SaleCount
和 IsPromo
等于 0
.
以类似的方式,我们可以使用 sum(SaleCount > 0 & IsPromo == 0)
来计算 SaleCount
和 IsPromo
等于 0
的观察值,因为我们得到 sum
TRUE
和 FALSE
值的(逻辑)向量。
对于您的编辑,试试这个:
mydat %>%
group_by(ItemRelation, DocumentNum) %>%
summarise(X1 = sum(SaleCount[SaleCount > 0 & IsPromo == 0]),
X2 = sum(SaleCount[SaleCount < 0 & IsPromo == 0]),
X3 = X1 + X2,
X4 = ifelse(sum(SaleCount > 0 & IsPromo == 0)==0, 0, X3/sum(SaleCount > 0 & IsPromo == 0)),
X5 = sum(SaleCount[IsPromo == 1]),
X6 = ifelse(sum(SaleCount > 0 & IsPromo == 1)==0, -X4,
X5/sum(SaleCount > 0 & IsPromo == 1) -
X3/sum(SaleCount > 0 & IsPromo == 0)*
sum(SaleCount > 0 & IsPromo == 1))) %>%
ungroup() %>%
mutate_all(~ifelse(. %in% c(-Inf, Inf), 0, .))