R:使用另一个数据框中的列名、条件和值在数据框中创建一个新列
R: Create a new column in a dataframe, using column name, condition and value from another dataframe
将基本数据框视为:
data <- data.frame(amount_bin = c("10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+"),
risk_score = c("0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900"))
并将另一个数据框中的信息分组为:
group_info <- data.frame(variable = c("amount_bin_group", "amount_bin_group", "amount_bin_group", "amount_bin_group", "amount_bin_group",
"risk_score_group", "risk_score_group", "risk_score_group", "risk_score_group", "risk_score_group"),
bin = c("10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+",
"0-700", "700-750", "750-800", "800-850", "850-900"),
group = c("1", "1", "2", "2", "3",
"a", "a", "a", "b", "b"))
我想在名为 "amount_bin_group" 和 "risk_score_group" 的基础数据框(数据)中创建 2 列,当 bin 列来自 group_info和数据是一样的。为简单起见,我们假设基列始终是 group_info$ 变量名称减去 "group" 字符串。这意味着,当我们要创建列 amount_bin_group 时,基本列在基本数据框中将始终为 amount_bin。
预期结果数据框为:
final_data <- data.frame(amount_bin = c("10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+"),
risk_score = c("0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900"),
amount_bin_group = c("1", "1", "2", "2", "3", "1", "1", "2", "2", "3", "1", "1", "2", "2", "3"),
risk_score_group = c("a", "a", "a", "b", "b", "a", "a", "a", "b", "b", "a", "a", "a", "b", "b"))
我刚刚想到的一个解决方案是迭代合并数据框,即:
final_data <- merge(data, group_info[, c("bin", "group")], by.x = "amount_bin", by.y = "bin")
final_data$amount_bin_group <- final_data$group
final_data$group <- NULL
但是,我确信可以有更有效的解决方案。请注意,有多个这样的列,而不仅仅是两个。所以,也许循环会有所帮助。
您的 group_info 太过整洁了。我不敢相信我真的会这么说。通过将其分成两个数据框,或将每一半分成它自己的列,您可以自己进行简单的左连接来获得答案。
final_data_calc <- data %>%
left_join(
group_info %>%
filter(variable == 'amount_bin_group') %>%
rename(amount_bin_group = group,amount_bin = bin) %>%
select(-variable)
) %>%
left_join(
group_info %>%
filter(variable == 'risk_score_group') %>%
rename(risk_score_group = group,risk_score = bin) %>%
select(-variable)
)
# amount_bin risk_score amount_bin_group risk_score_group
#1 10K-25K 0-700 1 a
#2 25K-35K 700-750 1 a
#3 35K-45K 750-800 2 a
#4 45K-50K 800-850 2 b
#5 50K+ 850-900 3 b
#6 10K-25K 0-700 1 a
#7 25K-35K 700-750 1 a
#8 35K-45K 750-800 2 a
#9 45K-50K 800-850 2 b
#10 50K+ 850-900 3 b
#11 10K-25K 0-700 1 a
#12 25K-35K 700-750 1 a
#13 35K-45K 750-800 2 a
#14 45K-50K 800-850 2 b
#15 50K+ 850-900 3 b
您可以只使用 for
循环来继续合并不同的集合:
for (i in unique(group_info$variable)) {
data <- merge(
data, group_info[group_info$variable==i,c("bin","group")],
by.x=sub("_group","",i), by.y="bin"
)
names(data)[names(data)=="group"] <- i
}
将基本数据框视为:
data <- data.frame(amount_bin = c("10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+"),
risk_score = c("0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900"))
并将另一个数据框中的信息分组为:
group_info <- data.frame(variable = c("amount_bin_group", "amount_bin_group", "amount_bin_group", "amount_bin_group", "amount_bin_group",
"risk_score_group", "risk_score_group", "risk_score_group", "risk_score_group", "risk_score_group"),
bin = c("10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+",
"0-700", "700-750", "750-800", "800-850", "850-900"),
group = c("1", "1", "2", "2", "3",
"a", "a", "a", "b", "b"))
我想在名为 "amount_bin_group" 和 "risk_score_group" 的基础数据框(数据)中创建 2 列,当 bin 列来自 group_info和数据是一样的。为简单起见,我们假设基列始终是 group_info$ 变量名称减去 "group" 字符串。这意味着,当我们要创建列 amount_bin_group 时,基本列在基本数据框中将始终为 amount_bin。
预期结果数据框为:
final_data <- data.frame(amount_bin = c("10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+", "10K-25K", "25K-35K", "35K-45K", "45K-50K", "50K+"),
risk_score = c("0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900", "0-700", "700-750", "750-800", "800-850", "850-900"),
amount_bin_group = c("1", "1", "2", "2", "3", "1", "1", "2", "2", "3", "1", "1", "2", "2", "3"),
risk_score_group = c("a", "a", "a", "b", "b", "a", "a", "a", "b", "b", "a", "a", "a", "b", "b"))
我刚刚想到的一个解决方案是迭代合并数据框,即:
final_data <- merge(data, group_info[, c("bin", "group")], by.x = "amount_bin", by.y = "bin")
final_data$amount_bin_group <- final_data$group
final_data$group <- NULL
但是,我确信可以有更有效的解决方案。请注意,有多个这样的列,而不仅仅是两个。所以,也许循环会有所帮助。
您的 group_info 太过整洁了。我不敢相信我真的会这么说。通过将其分成两个数据框,或将每一半分成它自己的列,您可以自己进行简单的左连接来获得答案。
final_data_calc <- data %>%
left_join(
group_info %>%
filter(variable == 'amount_bin_group') %>%
rename(amount_bin_group = group,amount_bin = bin) %>%
select(-variable)
) %>%
left_join(
group_info %>%
filter(variable == 'risk_score_group') %>%
rename(risk_score_group = group,risk_score = bin) %>%
select(-variable)
)
# amount_bin risk_score amount_bin_group risk_score_group
#1 10K-25K 0-700 1 a
#2 25K-35K 700-750 1 a
#3 35K-45K 750-800 2 a
#4 45K-50K 800-850 2 b
#5 50K+ 850-900 3 b
#6 10K-25K 0-700 1 a
#7 25K-35K 700-750 1 a
#8 35K-45K 750-800 2 a
#9 45K-50K 800-850 2 b
#10 50K+ 850-900 3 b
#11 10K-25K 0-700 1 a
#12 25K-35K 700-750 1 a
#13 35K-45K 750-800 2 a
#14 45K-50K 800-850 2 b
#15 50K+ 850-900 3 b
您可以只使用 for
循环来继续合并不同的集合:
for (i in unique(group_info$variable)) {
data <- merge(
data, group_info[group_info$variable==i,c("bin","group")],
by.x=sub("_group","",i), by.y="bin"
)
names(data)[names(data)=="group"] <- i
}