按顺序对数据框中的一系列行应用匹配和替换功能
Apply a match and replace function over series of rows in a dataframe in order
起始数据帧
data_start <- data.frame(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
> data_start
marker id_out id_new
1 yes 5 6
2 yes 3 8
3 no 1 9
4 yes 1 4
5 no 7 2
添加三列 headers,下面是空列。附加起始 var1:var3
值。
data_start[,c("var1", "var2", "var3")] <- NA
vars <- c(5,3,1)
data_start[1, 4:6] <- vars
> data_start
marker id_out id_new var1 var2 var3
1 yes 5 6 5 3 1
2 yes 3 8 NA NA NA
3 no 1 9 NA NA NA
4 yes 1 4 NA NA NA
5 no 7 2 NA NA NA
我想通过对 IF marker
= yes
AND id_out
匹配任何 [=14] 的每一行应用一个函数来更新我的 var1:var3
列=],将 var1:var3
中的任何一个替换为 id_new
。我找到了这个解决方案,但适用于一行代码,并且仍然需要更新该行的每个新 var1:var3
部分。
data_start[1, 4:6][data_start[1, 4:6] == data_start[1,"id_out"]] <- data_start[1,"id_new"]
每一行还取决于在再次应用该函数之前使用上一行中的值。
最终输出看起来像这样,当标记 = no
时行保持不变,随后更新每一行。
> data_final
marker id_out id_new var1 var2 var3
1 yes 5 6 6 3 1
2 yes 3 8 6 8 1
3 no 1 9 6 8 1
4 yes 1 4 6 8 4
5 no 7 2 6 8 4
由于我必须 运行,因此将其组合得非常粗糙,但这应该可行。
data_start <- data.frame(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
data_start[,c("var1", "var2", "var3")] <- NA
vars <- c(5,3,1)
data_start[1, 4:6] <- vars
onVars <- c("var1", "var2", "var3")
for (i in 2:nrow(data_start)) {
print(i)
for (var in onVars) {
if (data_start$marker[i] == "yes" & data_start$id_out[i] == data_start[i - 1, var]) {
data_start[i, var] <- data_start$id_new[i]
} else {
data_start[i, var] <- data_start[i - 1, var]
}
}
}
data_start 是你的输出。
糟糕,看来我可能遗漏了对第一行的评估,但希望您现在可以自己处理。
这是一个片段,即使您有超过三列,也可以让您进行此计算:
library(data.table)
dt <- data.table(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
dt[, change := cumsum(marker == "yes")]
ref.new <- dt[marker == "yes", id_new] # Reference to values where marker is "yes"
ref.out <- dt[marker == "yes", id_out]
for (x in 1:length(ref.new)) {
dt[, paste("var", x, sep="") := ifelse(change >= x, ref.new[x] , ref.out[x])]
}
head(dt)
# marker id_out id_new change var1 var2 var3
#1: yes 5 6 1 6 3 1
#2: yes 3 8 2 6 8 1
#3: no 1 9 2 6 8 1
#4: yes 1 4 3 6 8 4
#5: no 7 2 3 6 8 4
没有for循环和if好像很难找到解决方案,所以就在这里。我尝试用 c(1,3,1)
等其他设置更改原始值,代码工作正常。如果需要,我们还可以添加更多可变列。
# Re-create the data
dt <- data.table(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
var.col <- paste0("var", 1:3)
dt[1, (var.col) := .(5,3,1)]
# Processing
for(i in 1:nrow(dt)) {
if(i > 1) dt[i, (var.col) := as.list(dt[i-1, var.col, with = F])]
var.i <- dt[i, var.col, with = F] %in% dt[i, id_out]
if(dt[i]$marker == 'yes' & sum(var.i) != 0) {
dt[i, (var.col[var.i]) := dt[i, id_new]]
}
}
这可以与任意数量的列一起使用,并适用于基本 R:
cols <- c("var1", "var2", "var3")
for(j in 1:length(cols)) {
var <- cols[j]
for(i in 1:nrow(data_start)){
if(i > 1) {
data_start[i, var] <- data_start[i-1, var]
}
if(data_start[i, "marker"] == "yes" & data_start[i, var] == data_start[i,"id_out"]) {
data_start[i,var] <- data_start[i, "id_new"]
}
}
}
起始数据帧
data_start <- data.frame(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
> data_start
marker id_out id_new
1 yes 5 6
2 yes 3 8
3 no 1 9
4 yes 1 4
5 no 7 2
添加三列 headers,下面是空列。附加起始 var1:var3
值。
data_start[,c("var1", "var2", "var3")] <- NA
vars <- c(5,3,1)
data_start[1, 4:6] <- vars
> data_start
marker id_out id_new var1 var2 var3
1 yes 5 6 5 3 1
2 yes 3 8 NA NA NA
3 no 1 9 NA NA NA
4 yes 1 4 NA NA NA
5 no 7 2 NA NA NA
我想通过对 IF marker
= yes
AND id_out
匹配任何 [=14] 的每一行应用一个函数来更新我的 var1:var3
列=],将 var1:var3
中的任何一个替换为 id_new
。我找到了这个解决方案,但适用于一行代码,并且仍然需要更新该行的每个新 var1:var3
部分。
data_start[1, 4:6][data_start[1, 4:6] == data_start[1,"id_out"]] <- data_start[1,"id_new"]
每一行还取决于在再次应用该函数之前使用上一行中的值。
最终输出看起来像这样,当标记 = no
时行保持不变,随后更新每一行。
> data_final
marker id_out id_new var1 var2 var3
1 yes 5 6 6 3 1
2 yes 3 8 6 8 1
3 no 1 9 6 8 1
4 yes 1 4 6 8 4
5 no 7 2 6 8 4
由于我必须 运行,因此将其组合得非常粗糙,但这应该可行。
data_start <- data.frame(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
data_start[,c("var1", "var2", "var3")] <- NA
vars <- c(5,3,1)
data_start[1, 4:6] <- vars
onVars <- c("var1", "var2", "var3")
for (i in 2:nrow(data_start)) {
print(i)
for (var in onVars) {
if (data_start$marker[i] == "yes" & data_start$id_out[i] == data_start[i - 1, var]) {
data_start[i, var] <- data_start$id_new[i]
} else {
data_start[i, var] <- data_start[i - 1, var]
}
}
}
data_start 是你的输出。
糟糕,看来我可能遗漏了对第一行的评估,但希望您现在可以自己处理。
这是一个片段,即使您有超过三列,也可以让您进行此计算:
library(data.table)
dt <- data.table(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
dt[, change := cumsum(marker == "yes")]
ref.new <- dt[marker == "yes", id_new] # Reference to values where marker is "yes"
ref.out <- dt[marker == "yes", id_out]
for (x in 1:length(ref.new)) {
dt[, paste("var", x, sep="") := ifelse(change >= x, ref.new[x] , ref.out[x])]
}
head(dt)
# marker id_out id_new change var1 var2 var3
#1: yes 5 6 1 6 3 1
#2: yes 3 8 2 6 8 1
#3: no 1 9 2 6 8 1
#4: yes 1 4 3 6 8 4
#5: no 7 2 3 6 8 4
没有for循环和if好像很难找到解决方案,所以就在这里。我尝试用 c(1,3,1)
等其他设置更改原始值,代码工作正常。如果需要,我们还可以添加更多可变列。
# Re-create the data
dt <- data.table(marker = c("yes","yes","no","yes","no"),
id_out = c(5,3,1,1,7),
id_new = c(6,8,9,4,2))
var.col <- paste0("var", 1:3)
dt[1, (var.col) := .(5,3,1)]
# Processing
for(i in 1:nrow(dt)) {
if(i > 1) dt[i, (var.col) := as.list(dt[i-1, var.col, with = F])]
var.i <- dt[i, var.col, with = F] %in% dt[i, id_out]
if(dt[i]$marker == 'yes' & sum(var.i) != 0) {
dt[i, (var.col[var.i]) := dt[i, id_new]]
}
}
这可以与任意数量的列一起使用,并适用于基本 R:
cols <- c("var1", "var2", "var3")
for(j in 1:length(cols)) {
var <- cols[j]
for(i in 1:nrow(data_start)){
if(i > 1) {
data_start[i, var] <- data_start[i-1, var]
}
if(data_start[i, "marker"] == "yes" & data_start[i, var] == data_start[i,"id_out"]) {
data_start[i,var] <- data_start[i, "id_new"]
}
}
}