R:递归评估存储为列表的二叉树
R: recursively evaluating a binary tree stored as a list
我有一棵名为 mytree
的树,看起来像这样:
我将其存储为列表:
mytree <- list(list(structure(list(y = c(-10, 7, 8, -7), x = c(10, 20,
25, 35), grad = c(-10.5, 6.5, 7.5, -7.5), sim_score = c(4, 4,
4, 4), value = c(-1, -1, -1, -1)), row.names = c(NA, -4L), class = "data.frame")),
list(structure(list(y = -10, x = 10, grad = -10.5, sim_score = 110.25,
value = -10.5, gain = 120.333333333333, criterion = "x < 15"), row.names = 1L, class = "data.frame"),
structure(list(y = c(7, 8, -7), x = c(20, 25, 35), grad = c(6.5,
7.5, -7.5), sim_score = c(14.0833333333333, 14.0833333333333,
14.0833333333333), value = c(2.16666666666667, 2.16666666666667,
2.16666666666667), gain = c(120.333333333333, 120.333333333333,
120.333333333333), criterion = c("x >= 15", "x >= 15",
"x >= 15")), row.names = 2:4, class = "data.frame")),
list(NULL, NULL, structure(list(y = c(7, 8), x = c(20, 25
), grad = c(6.5, 7.5), sim_score = c(98, 98), value = c(7,
7), gain = c(140.166666666667, 140.166666666667), criterion = c("x < 30",
"x < 30")), row.names = 2:3, class = "data.frame"), structure(list(
y = -7, x = 35, grad = -7.5, sim_score = 56.25, value = -7.5,
gain = 140.166666666667, criterion = "x >= 30"), row.names = 4L, class = "data.frame")),
list(NULL, NULL, NULL, NULL, structure(list(y = 7, x = 20,
grad = 6.5, sim_score = 42.25, value = 6.5, gain = 0.5,
criterion = "x < 22.5"), row.names = 2L, class = "data.frame"),
structure(list(y = 8, x = 25, grad = 7.5, sim_score = 56.25,
value = 7.5, gain = 0.5, criterion = "x >= 22.5"), row.names = 3L, class = "data.frame"),
NULL, NULL))
看起来像这样:
[[1]]
[[1]][[1]]
y x grad sim_score value
1 -10 10 -10.5 4 -1
2 7 20 6.5 4 -1
3 8 25 7.5 4 -1
4 -7 35 -7.5 4 -1
[[2]]
[[2]][[1]]
y x grad sim_score value gain criterion
1 -10 10 -10.5 110.25 -10.5 120.3333 x < 15
[[2]][[2]]
y x grad sim_score value gain criterion
2 7 20 6.5 14.08333 2.166667 120.3333 x >= 15
3 8 25 7.5 14.08333 2.166667 120.3333 x >= 15
4 -7 35 -7.5 14.08333 2.166667 120.3333 x >= 15
[[3]]
[[3]][[1]]
NULL
[[3]][[2]]
NULL
[[3]][[3]]
y x grad sim_score value gain criterion
2 7 20 6.5 98 7 140.1667 x < 30
3 8 25 7.5 98 7 140.1667 x < 30
[[3]][[4]]
y x grad sim_score value gain criterion
4 -7 35 -7.5 56.25 -7.5 140.1667 x >= 30
[[4]]
[[4]][[1]]
NULL
[[4]][[2]]
NULL
[[4]][[3]]
NULL
[[4]][[4]]
NULL
[[4]][[5]]
y x grad sim_score value gain criterion
2 7 20 6.5 42.25 6.5 0.5 x < 22.5
[[4]][[6]]
y x grad sim_score value gain criterion
3 8 25 7.5 56.25 7.5 0.5 x >= 22.5
[[4]][[7]]
NULL
[[4]][[8]]
NULL
列表的第一个索引,即 1、2、3、4,对应于树的级别或高度。第二个索引对应于给定级别中节点的索引。例如,mytree[[1]][[1]]
包含根,它在 mytree[[2]][[1]]
和 mytree[[2]][[2]]
中有子节点。
给定父节点存储在 mytree[[i]][[j]]
中,其子节点存储在 mytree[[i + 1]][[2 * j]]
和 mytree[[i + 1]][[2 * j -1]]
中。
我想编写一个名为 eval_tree
的函数,当给定一个新实例 x
时,它将通过检查 criterion
来检查 x
属于哪个叶节点的分裂然后输出叶子的值,它存储在value
下。这是我希望 eval_tree
工作方式的示例:
newdata <- data.frame(x = c(10, 20, 25, 35))
> eval_tree(tree = mytree, newdata = newdata)
[1] -10.5
[2] 6.5
[3] 7.5
[4] -7.5
这是我目前所拥有的。不幸的是,它不起作用……而且我想我可能需要递归地实现该函数,以便它更有效率。谁能指出我正确的方向?
eval_tree <- function(tree, newdata){
if(length(tree) == 1){
# If tree only has a root, return value of root
return(tree[[1]][[1]]$value[1])
}else if(length(tree) > 1){
for (level in 2:length(tree)){
for(ind in 1:length(tree[[level]]))
if(eval(parse(text = tree[[level]][[ind]][["criterion"]]))){
# Criterion is true, then go to child node
# Check if there is child node
if(is.null(tree[[level + 1]][[ind * 2]]) && is.null(tree[[level + 1]][[ind * 2 - 1]])){
return(tree[[level]][[ind]]$value[1])
}else if(eval(parse(text = tree[[level + 1]][[ind * 2]][["criterion"]]))){
# Criterion is true, then go to childi node
# I think this is where recursion would be more appropriate than all these nested loops
}
}
}
}
}
你可以试试这样:
index <- function(x,tree,e, i = 1, j = 1)
{
if(nrow((tree[[i]][[j]])) == 1)
{
if(eval(parse(text=tree[[i]][[j]]$crite), list(x = x))) {
if(is.null(e$a)){
e$a <- i
e$b <- tree[[i]][[j]]$val
}
else if(e$a > i)e$b <- tree[[i]][[j]]$val
TRUE
}
else FALSE
}
else index(x, tree, e,i + 1,2*j-1) | index(x, tree,e,i+1, 2*j)
}
pred_tree <- function( tree,newdata){
cbind(newdata,pred = sapply(newdata$x,function(x){
e <- new.env()
index(x,tree,e)
e$b
}))
}
pred_tree(mytree,data.frame(x = c(10,20,25,30,25)))
x pred
1 10 -10.5
2 20 6.5
3 25 7.5
4 30 -7.5
5 25 7.5
我有一棵名为 mytree
的树,看起来像这样:
我将其存储为列表:
mytree <- list(list(structure(list(y = c(-10, 7, 8, -7), x = c(10, 20,
25, 35), grad = c(-10.5, 6.5, 7.5, -7.5), sim_score = c(4, 4,
4, 4), value = c(-1, -1, -1, -1)), row.names = c(NA, -4L), class = "data.frame")),
list(structure(list(y = -10, x = 10, grad = -10.5, sim_score = 110.25,
value = -10.5, gain = 120.333333333333, criterion = "x < 15"), row.names = 1L, class = "data.frame"),
structure(list(y = c(7, 8, -7), x = c(20, 25, 35), grad = c(6.5,
7.5, -7.5), sim_score = c(14.0833333333333, 14.0833333333333,
14.0833333333333), value = c(2.16666666666667, 2.16666666666667,
2.16666666666667), gain = c(120.333333333333, 120.333333333333,
120.333333333333), criterion = c("x >= 15", "x >= 15",
"x >= 15")), row.names = 2:4, class = "data.frame")),
list(NULL, NULL, structure(list(y = c(7, 8), x = c(20, 25
), grad = c(6.5, 7.5), sim_score = c(98, 98), value = c(7,
7), gain = c(140.166666666667, 140.166666666667), criterion = c("x < 30",
"x < 30")), row.names = 2:3, class = "data.frame"), structure(list(
y = -7, x = 35, grad = -7.5, sim_score = 56.25, value = -7.5,
gain = 140.166666666667, criterion = "x >= 30"), row.names = 4L, class = "data.frame")),
list(NULL, NULL, NULL, NULL, structure(list(y = 7, x = 20,
grad = 6.5, sim_score = 42.25, value = 6.5, gain = 0.5,
criterion = "x < 22.5"), row.names = 2L, class = "data.frame"),
structure(list(y = 8, x = 25, grad = 7.5, sim_score = 56.25,
value = 7.5, gain = 0.5, criterion = "x >= 22.5"), row.names = 3L, class = "data.frame"),
NULL, NULL))
看起来像这样:
[[1]]
[[1]][[1]]
y x grad sim_score value
1 -10 10 -10.5 4 -1
2 7 20 6.5 4 -1
3 8 25 7.5 4 -1
4 -7 35 -7.5 4 -1
[[2]]
[[2]][[1]]
y x grad sim_score value gain criterion
1 -10 10 -10.5 110.25 -10.5 120.3333 x < 15
[[2]][[2]]
y x grad sim_score value gain criterion
2 7 20 6.5 14.08333 2.166667 120.3333 x >= 15
3 8 25 7.5 14.08333 2.166667 120.3333 x >= 15
4 -7 35 -7.5 14.08333 2.166667 120.3333 x >= 15
[[3]]
[[3]][[1]]
NULL
[[3]][[2]]
NULL
[[3]][[3]]
y x grad sim_score value gain criterion
2 7 20 6.5 98 7 140.1667 x < 30
3 8 25 7.5 98 7 140.1667 x < 30
[[3]][[4]]
y x grad sim_score value gain criterion
4 -7 35 -7.5 56.25 -7.5 140.1667 x >= 30
[[4]]
[[4]][[1]]
NULL
[[4]][[2]]
NULL
[[4]][[3]]
NULL
[[4]][[4]]
NULL
[[4]][[5]]
y x grad sim_score value gain criterion
2 7 20 6.5 42.25 6.5 0.5 x < 22.5
[[4]][[6]]
y x grad sim_score value gain criterion
3 8 25 7.5 56.25 7.5 0.5 x >= 22.5
[[4]][[7]]
NULL
[[4]][[8]]
NULL
列表的第一个索引,即 1、2、3、4,对应于树的级别或高度。第二个索引对应于给定级别中节点的索引。例如,mytree[[1]][[1]]
包含根,它在 mytree[[2]][[1]]
和 mytree[[2]][[2]]
中有子节点。
给定父节点存储在 mytree[[i]][[j]]
中,其子节点存储在 mytree[[i + 1]][[2 * j]]
和 mytree[[i + 1]][[2 * j -1]]
中。
我想编写一个名为 eval_tree
的函数,当给定一个新实例 x
时,它将通过检查 criterion
来检查 x
属于哪个叶节点的分裂然后输出叶子的值,它存储在value
下。这是我希望 eval_tree
工作方式的示例:
newdata <- data.frame(x = c(10, 20, 25, 35))
> eval_tree(tree = mytree, newdata = newdata)
[1] -10.5
[2] 6.5
[3] 7.5
[4] -7.5
这是我目前所拥有的。不幸的是,它不起作用……而且我想我可能需要递归地实现该函数,以便它更有效率。谁能指出我正确的方向?
eval_tree <- function(tree, newdata){
if(length(tree) == 1){
# If tree only has a root, return value of root
return(tree[[1]][[1]]$value[1])
}else if(length(tree) > 1){
for (level in 2:length(tree)){
for(ind in 1:length(tree[[level]]))
if(eval(parse(text = tree[[level]][[ind]][["criterion"]]))){
# Criterion is true, then go to child node
# Check if there is child node
if(is.null(tree[[level + 1]][[ind * 2]]) && is.null(tree[[level + 1]][[ind * 2 - 1]])){
return(tree[[level]][[ind]]$value[1])
}else if(eval(parse(text = tree[[level + 1]][[ind * 2]][["criterion"]]))){
# Criterion is true, then go to childi node
# I think this is where recursion would be more appropriate than all these nested loops
}
}
}
}
}
你可以试试这样:
index <- function(x,tree,e, i = 1, j = 1)
{
if(nrow((tree[[i]][[j]])) == 1)
{
if(eval(parse(text=tree[[i]][[j]]$crite), list(x = x))) {
if(is.null(e$a)){
e$a <- i
e$b <- tree[[i]][[j]]$val
}
else if(e$a > i)e$b <- tree[[i]][[j]]$val
TRUE
}
else FALSE
}
else index(x, tree, e,i + 1,2*j-1) | index(x, tree,e,i+1, 2*j)
}
pred_tree <- function( tree,newdata){
cbind(newdata,pred = sapply(newdata$x,function(x){
e <- new.env()
index(x,tree,e)
e$b
}))
}
pred_tree(mytree,data.frame(x = c(10,20,25,30,25)))
x pred
1 10 -10.5
2 20 6.5
3 25 7.5
4 30 -7.5
5 25 7.5