使用限制来定义唯一标识
using limits to define unique identities
Person <- c(1,2,3)
Age <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)
People <- data.frame(Person, Age, Height, Weight)
Age_cats_type1 [5-15], [20-30], [35-45]
Age_cats_type2 [8-13], [14-16], [18-40]
Height_cat_Type1 [100-120], [121-140], [141-186]
Height_cat_type2 [110-125], [126-145], [146-190]
Weight_cat_Type1 [50-60], [61-78], [79-85]
Weight_cat_Type2 [55-75], [76-90], [91-100]
对于人物[1,2](年龄=10),这适合 Age_cats_type1==1
和 Age_cats_type2==1
。
对于 People[1,3] (height=140),这适合 Height_cat_Type1==2
和 Height_cat_Type2==2
现在我想为 (Age_cats_type1==1)|(Age_cats_type1==2)
、(Height_cats_type1==1)|(Height_cats_type1==2)
、(Weight_cats_type1==1)|(Weight_cats_type1==2)
.
的间隔的每个唯一可能结果创建一个 table
所需的输出应类似于下面的黄色图像。
上面的table是对每个invterval
的可能性的总结
这与 密切相关,但是当您按照 BrodieG 概述的此处使用的代码进行操作时,第三次迭代会出现错误。
在本例中,我们在 data.table
中使用 foverlaps
我用过下面的代码
library(intervals)
# create our limits
AGE_cats_type1 <- Intervals(
matrix(c(5, 15, 20, 30, 35, 40), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
AGE_cats_type2 <- Intervals(
matrix(c(8, 13, 14, 16, 18, 40), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Height_cats_type1 <- Intervals(
matrix(c(100, 120, 121, 140, 141, 186), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Height_cats_type2 <- Intervals(
matrix(c(110, 125, 126, 145, 146, 190), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Weight_cats_type1 <- Intervals(
matrix(c(50, 60, 61, 78, 79, 85), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Weight_cats_type2 <- Intervals(
matrix(c(55, 75, 76, 90, 91, 100), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
#now format data
# first for age
library(data.table)
PEOPLE1 <- data.table(People)
PEOPLE1[, A1:=Age]
I_age_1 <- data.table(cbind(data.frame(AGE_cats_type1), idX=1:3, idY=0))
I_age_2 <- data.table(cbind(data.frame(AGE_cats_type2), idX=0, idY=1:3))
setkey(I_age_1, X1, X2)
setkey(I_age_2, X1, X2)
PEOPLE2 <- data.frame(rbind(
foverlaps(PEOPLE1, I_age_1, by.x=c("Age", "A1"), nomatch=0),
foverlaps(PEOPLE1, I_age_2, by.x=c("Age", "A1"), nomatch=0)))
####################################################
# second iteration for height
PEOPLE3 <- data.table(PEOPLE2)
PEOPLE3[, B1:=Height]
I_height_1 <- data.table(cbind(data.frame(Height_cats_type1), idXa=1:3, idYa=0))
I_height_2 <- data.table(cbind(data.frame(Height_cats_type2), idXa=0, idYa=1:3))
setkey(I_height_1, X1, X2)
setkey(I_height_2, X1, X2)
PEOPLE4 <- data.frame(rbind(
foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0),
foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0)))
################################################
# third iteration
PEOPLE5 <- data.table(PEOPLE4)
PEOPLE5[, C1:=Weight]
I_weight_1 <- data.table(cbind(data.frame(Weight_cats_type1), idXb=1:3, idYb=0))
I_weight_2 <- data.table(cbind(data.frame(Weight_cats_type2), idXb=0, idYb=1:3))
setkey(I_weight_1, X1, X2)
setkey(I_weight_2, X1, X2)
PEOPLE6 <- data.frame(rbind(
foverlaps(PEOPLE5, I_weight_1, by.x=c("Height", "B1"), nomatch=0),
foverlaps(PEOPLE5, I_weight_2, by.x=c("Height", "B1"), nomatch=0)))
但是在 PEOPLE6 中出现错误。
Error in setcolorder(ans, c(xcols1, ycols, xcols2)) :
neworder is length 16 but x has 18 columns.
当我查看 PEOPLE4 时,我们看到 idX idY idxA 和 idyA 是 Age_cats_type1、Age_cats_type2、Height_cat_Type1 和 Height_cat_Type2 值
你的问题的表述有一些问题。让我们尝试重构您的意思。
# Let's first construct the data
Person <- c(1,2,3)
Age <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)
People <- data.frame(Person, Age, Height, Weight)
# Results in something like this:
# Person Age Height Weight
# 1 1 10 140 65
# 2 2 22 185 80
# 3 3 30 160 75
# Now we want to represent ranges. One way to do it would be:
Age_cats_type1 <- list(c(5, 15), c(20, 30), c(35, 45))
Age_cats_type2 <- list(c(8, 13), c(14, 16), c(18, 40))
Height_cat_Type1 <- list(c(100, 120), c(121, 140), c(141, 186))
Height_cat_Type2 <- list(c(110, 125), c(126, 145), c(146, 190))
Weight_cat_Type1 <- list(c(50, 60), c(61, 78), c(79, 85))
Weight_cat_Type2 <- list(c(55, 75), c(76, 90), c(91, 100))
# Then you mentioned something like People[1,1] meant age == 10.
# I believe you made a mistake here. If you type People[1, 1] in the console,
# you'll find you that it returns Person == 1. Therefore, I think
# that you meant to say the People data frame was constructed without the Person vector.
People <- data.frame(Age, Height, Weight)
# Now People[1, 1] returns age == 10.
# Then you went on to say that you wanted some function that returned Age_cats_type == 1
# Well, it seems that you want the first element of the list of ranges that contains the specified value.
# Then let's build it
contains_value <- function(range, value) {
lower <- range[1]
upper <- range[2]
lower <= value && value <= upper
}
range_index <- function(ranges_list, value){
which(sapply(ranges_list, contains_value, value))[1]
}
range_index(Age_cats_type1, People[1, 1]) # 1
range_index(Age_cats_type2, People[1, 1]) # 1
range_index(Height_cat_Type1, People[1, 2]) # 2
range_index(Height_cat_Type2, People[1, 2]) # 2
# Now I didn't understand what the table you were trying to construct was, but maybe these functions will help you build it.
Person <- c(1,2,3)
Age <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)
People <- data.frame(Person, Age, Height, Weight)
Age_cats_type1 [5-15], [20-30], [35-45]
Age_cats_type2 [8-13], [14-16], [18-40]
Height_cat_Type1 [100-120], [121-140], [141-186]
Height_cat_type2 [110-125], [126-145], [146-190]
Weight_cat_Type1 [50-60], [61-78], [79-85]
Weight_cat_Type2 [55-75], [76-90], [91-100]
对于人物[1,2](年龄=10),这适合 Age_cats_type1==1
和 Age_cats_type2==1
。
对于 People[1,3] (height=140),这适合 Height_cat_Type1==2
和 Height_cat_Type2==2
现在我想为 (Age_cats_type1==1)|(Age_cats_type1==2)
、(Height_cats_type1==1)|(Height_cats_type1==2)
、(Weight_cats_type1==1)|(Weight_cats_type1==2)
.
所需的输出应类似于下面的黄色图像。 上面的table是对每个invterval
的可能性的总结这与 data.table
foverlaps
我用过下面的代码
library(intervals)
# create our limits
AGE_cats_type1 <- Intervals(
matrix(c(5, 15, 20, 30, 35, 40), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
AGE_cats_type2 <- Intervals(
matrix(c(8, 13, 14, 16, 18, 40), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Height_cats_type1 <- Intervals(
matrix(c(100, 120, 121, 140, 141, 186), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Height_cats_type2 <- Intervals(
matrix(c(110, 125, 126, 145, 146, 190), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Weight_cats_type1 <- Intervals(
matrix(c(50, 60, 61, 78, 79, 85), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Weight_cats_type2 <- Intervals(
matrix(c(55, 75, 76, 90, 91, 100), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
#now format data
# first for age
library(data.table)
PEOPLE1 <- data.table(People)
PEOPLE1[, A1:=Age]
I_age_1 <- data.table(cbind(data.frame(AGE_cats_type1), idX=1:3, idY=0))
I_age_2 <- data.table(cbind(data.frame(AGE_cats_type2), idX=0, idY=1:3))
setkey(I_age_1, X1, X2)
setkey(I_age_2, X1, X2)
PEOPLE2 <- data.frame(rbind(
foverlaps(PEOPLE1, I_age_1, by.x=c("Age", "A1"), nomatch=0),
foverlaps(PEOPLE1, I_age_2, by.x=c("Age", "A1"), nomatch=0)))
####################################################
# second iteration for height
PEOPLE3 <- data.table(PEOPLE2)
PEOPLE3[, B1:=Height]
I_height_1 <- data.table(cbind(data.frame(Height_cats_type1), idXa=1:3, idYa=0))
I_height_2 <- data.table(cbind(data.frame(Height_cats_type2), idXa=0, idYa=1:3))
setkey(I_height_1, X1, X2)
setkey(I_height_2, X1, X2)
PEOPLE4 <- data.frame(rbind(
foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0),
foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0)))
################################################
# third iteration
PEOPLE5 <- data.table(PEOPLE4)
PEOPLE5[, C1:=Weight]
I_weight_1 <- data.table(cbind(data.frame(Weight_cats_type1), idXb=1:3, idYb=0))
I_weight_2 <- data.table(cbind(data.frame(Weight_cats_type2), idXb=0, idYb=1:3))
setkey(I_weight_1, X1, X2)
setkey(I_weight_2, X1, X2)
PEOPLE6 <- data.frame(rbind(
foverlaps(PEOPLE5, I_weight_1, by.x=c("Height", "B1"), nomatch=0),
foverlaps(PEOPLE5, I_weight_2, by.x=c("Height", "B1"), nomatch=0)))
但是在 PEOPLE6 中出现错误。
Error in setcolorder(ans, c(xcols1, ycols, xcols2)) :
neworder is length 16 but x has 18 columns.
当我查看 PEOPLE4 时,我们看到 idX idY idxA 和 idyA 是 Age_cats_type1、Age_cats_type2、Height_cat_Type1 和 Height_cat_Type2 值
你的问题的表述有一些问题。让我们尝试重构您的意思。
# Let's first construct the data
Person <- c(1,2,3)
Age <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)
People <- data.frame(Person, Age, Height, Weight)
# Results in something like this:
# Person Age Height Weight
# 1 1 10 140 65
# 2 2 22 185 80
# 3 3 30 160 75
# Now we want to represent ranges. One way to do it would be:
Age_cats_type1 <- list(c(5, 15), c(20, 30), c(35, 45))
Age_cats_type2 <- list(c(8, 13), c(14, 16), c(18, 40))
Height_cat_Type1 <- list(c(100, 120), c(121, 140), c(141, 186))
Height_cat_Type2 <- list(c(110, 125), c(126, 145), c(146, 190))
Weight_cat_Type1 <- list(c(50, 60), c(61, 78), c(79, 85))
Weight_cat_Type2 <- list(c(55, 75), c(76, 90), c(91, 100))
# Then you mentioned something like People[1,1] meant age == 10.
# I believe you made a mistake here. If you type People[1, 1] in the console,
# you'll find you that it returns Person == 1. Therefore, I think
# that you meant to say the People data frame was constructed without the Person vector.
People <- data.frame(Age, Height, Weight)
# Now People[1, 1] returns age == 10.
# Then you went on to say that you wanted some function that returned Age_cats_type == 1
# Well, it seems that you want the first element of the list of ranges that contains the specified value.
# Then let's build it
contains_value <- function(range, value) {
lower <- range[1]
upper <- range[2]
lower <= value && value <= upper
}
range_index <- function(ranges_list, value){
which(sapply(ranges_list, contains_value, value))[1]
}
range_index(Age_cats_type1, People[1, 1]) # 1
range_index(Age_cats_type2, People[1, 1]) # 1
range_index(Height_cat_Type1, People[1, 2]) # 2
range_index(Height_cat_Type2, People[1, 2]) # 2
# Now I didn't understand what the table you were trying to construct was, but maybe these functions will help you build it.