根据区域数量重塑我的数据
Reshape my data according number of zones
这是我的数据
information=structure(list(X = 1:15, Name = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ABL", "ARP"
), class = "factor"), Zone = c(1L, 2L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 4L, 5L, 6L, 7L, 9L), X3 = c(1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 3L, 0L, 1L, 2L, 0L, 1L, 0L), X2 = c(0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), X5 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L), X4 = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), X1 = c(0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("X",
"Name", "Zone", "X3", "X2", "X5", "X4", "X1"), class = "data.frame", row.names = c(NA, -15L))
我想要所有Name
有12个区域,当没有Zone
关联时,Zones
将取缺失值,X1到X5的值将取0。注意例如,对于 Name
ARP 和 ABL,我每个都没有区域 3。
我试过这个代码
library(tidyr)
library(dplyr)
plot_data=information
plot_data$Zone=(factor(plot_data$Zone, levels = c(1:12)))
plot_data= plot_data %>% left_join(data_frame(Zone= factor(1:12)), .)
它给了我:
Zone X Name X3 X2 X5 X4 X1
(fctr) (int) (fctr) (int) (int) (int) (int) (int)
1 1 1 ABL 1 0 0 0 0
2 1 9 ARP 3 0 0 0 0
3 2 2 ABL 1 0 0 0 0
4 2 10 ARP 0 0 1 0 0
5 3 NA NA NA NA NA NA NA
6 4 11 ARP 1 0 0 1 0
7 5 12 ARP 2 0 0 0 0
8 6 13 ARP 0 0 1 0 0
9 7 3 ABL 1 0 0 0 0
10 7 14 ARP 1 0 0 0 0
11 8 4 ABL 1 1 0 0 0
12 9 5 ABL 2 0 0 0 0
13 9 15 ARP 0 1 0 0 0
14 10 6 ABL 2 0 0 0 1
15 11 7 ABL 1 0 0 0 0
16 12 8 ABL 1 0 0 0 0
我的代码有什么问题?
非常感谢!
首先,我们complete()
数据以确保所有 Name
拥有所有 Zone
。然后,我们将 X1
列中的 NA
s 替换为 0
到 X5
:
library(tidyr)
library(dplyr)
information %>%
complete(Name, Zone) %>%
replace_na(as.list(setNames(rep(0, 5), paste0("X", 1:5))))
给出:
# Name Zone X X3 X2 X5 X4 X1
#1 ABL 1 1 1 0 0 0 0
#2 ABL 2 2 1 0 0 0 0
#3 ABL 4 NA 0 0 0 0 0
#4 ABL 5 NA 0 0 0 0 0
#5 ABL 6 NA 0 0 0 0 0
#6 ABL 7 3 1 0 0 0 0
#7 ABL 8 4 1 1 0 0 0
#8 ABL 9 5 2 0 0 0 0
#9 ABL 10 6 2 0 0 0 1
#10 ABL 11 7 1 0 0 0 0
#11 ABL 12 8 1 0 0 0 0
#12 ARP 1 9 3 0 0 0 0
#13 ARP 2 10 0 0 1 0 0
#14 ARP 4 11 1 0 0 1 0
#15 ARP 5 12 2 0 0 0 0
#16 ARP 6 13 0 0 1 0 0
#17 ARP 7 14 1 0 0 0 0
#18 ARP 8 NA 0 0 0 0 0
#19 ARP 9 15 0 1 0 0 0
#20 ARP 10 NA 0 0 0 0 0
#21 ARP 11 NA 0 0 0 0 0
#22 ARP 12 NA 0 0 0 0 0
注意:正如@aosmith 所提到的,您可以使用 complete()
的 fill
参数代替(它在内部调用 replace_na
) :
information %>%
complete(Name, Zone, fill = as.list(setNames(rep(0, 5), paste0("X", 1:5))))
请注意,此方法仅 complete()
观察到 Zone
s。由于您的初始数据中没有 Zone = 3
,因此不会将此 Name
和 Zone
组合添加到结果中。如果您想填写应该观察到但没有观察到的缺失值,您可以使用 full_seq()
:
information %>%
complete(Zone = full_seq(Zone, period = 1), Name) %>%
#just another way to replace NA in column X3 to X1
mutate_each(funs(replace(., is.na(.), 0)), X3:X1)
使用查找数据并合并:
# lookup data every name has every zone
lookup <- data.frame(expand.grid(c("ABL", "ARP"), 1:12))
colnames(lookup) <- c("Name", "Zone")
# marge and convert NA to 0
res <- merge(lookup, information, by = c("Name", "Zone"), all.x = TRUE)
res[, c(paste0("X", 1:5))][ is.na(res[, c(paste0("X", 1:5))]) ] <- 0
这个呢?
aux_key<-merge(x=unique(information$Name),y=1:12)
names(aux_key)<-c("Name","Zone")
new_information<-merge(
x=aux_key
,y=information
,by.x=c("Name","Zone")
,by.y=c("Name","Zone")
,all.x=T
)
new_information[is.na(new_information)] <- 0
new_information[new_information$X==0,"X"] <- NA
head(new_information)
这是我的数据
information=structure(list(X = 1:15, Name = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ABL", "ARP"
), class = "factor"), Zone = c(1L, 2L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 4L, 5L, 6L, 7L, 9L), X3 = c(1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 3L, 0L, 1L, 2L, 0L, 1L, 0L), X2 = c(0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), X5 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L), X4 = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), X1 = c(0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("X",
"Name", "Zone", "X3", "X2", "X5", "X4", "X1"), class = "data.frame", row.names = c(NA, -15L))
我想要所有Name
有12个区域,当没有Zone
关联时,Zones
将取缺失值,X1到X5的值将取0。注意例如,对于 Name
ARP 和 ABL,我每个都没有区域 3。
我试过这个代码
library(tidyr)
library(dplyr)
plot_data=information
plot_data$Zone=(factor(plot_data$Zone, levels = c(1:12)))
plot_data= plot_data %>% left_join(data_frame(Zone= factor(1:12)), .)
它给了我:
Zone X Name X3 X2 X5 X4 X1
(fctr) (int) (fctr) (int) (int) (int) (int) (int)
1 1 1 ABL 1 0 0 0 0
2 1 9 ARP 3 0 0 0 0
3 2 2 ABL 1 0 0 0 0
4 2 10 ARP 0 0 1 0 0
5 3 NA NA NA NA NA NA NA
6 4 11 ARP 1 0 0 1 0
7 5 12 ARP 2 0 0 0 0
8 6 13 ARP 0 0 1 0 0
9 7 3 ABL 1 0 0 0 0
10 7 14 ARP 1 0 0 0 0
11 8 4 ABL 1 1 0 0 0
12 9 5 ABL 2 0 0 0 0
13 9 15 ARP 0 1 0 0 0
14 10 6 ABL 2 0 0 0 1
15 11 7 ABL 1 0 0 0 0
16 12 8 ABL 1 0 0 0 0
我的代码有什么问题?
非常感谢!
首先,我们complete()
数据以确保所有 Name
拥有所有 Zone
。然后,我们将 X1
列中的 NA
s 替换为 0
到 X5
:
library(tidyr)
library(dplyr)
information %>%
complete(Name, Zone) %>%
replace_na(as.list(setNames(rep(0, 5), paste0("X", 1:5))))
给出:
# Name Zone X X3 X2 X5 X4 X1
#1 ABL 1 1 1 0 0 0 0
#2 ABL 2 2 1 0 0 0 0
#3 ABL 4 NA 0 0 0 0 0
#4 ABL 5 NA 0 0 0 0 0
#5 ABL 6 NA 0 0 0 0 0
#6 ABL 7 3 1 0 0 0 0
#7 ABL 8 4 1 1 0 0 0
#8 ABL 9 5 2 0 0 0 0
#9 ABL 10 6 2 0 0 0 1
#10 ABL 11 7 1 0 0 0 0
#11 ABL 12 8 1 0 0 0 0
#12 ARP 1 9 3 0 0 0 0
#13 ARP 2 10 0 0 1 0 0
#14 ARP 4 11 1 0 0 1 0
#15 ARP 5 12 2 0 0 0 0
#16 ARP 6 13 0 0 1 0 0
#17 ARP 7 14 1 0 0 0 0
#18 ARP 8 NA 0 0 0 0 0
#19 ARP 9 15 0 1 0 0 0
#20 ARP 10 NA 0 0 0 0 0
#21 ARP 11 NA 0 0 0 0 0
#22 ARP 12 NA 0 0 0 0 0
注意:正如@aosmith 所提到的,您可以使用 complete()
的 fill
参数代替(它在内部调用 replace_na
) :
information %>%
complete(Name, Zone, fill = as.list(setNames(rep(0, 5), paste0("X", 1:5))))
请注意,此方法仅 complete()
观察到 Zone
s。由于您的初始数据中没有 Zone = 3
,因此不会将此 Name
和 Zone
组合添加到结果中。如果您想填写应该观察到但没有观察到的缺失值,您可以使用 full_seq()
:
information %>%
complete(Zone = full_seq(Zone, period = 1), Name) %>%
#just another way to replace NA in column X3 to X1
mutate_each(funs(replace(., is.na(.), 0)), X3:X1)
使用查找数据并合并:
# lookup data every name has every zone
lookup <- data.frame(expand.grid(c("ABL", "ARP"), 1:12))
colnames(lookup) <- c("Name", "Zone")
# marge and convert NA to 0
res <- merge(lookup, information, by = c("Name", "Zone"), all.x = TRUE)
res[, c(paste0("X", 1:5))][ is.na(res[, c(paste0("X", 1:5))]) ] <- 0
这个呢?
aux_key<-merge(x=unique(information$Name),y=1:12)
names(aux_key)<-c("Name","Zone")
new_information<-merge(
x=aux_key
,y=information
,by.x=c("Name","Zone")
,by.y=c("Name","Zone")
,all.x=T
)
new_information[is.na(new_information)] <- 0
new_information[new_information$X==0,"X"] <- NA
head(new_information)