合并参数,但在一个数据集中。如果客户端更改名称,如何压缩数据框?
Merge argument but within one dataset. How to condense dataframe if client changed names?
我如何对已加载到一帧中的行应用合并功能?如下所示,从 2013 年开始,客户 EE 将名称更改为 TE。我想将集合合并为一个,因此获取 EE 2011 和 2012 数据,然后将其插入 TE 的 2013 和 2014(在此过程中删除 EE)。我在集合中还有其他数据,如客户端 FE,我不想在此过程中对其进行操作,但保留在最终数据框中。
数据:
http://i.stack.imgur.com/smmlY.jpg
structure(list(ClientID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L), Client = c("EE", "EE", "EE",
"EE", "TE", "TE", "TE", "TE", "EE", "EE", "EE", "EE", "TE", "TE",
"TE", "TE", "FE"), ItemCode = c(15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 15L), Item = c("plastic",
"plastic", "plastic", "plastic", "plastic", "plastic", "plastic",
"plastic", " metal", " metal", " metal", " metal", " metal",
" metal", " metal", " metal", "plastic"), DomainCode = c(112L,
113L, 114L, 115L, 112L, 113L, 114L, 115L, 112L, 113L, 114L, 115L,
112L, 113L, 114L, 115L, 112L), Domain = c("Sector1", "Sector2",
"Sector3", "Sector4", "Sector1", "Sector2", "Sector3", "Sector4",
"Sector1", "Sector2", "Sector3", "Sector4", "Sector1", "Sector2",
"Sector3", "Sector4", "Sector1"), Unit = c("kg", "kg", "kg",
"kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg",
"kg", "kg", "kg"), X2011 = c(3L, 11L, 3L, 5L, NA, NA, NA, NA,
9L, 6L, 5L, 4L, NA, NA, NA, NA, 5L), X2012 = c(4L, 5L, 9L, 7L,
NA, NA, NA, NA, 4L, 7L, 10L, 12L, NA, NA, NA, NA, 7L), X2013 = c(NA,
NA, NA, NA, 8L, 4L, 9L, 7L, NA, NA, NA, NA, 3L, 3L, 12L, 6L,
6L), X2014 = c(NA, NA, NA, NA, 2L, 5L, 6L, 3L, NA, NA, NA, NA,
12L, 10L, 10L, 11L, 8L)), .Names = c("ClientID", "Client", "ItemCode",
"Item", "DomainCode", "Domain", "Unit", "X2011", "X2012", "X2013",
"X2014"), class = "data.frame", row.names = c(NA, -17L))
预计最终结果
structure(list(ClientID = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L
), Client = c("TE", "TE", "TE", "TE", "TE", "TE", "TE", "TE",
"FE"), ItemCode = c(15L, 15L, 15L, 15L, 20L, 20L, 20L, 20L, 15L
), Item = c("plastic", "plastic", "plastic", "plastic", " metal",
" metal", " metal", " metal", "plastic"), DomainCode = c(112L,
113L, 114L, 115L, 112L, 113L, 114L, 115L, 112L), Domain = c("Sector1",
"Sector2", "Sector3", "Sector4", "Sector1", "Sector2", "Sector3",
"Sector4", "Sector1"), Unit = c("kg", "kg", "kg", "kg", "kg",
"kg", "kg", "kg", "kg"), X2011 = c(9L, 6L, 5L, 11L, 11L, 2L,
2L, 9L, 5L), X2012 = c(7L, 12L, 5L, 2L, 12L, 9L, 8L, 12L, 7L),
X2013 = c(9L, 11L, 5L, 9L, 5L, 5L, 8L, 9L, 6L), X2014 = c(3L,
8L, 2L, 5L, 9L, 8L, 8L, 7L, 8L)), .Names = c("ClientID",
"Client", "ItemCode", "Item", "DomainCode", "Domain", "Unit",
"X2011", "X2012", "X2013", "X2014"), class = "data.frame", row.names = c(NA,
-9L))
尝试
indx <- dat$Client =='TE'
indx1 <- dat$Client=='EE'
dat1 <- dat[dat$Client %in% c('TE', 'FE'),]
dat1[dat1$Client=='TE',c('X2011', 'X2012')] <- Map(function(x,y) replace(x,
is.na(x), na.omit(y)), dat1[indx, c("X2011", "X2012")],
dat[indx1,c("X2011", "X2012")])
我如何对已加载到一帧中的行应用合并功能?如下所示,从 2013 年开始,客户 EE 将名称更改为 TE。我想将集合合并为一个,因此获取 EE 2011 和 2012 数据,然后将其插入 TE 的 2013 和 2014(在此过程中删除 EE)。我在集合中还有其他数据,如客户端 FE,我不想在此过程中对其进行操作,但保留在最终数据框中。
数据: http://i.stack.imgur.com/smmlY.jpg
structure(list(ClientID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L), Client = c("EE", "EE", "EE",
"EE", "TE", "TE", "TE", "TE", "EE", "EE", "EE", "EE", "TE", "TE",
"TE", "TE", "FE"), ItemCode = c(15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 15L), Item = c("plastic",
"plastic", "plastic", "plastic", "plastic", "plastic", "plastic",
"plastic", " metal", " metal", " metal", " metal", " metal",
" metal", " metal", " metal", "plastic"), DomainCode = c(112L,
113L, 114L, 115L, 112L, 113L, 114L, 115L, 112L, 113L, 114L, 115L,
112L, 113L, 114L, 115L, 112L), Domain = c("Sector1", "Sector2",
"Sector3", "Sector4", "Sector1", "Sector2", "Sector3", "Sector4",
"Sector1", "Sector2", "Sector3", "Sector4", "Sector1", "Sector2",
"Sector3", "Sector4", "Sector1"), Unit = c("kg", "kg", "kg",
"kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg",
"kg", "kg", "kg"), X2011 = c(3L, 11L, 3L, 5L, NA, NA, NA, NA,
9L, 6L, 5L, 4L, NA, NA, NA, NA, 5L), X2012 = c(4L, 5L, 9L, 7L,
NA, NA, NA, NA, 4L, 7L, 10L, 12L, NA, NA, NA, NA, 7L), X2013 = c(NA,
NA, NA, NA, 8L, 4L, 9L, 7L, NA, NA, NA, NA, 3L, 3L, 12L, 6L,
6L), X2014 = c(NA, NA, NA, NA, 2L, 5L, 6L, 3L, NA, NA, NA, NA,
12L, 10L, 10L, 11L, 8L)), .Names = c("ClientID", "Client", "ItemCode",
"Item", "DomainCode", "Domain", "Unit", "X2011", "X2012", "X2013",
"X2014"), class = "data.frame", row.names = c(NA, -17L))
预计最终结果
structure(list(ClientID = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L
), Client = c("TE", "TE", "TE", "TE", "TE", "TE", "TE", "TE",
"FE"), ItemCode = c(15L, 15L, 15L, 15L, 20L, 20L, 20L, 20L, 15L
), Item = c("plastic", "plastic", "plastic", "plastic", " metal",
" metal", " metal", " metal", "plastic"), DomainCode = c(112L,
113L, 114L, 115L, 112L, 113L, 114L, 115L, 112L), Domain = c("Sector1",
"Sector2", "Sector3", "Sector4", "Sector1", "Sector2", "Sector3",
"Sector4", "Sector1"), Unit = c("kg", "kg", "kg", "kg", "kg",
"kg", "kg", "kg", "kg"), X2011 = c(9L, 6L, 5L, 11L, 11L, 2L,
2L, 9L, 5L), X2012 = c(7L, 12L, 5L, 2L, 12L, 9L, 8L, 12L, 7L),
X2013 = c(9L, 11L, 5L, 9L, 5L, 5L, 8L, 9L, 6L), X2014 = c(3L,
8L, 2L, 5L, 9L, 8L, 8L, 7L, 8L)), .Names = c("ClientID",
"Client", "ItemCode", "Item", "DomainCode", "Domain", "Unit",
"X2011", "X2012", "X2013", "X2014"), class = "data.frame", row.names = c(NA,
-9L))
尝试
indx <- dat$Client =='TE'
indx1 <- dat$Client=='EE'
dat1 <- dat[dat$Client %in% c('TE', 'FE'),]
dat1[dat1$Client=='TE',c('X2011', 'X2012')] <- Map(function(x,y) replace(x,
is.na(x), na.omit(y)), dat1[indx, c("X2011", "X2012")],
dat[indx1,c("X2011", "X2012")])