如何计算个人超过某个值的年数并显示所述年份
How to calculate number of years individuals exceeded a value and display said years
我有以下数据集:
## ID cumulative sector meters DOB
## 1 20100 41.75948 H38 6400 1979-08-24
## 2 20101 35.16874 B01 1600 1980-05-05
## 3 20102 20.60899 H04 1600 1979-03-17
## 4 20103 38.72266 c("B09", "B37") c(3200, 8000) c("1981-11-30", "1981-11-30")
## 5 20104 38.72266 c("B09", "B37") c(3200, 8000) c("1978-09-01", "1978-09-01")
## 6 20105 116.88668 c("B09", "B09") c(3200, 3200) c("1980-12-03", "1980-12-03")
## Oct Res_FROM Res_TO
## 1 W 1979-08-15 1991-05-15
## 2 NW 1980-05-15 1991-04-15
## 3 SW 1972-06-15 1979-08-15
## 4 c("NE", "N") c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 5 c("NE", "N") c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 6 c("NE", "NE") c("1980-12-15", "1983-08-15") c("1983-08-15", "1991-03-15")
## Exp_FROM Exp_TO
## 1 1979-08-24 1988-12-31
## 2 1980-05-15 1988-12-31
## 3 1979-03-17 1979-08-15
## 4 c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 5 c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 6 c("1980-12-15", "1983-08-15") c("1983-08-15", "1988-12-31")
## Exps_Grp Yr1952 Yr1953 Yr1954 Yr1955 Yr1956 Yr1957 Yr1958
## 1 fr51>88 NA NA NA NA NA NA NA
## 2 fr51>88 NA NA NA NA NA NA NA
## 3 between NA NA NA NA NA NA NA
## 4 c("between", "between") NA NA NA NA NA NA NA
## 5 c("between", "between") NA NA NA NA NA NA NA
## 6 c("between", "fr51>88") NA NA NA NA NA NA NA
## Yr1959 Yr1960 Yr1961 Yr1962 Yr1963 Yr1964 Yr1965 Yr1966 Yr1967 Yr1968 Yr1969
## 1 NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA
## Yr1970 Yr1971 Yr1972 Yr1973 Yr1974 Yr1975 Yr1976 Yr1977 Yr1978 Yr1979
## 1 NA NA NA NA NA NA NA NA NA 5.950991
## 2 NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA 20.608986
## 4 NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA
## Yr1980 Yr1981 Yr1982 Yr1983 Yr1984 Yr1985 Yr1986
## 1 4.340588 4.340588 4.340588 4.340588 4.340588 4.340588 4.3405881
## 2 2.927725 4.447229 4.447229 4.447229 4.447229 4.447229 4.4472289
## 3 NA NA NA NA NA NA NA
## 4 NA NA 15.365412 16.018407 3.594414 3.052618 0.6918076
## 5 NA NA 15.365412 16.018407 3.594414 3.052618 0.6918076
## 6 0.758267 16.018407 16.018407 16.018407 16.018407 16.018407 16.0184067
## Yr1987 Yr1988 Yrs_Exp arth_mean median cumulative.1 Age
## 1 4.340588 1.083782 9.3616438 4.175948 4.340588 41.75948 9
## 2 4.447229 1.110409 8.6356164 3.907637 4.447229 35.16874 8
## 3 NA NA 0.4136986 20.608986 20.608986 20.60899 9
## 4 NA NA 4.2493151 7.744532 3.594414 38.72266 7
## 5 NA NA 4.2493151 7.744532 3.594414 38.72266 10
## 6 16.018407 3.999565 8.0493151 12.987409 16.018407 116.88668 8
根据这些信息,我想完成几件事:
我想计算每个人(每个 ID 是一个人)的平均值(即 arth_mean 列)大于 4 的年数。将此添加为列对我的数据集来说是理想的。
从第 2 点开始,我希望每一年每个人超过 4 的 arth_mean 都为每个人列出,也在我的数据集的新列中。因此,例如,我可能有一列为每个 ID 列出了 1954、1966、1967 等。
我对 R 比较陌生,因此非常感谢任何帮助或建议。我不知道如何获得我想要的输出。我在下面添加了可重现的数据。
## structure(list(ID = 20100:20109, cumulative = c(41.75947792,
## 35.16873597, 20.60898553, 38.72265958, 38.72265958, 116.8866785,
## 271.4880918, 134.9473309, 35.91709062, 26.06165412), sector = c("H38",
## "B01", "H04", "c(\"B09\", \"B37\")", "c(\"B09\", \"B37\")", "c(\"B09\", \"B09\")",
## "c(\"B09\", \"B09\")", "H02", "B13", "H38"), meters = c("6400",
## "1600", "1600", "c(3200, 8000)", "c(3200, 8000)", "c(3200, 3200)",
## "c(3200, 3200)", "1600", "4800", "6400"), DOB = c("1979-08-24",
## "1980-05-05", "1979-03-17", "c(\"1981-11-30\", \"1981-11-30\")",
## "c(\"1978-09-01\", \"1978-09-01\")", "c(\"1980-12-03\", \"1980-12-03\")",
## "c(\"1978-04-25\", \"1978-04-25\")", "1979-06-22", "1978-10-09",
## "1982-04-26"), Oct = c("W", "NW", "SW", "c(\"NE\", \"N\")", "c(\"NE\", \"N\")",
## "c(\"NE\", \"NE\")", "c(\"NE\", \"NE\")", "SE", "N", "W"), Res_FROM = c("1979-08-15",
## "1980-05-15", "1972-06-15", "c(\"1982-01-15\", \"1984-01-15\")",
## "c(\"1982-01-15\", \"1984-01-15\")", "c(\"1980-12-15\", \"1983-08-15\")",
## "c(\"1978-04-15\", \"1983-08-15\")", "1979-06-15", "1978-10-15",
## "1982-04-15"), Res_TO = c("1991-05-15", "1991-04-15", "1979-08-15",
## "c(\"1984-01-15\", \"1986-04-15\")", "c(\"1984-01-15\", \"1986-04-15\")",
## "c(\"1983-08-15\", \"1991-03-15\")", "c(\"1983-08-15\", \"2000-01-15\")",
## "1991-04-15", "1983-03-15", "1991-04-15"), Exp_FROM = c("1979-08-24",
## "1980-05-15", "1979-03-17", "c(\"1982-01-15\", \"1984-01-15\")",
## "c(\"1982-01-15\", \"1984-01-15\")", "c(\"1980-12-15\", \"1983-08-15\")",
## "c(\"1978-04-25\", \"1983-08-15\")", "1979-06-22", "1978-10-15",
## "1982-04-26"), Exp_TO = c("1988-12-31", "1988-12-31", "1979-08-15",
## "c(\"1984-01-15\", \"1986-04-15\")", "c(\"1984-01-15\", \"1986-04-15\")",
## "c(\"1983-08-15\", \"1988-12-31\")", "c(\"1983-08-15\", \"1988-12-31\")",
## "1988-12-31", "1983-03-15", "1988-12-31"), Exps_Grp = c("fr51>88",
## "fr51>88", "between", "c(\"between\", \"between\")", "c(\"between\", \"between\")",
## "c(\"between\", \"fr51>88\")", "c(\"between\", \"fr51>88\")",
## "fr51>88", "between", "fr51>88"), Yr1952 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1953 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1954 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1955 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1956 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1957 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1958 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1959 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1960 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1961 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1962 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1963 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1964 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1965 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1966 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1967 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1968 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1969 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1970 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1971 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1972 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1973 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1974 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1975 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1976 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1977 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1978 = c(NA, NA, NA, NA, NA, NA, 79.39642441,
## NA, 5.655852488, NA), Yr1979 = c(5.950991161, NA, 20.60898553,
## NA, NA, NA, 59.94484924, 28.50091596, 16.41746701, NA), Yr1980 = c(4.340588078,
## 2.927724588, NA, NA, NA, 0.758267013, 16.01840668, 12.90308755,
## 4.387060214, NA), Yr1981 = c(4.340588078, 4.447228937, NA, NA,
## NA, 16.01840668, 16.01840668, 12.90308755, 4.387060214, NA),
## Yr1982 = c(4.340588078, 4.447228937, NA, 15.36541238, 15.36541238,
## 16.01840668, 16.01840668, 12.90308755, 4.387060214, 3.274931595
## ), Yr1983 = c(4.340588078, 4.447228937, NA, 16.01840668,
## 16.01840668, 16.01840668, 16.01840668, 12.90308755, 0.682590481,
## 4.340588078), Yr1984 = c(4.340588078, 4.447228937, NA, 3.594414445,
## 3.594414445, 16.01840668, 16.01840668, 12.90308755, NA, 4.340588078
## ), Yr1985 = c(4.340588078, 4.447228937, NA, 3.052618478,
## 3.052618478, 16.01840668, 16.01840668, 12.90308755, NA, 4.340588078
## ), Yr1986 = c(4.340588078, 4.447228937, NA, 0.691807598,
## 0.691807598, 16.01840668, 16.01840668, 12.90308755, NA, 4.340588078
## ), Yr1987 = c(4.340588078, 4.447228937, NA, NA, NA, 16.01840668,
## 16.01840668, 12.90308755, NA, 4.340588078), Yr1988 = c(1.083782142,
## 1.110408824, NA, NA, NA, 3.999564755, 3.999564755, 3.221714571,
## NA, 1.083782142), Yrs_Exp = c(9.361643836, 8.635616438, 0.41369863,
## 4.249315068, 4.249315068, 8.049315068, 10.69315068, 9.534246575,
## 4.416438356, 6.687671233), arth_mean = c(4.175947792, 3.907637331,
## 20.60898553, 7.744531916, 7.744531916, 12.98740872, 24.68073562,
## 13.49473309, 5.98618177, 3.723093446), median = c(4.340588078,
## 4.447228937, 20.60898553, 3.594414445, 3.594414445, 16.01840668,
## 16.01840668, 12.90308755, 4.387060214, 4.340588078), cumulative.1 = c(41.75947792,
## 35.16873597, 20.60898553, 38.72265958, 38.72265958, 116.8866785,
## 271.4880918, 134.9473309, 35.91709062, 26.06165412), Age = c(9L,
## 8L, 9L, 7L, 10L, 8L, 10L, 9L, 10L, 6L)), class = "data.frame", row.names = c(NA,
## -10L))
如果你上面的10行数据集是dat
,那么你可以将数据集融化成一个长格式(年列),限制在那些年列中的值超过4的行,计算行为 num_yr_above
,并将年份值连接为 years_above
.
然后加入 dat
library(data.table)
setDT(dat)
dat[melt(dat,id="ID",
measure=patterns("Yr\d"))[value>4, .(
num_yr_above=.N,
years_above = list(gsub("Yr","",variable))),
by=ID],
on=.(ID)]
输出:(包括所有列,但这里我只显示添加到数据集中的新附加列)
ID num_yr_above years_above
1: 20106 10 1978,1979,1980,1981,1982,1983,...
2: 20108 5 1978,1979,1980,1981,1982
3: 20100 9 1979,1980,1981,1982,1983,1984,...
4: 20102 1 1979
5: 20107 9 1979,1980,1981,1982,1983,1984,...
6: 20101 7 1981,1982,1983,1984,1985,1986,...
7: 20105 7 1981,1982,1983,1984,1985,1986,...
8: 20103 2 1982,1983
9: 20104 2 1982,1983
10: 20109 5 1983,1984,1985,1986,1987
我有以下数据集:
## ID cumulative sector meters DOB
## 1 20100 41.75948 H38 6400 1979-08-24
## 2 20101 35.16874 B01 1600 1980-05-05
## 3 20102 20.60899 H04 1600 1979-03-17
## 4 20103 38.72266 c("B09", "B37") c(3200, 8000) c("1981-11-30", "1981-11-30")
## 5 20104 38.72266 c("B09", "B37") c(3200, 8000) c("1978-09-01", "1978-09-01")
## 6 20105 116.88668 c("B09", "B09") c(3200, 3200) c("1980-12-03", "1980-12-03")
## Oct Res_FROM Res_TO
## 1 W 1979-08-15 1991-05-15
## 2 NW 1980-05-15 1991-04-15
## 3 SW 1972-06-15 1979-08-15
## 4 c("NE", "N") c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 5 c("NE", "N") c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 6 c("NE", "NE") c("1980-12-15", "1983-08-15") c("1983-08-15", "1991-03-15")
## Exp_FROM Exp_TO
## 1 1979-08-24 1988-12-31
## 2 1980-05-15 1988-12-31
## 3 1979-03-17 1979-08-15
## 4 c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 5 c("1982-01-15", "1984-01-15") c("1984-01-15", "1986-04-15")
## 6 c("1980-12-15", "1983-08-15") c("1983-08-15", "1988-12-31")
## Exps_Grp Yr1952 Yr1953 Yr1954 Yr1955 Yr1956 Yr1957 Yr1958
## 1 fr51>88 NA NA NA NA NA NA NA
## 2 fr51>88 NA NA NA NA NA NA NA
## 3 between NA NA NA NA NA NA NA
## 4 c("between", "between") NA NA NA NA NA NA NA
## 5 c("between", "between") NA NA NA NA NA NA NA
## 6 c("between", "fr51>88") NA NA NA NA NA NA NA
## Yr1959 Yr1960 Yr1961 Yr1962 Yr1963 Yr1964 Yr1965 Yr1966 Yr1967 Yr1968 Yr1969
## 1 NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA
## Yr1970 Yr1971 Yr1972 Yr1973 Yr1974 Yr1975 Yr1976 Yr1977 Yr1978 Yr1979
## 1 NA NA NA NA NA NA NA NA NA 5.950991
## 2 NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA 20.608986
## 4 NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA
## Yr1980 Yr1981 Yr1982 Yr1983 Yr1984 Yr1985 Yr1986
## 1 4.340588 4.340588 4.340588 4.340588 4.340588 4.340588 4.3405881
## 2 2.927725 4.447229 4.447229 4.447229 4.447229 4.447229 4.4472289
## 3 NA NA NA NA NA NA NA
## 4 NA NA 15.365412 16.018407 3.594414 3.052618 0.6918076
## 5 NA NA 15.365412 16.018407 3.594414 3.052618 0.6918076
## 6 0.758267 16.018407 16.018407 16.018407 16.018407 16.018407 16.0184067
## Yr1987 Yr1988 Yrs_Exp arth_mean median cumulative.1 Age
## 1 4.340588 1.083782 9.3616438 4.175948 4.340588 41.75948 9
## 2 4.447229 1.110409 8.6356164 3.907637 4.447229 35.16874 8
## 3 NA NA 0.4136986 20.608986 20.608986 20.60899 9
## 4 NA NA 4.2493151 7.744532 3.594414 38.72266 7
## 5 NA NA 4.2493151 7.744532 3.594414 38.72266 10
## 6 16.018407 3.999565 8.0493151 12.987409 16.018407 116.88668 8
根据这些信息,我想完成几件事:
我想计算每个人(每个 ID 是一个人)的平均值(即 arth_mean 列)大于 4 的年数。将此添加为列对我的数据集来说是理想的。
从第 2 点开始,我希望每一年每个人超过 4 的 arth_mean 都为每个人列出,也在我的数据集的新列中。因此,例如,我可能有一列为每个 ID 列出了 1954、1966、1967 等。
我对 R 比较陌生,因此非常感谢任何帮助或建议。我不知道如何获得我想要的输出。我在下面添加了可重现的数据。
## structure(list(ID = 20100:20109, cumulative = c(41.75947792,
## 35.16873597, 20.60898553, 38.72265958, 38.72265958, 116.8866785,
## 271.4880918, 134.9473309, 35.91709062, 26.06165412), sector = c("H38",
## "B01", "H04", "c(\"B09\", \"B37\")", "c(\"B09\", \"B37\")", "c(\"B09\", \"B09\")",
## "c(\"B09\", \"B09\")", "H02", "B13", "H38"), meters = c("6400",
## "1600", "1600", "c(3200, 8000)", "c(3200, 8000)", "c(3200, 3200)",
## "c(3200, 3200)", "1600", "4800", "6400"), DOB = c("1979-08-24",
## "1980-05-05", "1979-03-17", "c(\"1981-11-30\", \"1981-11-30\")",
## "c(\"1978-09-01\", \"1978-09-01\")", "c(\"1980-12-03\", \"1980-12-03\")",
## "c(\"1978-04-25\", \"1978-04-25\")", "1979-06-22", "1978-10-09",
## "1982-04-26"), Oct = c("W", "NW", "SW", "c(\"NE\", \"N\")", "c(\"NE\", \"N\")",
## "c(\"NE\", \"NE\")", "c(\"NE\", \"NE\")", "SE", "N", "W"), Res_FROM = c("1979-08-15",
## "1980-05-15", "1972-06-15", "c(\"1982-01-15\", \"1984-01-15\")",
## "c(\"1982-01-15\", \"1984-01-15\")", "c(\"1980-12-15\", \"1983-08-15\")",
## "c(\"1978-04-15\", \"1983-08-15\")", "1979-06-15", "1978-10-15",
## "1982-04-15"), Res_TO = c("1991-05-15", "1991-04-15", "1979-08-15",
## "c(\"1984-01-15\", \"1986-04-15\")", "c(\"1984-01-15\", \"1986-04-15\")",
## "c(\"1983-08-15\", \"1991-03-15\")", "c(\"1983-08-15\", \"2000-01-15\")",
## "1991-04-15", "1983-03-15", "1991-04-15"), Exp_FROM = c("1979-08-24",
## "1980-05-15", "1979-03-17", "c(\"1982-01-15\", \"1984-01-15\")",
## "c(\"1982-01-15\", \"1984-01-15\")", "c(\"1980-12-15\", \"1983-08-15\")",
## "c(\"1978-04-25\", \"1983-08-15\")", "1979-06-22", "1978-10-15",
## "1982-04-26"), Exp_TO = c("1988-12-31", "1988-12-31", "1979-08-15",
## "c(\"1984-01-15\", \"1986-04-15\")", "c(\"1984-01-15\", \"1986-04-15\")",
## "c(\"1983-08-15\", \"1988-12-31\")", "c(\"1983-08-15\", \"1988-12-31\")",
## "1988-12-31", "1983-03-15", "1988-12-31"), Exps_Grp = c("fr51>88",
## "fr51>88", "between", "c(\"between\", \"between\")", "c(\"between\", \"between\")",
## "c(\"between\", \"fr51>88\")", "c(\"between\", \"fr51>88\")",
## "fr51>88", "between", "fr51>88"), Yr1952 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1953 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1954 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1955 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1956 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1957 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1958 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1959 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1960 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1961 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1962 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1963 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1964 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1965 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1966 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1967 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1968 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1969 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1970 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1971 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1972 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1973 = c(NA, NA, NA, NA, NA, NA, NA,
## NA, NA, NA), Yr1974 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
## ), Yr1975 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1976 = c(NA,
## NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1977 = c(NA, NA, NA, NA,
## NA, NA, NA, NA, NA, NA), Yr1978 = c(NA, NA, NA, NA, NA, NA, 79.39642441,
## NA, 5.655852488, NA), Yr1979 = c(5.950991161, NA, 20.60898553,
## NA, NA, NA, 59.94484924, 28.50091596, 16.41746701, NA), Yr1980 = c(4.340588078,
## 2.927724588, NA, NA, NA, 0.758267013, 16.01840668, 12.90308755,
## 4.387060214, NA), Yr1981 = c(4.340588078, 4.447228937, NA, NA,
## NA, 16.01840668, 16.01840668, 12.90308755, 4.387060214, NA),
## Yr1982 = c(4.340588078, 4.447228937, NA, 15.36541238, 15.36541238,
## 16.01840668, 16.01840668, 12.90308755, 4.387060214, 3.274931595
## ), Yr1983 = c(4.340588078, 4.447228937, NA, 16.01840668,
## 16.01840668, 16.01840668, 16.01840668, 12.90308755, 0.682590481,
## 4.340588078), Yr1984 = c(4.340588078, 4.447228937, NA, 3.594414445,
## 3.594414445, 16.01840668, 16.01840668, 12.90308755, NA, 4.340588078
## ), Yr1985 = c(4.340588078, 4.447228937, NA, 3.052618478,
## 3.052618478, 16.01840668, 16.01840668, 12.90308755, NA, 4.340588078
## ), Yr1986 = c(4.340588078, 4.447228937, NA, 0.691807598,
## 0.691807598, 16.01840668, 16.01840668, 12.90308755, NA, 4.340588078
## ), Yr1987 = c(4.340588078, 4.447228937, NA, NA, NA, 16.01840668,
## 16.01840668, 12.90308755, NA, 4.340588078), Yr1988 = c(1.083782142,
## 1.110408824, NA, NA, NA, 3.999564755, 3.999564755, 3.221714571,
## NA, 1.083782142), Yrs_Exp = c(9.361643836, 8.635616438, 0.41369863,
## 4.249315068, 4.249315068, 8.049315068, 10.69315068, 9.534246575,
## 4.416438356, 6.687671233), arth_mean = c(4.175947792, 3.907637331,
## 20.60898553, 7.744531916, 7.744531916, 12.98740872, 24.68073562,
## 13.49473309, 5.98618177, 3.723093446), median = c(4.340588078,
## 4.447228937, 20.60898553, 3.594414445, 3.594414445, 16.01840668,
## 16.01840668, 12.90308755, 4.387060214, 4.340588078), cumulative.1 = c(41.75947792,
## 35.16873597, 20.60898553, 38.72265958, 38.72265958, 116.8866785,
## 271.4880918, 134.9473309, 35.91709062, 26.06165412), Age = c(9L,
## 8L, 9L, 7L, 10L, 8L, 10L, 9L, 10L, 6L)), class = "data.frame", row.names = c(NA,
## -10L))
如果你上面的10行数据集是dat
,那么你可以将数据集融化成一个长格式(年列),限制在那些年列中的值超过4的行,计算行为 num_yr_above
,并将年份值连接为 years_above
.
然后加入 dat
library(data.table)
setDT(dat)
dat[melt(dat,id="ID",
measure=patterns("Yr\d"))[value>4, .(
num_yr_above=.N,
years_above = list(gsub("Yr","",variable))),
by=ID],
on=.(ID)]
输出:(包括所有列,但这里我只显示添加到数据集中的新附加列)
ID num_yr_above years_above
1: 20106 10 1978,1979,1980,1981,1982,1983,...
2: 20108 5 1978,1979,1980,1981,1982
3: 20100 9 1979,1980,1981,1982,1983,1984,...
4: 20102 1 1979
5: 20107 9 1979,1980,1981,1982,1983,1984,...
6: 20101 7 1981,1982,1983,1984,1985,1986,...
7: 20105 7 1981,1982,1983,1984,1985,1986,...
8: 20103 2 1982,1983
9: 20104 2 1982,1983
10: 20109 5 1983,1984,1985,1986,1987