在 R 中出现错误 - [`:=`(dob, date_of_birth) 中的错误:找不到函数“:=”]
Getting an error in R - [Error in `:=`(dob, date_of_birth) : could not find function ":="]
我似乎遇到了正确语法的问题,该语法允许我将现有列复制到 data.table 的原始数据集中具有新名称的新列。
参考这个例子:
以下示例:
If you want to do the replacement by reference, using := then you can
do
dt[, x:=mpg][, setdiff(colnames(dt), c('x', 'mpg')) := NULL]
If we need it in a single step, instead of doing the := to modify the
original dataset, specify it with = inside list or .(
dt[,.(x = mpg, mpg)]
Or if it necessary to create the column in original dataset, it can be
piped
dt[, x := mpg][, .(x, mpg)]
将 data.table 与此示例数据一起使用:
> head(sample_dt)
date_of_birth date_employed Total AVC numtrans firstfundingdate minAccountingdte
1 10/01/1988 16/08/2013 490909.6 0 61 25/11/2014 31/10/2014
2 26/12/1971 08/01/2001 4400292.1 0 175 19/08/2006 28/02/2006
3 15/10/1979 14/01/2005 92240.0 0 44 25/10/2006 31/01/2005
4 04/04/1973 30/04/2002 1594627.9 0 158 18/09/2012 30/04/2007
5 22/02/1972 22/02/1996 627662.7 0 126 27/02/2007 31/10/2006
6 07/06/1976 01/03/2010 3735319.2 0 129 13/05/2010 31/03/2010
gender client_status Balance
1 F C 626567.9
2 M C 9955518.3
3 F C 385284.5
4 M C 3097565.4
5 M C 1815569.6
6 M C 7132986.0
我想要添加了新列的这种类型的输出:
> head(sample_dt)
date_of_birth date_employed Total AVC numtrans firstfundingdate minAccountingdte
1 10/01/1988 16/08/2013 490909.6 0 61 25/11/2014 31/10/2014
2 26/12/1971 08/01/2001 4400292.1 0 175 19/08/2006 28/02/2006
3 15/10/1979 14/01/2005 92240.0 0 44 25/10/2006 31/01/2005
4 04/04/1973 30/04/2002 1594627.9 0 158 18/09/2012 30/04/2007
5 22/02/1972 22/02/1996 627662.7 0 126 27/02/2007 31/10/2006
6 07/06/1976 01/03/2010 3735319.2 0 129 13/05/2010 31/03/2010
gender client_status Balance dob employment_dte firstfunding_dte minAccounting_dte
1 F C 626567.9 1988-01-11 2013-08-16 2014-11-25 2014-10-31
2 M C 9955518.3 1971-12-26 2001-01-08 2006-08-19 2006-02-28
3 F C 385284.5 1979-10-15 2005-01-14 2006-10-25 2005-01-31
4 M C 3097565.4 1973-04-04 2002-04-30 2012-09-18 2007-04-30
5 M C 1815569.6 1972-02-22 1996-02-22 2007-02-27 2006-10-31
6 M C 7132986.0 1976-06-07 2010-03-01 2010-05-13 2010-03-31
附加数据结构信息:
> dput(head(sample_dt))
structure(list(date_of_birth = c("10/01/1988", "26/12/1971",
"15/10/1979", "04/04/1973", "22/02/1972", "07/06/1976"), date_employed = c("16/08/2013",
"08/01/2001", "14/01/2005", "30/04/2002", "22/02/1996", "01/03/2010"
), Total = c(490909.59, 4400292.09, 92240, 1594627.95, 627662.74,
3735319.25), AVC = c(0, 0, 0, 0, 0, 0), numtrans = c(61L, 175L,
44L, 158L, 126L, 129L), firstfundingdate = c("25/11/2014", "19/08/2006",
"25/10/2006", "18/09/2012", "27/02/2007", "13/05/2010"), minAccountingdte = c("31/10/2014",
"28/02/2006", "31/01/2005", "30/04/2007", "31/10/2006", "31/03/2010"
), gender = c("F", "M", "F", "M", "M", "M"), client_status = c("C",
"C", "C", "C", "C", "C"), Balance = c(626567.94, 9955518.35,
385284.46, 3097565.35, 1815569.61, 7132985.99)), row.names = c(NA,
6L), class = c("data.table", "data.frame"))
但是,当我运行执行以下命令时,出现错误:
> sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c('dob', 'date_of_birth')) := NULL]
Error in `:=`(dob, date_of_birth) : could not find function ":="
> sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c("dob", "date_of_birth")) := NULL]
Error in `:=`(dob, date_of_birth) : could not find function ":="
和
> sample_dt[, .(dob = date_of_birth, date_of_birth)]
Error in .(dob = date_of_birth, date_of_birth) :
could not find function "."
还有
> sample_dt[, dob := date_of_birth][, .(dob, date_of_birth)]
Error in `:=`(dob, date_of_birth) : could not find function ":="
如果可能...我希望能够同时运行所有这些列副本:
sample_dt[, employment_dte := date_employed][, .(employment_dte, date_employed)]
sample_dt[, firstfunding_dte := firstfundingdate][, .(firstfunding_dte, firstfundingdate)]
sample_dt[, minAccounting_dte := minAccountingdte][, .(minAccounting_dte, minAccountingdte)]
...在我继续将它们转换为日期列之前:
## convert date columns
date_cols <- c("dob", "employment_dte", "firstfunding_dte", "minAccounting_dte")
# convert date columns in DD/MM/YYYY format
date_cols <- na.omit(names(sample_dt)[
sample_dt[1, sapply(.SD, stringr::str_detect, pattern = "\d{2}/\d{2}/\d{4}"),]])
# use lubridate package
sample_dt[, (date_cols) := lapply(.SD, lubridate::dmy), .SDcols = date_cols]
我做错了什么?
如有任何帮助,我们将不胜感激!
我无法重现您示例中的错误:
library("data.table")
sample_dt <-
data.table(
date_of_birth = c("10/01/1988", "26/12/1971", "15/10/1979", "04/04/1973", "22/02/1972", "07/06/1976"),
date_employed = c("16/08/2013", "08/01/2001", "14/01/2005", "30/04/2002", "22/02/1996", "01/03/2010"),
Total = c(490909.59, 4400292.09, 92240, 1594627.95, 627662.74, 3735319.25),
AVC = c(0, 0, 0, 0, 0, 0), numtrans = c(61L, 175L, 44L, 158L, 126L, 129L),
firstfundingdate = c("25/11/2014", "19/08/2006", "25/10/2006", "18/09/2012", "27/02/2007", "13/05/2010"),
minAccountingdte = c("31/10/2014", "28/02/2006", "31/01/2005", "30/04/2007", "31/10/2006", "31/03/2010"),
gender = c("F", "M", "F", "M", "M", "M"), client_status = c("C", "C", "C", "C", "C", "C"),
Balance = c(626567.94, 9955518.35, 385284.46, 3097565.35, 1815569.61, 7132985.99))
###
sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c('dob', 'date_of_birth')) := NULL]
sample_dt[, .(dob = date_of_birth, date_of_birth)]
sample_dt[, dob := date_of_birth][, .(dob, date_of_birth)] ## warning
只有最后一行会给出警告。
最终您正在寻找这样的东西:
library("data.table")
Iris <- iris; setDT(Iris); head(Iris)
Iris[, ':='(S=Species, SL=Sepal.Length)][]
我似乎遇到了正确语法的问题,该语法允许我将现有列复制到 data.table 的原始数据集中具有新名称的新列。
参考这个例子:
If you want to do the replacement by reference, using := then you can do
dt[, x:=mpg][, setdiff(colnames(dt), c('x', 'mpg')) := NULL]
If we need it in a single step, instead of doing the := to modify the original dataset, specify it with = inside list or .(
dt[,.(x = mpg, mpg)]
Or if it necessary to create the column in original dataset, it can be piped
dt[, x := mpg][, .(x, mpg)]
将 data.table 与此示例数据一起使用:
> head(sample_dt)
date_of_birth date_employed Total AVC numtrans firstfundingdate minAccountingdte
1 10/01/1988 16/08/2013 490909.6 0 61 25/11/2014 31/10/2014
2 26/12/1971 08/01/2001 4400292.1 0 175 19/08/2006 28/02/2006
3 15/10/1979 14/01/2005 92240.0 0 44 25/10/2006 31/01/2005
4 04/04/1973 30/04/2002 1594627.9 0 158 18/09/2012 30/04/2007
5 22/02/1972 22/02/1996 627662.7 0 126 27/02/2007 31/10/2006
6 07/06/1976 01/03/2010 3735319.2 0 129 13/05/2010 31/03/2010
gender client_status Balance
1 F C 626567.9
2 M C 9955518.3
3 F C 385284.5
4 M C 3097565.4
5 M C 1815569.6
6 M C 7132986.0
我想要添加了新列的这种类型的输出:
> head(sample_dt)
date_of_birth date_employed Total AVC numtrans firstfundingdate minAccountingdte
1 10/01/1988 16/08/2013 490909.6 0 61 25/11/2014 31/10/2014
2 26/12/1971 08/01/2001 4400292.1 0 175 19/08/2006 28/02/2006
3 15/10/1979 14/01/2005 92240.0 0 44 25/10/2006 31/01/2005
4 04/04/1973 30/04/2002 1594627.9 0 158 18/09/2012 30/04/2007
5 22/02/1972 22/02/1996 627662.7 0 126 27/02/2007 31/10/2006
6 07/06/1976 01/03/2010 3735319.2 0 129 13/05/2010 31/03/2010
gender client_status Balance dob employment_dte firstfunding_dte minAccounting_dte
1 F C 626567.9 1988-01-11 2013-08-16 2014-11-25 2014-10-31
2 M C 9955518.3 1971-12-26 2001-01-08 2006-08-19 2006-02-28
3 F C 385284.5 1979-10-15 2005-01-14 2006-10-25 2005-01-31
4 M C 3097565.4 1973-04-04 2002-04-30 2012-09-18 2007-04-30
5 M C 1815569.6 1972-02-22 1996-02-22 2007-02-27 2006-10-31
6 M C 7132986.0 1976-06-07 2010-03-01 2010-05-13 2010-03-31
附加数据结构信息:
> dput(head(sample_dt))
structure(list(date_of_birth = c("10/01/1988", "26/12/1971",
"15/10/1979", "04/04/1973", "22/02/1972", "07/06/1976"), date_employed = c("16/08/2013",
"08/01/2001", "14/01/2005", "30/04/2002", "22/02/1996", "01/03/2010"
), Total = c(490909.59, 4400292.09, 92240, 1594627.95, 627662.74,
3735319.25), AVC = c(0, 0, 0, 0, 0, 0), numtrans = c(61L, 175L,
44L, 158L, 126L, 129L), firstfundingdate = c("25/11/2014", "19/08/2006",
"25/10/2006", "18/09/2012", "27/02/2007", "13/05/2010"), minAccountingdte = c("31/10/2014",
"28/02/2006", "31/01/2005", "30/04/2007", "31/10/2006", "31/03/2010"
), gender = c("F", "M", "F", "M", "M", "M"), client_status = c("C",
"C", "C", "C", "C", "C"), Balance = c(626567.94, 9955518.35,
385284.46, 3097565.35, 1815569.61, 7132985.99)), row.names = c(NA,
6L), class = c("data.table", "data.frame"))
但是,当我运行执行以下命令时,出现错误:
> sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c('dob', 'date_of_birth')) := NULL]
Error in `:=`(dob, date_of_birth) : could not find function ":="
> sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c("dob", "date_of_birth")) := NULL]
Error in `:=`(dob, date_of_birth) : could not find function ":="
和
> sample_dt[, .(dob = date_of_birth, date_of_birth)]
Error in .(dob = date_of_birth, date_of_birth) :
could not find function "."
还有
> sample_dt[, dob := date_of_birth][, .(dob, date_of_birth)]
Error in `:=`(dob, date_of_birth) : could not find function ":="
如果可能...我希望能够同时运行所有这些列副本:
sample_dt[, employment_dte := date_employed][, .(employment_dte, date_employed)]
sample_dt[, firstfunding_dte := firstfundingdate][, .(firstfunding_dte, firstfundingdate)]
sample_dt[, minAccounting_dte := minAccountingdte][, .(minAccounting_dte, minAccountingdte)]
...在我继续将它们转换为日期列之前:
## convert date columns
date_cols <- c("dob", "employment_dte", "firstfunding_dte", "minAccounting_dte")
# convert date columns in DD/MM/YYYY format
date_cols <- na.omit(names(sample_dt)[
sample_dt[1, sapply(.SD, stringr::str_detect, pattern = "\d{2}/\d{2}/\d{4}"),]])
# use lubridate package
sample_dt[, (date_cols) := lapply(.SD, lubridate::dmy), .SDcols = date_cols]
我做错了什么?
如有任何帮助,我们将不胜感激!
我无法重现您示例中的错误:
library("data.table")
sample_dt <-
data.table(
date_of_birth = c("10/01/1988", "26/12/1971", "15/10/1979", "04/04/1973", "22/02/1972", "07/06/1976"),
date_employed = c("16/08/2013", "08/01/2001", "14/01/2005", "30/04/2002", "22/02/1996", "01/03/2010"),
Total = c(490909.59, 4400292.09, 92240, 1594627.95, 627662.74, 3735319.25),
AVC = c(0, 0, 0, 0, 0, 0), numtrans = c(61L, 175L, 44L, 158L, 126L, 129L),
firstfundingdate = c("25/11/2014", "19/08/2006", "25/10/2006", "18/09/2012", "27/02/2007", "13/05/2010"),
minAccountingdte = c("31/10/2014", "28/02/2006", "31/01/2005", "30/04/2007", "31/10/2006", "31/03/2010"),
gender = c("F", "M", "F", "M", "M", "M"), client_status = c("C", "C", "C", "C", "C", "C"),
Balance = c(626567.94, 9955518.35, 385284.46, 3097565.35, 1815569.61, 7132985.99))
###
sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c('dob', 'date_of_birth')) := NULL]
sample_dt[, .(dob = date_of_birth, date_of_birth)]
sample_dt[, dob := date_of_birth][, .(dob, date_of_birth)] ## warning
只有最后一行会给出警告。
最终您正在寻找这样的东西:
library("data.table")
Iris <- iris; setDT(Iris); head(Iris)
Iris[, ':='(S=Species, SL=Sepal.Length)][]