在 R 中出现错误 - [`:=`(dob, date_of_birth) 中的错误:找不到函数“:=”]

Getting an error in R - [Error in `:=`(dob, date_of_birth) : could not find function ":="]

我似乎遇到了正确语法的问题,该语法允许我将现有列复制到 data.table 的原始数据集中具有新名称的新列。

参考这个例子:

以下示例:

If you want to do the replacement by reference, using := then you can do

dt[, x:=mpg][, setdiff(colnames(dt), c('x', 'mpg')) := NULL]

If we need it in a single step, instead of doing the := to modify the original dataset, specify it with = inside list or .(

dt[,.(x = mpg, mpg)]

Or if it necessary to create the column in original dataset, it can be piped

dt[, x := mpg][, .(x, mpg)]

将 data.table 与此示例数据一起使用:

> head(sample_dt)
  date_of_birth date_employed     Total AVC numtrans firstfundingdate minAccountingdte
1    10/01/1988    16/08/2013  490909.6   0       61       25/11/2014       31/10/2014
2    26/12/1971    08/01/2001 4400292.1   0      175       19/08/2006       28/02/2006
3    15/10/1979    14/01/2005   92240.0   0       44       25/10/2006       31/01/2005
4    04/04/1973    30/04/2002 1594627.9   0      158       18/09/2012       30/04/2007
5    22/02/1972    22/02/1996  627662.7   0      126       27/02/2007       31/10/2006
6    07/06/1976    01/03/2010 3735319.2   0      129       13/05/2010       31/03/2010
  gender client_status   Balance
1      F             C  626567.9
2      M             C 9955518.3
3      F             C  385284.5
4      M             C 3097565.4
5      M             C 1815569.6
6      M             C 7132986.0

我想要添加了新列的这种类型的输出:

> head(sample_dt)
      date_of_birth date_employed     Total AVC numtrans firstfundingdate minAccountingdte
    1    10/01/1988    16/08/2013  490909.6   0       61       25/11/2014       31/10/2014
    2    26/12/1971    08/01/2001 4400292.1   0      175       19/08/2006       28/02/2006
    3    15/10/1979    14/01/2005   92240.0   0       44       25/10/2006       31/01/2005
    4    04/04/1973    30/04/2002 1594627.9   0      158       18/09/2012       30/04/2007
    5    22/02/1972    22/02/1996  627662.7   0      126       27/02/2007       31/10/2006
    6    07/06/1976    01/03/2010 3735319.2   0      129       13/05/2010       31/03/2010
      gender client_status   Balance        dob  employment_dte  firstfunding_dte  minAccounting_dte
    1      F             C  626567.9 1988-01-11      2013-08-16        2014-11-25         2014-10-31
    2      M             C 9955518.3 1971-12-26      2001-01-08        2006-08-19         2006-02-28
    3      F             C  385284.5 1979-10-15      2005-01-14        2006-10-25         2005-01-31
    4      M             C 3097565.4 1973-04-04      2002-04-30        2012-09-18         2007-04-30
    5      M             C 1815569.6 1972-02-22      1996-02-22        2007-02-27         2006-10-31
    6      M             C 7132986.0 1976-06-07      2010-03-01        2010-05-13         2010-03-31

附加数据结构信息:

> dput(head(sample_dt))
structure(list(date_of_birth = c("10/01/1988", "26/12/1971", 
"15/10/1979", "04/04/1973", "22/02/1972", "07/06/1976"), date_employed = c("16/08/2013", 
"08/01/2001", "14/01/2005", "30/04/2002", "22/02/1996", "01/03/2010"
), Total = c(490909.59, 4400292.09, 92240, 1594627.95, 627662.74, 
3735319.25), AVC = c(0, 0, 0, 0, 0, 0), numtrans = c(61L, 175L, 
44L, 158L, 126L, 129L), firstfundingdate = c("25/11/2014", "19/08/2006", 
"25/10/2006", "18/09/2012", "27/02/2007", "13/05/2010"), minAccountingdte = c("31/10/2014", 
"28/02/2006", "31/01/2005", "30/04/2007", "31/10/2006", "31/03/2010"
), gender = c("F", "M", "F", "M", "M", "M"), client_status = c("C", 
"C", "C", "C", "C", "C"), Balance = c(626567.94, 9955518.35, 
385284.46, 3097565.35, 1815569.61, 7132985.99)), row.names = c(NA, 
6L), class = c("data.table", "data.frame"))

但是,当我运行执行以下命令时,出现错误:

> sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c('dob', 'date_of_birth')) := NULL]
Error in `:=`(dob, date_of_birth) : could not find function ":="
> sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c("dob", "date_of_birth")) := NULL]
Error in `:=`(dob, date_of_birth) : could not find function ":="

> sample_dt[, .(dob = date_of_birth, date_of_birth)]
Error in .(dob = date_of_birth, date_of_birth) : 
  could not find function "."

还有

> sample_dt[, dob := date_of_birth][, .(dob, date_of_birth)]
Error in `:=`(dob, date_of_birth) : could not find function ":="

如果可能...我希望能够同时运行所有这些列副本:

sample_dt[, employment_dte := date_employed][, .(employment_dte, date_employed)]

sample_dt[, firstfunding_dte := firstfundingdate][, .(firstfunding_dte, firstfundingdate)]

sample_dt[, minAccounting_dte := minAccountingdte][, .(minAccounting_dte, minAccountingdte)]

...在我继续将它们转换为日期列之前:

## convert date columns
date_cols <- c("dob", "employment_dte", "firstfunding_dte", "minAccounting_dte")

# convert date columns in DD/MM/YYYY format
date_cols <- na.omit(names(sample_dt)[
  sample_dt[1, sapply(.SD, stringr::str_detect, pattern = "\d{2}/\d{2}/\d{4}"),]])

# use lubridate package
sample_dt[, (date_cols) := lapply(.SD, lubridate::dmy), .SDcols = date_cols]

我做错了什么?

如有任何帮助,我们将不胜感激!

我无法重现您示例中的错误:

library("data.table") 
sample_dt <- 
data.table(
  date_of_birth = c("10/01/1988", "26/12/1971", "15/10/1979", "04/04/1973", "22/02/1972", "07/06/1976"), 
  date_employed = c("16/08/2013", "08/01/2001", "14/01/2005", "30/04/2002", "22/02/1996", "01/03/2010"), 
  Total = c(490909.59, 4400292.09, 92240, 1594627.95, 627662.74, 3735319.25), 
  AVC = c(0, 0, 0, 0, 0, 0), numtrans = c(61L, 175L, 44L, 158L, 126L, 129L), 
  firstfundingdate = c("25/11/2014", "19/08/2006", "25/10/2006", "18/09/2012", "27/02/2007", "13/05/2010"), 
  minAccountingdte = c("31/10/2014", "28/02/2006", "31/01/2005", "30/04/2007", "31/10/2006", "31/03/2010"), 
  gender = c("F", "M", "F", "M", "M", "M"), client_status = c("C", "C", "C", "C", "C", "C"), 
  Balance = c(626567.94, 9955518.35, 385284.46, 3097565.35, 1815569.61, 7132985.99))
###
sample_dt[, dob:=date_of_birth][, setdiff(colnames(sample_dt), c('dob', 'date_of_birth')) := NULL]
sample_dt[, .(dob = date_of_birth, date_of_birth)]
sample_dt[, dob := date_of_birth][, .(dob, date_of_birth)] ## warning

只有最后一行会给出警告。

最终您正在寻找这样的东西:

library("data.table") 
Iris <- iris; setDT(Iris); head(Iris)
Iris[, ':='(S=Species, SL=Sepal.Length)][]