允许引用具有索引和名称的列的函数

Question

我试图通过允许索引和名称来创建一种更简单的方法来引用具有以下功能的列。另见。

所以这个有效：

df <- data.table::fread("a b c d e f g h i j
                         1 2 3 4 5 6 7 8 9 10",
                                               header = TRUE)
columns <- c(1:8, "i", 9, "j")


col2num <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(nums)
            }

col2num(df, columns)
#> Warning in col2num(df, columns): NAs introduced by coercion
#>  [1]  1  2  3  4  5  6  7  8  9  9 10

这个也有效：

col2name <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(names(df)[nums])
            }

col2name(df, columns)
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "i" "j"
Warning message:
In col2name(df, columns) : NAs introduced by coercion

但是当我执行以下操作时，它不再有效：

columns <- c(1:7, "j", 8, "i")
col2name <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(names(df)[nums])
            }

col2name(df, columns)
Error in nums[is.na(nums)] <- which(names(df) == columns[is.na(nums)]) : 
replacement has length zero

还有，这个不行：

columns <- c("a", "j", 8, "i")
col2name <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(names(df)[nums])
            }

col2name(df, columns)
[1] "a" "i" "h" "a"

我该如何解决这个问题？

Answer 1

一种替代方法，缺点是数据必须是 data.frame 对象：

indexr<- function(df, cols){
  to_match<-cols[grep("[A-za-z]",cols)]
  matched<-match(to_match,names(df))
  numerics <- as.numeric(c(setdiff(cols,to_match),matched))


  df[c(numerics)]
}



 indexr(iris,c(1,"Sepal.Width"))
    Sepal.Length Sepal.Width
1            5.1         3.5
2            4.9         3.0
3            4.7         3.2

有了你的数据（缺点是我们要回一个data.frame）。可能为此定义一个方法。

data.table::setDF(df)
indexr(df,columns)
  a b c d e f g h i i.1  j
1 1 2 3 4 5 6 7 8 9   9 10

编辑改为 return 名称：

indexr<- function(df, cols){
  to_match<-cols[grep("[A-za-z]",cols)]
  matched<-match(to_match,names(df))
  numerics <- as.numeric(c(setdiff(cols,to_match),matched))


  names(df[c(numerics)])
}


 indexr(mtcars,c("mpg",5))
    [1] "drat" "mpg" 


  indexr(df,columns)
 [1] "a"   "b"   "c"   "d"   "e"   "f"   "g"   "h"   "i"   "i.1"
[11] "j"

Answer 2

我们只需要遍历 columns:

col2num <- function(df, columns){
  nums <- as.numeric(columns)
  nums[is.na(nums)] <- sapply(columns[is.na(as.numeric(columns))], 
                              function(x) which(names(df) == x))
  return(nums)
}

col2name <- function(df, columns){
  nums <- as.numeric(columns)
  nums[is.na(nums)] <- sapply(columns[is.na(as.numeric(columns))], 
                              function(x) which(names(df) == x))
  return(names(df)[nums])
}

columns1 <- c(1:8, "i", 9, "j")
columns2 <- c(1:7, "j", 8, "i")

suppressWarnings(col2name(df, columns1))
#>  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "i" "j"

suppressWarnings(col2num(df, columns1))
#>  [1]  1  2  3  4  5  6  7  8  9  9 10


suppressWarnings(col2num(df, columns2))
#>  [1]  1  2  3  4  5  6  7 10  8  9

suppressWarnings(col2name(df, columns2))
#>  [1] "a" "b" "c" "d" "e" "f" "g" "j" "h" "i"

我正在使用 suppressWarnings 以避免每次我运行函数时收到以下警告：

Warning messages:
1: In col2name(df, columns) : NAs introduced by coercion
2: In lapply(X = X, FUN = FUN, ...) : NAs introduced by coercion

允许引用具有索引和名称的列的函数

A function that allows referral to columns with both index and name

r

function

columnsorting