如何将创建列的函数应用到 R 中的数据框

How to apply a function that creates columns to dataframe in R

我尝试在 R 中对数据框进行一些数据整理,但我可以找出我的解决方案不起作用的原因。

auditlog <- data.frame(X_ID=c(1,2,4,5,6,7,8,9,10),
           EVENT=c('create', 'delete', 'update', 'update', 'delete', 'delete', 'create', 
                   'create', 'update'),
           UNIT=c('30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx'),
           CREATED=c('2015-12-01', '2015-12-01', '2015-12-02', '2015-12-04',
                     '2015-12-05', '2015-12-06', '2015-12-10', '2015-12-10',
                     '2015-12-10'),
           R1=c('xxxxxxxx11', 'title', 'xxxxxxxx25', 'xxxxxxxx11', 'new_title',
                '_title', 'xxxxxxxx12', 'xxxxxxxx87', 'xxxxxxxx87'),
           R2=c('my_title', 'xxxxxxxx47', 'titleA', 'my_title', 'xxxxxxxx11',
                'xxxxxxxx64', 'my_title_24', 'my_title_2', 'my_title_2'),
           R3=c('red', '', 'title_42', 'new_title', '', '', 'green', 'blue', '_title_'),
           R4=c('note', '', '', '', '', '', 'my_important_note', 'yet another note', ''))

这是我的审计日志的摘录:

X_ID    EVENT   UNIT    CREATED     R1          R2          R3          R4
1       create  30xx    2015-12-01  xxxxxxxx11  my_title    red         'note'          
2       delete  30xx    2015-12-01  title       xxxxxxxx47
4       update  30xx    2015-12-02  xxxxxxxx25  titleA      title_42
5       update  30xx    2015-12-04  xxxxxxxx11  my_title    new_title           
6       delete  30xx    2015-12-05  new_title   xxxxxxxx11      
7       delete  30xx    2015-12-06  _title      xxxxxxxx64          
8       create  30xx    2015-12-10  xxxxxxxx12  my_title_24 green       'my_important_note'         
9       create  30xx    2015-12-10  xxxxxxxx87  my_title_2  blue        'yet another note'
10      update  30xx    2015-12-10  xxxxxxxx87  my_title_2  _title_         

我想通过将 r1 到 r4 列重新分配给更清晰的列来构建此日志,如下所示:

X_ID    EVENT   UNIT    CREATED     ELEMENT_ID  TITLE       NEW_TITLE   COLOR   COMMENT
1       create  30xx    2015-12-01  xxxxxxxx11  my_title                red     'note'          
2       delete  30xx    2015-12-01  xxxxxxxx47  title
4       update  30xx    2015-12-02  xxxxxxxx25  titleA      title_42
5       update  30xx    2015-12-04  xxxxxxxx11  my_title    new_title           
6       delete  30xx    2015-12-05  xxxxxxxx11  new_title       
7       delete  30xx    2015-12-06  xxxxxxxx64  _title          
8       create  30xx    2015-12-10  xxxxxxxx12  my_title_24             green   'my_important_note'         
9       create  30xx    2015-12-10  xxxxxxxx87  my_title_2              blue    'yet another note'
10      update  30xx    2015-12-10  xxxxxxxx87  my_title_2  _title_

这是我在 R 中的方法,我尝试对每一行应用一个带条件的函数:

struct_log <- apply(auditlog, 1, function(row) {
   if (row['EVENT'] == 'create') { row['ELEMNT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['COLOR'] <- row['R3']; row['COMMENT'] <- row['R4'] }
   else if (row['EVENT'] == 'delete') { row['TITLE'] <- row['R1']; row['ELEMNT_ID'] <- row['R2'] }
   else if (row['EVENT'] == 'update') { row['ELEMENT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['NEW_TITLE'] <- row['R3'] } }
})

现在我不知道为什么我没有取回数据框。

使用索引更容易做到这一点。方法如下,但首先您还应该在 auditlog 定义中包含一个 stringsAsFactors=FALSE 参数:

k <- nrow(auditlog)

# build an empty dataframe:
struct_log <- data.frame(X_ID=numeric(k),
                     EVENT=character(k),
                     UNIT=character(k),
                     CREATED=character(k),
                     ELEMENT_ID=character(k),
                     TITLE=character(k),
                     NEW_TITLE=character(k),
                     COLOR=character(k),
                     COMMENT=character(k),
                     stringsAsFactors=FALSE)

struct_log[,1:4] <- auditlog[,1:4] # keep first 4 columns as-is

ind <- which(auditlog['EVENT']=='create')
struct_log[ind,'ELEMENT_ID'] <- auditlog[ind,'R1']
struct_log[ind,'TITLE'] <- auditlog[ind,'R2']
struct_log[ind,'COLOR'] <- auditlog[ind,'R3']
struct_log[ind,'COMMENT'] <- auditlog[ind,'R4']

ind <- which(auditlog['EVENT']=='delete')
struct_log[ind,'TITLE'] <- auditlog[ind,'R1']
struct_log[ind,'ELEMENT_ID'] <- auditlog[ind,'R2']

ind <- which(auditlog['EVENT']=='update')
struct_log[ind,'ELEMENT_ID'] <- auditlog[ind,'R1']
struct_log[ind,'TITLE'] <- auditlog[ind, 'R2']
struct_log[ind,'NEW_TITLE'] <- auditlog[ind,'R3']

你想要的结果:

  X_ID  EVENT UNIT    CREATED ELEMENT_ID       TITLE NEW_TITLE COLOR           COMMENT
1    1 create 30xx 2015-12-01 xxxxxxxx11    my_title             red              note
2    2 delete 30xx 2015-12-01 xxxxxxxx47       title                                  
3    4 update 30xx 2015-12-02 xxxxxxxx25      titleA  title_42                        
4    5 update 30xx 2015-12-04 xxxxxxxx11    my_title new_title                        
5    6 delete 30xx 2015-12-05 xxxxxxxx11   new_title                                  
6    7 delete 30xx 2015-12-06 xxxxxxxx64      _title                                  
7    8 create 30xx 2015-12-10 xxxxxxxx12 my_title_24           green my_important_note
8    9 create 30xx 2015-12-10 xxxxxxxx87  my_title_2            blue  yet another note
9   10 update 30xx 2015-12-10 xxxxxxxx87  my_title_2   _title_                        

不要忘记stringsAsFactors=FALSE 包含到 auditlog 定义中(类似于我在上面对 struct_log 所做的) , 不然不行...

(好的,评论后的第二个答案:)

我修改了你最初的方法。首先,您需要在 if 语句中设置 all 列(即包括空白列);其次,您需要 return 函数结果。这是整个代码(包括 auditlog 定义中的修改):

auditlog <- data.frame(X_ID=c(1,2,4,5,6,7,8,9,10),
                   EVENT=c('create', 'delete', 'update', 'update', 'delete', 'delete', 'create', 
                           'create', 'update'),
                   UNIT=c('30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx', '30xx'),
                   CREATED=c('2015-12-01', '2015-12-01', '2015-12-02', '2015-12-04',
                             '2015-12-05', '2015-12-06', '2015-12-10', '2015-12-10',
                             '2015-12-10'),
                   R1=c('xxxxxxxx11', 'title', 'xxxxxxxx25', 'xxxxxxxx11', 'new_title',
                        '_title', 'xxxxxxxx12', 'xxxxxxxx87', 'xxxxxxxx87'),
                   R2=c('my_title', 'xxxxxxxx47', 'titleA', 'my_title', 'xxxxxxxx11',
                        'xxxxxxxx64', 'my_title_24', 'my_title_2', 'my_title_2'),
                   R3=c('red', '', 'title_42', 'new_title', '', '', 'green', 'blue', '_title_'),
                   R4=c('note', '', '', '', '', '', 'my_important_note', 'yet another note', ''),
                   stringsAsFactors = FALSE  # added
                   )


ff <- function(row) {
  if (row['EVENT'] == 'create')      { row['ELEMENT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['NEW_TITLE'] <- ''; row['COLOR'] <- row['R3']; row['COMMENT'] <- row['R4'] }
  else if (row['EVENT'] == 'delete') { row['ELEMENT_ID'] <- row['R2']; row['TITLE'] <- row['R1']; row['NEW_TITLE'] <- row['COLOR'] <- row['COMMENT'] <- '' }
  else if (row['EVENT'] == 'update') { row['ELEMENT_ID'] <- row['R1']; row['TITLE'] <- row['R2']; row['NEW_TITLE'] <- row['R3']; row['COLOR'] <- row['COMMENT'] <- '' } 
  return(row)  # added
}  

struct_log <- t(apply(auditlog,1,ff))
struct_log <- as.data.frame(struct_log)
new_names <- c("ELEMENT_ID", "TITLE", "NEW_TITLE", "COLOR", "COMMENT")
names(struct_log) <- c(names(auditlog), new_names)
struct_log[c('R1', 'R2', 'R3', 'R4')] <- list(NULL)  # drop 'R' columns

之后你又得到了:

    X_ID  EVENT UNIT    CREATED ELEMENT_ID       TITLE NEW_TITLE COLOR           COMMENT
1    1 create 30xx 2015-12-01 xxxxxxxx11    my_title             red              note
2    2 delete 30xx 2015-12-01 xxxxxxxx47       title                                  
3    4 update 30xx 2015-12-02 xxxxxxxx25      titleA  title_42                        
4    5 update 30xx 2015-12-04 xxxxxxxx11    my_title new_title                        
5    6 delete 30xx 2015-12-05 xxxxxxxx11   new_title                                  
6    7 delete 30xx 2015-12-06 xxxxxxxx64      _title                                  
7    8 create 30xx 2015-12-10 xxxxxxxx12 my_title_24           green my_important_note
8    9 create 30xx 2015-12-10 xxxxxxxx87  my_title_2            blue  yet another note
9   10 update 30xx 2015-12-10 xxxxxxxx87  my_title_2   _title_