使用 data.tables 时循环

Question

我有以下（代表性）数据集（实际数据集的一小部分）

structure(list(Time = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1), AgentID = 1:40, State = c(59L, 28L, 84L, 11L, 
5L, 8L, 14L, 71L, 47L, 7L, 84L, 95L, 91L, 92L, 99L, 34L, 70L, 
37L, 55L, 96L, 46L, 38L, 71L, 2L, 61L, 13L, 73L, 26L, 44L, 59L, 
52L, 53L, 42L, 66L, 23L, 11L, 42L, 77L, 38L, 48L), Action = c(-1L, 
-1L, 1L, -1L, 1L, 1L, 1L, -1L, -1L, -1L, -1L, 1L, 1L, 1L, 1L, 
-1L, 1L, 1L, -1L, -1L, 1L, 1L, -1L, 1L, 1L, -1L, -1L, 1L, -1L, 
-1L, 1L, -1L, -1L, 1L, 1L, -1L, -1L, 1L, 1L, 1L), N = c(40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L
), SimulationID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), discountFactor = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), i_phase = c(1L, 
-1L, -1L, 1L, 1L, -1L, 1L, 1L, -1L, 1L, 1L, 1L, 1L, 1L, 1L, -1L, 
-1L, 1L, 1L, 1L, 1L, -1L, -1L, 1L, -1L, -1L, -1L, -1L, -1L, -1L, 
1L, -1L, -1L, -1L, -1L, 1L, -1L, 1L, 1L, -1L), i_antiPhase = c(-1, 
1, 1, -1, -1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, 1, 1, -1, 
-1, -1, -1, 1, 1, -1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 
-1, -1, 1), totalCount = c(40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L), phaseCount = c(20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), 
    phaseCountVar = c(1.24104938271605, 1.15579357351509, 1.15579357351509, 
    1.24104938271605, 1.24104938271605, 1.15579357351509, 1.24104938271605, 
    1.24104938271605, 1.15579357351509, 1.24104938271605, 1.24104938271605, 
    1.24104938271605, 1.24104938271605, 1.24104938271605, 1.24104938271605, 
    1.15579357351509, 1.15579357351509, 1.24104938271605, 1.24104938271605, 
    1.24104938271605, 1.24104938271605, 1.15579357351509, 1.15579357351509, 
    1.24104938271605, 1.15579357351509, 1.15579357351509, 1.15579357351509, 
    1.15579357351509, 1.15579357351509, 1.15579357351509, 1.24104938271605, 
    1.15579357351509, 1.15579357351509, 1.15579357351509, 1.15579357351509, 
    1.24104938271605, 1.15579357351509, 1.24104938271605, 1.24104938271605, 
    1.15579357351509)), row.names = c(NA, -40L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x5619bf50c210>)

其中 i_phase，i_antiPhase == +/-1。

我想做的是：

a[a, antiPhaseAgents:=list(i.AgentID[i_phase==x.i_antiPhase]), on=.(Time,SimulationID,N,SimulationID), by=.(x.AgentID,Time,SimulationID,N,discountFactor)]

换句话说，对于给定的 (Time,SimulationID, N, discountFactor)，对于 x 中所有不同的 AgentID，分别找到 i 中的 AgentID i_phase 是考虑中 AgentID（来自 x）的 i_antiPhase。

当然，上面代码中的语法是行不通的，所以我正在寻找实现上面操作的方法。

注意：首选 data.table 纯溶液。

Answer 1

如果我正确理解你的问题：

a[a,.(x.AgentID,i.AgentID,Time,SimulationID,N,discountFactor,x.i_antiPhase), on=.(Time,SimulationID,N, i_phase=i_antiPhase),allow.cartesian=T][
   ,.(antiPhaseAgents=list(i.AgentID)),by=.(x.AgentID,Time,SimulationID,N,discountFactor)]

    x.AgentID Time SimulationID  N discountFactor       antiPhaseAgents
 1:         2    1            1 40              0  1, 4, 5, 7, 8,10,...
 2:         3    1            1 40              0  1, 4, 5, 7, 8,10,...
 3:         6    1            1 40              0  1, 4, 5, 7, 8,10,...
 4:         9    1            1 40              0  1, 4, 5, 7, 8,10,...
 5:        16    1            1 40              0  1, 4, 5, 7, 8,10,...
 6:        17    1            1 40              0  1, 4, 5, 7, 8,10,...

使用 data.tables 时循环

Looping when using data.tables

r

data.table

tidyverse