在 R 中合并 2 data.tables,其中 1 个数据 table 有一个额外的行

Merging 2 data.tables in R where 1 data table has an extra row

所以我一直在尝试合并这 2 个看起来像这样的 data.table

structure(list(orderDate = structure(c(18414, 18444, 18475, 18506, 
18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779
), class = "Date"), productName = c("A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady", "A. De La Sota Lady"), totalOrders = c(2L, 
15L, 52L, 225L, 27L, 10L, 5L, 19L, 36L, 41L, 58L, 16L, 2L)), row.names = c(NA, 
-13L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000024e1b7d1ef0>, sorted = "orderDate")

structure(list(returnDate = structure(c(18444, 18475, 18506, 
18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779
), class = "Date"), productName = c("A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
"A. De La Sota Lady"), totalReturns = c(5L, 10L, 129L, 73L, 18L, 
3L, 8L, 15L, 43L, 44L, 30L, 6L), orderDate = structure(c(18444, 
18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718, 
18748, 18779), class = "Date")), row.names = c(NA, -12L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x0000024e1b7d1ef0>, sorted = "orderDate")

结果是合并的data.table

structure(list(orderDate = structure(c(18444, 18475, 18506, 18536, 
18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779), class = "Date"), 
    productName = c("A. De La Sota Lady", "A. De La Sota Lady", 
    "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
    "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
    "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady", 
    "A. De La Sota Lady"), totalOrders = c(15L, 52L, 225L, 27L, 
    10L, 5L, 19L, 36L, 41L, 58L, 16L, 2L), totalReturns = c(5L, 
    10L, 129L, 73L, 18L, 3L, 8L, 15L, 43L, 44L, 30L, 6L)), sorted = "orderDate", class = c("data.table", 
"data.frame"), row.names = c(NA, -12L), .internal.selfref = <pointer: 0x0000024e1b7d1ef0>)

但是 returnTest table 中缺少一个日期行。

我尝试使用 productName 列作为键列进行合并,但由于某种原因,它一直给我一个错误,这是我可以无错误地合并两个 table 的唯一方法.最终,我想要一个数据 table 来检查某个产品的 return 率,但是使用这种方法我总是错过一个月我可以有订单但没有 return反之亦然。有人可以帮忙吗?我已经尝试解决这个问题大约一个星期了。

test1  <- ordersByProductNameAndSize[`productName` == 'A. De La Sota Lady'  ] 
setkeyv(test1, 'orderDate')

test2 <- returnsByProductNameAndSize[`productName` == 'A. De La Sota Lady'  ]
test2[, 'orderDate' := returnDate]
setkeyv(test2, 'orderDate'
  
returnTest <- merge(test1, test2[, c('orderDate', 'totalReturns'), all = TRUE, with = FALSE]) # , 'totalReturns' 
returnTest[, 'returnRate' := ((totalReturns / totalOrders) *100)] 

感谢您发布数据!如果我的理解正确,您的“缺失”值只是在 2020 年 6 月 1 日订购了一件商品,但当天没有任何退货,对吗?

t1 <- structure(list(
  orderDate = structure(c(18414, 18444, 18475, 18506,  18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779 ), class = "Date"),
  productName = c("A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady", "A. De La Sota Lady"),
  totalOrders = c(2L,  15L, 52L, 225L, 27L, 10L, 5L, 19L, 36L, 41L, 58L, 16L, 2L)),
  row.names = c(NA,  -13L),
  class = c("data.table", "data.frame"))

t2 <- structure(list(
  returnDate = structure(c(18444, 18475, 18506,  18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779 ), class = "Date"),
  productName = c("A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady", "A. De La Sota Lady", "A. De La Sota Lady",  "A. De La Sota Lady"),
  totalReturns = c(5L, 10L, 129L, 73L, 18L,  3L, 8L, 15L, 43L, 44L, 30L, 6L),
  orderDate = structure(c(18444,  18475, 18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718,  18748, 18779), class = "Date")),
  row.names = c(NA, -12L),
  class = c("data.table",  "data.frame"))

rt <- merge(t1, t2, by = "orderDate", all = TRUE)

# calculate return rate
rt$returnRate <- (rt$totalReturns / rt$totalOrders) * 100