Tibble 反转坐标

Tibble reversing coordinates

我正在尝试做一个之前看起来很简单的操作,但我没有在网上找到明确的解决方案。

我得到了这种 table:

tibble(
   block = c(1,1,1,2,2,2),
   tag = letters[1:6],
   start = c(15,54,78,27,45,80),
   end = c(50,80,90,40,76,100),
   direction = c(-1,-1,1,1,1,1),
   anchor = c(FALSE,TRUE,FALSE,FALSE,TRUE,FALSE)
) -> df1
# A tibble: 6 x 6
  block tag   start   end direction anchor
  <dbl> <chr> <dbl> <dbl>     <dbl> <lgl> 
1     1 a        15    50        -1 FALSE 
2     1 b        54    80        -1 TRUE  
3     1 c        78    90         1 FALSE 
4     2 d        27    40         1 FALSE 
5     2 e        45    76         1 TRUE  
6     2 f        80   100         1 FALSE 

我在 block 列中得到了分组,但只有 1 anchor 个分组。 给定 anchor == TRUE,如果锚点方向为 -1 (direction[anchor] == -1),我需要 反转 (direction * -1) 块内的坐标, 还需要 保持锚点坐标 (start & end) 并调整 anchor == FALSE 的另一个和坐标以保持新月形但具有相同的比例(到上游和下游标签的长度和距离)。

为了简化,如果组的锚点是-1,我需要重新调整坐标。 这意味着,如果 anchor == -1 那么:

  1. ancho * -1
  2. 必须还原标签订单
  3. 坐标将发生变化,保持标签的长度和标签之间的距离不变

那么,输出只需要是这样的:

tibble(
  block = c(1,1,1,2,2,2),
  tag = c("c", "b", "a", "d", "e", "f"),
  start = c(44,54,84,27,45,80),
  end = c(56,80,119,40,76,100),
  direction = c(-1,1,1,1,1,1),
  anchor = c(FALSE,TRUE,FALSE,FALSE,TRUE,FALSE)
) -> df2
# A tibble: 6 x 6
  block tag   start   end direction anchor
  <dbl> <chr> <dbl> <dbl>     <dbl> <lgl> 
1     1 c        44    56        -1 FALSE 
2     1 b        54    80         1 TRUE  
3     1 a        84   119         1 FALSE 
4     2 d        27    40         1 FALSE 
5     2 e        45    76         1 TRUE  
6     2 f        80   100         1 FALSE

正如您在下面看到的,长度和线对距离保持不变:

df1 %>% 
  group_by(block) %>% 
  mutate(
    TagDistance = lead(start) - end,
    len = end - start
  )
# A tibble: 6 x 8
# Groups:   block [2]
  block tag   start   end direction anchor TagDistance   len
  <dbl> <chr> <dbl> <dbl>     <dbl> <lgl>        <dbl> <dbl>
1     1 a        15    50        -1 FALSE            4    35
2     1 b        54    80        -1 TRUE            -2    26
3     1 c        78    90         1 FALSE           NA    12
4     2 d        27    40         1 FALSE            5    13
5     2 e        45    76         1 TRUE             4    31
6     2 f        80   100         1 FALSE           NA    20



df2 %>% 
  group_by(block) %>% 
  mutate(
    TagDistance = lead(start) - end,
    len = end - start
  )
# A tibble: 6 x 8
# Groups:   block [2]
  block tag   start   end direction anchor TagDistance   len
  <dbl> <chr> <dbl> <dbl>     <dbl> <lgl>        <dbl> <dbl>
1     1 c        44    56        -1 FALSE           -2    12
2     1 b        54    80         1 TRUE             4    26
3     1 a        84   119         1 FALSE           NA    35
4     2 d        27    40         1 FALSE            5    13
5     2 e        45    76         1 TRUE             4    31
6     2 f        80   100         1 FALSE           NA    20

图示是这样的:

library(ggplot2)   
library(gggenes)   
df1 %>% 
  ggplot(aes(xmin = start, xmax = end, y = as.factor(block), forward = direction, fill = anchor)) +
  geom_gene_arrow() +
  geom_gene_label(aes(label = tag)) +
  theme_genes() 
#
df2 %>% 
  ggplot(aes(xmin = start, xmax = end, y = as.factor(block), forward = direction, fill = anchor)) +
  geom_gene_arrow() +
  geom_gene_label(aes(label = tag)) +
  theme_genes() 

提前致谢

我解决了,很笨,请问有更好的办法吗?

tibble(
  block = c(1,1,1,2,2,2),
  tag = letters[1:6],
  start = c(15,54,78,27,45,80),
  end = c(50,80,90,40,76,100),
  direction = c(-1,-1,1,1,1,1),
  anchor = c(FALSE,TRUE,FALSE,FALSE,TRUE,FALSE)
) -> a
a
# A tibble: 6 x 6
  block tag   start   end direction anchor
  <dbl> <chr> <dbl> <dbl>     <dbl> <lgl> 
1     1 a        15    50        -1 FALSE 
2     1 b        54    80        -1 TRUE  
3     1 c        78    90         1 FALSE 
4     2 d        27    40         1 FALSE 
5     2 e        45    76         1 TRUE  
6     2 f        80   100         1 FALSE 

然后我按 block 分组并执行大量算术运算,例如:

a %>% 
  group_by(block) %>% 
  mutate(
    anchor_direction = direction[anchor],
    position_relative_to_anchor = case_when(
      anchor ~ NA_character_,
      (start < start[anchor]) | (start == start[anchor] && end < end[anchor]) ~ "upstream",
      start > start[anchor] ~ "downstream"
    ),
    TagDistance = if_else(
      position_relative_to_anchor == "upstream",
      start[anchor] - end,
      start - end[anchor]
    ),
    length = end - start,
    newstart = case_when(
      anchor ~ start,
      anchor_direction == 1 ~ start,
      position_relative_to_anchor == "upstream" ~ end[anchor] + TagDistance,
      position_relative_to_anchor == "downstream" ~ start[anchor] - TagDistance
    ),
    newend = case_when(
      anchor ~ end,
      anchor_direction == 1 ~ end,
      position_relative_to_anchor == "upstream" ~ newstart + length,
      position_relative_to_anchor == "downstream" ~ newstart - length
    ),
    start = case_when(
      anchor ~ start,
      anchor_direction == 1 ~ start,
      position_relative_to_anchor == "upstream" ~ newstart,
      position_relative_to_anchor == "downstream" ~ newend
    ),
    end = case_when(
      anchor ~ end,
      anchor_direction == 1 ~ end,
      position_relative_to_anchor == "upstream"    ~ newend,
      position_relative_to_anchor == "downstream"  ~ newstart
    )
  ) %>% 
  arrange(block,start,end) %>% 
  mutate(
    direction = direction * anchor_direction
  ) %>% 
  select(
    -c(
      anchor_direction,
      position_relative_to_anchor, 
      TagDistance,
      length,
      newstart,
      newend
    )
  ) -> a
a
# A tibble: 6 x 6
# Groups:   block [2]
  block tag   start   end direction anchor
  <dbl> <chr> <dbl> <dbl>     <dbl> <lgl> 
1     1 c        44    56        -1 FALSE 
2     1 b        54    80         1 TRUE  
3     1 a        84   119         1 FALSE 
4     2 d        27    40         1 FALSE 
5     2 e        45    76         1 TRUE  
6     2 f        80   100         1 FALSE

最后,我和预期的结果进行了比较:

tibble(
  block = c(1,1,1,2,2,2),
  tag = c("c", "b", "a", "d", "e", "f"),
  start = c(44,54,84,27,45,80),
  end = c(56,80,119,40,76,100),
  direction = c(-1,1,1,1,1,1),
  anchor = c(FALSE,TRUE,FALSE,FALSE,TRUE,FALSE)
) -> b
setdiff(a, b)
# A tibble: 0 x 6
# Groups:   block [0]
# … with 6 variables: block <dbl>, tag <chr>, start <dbl>, end <dbl>, direction <dbl>, anchor <lgl>

欢迎任何更好的解决方案。