使用 ggplot2 和 directlabels 标记点

Labelling points with ggplot2 and directlabels

这是我 的后续。虽然 krlmlr 的回答以某种方式帮助解决了我的问题,但仍然存在一个问题 - 标签远离这些点,以至于无法看到哪个标签对应于哪个点。有谁知道如何解决这个问题?

测试数据

test <- structure(list(ID = c(183, 184, 185, 186, 187, 188, 189, 190, 
    191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 202, 203, 204
    ), group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 10L, 1L, 1L, 11L, 1L, 10L, 10L, 1L, 1L, 1L), .Label = c("a", 
    "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", 
    "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"), class = "factor"), 
        x = c(27.4921834914348, 25.9627681619548, 30.4138361188149, 
        29.7795838507818, 32.33351964819, 31.9669266268744, 35.4433172141507, 
        37.8161067968601, 43.1590654001956, 44.2747819266045, 46.3829222044342, 
        42.2074195880057, 45.4532493462957, 48.393281430824, 51.7145681408198, 
        51.4911582677409, 51.9582538485293, 51.3328966791848, 36.4638478746633, 
        36.6113033420253, 39.3476493044906), y = c(-40.6667236544384, 
        -39.2640436250506, -40.6403794753022, -40.907139119954, -39.9691483441288, 
        -39.7485517513382, -38.595662907188, -38.2106224386729, -37.6418188651769, 
        -32.7096448721895, -34.1161006958616, -32.2821584775403, 
        -30.8436917254975, -30.3865899667262, -30.3910690518699, 
        -26.1013343566452, -23.8437232732877, -21.5548787351057, 
        5.50922747751602, 5.64434551903915, 5.01263995541617)), row.names = c(NA, 
    -21L), .Names = c("ID", "group", "x", "y"), class = "data.frame")

library(ggplot2)
library(directlabels)

绘图使用 geom_text - 问题:标签重叠

ggplot(test, aes(x=x, y=y)) + 
geom_point(aes(colour=group)) + 
geom_text(aes(label=ID), show_guide=F)

按照 的建议绘图 - 问题:标签离点很远

ggplot(test, aes(x=x, y=y)) + 
geom_point(aes(colour=group)) + 
geom_dl(aes(label = ID), method = defaultpf.ggplot("point",,,))

也许 hjustvjust 就是您要找的东西?

ggplot(test, aes(x=x, y=y)) + 
  geom_point(aes(colour=group)) + 
  geom_text(aes(label=ID), show_guide=F, hjust = 1.2, vjust = 0.5)

我想你可以通过调整抖动参数来做到这一点 -- ... position = position_jitter...。由于您只向我们提供了 10% 的数据,您可能需要稍微尝试一下:

ggplot(test, aes(x=x, y=y)) + 
  geom_point(aes(colour=group), position= position_jitter(width= 1.5, height= 1)) + 
  geom_text(aes(label=ID), show_guide=F, hjust = 1.2, vjust = -.5,
            position= position_jitter(width= 1.5, height= 1))

避免重叠(至少在某种程度上)的一种方法是将每个标签偏移一个由离它最近的点确定的量。因此,例如,如果一个点的最近邻点直接位于其右侧,则其标签将放置在左侧等。

# centre and normalise variables
test$yy  <- (test$y - min(test$y)) / (max(test$y) - min(test$y))
test$xx  <- (test$x - min(test$x)) / (max(test$x) - min(test$x))
test$angle <- NA
for (i in 1:nrow(test)) {
    dx <- test[-i, ]$xx - test[i, ]$xx
    dy <- test[-i, ]$yy - test[i, ]$yy
    j <- which.min(dx ^ 2 + dy ^ 2)
    theta <- atan2((test[-i, ]$yy[j] - test[i, ]$yy), (test[-i, ]$xx[j] - test[i, ]$xx))
    test[i, ]$angle <- theta + pi
}
sc <- 0.5
test$nudge.x <- cos(test$angle) * sc
test$nudge.y <- sin(test$angle) * sc

ggplot(test, aes(x=x, y=y)) + 
    geom_point(aes(colour=group)) + 
    geom_text(aes(x = x + nudge.x, y = y + nudge.y, label = ID), size = 3, show.legend = FALSE)

您可以尝试调整缩放参数 sc(它越大,标签离点越远)以避免标签重叠。 (我想可能会发生不同的 sc 可以应用于所有点以避免重叠 - 在这种情况下,您需要更改每个点的缩放参数,可能通过使用 [=14] 定义 sc =] 和 dy)。

可能 ggrepel 更适合标记散点图中的点。

library(ggplot2)  # ggrepel requires ggpot2 v2.0.0
library(ggrepel)

ggplot(test, aes(x=x, y=y)) + 


geom_text_repel(aes(label = ID, color = group),  show.legend = FALSE,
                     box.padding = unit(0.45, "lines")) +

geom_point(aes(colour=group))