使用 ggplot2 自定义森林图。不能有多个组,CI越过下限

Custom forest plot with with ggplot2. Can't have multiple groups, CIs cross the lower limit

我写了一个函数来根据回归结果绘制 CIs 的森林图。

我向函数 a data.frame 提供预测变量标签($label)、估计值($coef)、低值和高值 CIs($ci.low、$ci.high), 样式 ($style):

structure(list(label = structure(c(9L, 4L, 8L, 2L, 6L, 10L, 3L, 
7L, 1L, 5L), .Label = c("    - frattura esposta", "    - frattura esposta 2", 
"    - lembo di perone vs lembo corticoperiostale", "    - lembo di perone vs lembo corticoperiostale 2", 
"    - sesso maschile vs femminile", "    - sesso maschile vs femminile 2", 
"    - trauma bassa energia", "    - trauma bassa energia 2", 
"Tempo di guarigione 2:", "Tempo di guarigione:"), class = "factor"), 
    coef = c(NA, 0.812, 0.695, 1.4, 0.682, NA, 0.812, 0.695, 
    1.4, 0.682), ci.low = c(NA, 0.405, 0.31, 1.26, 0.0855, NA, 
    0.405, 0.31, 1.26, 0.0855), ci.high = c(NA, 1.82, 0.912, 
    2.94, 1.01, NA, 1.82, 0.912, 2.94, 1.01), style = structure(c(1L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("bold", "plain"
    ), class = "factor")), .Names = c("label", "coef", "ci.low", 
"ci.high", "style"), class = "data.frame", row.names = c(NA, 
-10L))

我想在估计值周围显示 CIs,并在可能的情况下对预测变量进行分组。对于第一个目标,我翻转了轴并使用了误差线;对于后者,我在数据框中创建了具有标签但没有值的行。结果是:

第一个问题: 如您所见,分组标签是粗体的,并且没有任何关联的数据。 样式(正常或粗体)在样式列中定义(我打算将其自动化)。问题是这仅在所有标签都不同时才有效(请注意,我在第一张图中的每个标签上都添加了“2”以使其不同);带有重复标签的行简单地显示为空 space:

我从 "trauma bassa energia" 标签中删除了 2,它消失了。 (风格也很乱)。

我想找到一个分组的解决方案,甚至与我的实现大不相同,但没有重名问题。

第二个问题: 正如您在两张图片中看到的那样,较低的 CI 条穿过零,这是赔率比(并且考虑到我使用的数据框中的数字)这是不可能的。

这是我的代码:

forest.plot <- function(d, xlab = "Coefficients", ylab = "", exp = T, bars = T, lims = NULL){
    require(ggplot2)
    boundary <- 0
    text.pos <- -1.5
    if(is.null(lims)) lims <- c(min(d$ci.low, na.rm = T), max(d$ci.high, na.rm = T))
    p <- ggplot(d, aes(x=label, y=coef), environment = environment()) +
        coord_flip()

    if (exp == T){
        p <- p + scale_y_log10(labels = round)
        boundary <- 1
        if(xlab == 'Coefficients') xlab <- 'Odds Ratios'
    }

    p <- p + geom_hline(yintercept = boundary, lty=2, col = 'darkgray', lwd = 1)

    if (bars == T) {
        text.pos <- -2
        p <- p +
            geom_bar(aes(fill = coef > boundary), stat = "identity", width = .3) +
            geom_errorbar(aes(ymin = ci.low, ymax = ci.high, lwd = .5), colour = "dodgerblue4", width = 0.05)
    }
    else p <- p + geom_errorbar(aes(colour = coef > boundary, ymin = ci.low, ymax = ci.high, width = .05, lwd = .5))

    if (!is.null(d$style)) style <- d[['style']] else style <- rep('plain', nrow(d))

    p <- p + geom_point(colour = 'dodgerblue4', aes(size = 2)) +
        scale_x_discrete(limits=rev(d$label)) +
        geom_text(aes(label = coef, vjust = text.pos)) +
        theme_bw() +
        theme(axis.text.x = element_text(color = 'gray30', size = 16),
                    axis.text.y = element_text(face = rev(style), color = 'gray30', size = 14, hjust=0, angle=0),
                    axis.title.x = element_text(size = 20, color = 'gray30', vjust = 0),
                    axis.ticks = element_blank(),
                    legend.position="none",
                    panel.border = element_blank()) +
        geom_vline(xintercept = 0, lwd = 2) +
        ylab(xlab) +
        xlab(ylab)

    return(p)
}

您可以通过创建两个 ggplot 对象并通过 gridExtra::grid.draw 将它们放在一起来获得您想要的结果。

设置

library(ggplot2)
library(gridExtra)
library(grid)

regression_results <- 
  structure(list(label = structure(c(9L, 4L, 8L, 2L, 6L, 10L, 3L, 7L, 1L, 5L), 
                                   .Label = c("    - frattura esposta", "    - frattura esposta 2", "    - lembo di perone vs lembo corticoperiostale", "    - lembo di perone vs lembo corticoperiostale 2", "    - sesso maschile vs femminile", "    - sesso maschile vs femminile 2", "    - trauma bassa energia", "    - trauma bassa energia 2", "Tempo di guarigione 2:", "Tempo di guarigione:"), 
                                   class = "factor"), 
                 coef = c(NA, 0.812, 0.695, 1.4, 0.682, NA, 0.812, 0.695, 1.4, 0.682), 
                 ci.low = c(NA, 0.405, 0.31, 1.26, 0.0855, NA, 0.405, 0.31, 1.26, 0.0855), 
                 ci.high = c(NA, 1.82, 0.912, 2.94, 1.01, NA, 1.82, 0.912, 2.94, 1.01), 
                 style = structure(c(1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), 
                                   .Label = c("bold", "plain"), class = "factor")), 
            .Names = c("label", "coef", "ci.low", "ci.high", "style"), 
            class = "data.frame", 
            row.names = c(NA, -10L))

# Set a y-axis value for each label
regression_results$yval <- seq(nrow(regression_results), 1, by = -1)

建造森林地块

# Forest plot
forest_plot <- 
  ggplot(regression_results) + 
    theme_bw() + 
    aes(x = coef, xmin = ci.low, xmax = ci.high, y = yval) + 
    geom_point() + 
    geom_errorbarh(height = 0.2, color = 'red') + 
    geom_vline(xintercept = 1) + 
    theme(
          axis.text.y = element_blank(),
          axis.title.y = element_blank(),
          axis.ticks.y = element_blank(),
          panel.grid.major.y = element_blank(), 
          panel.grid.minor.y = element_blank(), 
          panel.border = element_blank() 
          )  +
    ylim(0, 10) + 
    xlab("Odds Ratio")

构建标签图

# labels, could be extended to show more information
table_plot <-
  ggplot(regression_results) + 
    theme_bw() + 
    aes(y = yval) + 
    geom_text(aes(label = gsub("\s2", "", label), x = 0), hjust = 0) + 
    theme(
          axis.text = element_blank(),
          axis.title = element_blank(),
          axis.ticks = element_blank(),
          panel.grid = element_blank(),
          panel.border = element_blank() 
          ) + 
    xlim(0, 6) +
    ylim(0, 10)

制作剧情

# build the plot
png(filename = "so-example.png", width = 8, height = 6, units = "in", res = 300)

grid.draw(gridExtra:::cbind_gtable(ggplotGrob(table_plot), ggplotGrob(forest_plot), size = "last"))

dev.off()