使用 R 中的列名向量创建多面 xy 散点图

Create faceted xy scatters using vectors of column names in R

我有两个等长的字符向量;其中 vector.x 中的第一个位置与 vector.y 中的第一个位置相匹配,依此类推。元素引用数据框中的列名称(宽格式)。我想以某种方式循环遍历这些向量,为向量中的每一对生成 xy 散点图,最好是在多面图中。这是一个(希望)可重现的例子。明确地说,在这个例子中,我最终会得到 10 个散点图。

vector.x <- c("Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Bathycoccus", "Brockmanniella", "Brockmanniella",  "Caecitellus_paraparvulus", "Caecitellus_paraparvulus")

vector.y <- c("Aliiroseovarius", "Neptuniibacter", "Pseudofulvibacter", "Thalassobius", "unclassified_Porticoccus", "Tenacibaculum", "Pseudomonas", "unclassified_GpIIa", "Marinobacter", "Thalassobius")
structure(list(Aliiroseovarius = c(0, 0, 0, 0.00487132352941176, 
0.0108639420589757), Marinobacter = c(0, 0.00219023779724656, 
0, 0.00137867647058824, 0.00310398344542162), Neptuniibacter = c(0.00945829750644884, 
0.00959532749269921, 0.0171310629514964, 0.2796875, 0.345835488877393
), Pseudofulvibacter = c(0, 0, 0, 0.00284926470588235, 0.00362131401965856
), Pseudomonas = c(0.00466773123694878, 0.00782227784730914, 
0.0282765737874097, 0.00707720588235294, 0.00400931195033627), 
    Tenacibaculum = c(0, 0, 0, 0.00505514705882353, 0.00362131401965856
    ), Thalassobius = c(0, 0.00166875260742595, 0, 0.0633272058823529, 
    0.147697878944646), unclassified_GpIIa = c(0, 0.000730079265748853, 
    0, 0.003125, 0.00103466114847387), unclassified_Porticoccus = c(0, 
    0, 0, 0.00119485294117647, 0.00569063631660631), Aplanochytrium = c(0, 
    0, 0, 0.000700770847932726, 0.0315839846865529), Bathycoccus = c(0.000388802488335925, 
    0, 0, 0.0227750525578136, 0.00526399744775881), Brockmanniella = c(0, 
    0.00383141762452107, 0, 0.000875963559915907, 0), Caecitellus_paraparvulus = c(0, 
    0, 0, 0.000875963559915907, 0.00797575370872547)), row.names = c("B11", 
"B13", "B22", "DI5", "FF6"), class = "data.frame")

不太优雅,但应该让你继续:

vector.x <- c("Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Bathycoccus", "Brockmanniella", "Brockmanniella",  "Caecitellus_paraparvulus", "Caecitellus_paraparvulus")
vector.y <- c("Aliiroseovarius", "Neptuniibacter", "Pseudofulvibacter", "Thalassobius", "unclassified_Porticoccus", "Tenacibaculum", "Pseudomonas", "unclassified_GpIIa", "Marinobacter", "Thalassobius")

df1 = structure(
  list(Aliiroseovarius = c(0, 0, 0, 0.00487132352941176, 0.0108639420589757),
       Marinobacter = c(0, 0.00219023779724656, 0, 0.00137867647058824, 0.00310398344542162),
       Neptuniibacter = c(0.00945829750644884, 0.00959532749269921, 0.0171310629514964, 0.2796875, 0.345835488877393),
       Pseudofulvibacter = c(0, 0, 0, 0.00284926470588235, 0.00362131401965856),
       Pseudomonas = c(0.00466773123694878, 0.00782227784730914, 0.0282765737874097, 0.00707720588235294, 0.00400931195033627),
       Tenacibaculum = c(0, 0, 0, 0.00505514705882353, 0.00362131401965856),
       Thalassobius = c(0, 0.00166875260742595, 0, 0.0633272058823529, 0.147697878944646),
       unclassified_GpIIa = c(0, 0.000730079265748853, 0, 0.003125, 0.00103466114847387),
       unclassified_Porticoccus = c(0, 0, 0, 0.00119485294117647, 0.00569063631660631),
       Aplanochytrium = c(0, 0, 0, 0.000700770847932726, 0.0315839846865529),
       Bathycoccus = c(0.000388802488335925, 0, 0, 0.0227750525578136, 0.00526399744775881),
       Brockmanniella = c(0, 0.00383141762452107, 0, 0.000875963559915907, 0),
       Caecitellus_paraparvulus = c(0, 0, 0, 0.000875963559915907, 0.00797575370872547)),
  row.names = c("B11", "B13", "B22", "DI5", "FF6"),
  class = "data.frame"
)

df2 = NULL
for(i in 1:10) {
  df.tmp = data.frame(
    plot = paste0(vector.x[i], ":", vector.y[i]),
    x = df1[[vector.x[i]]],
    y = df1[[vector.y[i]]]
  )
  if(is.null(df2)) df2=df.tmp else df2 = rbind(df2, df.tmp)
}

ggplot(data=df2, aes(x, y)) +
  geom_point() + 
  facet_grid(cols = vars(plot))

这有点长且令人费解,但它确实有效。

library(tidyverse)
library(gridExtra)

df_list <- apply(data.frame(vector.x, vector.y), 1, function(x){
  DF <- df1[which(names(df1) %in% x)]
  i <- which(names(DF) %in% vector.x)
  if(i == 2) DF[2:1] else DF
})


gg_list <- lapply(df_list, function(DF){
  ggplot(DF, aes(x = get(names(DF)[1]), y = get(names(DF)[2]))) +
    geom_point() +
    xlab(label = names(DF)[1]) +
    ylab(label = names(DF)[2])
})


g <- do.call(grid.arrange, gg_list)
g

正如 Rui Barradas 所展示的那样,可以从 ggplotgridExta 中获得非常好的图。如果你想坚持使用 base R,你可以这样做(假设你的数据集被称为 df1):

# set plot sizes
par(mfcol = c(floor(sqrt(length(vector.x))), ceiling(sqrt(length(vector.x)))))

# loop through plots
for (i in 1:length(vector.x)) {
  plot(df1[[vector.x[i]]], df1[[vector.y[i]]], xlab = vector.x[i], ylab = vector.y[i])
}

# reset plot size
par(mfcol = c(1,1))