在传递给 `geom_spoke` 之前汇总 2D bin 中的 X、Y、theta 数据
Summarise X,Y,theta data in 2D bins before passing to `geom_spoke`
我有包含 x、y-coordinates 和航向角的数据,我想将它们分成 2D 箱,以便计算每个箱的平均航向并使用 ggplot
的 geom_spoke
.
这是我想要执行的操作示例,其中包含手动创建的垃圾箱:
# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100), y = runif(100,0,100), angle = runif(100, 0, 2*pi))
# manual binning
bins <- rbind(
#bottom left
dat %>%
filter(x < 50 & y < 50) %>%
summarise(x = 25, y = 25, angle = mean(angle), n = n()),
#bottom right
dat %>%
filter(x > 50 & y < 50) %>%
summarise(x = 75, y = 25, angle = mean(angle), n = n()),
#top left
dat %>%
filter(x < 50 & y > 50) %>%
summarise(x = 25, y = 75, angle = mean(angle), n = n()),
#top right
dat %>%
filter(x > 50 & y > 50) %>%
summarise(x = 75, y = 75, angle = mean(angle), n = n())
)
# plot
ggplot(bins, aes(x, y)) +
geom_point() +
coord_equal() +
scale_x_continuous(limits = c(0,100)) +
scale_y_continuous(limits = c(0,100)) +
geom_spoke(aes(angle = angle, radius = n/2), arrow=arrow(length = unit(0.2,"cm")))
我知道如何为每个 bin 创建包含计数数据的 2D bin,例如:
# heatmap of x,y counts
p <- ggplot(dat, aes(x, y)) +
geom_bin2d(binwidth = c(50, 50)) +
coord_equal()
#ggplot_build(p)$data[[1]] #access binned data
但我似乎无法找到一种方法来总结其他变量,例如在传递给 geom_spoke
之前每个 bin 的标题。没有第一次装箱,我的情节看起来像这样:
这是一种方法。您需要一次确定每个维度 (x & y) 中 bin 的数量/范围,其他所有内容都应包含在代码中:
# adjust range & number of bins here
x.range <- pretty(dat$x, n = 3)
y.range <- pretty(dat$y, n = 3)
> x.range
[1] 0 50 100
> y.range
[1] 0 50 100
根据每一行落入的 x 和 y 间隔自动将每一行分配给一个 bin:
dat <- dat %>%
rowwise() %>%
mutate(x.bin = max(which(x > x.range)),
y.bin = max(which(y > y.range)),
bin = paste(x.bin, y.bin, sep = "_")) %>%
ungroup()
> head(dat)
# A tibble: 6 x 6
x y angle x.bin y.bin bin
<dbl> <dbl> <dbl> <int> <int> <chr>
1 26.55087 65.47239 1.680804 1 2 1_2
2 37.21239 35.31973 1.373789 1 1 1_1
3 57.28534 27.02601 3.247130 2 1 2_1
4 90.82078 99.26841 1.689866 2 2 2_2
5 20.16819 63.34933 1.138314 1 2 1_2
6 89.83897 21.32081 3.258310 2 1 2_1
计算每个 bin 的平均值:
dat <- dat %>%
group_by(bin) %>%
mutate(x.mean = mean(x),
y.mean = mean(y),
angle.mean = mean(angle),
n = n()) %>%
ungroup()
> head(dat)
# A tibble: 6 x 10
x y angle x.bin y.bin bin x.mean y.mean angle.mean n
<dbl> <dbl> <dbl> <int> <int> <chr> <dbl> <dbl> <dbl> <int>
1 26.55087 65.47239 1.680804 1 2 1_2 26.66662 68.56461 2.672454 29
2 37.21239 35.31973 1.373789 1 1 1_1 33.05887 28.86027 2.173177 23
3 57.28534 27.02601 3.247130 2 1 2_1 74.71214 24.99131 3.071629 23
4 90.82078 99.26841 1.689866 2 2 2_2 77.05622 77.91031 3.007859 25
5 20.16819 63.34933 1.138314 1 2 1_2 26.66662 68.56461 2.672454 29
6 89.83897 21.32081 3.258310 2 1 2_1 74.71214 24.99131 3.071629 23
没有 hard-coding 任何 bin 编号/bin 宽度的绘图:
ggplot(dat,
aes(x, y, fill = bin)) +
geom_bin2d(binwidth = c(diff(x.range)[1],
diff(y.range)[1])) +
geom_point(aes(x = x.mean, y = y.mean)) +
geom_spoke(aes(x = x.mean, y = y.mean, angle = angle.mean, radius = n/2),
arrow=arrow(length = unit(0.2,"cm"))) +
coord_equal()
填充调色板的选择、图例标签、绘图标题等其他细节可以随后进行调整。
只是为了扩展@Z.Lin 的回答,这里有一个修改,它让一个绘图点位于每个 bin 的中心,而不是平均 x、y 坐标。我很高兴听到是否有比使用 left_join
.
更多的 eloquent 解决方案
# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100),
y = runif(100,0,100),
angle = runif(100, 0, 2*pi))
# set parameters
n <- 2 #n bins
x.max #maximum x value
y.max #maximum y value
x.range <- seq(0, x.max, length.out = n+1)
y.range <- seq(0, y.max, length.out = n+1)
# bin data
dat <- dat %>%
rowwise() %>%
mutate(x.bin = max(which(x > x.range)),
y.bin = max(which(y > y.range)),
bin = paste(x.bin, y.bin, sep = "_")) %>%
ungroup()
# summarise values for each bin
dat <- dat %>%
group_by(bin) %>%
select(bin, x.bin, y.bin, x, y, angle) %>%
mutate(angle.mean = mean(angle),
n = n()) %>%
ungroup()
# add x,y-coords for centre points of each bin
x.bin.coords <- data.frame(x.bin = 1:n,
x.bin.coord = (x.range + (x.max / n / 2))[1:n])
y.bin.coords <- data.frame(y.bin = 1:n,
y.bin.coord = (y.range + (y.max / n / 2))[1:n])
dat <- left_join(dat, x.bin.coords, by = "x.bin")
dat <- left_join(dat, y.bin.coords, by = "y.bin")
# plot
ggplot(data = dat, aes(x, y)) +
geom_bin2d(binwidth = c(diff(x.range)[1], diff(y.range)[1])) +
geom_point(data = dat, aes(x = x.bin.coord, y = y.bin.coord)) +
geom_spoke(data = dat, aes(x = x.bin.coord, y = y.bin.coord, angle = angle.mean, radius = n/2), arrow=arrow(length = unit(0.2,"cm"))) +
coord_equal()
我有包含 x、y-coordinates 和航向角的数据,我想将它们分成 2D 箱,以便计算每个箱的平均航向并使用 ggplot
的 geom_spoke
.
这是我想要执行的操作示例,其中包含手动创建的垃圾箱:
# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100), y = runif(100,0,100), angle = runif(100, 0, 2*pi))
# manual binning
bins <- rbind(
#bottom left
dat %>%
filter(x < 50 & y < 50) %>%
summarise(x = 25, y = 25, angle = mean(angle), n = n()),
#bottom right
dat %>%
filter(x > 50 & y < 50) %>%
summarise(x = 75, y = 25, angle = mean(angle), n = n()),
#top left
dat %>%
filter(x < 50 & y > 50) %>%
summarise(x = 25, y = 75, angle = mean(angle), n = n()),
#top right
dat %>%
filter(x > 50 & y > 50) %>%
summarise(x = 75, y = 75, angle = mean(angle), n = n())
)
# plot
ggplot(bins, aes(x, y)) +
geom_point() +
coord_equal() +
scale_x_continuous(limits = c(0,100)) +
scale_y_continuous(limits = c(0,100)) +
geom_spoke(aes(angle = angle, radius = n/2), arrow=arrow(length = unit(0.2,"cm")))
# heatmap of x,y counts
p <- ggplot(dat, aes(x, y)) +
geom_bin2d(binwidth = c(50, 50)) +
coord_equal()
#ggplot_build(p)$data[[1]] #access binned data
但我似乎无法找到一种方法来总结其他变量,例如在传递给 geom_spoke
之前每个 bin 的标题。没有第一次装箱,我的情节看起来像这样:
这是一种方法。您需要一次确定每个维度 (x & y) 中 bin 的数量/范围,其他所有内容都应包含在代码中:
# adjust range & number of bins here
x.range <- pretty(dat$x, n = 3)
y.range <- pretty(dat$y, n = 3)
> x.range
[1] 0 50 100
> y.range
[1] 0 50 100
根据每一行落入的 x 和 y 间隔自动将每一行分配给一个 bin:
dat <- dat %>%
rowwise() %>%
mutate(x.bin = max(which(x > x.range)),
y.bin = max(which(y > y.range)),
bin = paste(x.bin, y.bin, sep = "_")) %>%
ungroup()
> head(dat)
# A tibble: 6 x 6
x y angle x.bin y.bin bin
<dbl> <dbl> <dbl> <int> <int> <chr>
1 26.55087 65.47239 1.680804 1 2 1_2
2 37.21239 35.31973 1.373789 1 1 1_1
3 57.28534 27.02601 3.247130 2 1 2_1
4 90.82078 99.26841 1.689866 2 2 2_2
5 20.16819 63.34933 1.138314 1 2 1_2
6 89.83897 21.32081 3.258310 2 1 2_1
计算每个 bin 的平均值:
dat <- dat %>%
group_by(bin) %>%
mutate(x.mean = mean(x),
y.mean = mean(y),
angle.mean = mean(angle),
n = n()) %>%
ungroup()
> head(dat)
# A tibble: 6 x 10
x y angle x.bin y.bin bin x.mean y.mean angle.mean n
<dbl> <dbl> <dbl> <int> <int> <chr> <dbl> <dbl> <dbl> <int>
1 26.55087 65.47239 1.680804 1 2 1_2 26.66662 68.56461 2.672454 29
2 37.21239 35.31973 1.373789 1 1 1_1 33.05887 28.86027 2.173177 23
3 57.28534 27.02601 3.247130 2 1 2_1 74.71214 24.99131 3.071629 23
4 90.82078 99.26841 1.689866 2 2 2_2 77.05622 77.91031 3.007859 25
5 20.16819 63.34933 1.138314 1 2 1_2 26.66662 68.56461 2.672454 29
6 89.83897 21.32081 3.258310 2 1 2_1 74.71214 24.99131 3.071629 23
没有 hard-coding 任何 bin 编号/bin 宽度的绘图:
ggplot(dat,
aes(x, y, fill = bin)) +
geom_bin2d(binwidth = c(diff(x.range)[1],
diff(y.range)[1])) +
geom_point(aes(x = x.mean, y = y.mean)) +
geom_spoke(aes(x = x.mean, y = y.mean, angle = angle.mean, radius = n/2),
arrow=arrow(length = unit(0.2,"cm"))) +
coord_equal()
填充调色板的选择、图例标签、绘图标题等其他细节可以随后进行调整。
只是为了扩展@Z.Lin 的回答,这里有一个修改,它让一个绘图点位于每个 bin 的中心,而不是平均 x、y 坐标。我很高兴听到是否有比使用 left_join
.
# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100),
y = runif(100,0,100),
angle = runif(100, 0, 2*pi))
# set parameters
n <- 2 #n bins
x.max #maximum x value
y.max #maximum y value
x.range <- seq(0, x.max, length.out = n+1)
y.range <- seq(0, y.max, length.out = n+1)
# bin data
dat <- dat %>%
rowwise() %>%
mutate(x.bin = max(which(x > x.range)),
y.bin = max(which(y > y.range)),
bin = paste(x.bin, y.bin, sep = "_")) %>%
ungroup()
# summarise values for each bin
dat <- dat %>%
group_by(bin) %>%
select(bin, x.bin, y.bin, x, y, angle) %>%
mutate(angle.mean = mean(angle),
n = n()) %>%
ungroup()
# add x,y-coords for centre points of each bin
x.bin.coords <- data.frame(x.bin = 1:n,
x.bin.coord = (x.range + (x.max / n / 2))[1:n])
y.bin.coords <- data.frame(y.bin = 1:n,
y.bin.coord = (y.range + (y.max / n / 2))[1:n])
dat <- left_join(dat, x.bin.coords, by = "x.bin")
dat <- left_join(dat, y.bin.coords, by = "y.bin")
# plot
ggplot(data = dat, aes(x, y)) +
geom_bin2d(binwidth = c(diff(x.range)[1], diff(y.range)[1])) +
geom_point(data = dat, aes(x = x.bin.coord, y = y.bin.coord)) +
geom_spoke(data = dat, aes(x = x.bin.coord, y = y.bin.coord, angle = angle.mean, radius = n/2), arrow=arrow(length = unit(0.2,"cm"))) +
coord_equal()