如何在 R 中为以下数据创建直方图
How to create histogram for below data in R
我的 Dataframe 有 4 个 obs 和 6 个变量
MonthYr : 202101 202102 202103 202104
Count1 : 123456 123425 123452 123455
Count2 : 123456 123429 123453 123454
Count3 : 123455 123428 123455 123455
Count4 : 123455 123428 123455 123455
Count5 : 123455 123428 123455 123455
输出:
enter image description here
添加了我想要的图像作为我的图表。而且它的增长,如果我 运行 代码下个月将自动添加 5 月及以后的新计数。
谁能帮我用 R 编写直方图代码?
提前致谢
您无法真正用如此少的数据点制作直方图,但您可以使用箱线图可视化每个“计数”变量的分布,例如
# Create the dataframe
df <- data.frame(MonthYr = c(202101, 202102, 202103, 202104),
Count1 = c(123456, 123425, 123452, 123455),
Count2 = c(123456, 123429, 123453, 123454),
Count3 = c(123455, 123428, 123455, 123455),
Count4 = c(123455, 123428, 123455, 123455),
Count5 = c(123455, 123428, 123455, 123455))
# Make MonthYr a factor
df$MonthYr = factor(x = df$MonthYr,
levels = c("202101", "202102", "202103", "202104"),
labels = c("Jan - 2021", "Feb - 2021", "Mar - 2021", "Apr - 2021"))
# Reshape the dataframe to the "long" format
df2 <- reshape(df, varying = 2:6, v.names = c("Value"),
direction = "long")
# Plot the distribution of Counts for each MonthYr
plot(x = df2$MonthYr, y = df2$Value, xlab = "Month - Year",
ylab = "Values", main = "Distribution of Counts for each Timepoint")
对于直方图:
"Jan 2021" <- df2$Value[df2$MonthYr == "Jan - 2021"]
"Feb 2021" <- df2$Value[df2$MonthYr == "Feb - 2021"]
"Mar 2021" <- df2$Value[df2$MonthYr == "Mar - 2021"]
"Apr 2021" <- df2$Value[df2$MonthYr == "Apr - 2021"]
dev.off()
par(mfrow = c(2, 2))
hist(`Jan 2021`, las = 2, xlab = "")
hist(`Feb 2021`, las = 2, xlab = "")
hist(`Mar 2021`, las = 2, xlab = "")
hist(`Apr 2021`, las = 2, xlab = "")
编辑
您可以使用例如
在同一图上绘制所有 4 个直方图
breaks <- seq(min(df2$Value), max(df2$Value), 0.5)
yaxis <- seq(1, 4, length.out = 20)
plot(x = df2$Value, y = yaxis, type = "n", ylab = "Count", xlab = "Value")
hist(`Jan 2021`, add = TRUE, breaks = breaks, col = 1, border = 1)
hist(`Feb 2021`, add = TRUE, breaks = breaks, col = 2, border = 2)
hist(`Mar 2021`, add = TRUE, breaks = breaks + 0.33, col = 3, border = 3)
hist(`Apr 2021`, add = TRUE, breaks = breaks + 0.66, col = 4, border = 4)
legend("topleft", c("Jan", "Feb", "Mar", "Apr"), fill = 1:4)
但是,您可以看到它们重叠(我在中断处使用了一个小的偏移量,因此它们不会完全重叠)。我认为更好的处理方法是使用 ggplot graphics library:
ggplot(df2, aes(x = Value, fill = MonthYr)) +
geom_bar() +
scale_y_continuous(breaks = 1:10)
或者,并排堆叠:
ggplot(df2, aes(x = Value, fill = MonthYr)) +
geom_bar(position = position_dodge(preserve = "single"))
或刻面:
ggplot(df2, aes(x = Value, fill = MonthYr)) +
geom_bar() +
facet_wrap(~MonthYr)
编辑 2:
根据您下面的评论和您现在提供的图片,您不需要直方图:您需要 barchart/barplot。这是使用 ggplot 库的条形图示例
# Create the dataframe
df <- data.frame(MonthYr = c(202101, 202102, 202103, 202104),
Count1 = c(123456, 123425, 123452, 123455),
Count2 = c(123456, 123429, 123453, 123454),
Count3 = c(123455, 123428, 123455, 123455),
Count4 = c(123455, 123428, 123455, 123455),
Count5 = c(123455, 123428, 123455, 123455))
# Make MonthYr a factor
df$MonthYr = factor(x = df$MonthYr,
levels = c("202101", "202102", "202103", "202104"),
labels = c("Jan - 2021", "Feb - 2021", "Mar - 2021", "Apr - 2021"))
# Reshape the dataframe to the "long" format
df2 <- reshape(df, varying = 2:6, v.names = c("Value"),
direction = "long")
df2$Count <- factor(df2$time,
levels = c(1, 2, 3, 4, 5),
labels = c("Count 1", "Count 2", "Count 3", "Count 4", "Count 5"))
df2
library(ggplot2)
library(gridExtra)
plot1 <- ggplot(df2, aes(x = MonthYr, y = Value, fill = Count)) +
geom_bar(width = 0.5, position = position_dodge(0.7), stat = "identity") +
coord_cartesian(ylim = c(123405, 123460)) +
theme_dark(base_size = 16) +
theme(axis.title = element_blank(),
legend.position = "bottom")
table_theme <- ttheme_default(base_size = 14, padding = unit(c(8, 8), "mm"))
table1 <- tableGrob(df, rows = NULL, theme = table_theme)
grid.arrange(plot1, table1, nrow = 2, heights = c(1, 0.75))
我的 Dataframe 有 4 个 obs 和 6 个变量
MonthYr : 202101 202102 202103 202104
Count1 : 123456 123425 123452 123455
Count2 : 123456 123429 123453 123454
Count3 : 123455 123428 123455 123455
Count4 : 123455 123428 123455 123455
Count5 : 123455 123428 123455 123455
输出: enter image description here 添加了我想要的图像作为我的图表。而且它的增长,如果我 运行 代码下个月将自动添加 5 月及以后的新计数。
谁能帮我用 R 编写直方图代码?
提前致谢
您无法真正用如此少的数据点制作直方图,但您可以使用箱线图可视化每个“计数”变量的分布,例如
# Create the dataframe
df <- data.frame(MonthYr = c(202101, 202102, 202103, 202104),
Count1 = c(123456, 123425, 123452, 123455),
Count2 = c(123456, 123429, 123453, 123454),
Count3 = c(123455, 123428, 123455, 123455),
Count4 = c(123455, 123428, 123455, 123455),
Count5 = c(123455, 123428, 123455, 123455))
# Make MonthYr a factor
df$MonthYr = factor(x = df$MonthYr,
levels = c("202101", "202102", "202103", "202104"),
labels = c("Jan - 2021", "Feb - 2021", "Mar - 2021", "Apr - 2021"))
# Reshape the dataframe to the "long" format
df2 <- reshape(df, varying = 2:6, v.names = c("Value"),
direction = "long")
# Plot the distribution of Counts for each MonthYr
plot(x = df2$MonthYr, y = df2$Value, xlab = "Month - Year",
ylab = "Values", main = "Distribution of Counts for each Timepoint")
对于直方图:
"Jan 2021" <- df2$Value[df2$MonthYr == "Jan - 2021"]
"Feb 2021" <- df2$Value[df2$MonthYr == "Feb - 2021"]
"Mar 2021" <- df2$Value[df2$MonthYr == "Mar - 2021"]
"Apr 2021" <- df2$Value[df2$MonthYr == "Apr - 2021"]
dev.off()
par(mfrow = c(2, 2))
hist(`Jan 2021`, las = 2, xlab = "")
hist(`Feb 2021`, las = 2, xlab = "")
hist(`Mar 2021`, las = 2, xlab = "")
hist(`Apr 2021`, las = 2, xlab = "")
编辑
您可以使用例如
在同一图上绘制所有 4 个直方图breaks <- seq(min(df2$Value), max(df2$Value), 0.5)
yaxis <- seq(1, 4, length.out = 20)
plot(x = df2$Value, y = yaxis, type = "n", ylab = "Count", xlab = "Value")
hist(`Jan 2021`, add = TRUE, breaks = breaks, col = 1, border = 1)
hist(`Feb 2021`, add = TRUE, breaks = breaks, col = 2, border = 2)
hist(`Mar 2021`, add = TRUE, breaks = breaks + 0.33, col = 3, border = 3)
hist(`Apr 2021`, add = TRUE, breaks = breaks + 0.66, col = 4, border = 4)
legend("topleft", c("Jan", "Feb", "Mar", "Apr"), fill = 1:4)
但是,您可以看到它们重叠(我在中断处使用了一个小的偏移量,因此它们不会完全重叠)。我认为更好的处理方法是使用 ggplot graphics library:
ggplot(df2, aes(x = Value, fill = MonthYr)) +
geom_bar() +
scale_y_continuous(breaks = 1:10)
或者,并排堆叠:
ggplot(df2, aes(x = Value, fill = MonthYr)) +
geom_bar(position = position_dodge(preserve = "single"))
或刻面:
ggplot(df2, aes(x = Value, fill = MonthYr)) +
geom_bar() +
facet_wrap(~MonthYr)
编辑 2:
根据您下面的评论和您现在提供的图片,您不需要直方图:您需要 barchart/barplot。这是使用 ggplot 库的条形图示例
# Create the dataframe
df <- data.frame(MonthYr = c(202101, 202102, 202103, 202104),
Count1 = c(123456, 123425, 123452, 123455),
Count2 = c(123456, 123429, 123453, 123454),
Count3 = c(123455, 123428, 123455, 123455),
Count4 = c(123455, 123428, 123455, 123455),
Count5 = c(123455, 123428, 123455, 123455))
# Make MonthYr a factor
df$MonthYr = factor(x = df$MonthYr,
levels = c("202101", "202102", "202103", "202104"),
labels = c("Jan - 2021", "Feb - 2021", "Mar - 2021", "Apr - 2021"))
# Reshape the dataframe to the "long" format
df2 <- reshape(df, varying = 2:6, v.names = c("Value"),
direction = "long")
df2$Count <- factor(df2$time,
levels = c(1, 2, 3, 4, 5),
labels = c("Count 1", "Count 2", "Count 3", "Count 4", "Count 5"))
df2
library(ggplot2)
library(gridExtra)
plot1 <- ggplot(df2, aes(x = MonthYr, y = Value, fill = Count)) +
geom_bar(width = 0.5, position = position_dodge(0.7), stat = "identity") +
coord_cartesian(ylim = c(123405, 123460)) +
theme_dark(base_size = 16) +
theme(axis.title = element_blank(),
legend.position = "bottom")
table_theme <- ttheme_default(base_size = 14, padding = unit(c(8, 8), "mm"))
table1 <- tableGrob(df, rows = NULL, theme = table_theme)
grid.arrange(plot1, table1, nrow = 2, heights = c(1, 0.75))