GGPlot2: Error: Aesthetics must be either length 1 or the same as the data (16): x, y, group

Question

这是我认为是简单折线图的代码

ggplot(data=top15andAllDatasummary.df, aes(x=years, y=calculations, group=1)) +
    geom_line() +
    geom_point()

我得到了这个错误：

Error: Aesthetics must be either length 1 or the same as the data (16): x, y, group

我在 r 的数据框中有数据。我的 X 轴是几年，Y 轴是我每年构建的一些计算（其中 16 个）。

编辑添加

structure(list(`2001` = c(349.315750645518, 217.47436370343, 
5.17963850977499, 126.661748432313, 57, 39), `2002` = c(703.26693877551, 
429.92, 9.32897959183673, 264.017959183673, 161, 108), `2003` = c(314.897774687065, 
193.792420027816, 4.08936022253129, 117.015994436718, 54, 37), 
    `2004` = c(305.988451086957, 190.680027173913, 3.87839673913043, 
    111.430027173913, 55, 38), `2005` = c(118.528015659408, 74.3175923660387, 
    1.50942011255199, 42.7010031808172, 10, 8), `2006` = c(120.531992244304, 
    73.8279205041202, 1.54362578768783, 45.1604459524964, 10, 
    8), `2007` = c(113.973899988451, 69.7619817530893, 1.44693382607691, 
    42.7649844092851, 10, 8), `2008` = c(110.676242590059, 67.3693570451436, 
    1.36285909712722, 41.9440264477884, 9, 7), `2009` = c(101.965558714192, 
    63.1446534003936, 1.22982724688388, 37.5910780669145, 9, 
    7), `2010` = c(93.9744360902256, 59.8894736842105, 1.14199785177229, 
    32.9429645542427, 9, 7), `2011` = c(91.8911316298046, 58.5660296328108, 
    1.15675327464033, 32.1683487223534, 9, 7), `2012` = c(91.2302181013592, 
    58.598356337583, 1.16773785691708, 31.4641239068591, 8, 6
    ), `2013` = c(87.1390443392165, 55.0509040034438, 1.10277658200603, 
    30.9853637537667, 8, 6), `2014` = c(85.7812132234942, 56.0456831068792, 
    1.09725045469134, 28.6382796619236, 8, 6), `2015` = c(88.331452900479, 
    58.526237360298, 1.22362959020756, 28.5815859499734, 8, 6
    )), .Names = c("2001", "2002", "2003", "2004", "2005", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015"), row.names = c("AllDataMeanByYear", "AllDataMeanAggAssault", 
"AllDataMeanMurderManSlaughter", "AllDataMeanRobbery", "AllDataMedianByYear", 
"AllDataMedianAggAssault"), class = "data.frame")


All Code:

 ## Total
lwdata$total <- lwdata$murdermanslaughter + lwdata$Robbery +    lwdata$Aggravated_assault
## Data Calculations Top 15
top15 <- lwdata[lwdata$total >= lwdata$total[order(lwdata$Year, lwdata$total, decreasing=TRUE)][15] , ]
## Top 15 Means
Top15MeanByYear <- tapply(top15$total,top15$Year,mean)
Top15MeanAggAssault <- tapply(top15$Aggravated_assault,top15$Year,mean)
Top15MeanMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,mean)
Top15MeanRob <- tapply(top15$Robbery,top15$Year,mean)
## All Data Means
AllDataMeanByYear <- tapply(lwdata$total,lwdata$Year,mean)
AllDataMeanAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,mean)
AllDataMeanMurderManSlaughter <- tapply(lwdata$murdermanslaughter,lwdata$Year,mean)
AllDataMeanRobbery <- tapply(lwdata$Robbery,lwdata$Year,mean)
## Top 15 Medians
Top15MedianByYear <- tapply(top15$total,top15$Year,median)
Top15MedianAggAssault <- tapply(top15$Aggravated_assault,top15$Year,median)
Top15MedianMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,median)
Top15MedianRob <- tapply(top15$Robbery,top15$Year,median)
## All Data Medians
AllDataMedianByYear <- tapply(lwdata$total,lwdata$Year,median)
AllDataMedianAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,median)
AllDataMedianMurderManSlaughter <-  tapply(lwdata$murdermanslaughter,lwdata$Year,median)
AllDataMedianRobbery <- tapply(lwdata$Robbery,lwdata$Year,median)
## Rounding Data To Two Decimal Points
Top15MeanByYear <- round(Top15MeanByYear,digits=2)
Top15MeanAggAssault <- round(Top15MeanAggAssault,digits=2)
Top15MeanMurderManSlaughter <- round(Top15MeanMurderManSlaughter,digits=2)
Top15MeanRob <- round(Top15MeanRob,digits=2)
AllDataMeanByYear <- round(AllDataMeanByYear,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanRobbery <- round(AllDataMeanRobbery,digits=2)
Top15MedianByYear <- round(Top15MedianByYear,digits=2)
Top15MedianAggAssault <- round(Top15MedianAggAssault,digits=2)
Top15MedianMurderManSlaughter <- round(Top15MedianMurderManSlaughter,digits=2)
Top15MedianRob <- round(Top15MedianRob,digits=2)
AllDataMedianByYear <- round(AllDataMedianByYear,digits=2)
AllDataMedianAggAssault <- round(AllDataMedianAggAssault,digits=2)
AllDataMedianMurderManSlaughter <-     round(AllDataMedianMurderManSlaughter,digits=2)
AllDataMedianRobbery <- round(AllDataMedianRobbery,digits=2)
## Summaries
AllDataSummary <- rbind(AllDataMeanByYear, AllDataMeanAggAssault, AllDataMeanMurderManSlaughter, AllDataMeanRobbery, AllDataMedianByYear, AllDataMedianAggAssault, AllDataMedianMurderManSlaughter, AllDataMedianRobbery)
Top15Summary <- rbind(Top15MeanByYear, Top15MeanAggAssault, Top15MeanMurderManSlaughter, Top15MeanRob,Top15MedianByYear,Top15MedianAggAssault,Top15MedianMurderManSlaughter,Top15MedianRob)
Top15andAllDatasummary <- rbind(AllDataSummary,Top15Summary)
## Class of New Items
class(AllDataSummary)
class(Top15Summary)
class(top15andAllDatasummary)
## Converting Matrices to Data Frames
AllDataSummary.df <- as.data.frame(AllDataSummary)
Top15Summary.df <- as.data.frame(Top15Summary)
Top15andAllDatasummary.df <- as.data.frame(Top15andAllDatasummary)
## Checking of New Classes
class(AllDataSummary.df)
class(Top15Summary.df)
class(Top15andAllDatasummary.df)
## Verifications for Names of New Components
colnames(Top15andAllDatasummary.df)
rownames(Top15andAllDatasummary.df)
## New Components
years <- colnames(Top15andAllDatasummary.df)
calculations <- colnames(Top15andAllDatasummary.df)
## Chicago
Chicago <- top15[which(top15$City=="Chicago"), ] 
## Basic Plots
plot(y=Chicago$total, x=Chicago$Year, type="l", xlab = "Year", ylab = "Total       Violent Crime (minus rape)", main="Chicago-Specific Data", col="blue")
## Data Types for Chicago
str(Chicago)

link到完整>100K的数据集是here

Answer 1

根据你的数据，我会这样做：

library(tidyr)
top15andAllDatasummary.df$variable = rownames(top15andAllDatasummary.df)
df.long = gather(data = top15andAllDatasummary.df, 
                 key = years, 
                 value = calculations, 
                 -variable)

此 gather 调用的目的是将您的数据重组为以下形式：

head(df.long)
#                        variable years calculations
# 1             AllDataMeanByYear  2001   349.315751
# 2         AllDataMeanAggAssault  2001   217.474364
# 3 AllDataMeanMurderManSlaughter  2001     5.179639
# 4            AllDataMeanRobbery  2001   126.661748
# 5           AllDataMedianByYear  2001    57.000000
# 6       AllDataMedianAggAssault  2001    39.000000

完成后，我们可以继续绘图：

ggplot(data = df.long, aes(x = years, 
                           y = calculations, 
                           group=variable, 
                           color=variable)) +
   geom_line() +
   geom_point()

这是您想要的结果吗？

Answer 2

您的数据框（我们称之为 df）每年都有一列，每个计算变量都有行名。这是 "wide" 数据，其中每一行都有多个数据值。 ggplot 旨在处理 "long" 数据，其中每一行都有一个包含数据值的列，以及告诉我们有关该数据点的事情的其他列（即，数据点代表什么变量以及它来自哪一年。

Hadley Wickham（他还撰写了 ggplot）的 tidyverse 软件包库使得将数据从宽数据转换为很久又回来了。

library(tidyverse)

df.new <- mutate(df, variable = rownames(df)) %>% 
    gather(year, value, -variable)

                        variable year      value
1              AllDataMeanByYear 2001 349.315751
2          AllDataMeanAggAssault 2001 217.474364
3  AllDataMeanMurderManSlaughter 2001   5.179639
4             AllDataMeanRobbery 2001 126.661748
5            AllDataMedianByYear 2001  57.000000
6        AllDataMedianAggAssault 2001  39.000000
7              AllDataMeanByYear 2002 703.266939
8          AllDataMeanAggAssault 2002 429.920000
9  AllDataMeanMurderManSlaughter 2002   9.328980
10            AllDataMeanRobbery 2002 264.017959
11           AllDataMedianByYear 2002 161.000000
12       AllDataMedianAggAssault 2002 108.000000
13             AllDataMeanByYear 2003 314.897775
14         AllDataMeanAggAssault 2003 193.792420
15 AllDataMeanMurderManSlaughter 2003   4.089360
16            AllDataMeanRobbery 2003 117.015994
17           AllDataMedianByYear 2003  54.000000
18       AllDataMedianAggAssault 2003  37.000000
19             AllDataMeanByYear 2004 305.988451
20         AllDataMeanAggAssault 2004 190.680027
... and 70 more rows

然后可以将此长数据发送到 ggplot。请注意，您最初的尝试使用了一个名为 "years" 的变量，该变量在数据框中不存在。 R（和 ggplot）无法知道您的列名 (2001:2015) 以某种方式神奇地代表了年份。

plot.years <- ggplot(data = df.new, aes(x = year, y = value, color = variable, group = variable)) +
    geom_line()
print(plot.years)

GGPlot2: Error: Aesthetics must be either length 1 or the same as the data (16): x, y, group

GGPlot2: Error: Aesthetics must be either length 1 or the same as the data (16): x, y, group

r

linegraph

ggplot2