从各种不同的计数在 R 中构建一个 tibble
Build a tibble in R from various different counts
我有一个相当简单的问题:
如果你有一个原始数据集,然后你通过过滤数据集来计算值,给你一个问题的答案:你如何构建你的答案的数据框/tibble?
#load the packages
library(easypackages)
packages("tidyverse","readxl","sf","tmaptools","tmap","lubridate",
"lwgeom","Cairo","nngeo","purrr","scales", "ggthemes","janitor")
polls<-st_as_sf(read.csv(url("https://www.caerphilly.gov.uk/CaerphillyDocs/FOI/Datasets_polling_stations_csv.aspx")),
coords = c("Easting","Northing"),crs = 27700)%>%
mutate(date = sample(seq(as.Date('2020/01/01'), as.Date('2020/05/31'), by="day"), 147))
test_stack<-polls%>%st_join(polls%>%st_buffer(dist=1000),join=st_within)%>%
filter(Ballot.Box.Polling.Station.x!=Ballot.Box.Polling.Station.y)%>%
add_count(Ballot.Box.Polling.Station.x)%>%
rename(number_of_neighbours = n)%>%
mutate(interval_date = date.x-date.y)%>%
subset(select = -c(6:8,10,11,13:18))## removing this comment will summarise the data so that only number of neighbours is returned %>%
distinct(Ballot.Box.Polling.Station.x,number_of_neighbours,date.x)%>%
filter(number_of_neighbours >=2)
polls%>%mutate(id = as.numeric(row_number()))%>% mutate(thing = case_when(id %% 2 == 0 ~ "stuff",
id %% 2 !=0 ~ "type"))->polls
polls%>%filter(thing=="stuff"& Polling.District.Code =="AC")%>%count()
polls%>%filter(thing == "type" & Polling.District.Code =="IA")%>%count()
如何构建行名称有意义且列是计算值的数据框?
有点像
行名称值
东西 AC 1
IA 1 型
听起来您想 group_by
列 thing
和 Polling.District.Code
,然后 summarize
每个组,计算其 length
。如果你想让摘要数据框摆脱几何列,你需要使用st_set_geometry(NULL)
polls %>%
group_by(thing, Polling.District.Code) %>%
summarize(count = length(thing), .groups = "keep") %>%
st_set_geometry(NULL)
#> # A tibble: 147 x 3
#> # Groups: thing, Polling.District.Code [147]
#> thing Polling.District.Code count
#> * <chr> <chr> <int>
#> 1 stuff AC 1
#> 2 stuff AE 1
#> 3 stuff BB1 1
#> 4 stuff CA1 1
#> 5 stuff CB1 1
#> 6 stuff CC 1
#> 7 stuff CE 1
#> 8 stuff DA2 1
#> 9 stuff DB1 1
#> 10 stuff DB3 1
#> # ... with 137 more rows
或者如果你想保留几何图形,使用:
polls %>%
group_by(thing, Polling.District.Code) %>%
summarize(count = length(thing), .groups = "keep")
#> Simple feature collection with 147 features and 3 fields
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 310399 ymin: 186331 xmax: 325960 ymax: 207788
#> projected CRS: OSGB 1936 / British National Grid
#> # A tibble: 147 x 4
#> # Groups: thing, Polling.District.Code [147]
#> thing Polling.District.Code count geometry
#> <chr> <chr> <int> <POINT [m]>
#> 1 stuff AC 1 (311777 206968)
#> 2 stuff AE 1 (311734 206047)
#> 3 stuff BB1 1 (310577 205577)
#> 4 stuff CA1 1 (314777 202748)
#> 5 stuff CB1 1 (314777 202748)
#> 6 stuff CC 1 (314622 203396)
#> 7 stuff CE 1 (315255 201843)
#> 8 stuff DA2 1 (315780 200318)
#> 9 stuff DB1 1 (314693 199774)
#> 10 stuff DB3 1 (315034 199159)
#> # ... with 137 more rows
我认为答案是bind_rows
polls%>%filter(thing=="stuff"& Polling.District.Code =="AC")%>%count()->a
polls%>%filter(thing == "type" & Polling.District.Code =="IA")%>%count()->b
bind_rows(a,b)->c
我有一个相当简单的问题: 如果你有一个原始数据集,然后你通过过滤数据集来计算值,给你一个问题的答案:你如何构建你的答案的数据框/tibble?
#load the packages
library(easypackages)
packages("tidyverse","readxl","sf","tmaptools","tmap","lubridate",
"lwgeom","Cairo","nngeo","purrr","scales", "ggthemes","janitor")
polls<-st_as_sf(read.csv(url("https://www.caerphilly.gov.uk/CaerphillyDocs/FOI/Datasets_polling_stations_csv.aspx")),
coords = c("Easting","Northing"),crs = 27700)%>%
mutate(date = sample(seq(as.Date('2020/01/01'), as.Date('2020/05/31'), by="day"), 147))
test_stack<-polls%>%st_join(polls%>%st_buffer(dist=1000),join=st_within)%>%
filter(Ballot.Box.Polling.Station.x!=Ballot.Box.Polling.Station.y)%>%
add_count(Ballot.Box.Polling.Station.x)%>%
rename(number_of_neighbours = n)%>%
mutate(interval_date = date.x-date.y)%>%
subset(select = -c(6:8,10,11,13:18))## removing this comment will summarise the data so that only number of neighbours is returned %>%
distinct(Ballot.Box.Polling.Station.x,number_of_neighbours,date.x)%>%
filter(number_of_neighbours >=2)
polls%>%mutate(id = as.numeric(row_number()))%>% mutate(thing = case_when(id %% 2 == 0 ~ "stuff",
id %% 2 !=0 ~ "type"))->polls
polls%>%filter(thing=="stuff"& Polling.District.Code =="AC")%>%count()
polls%>%filter(thing == "type" & Polling.District.Code =="IA")%>%count()
如何构建行名称有意义且列是计算值的数据框?
有点像
行名称值
东西 AC 1
IA 1 型
听起来您想 group_by
列 thing
和 Polling.District.Code
,然后 summarize
每个组,计算其 length
。如果你想让摘要数据框摆脱几何列,你需要使用st_set_geometry(NULL)
polls %>%
group_by(thing, Polling.District.Code) %>%
summarize(count = length(thing), .groups = "keep") %>%
st_set_geometry(NULL)
#> # A tibble: 147 x 3
#> # Groups: thing, Polling.District.Code [147]
#> thing Polling.District.Code count
#> * <chr> <chr> <int>
#> 1 stuff AC 1
#> 2 stuff AE 1
#> 3 stuff BB1 1
#> 4 stuff CA1 1
#> 5 stuff CB1 1
#> 6 stuff CC 1
#> 7 stuff CE 1
#> 8 stuff DA2 1
#> 9 stuff DB1 1
#> 10 stuff DB3 1
#> # ... with 137 more rows
或者如果你想保留几何图形,使用:
polls %>%
group_by(thing, Polling.District.Code) %>%
summarize(count = length(thing), .groups = "keep")
#> Simple feature collection with 147 features and 3 fields
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 310399 ymin: 186331 xmax: 325960 ymax: 207788
#> projected CRS: OSGB 1936 / British National Grid
#> # A tibble: 147 x 4
#> # Groups: thing, Polling.District.Code [147]
#> thing Polling.District.Code count geometry
#> <chr> <chr> <int> <POINT [m]>
#> 1 stuff AC 1 (311777 206968)
#> 2 stuff AE 1 (311734 206047)
#> 3 stuff BB1 1 (310577 205577)
#> 4 stuff CA1 1 (314777 202748)
#> 5 stuff CB1 1 (314777 202748)
#> 6 stuff CC 1 (314622 203396)
#> 7 stuff CE 1 (315255 201843)
#> 8 stuff DA2 1 (315780 200318)
#> 9 stuff DB1 1 (314693 199774)
#> 10 stuff DB3 1 (315034 199159)
#> # ... with 137 more rows
我认为答案是bind_rows
polls%>%filter(thing=="stuff"& Polling.District.Code =="AC")%>%count()->a
polls%>%filter(thing == "type" & Polling.District.Code =="IA")%>%count()->b
bind_rows(a,b)->c