读取嵌套 json 并将其转换为整齐的数据
read nested json and convert it to tidy data
我正在尝试将 json 转换为遵循 Tidy Data 原则的 tibble。
网站 http://pv.servelelecciones.cl/ 有一些不错的区域数据。因为我无法直接向 "Aysen region" 提供 link,因为 url 没有改变,这是他们显示的数据:
|listapacto |partido |votos|porcentaje|electo|
|------------------------------------|---------|-----|----------|------|
|H. SUMEMOS | |365 |4,52% | |
|TODOS | |113 |1,40% | |
|50. EDUARDO ROMO LAFOY |IND-TODOS|69 |0,86% | |
|51. SARA MARTINEZ MONDELO |IND-TODOS|44 |0,55% | |
|CIUDADANOS | |252 |3,12% | |
|52. VICTOR MANUEL BORQUEZ FINCKE |CIUD. |53 |0,66% | |
|53. MARISOL LUSDEMIA PINILLA VEJAR |CIUD. |199 |2,47% | |
|K. COALICIÓN REGIONALISTA VERDE | |200 |2,48% | |
|DEMOCRACIA REGIONAL PATAGONICA | |200 |2,48% | |
|54. ELSON BORQUEZ YAÑEZ |DRP |59 |0,73% | |
|55. PEDRO ANTONIO VERGARA ROJAS |DRP |42 |0,52% | |
|56. JESSICA ANDREA TORRES BORQUEZ |IND-DRP |59 |0,73% | |
|57. TAMARA ANDREA ESPINOZA GUTIERREZ|DRP |40 |0,50% | |
|N. LA FUERZA DE LA MAYORIA | |2.958|36,66% | |
|PARTIDO RADICAL SOCIALDEMOCRATA | |346 |4,29% | |
|58. JORGE CALDERON NUÑEZ |PRSD |346 |4,29% | |
|PARTIDO SOCIALISTA DE CHILE | |1.651|20,46% | |
|59. MARISOL MARTINEZ SANCHEZ |PSCH |1.651|20,46% | |
|PARTIDO COMUNISTA DE CHILE | |322 |3,99% | |
|60. ROXANA PEY TUMANOFF |IND-PCCH |322 |3,99% | |
|PARTIDO POR LA DEMOCRACIA | |639 |7,92% | |
|61. RENE OSVALDO ALINCO BUSTOS |IND-PPD |639 |7,92% |* |
|O. CONVERGENCIA DEMOCRATICA | |2.297|28,47% | |
|PARTIDO DEMOCRATA CRISTIANO | |2.297|28,47% | |
|62. MIGUEL ANGEL CALISTO AGUILA |PDC |1.882|23,32% |* |
|63. CARMEN GLORIA MARTINEZ CARDENAS |PDC |224 |2,78% | |
|64. RENE ANSELMO LEGUE CARDENAS |PDC |191 |2,37% | |
|P. CHILE VAMOS | |1.963|24,33% | |
|UNION DEMOCRATA INDEPENDIENTE | |605 |7,50% | |
|65. NESTOR MERA MUÑOZ |UDI |605 |7,50% | |
|PARTIDO REGIONALISTA INDEPENDIENTE | |132 |1,64% | |
|66. PATRICIO HENRIQUEZ BARRIENTOS |PRI |132 |1,64% | |
|EVOLUCION POLITICA | |365 |4,52% | |
|67. GEOCONDA NAVARRETE ARRATIA |EVOP. |365 |4,52% | |
|RENOVACION NACIONAL | |861 |10,67% | |
|68. ARACELY LEUQUEN URIBE |RN |861 |10,67% |* |
|CANDIDATURA INDEPENDIENTE | |286 |3,54% | |
|69. CECILIO AGUILAR GALINDO |IND |286 |3,54% | |
我想阅读为他们的网站提供的 json 数据,以获得像上面的 table 这样的小标题,并从中移动到一个整洁的结构。
如果我只是尝试直接读取数据:
require(data.table)
require(jsonlite)
require(dplyr)
x <- fromJSON("http://www.servelelecciones.cl/data/elecciones_diputados/computo/comunas/114501.json")
y <- as_tibble(x$data)
然后我在最后一栏得到一些锁定信息:
> y
# A tibble: 6 x 7
a b c d e f sd
* <chr> <lgl> <chr> <chr> <chr> <chr> <list>
1 H. SUMEMOS NA 365 4,52% 4 "" <data.frame [2 × 7]>
2 K. COALICIÓN REGIONALISTA VERDE NA 200 2,48% 4 "" <data.frame [1 × 7]>
3 N. LA FUERZA DE LA MAYORIA NA 2.958 36,66% 4 "" <data.frame [4 × 7]>
4 O. CONVERGENCIA DEMOCRATICA NA 2.297 28,47% 3 "" <data.frame [1 × 7]>
5 P. CHILE VAMOS NA 1.963 24,33% 4 "" <data.frame [4 × 7]>
6 CANDIDATURA INDEPENDIENTE NA 286 3,54% NA NA <data.frame [1 × 7]>
当然试过了
- Read Json file into a data.frame without nested lists
非常欢迎任何帮助。
一种可能性,而不是编写函数,是使用 tidyr
并根据需要取消嵌套。
就我而言:
require(data.table)
require(jsonlite)
require(dplyr)
require(tidyr)
x <- fromJSON("http://www.servelelecciones.cl/data/elecciones_diputados/computo/comunas/114501.json")
y <- as_tibble(x$data)
y1 <- y %>% filter(a == "CANDIDATURA INDEPENDIENTE")
y2 <- y %>% filter(a != "CANDIDATURA INDEPENDIENTE") %>% unnest(sd)
我正在尝试将 json 转换为遵循 Tidy Data 原则的 tibble。
网站 http://pv.servelelecciones.cl/ 有一些不错的区域数据。因为我无法直接向 "Aysen region" 提供 link,因为 url 没有改变,这是他们显示的数据:
|listapacto |partido |votos|porcentaje|electo|
|------------------------------------|---------|-----|----------|------|
|H. SUMEMOS | |365 |4,52% | |
|TODOS | |113 |1,40% | |
|50. EDUARDO ROMO LAFOY |IND-TODOS|69 |0,86% | |
|51. SARA MARTINEZ MONDELO |IND-TODOS|44 |0,55% | |
|CIUDADANOS | |252 |3,12% | |
|52. VICTOR MANUEL BORQUEZ FINCKE |CIUD. |53 |0,66% | |
|53. MARISOL LUSDEMIA PINILLA VEJAR |CIUD. |199 |2,47% | |
|K. COALICIÓN REGIONALISTA VERDE | |200 |2,48% | |
|DEMOCRACIA REGIONAL PATAGONICA | |200 |2,48% | |
|54. ELSON BORQUEZ YAÑEZ |DRP |59 |0,73% | |
|55. PEDRO ANTONIO VERGARA ROJAS |DRP |42 |0,52% | |
|56. JESSICA ANDREA TORRES BORQUEZ |IND-DRP |59 |0,73% | |
|57. TAMARA ANDREA ESPINOZA GUTIERREZ|DRP |40 |0,50% | |
|N. LA FUERZA DE LA MAYORIA | |2.958|36,66% | |
|PARTIDO RADICAL SOCIALDEMOCRATA | |346 |4,29% | |
|58. JORGE CALDERON NUÑEZ |PRSD |346 |4,29% | |
|PARTIDO SOCIALISTA DE CHILE | |1.651|20,46% | |
|59. MARISOL MARTINEZ SANCHEZ |PSCH |1.651|20,46% | |
|PARTIDO COMUNISTA DE CHILE | |322 |3,99% | |
|60. ROXANA PEY TUMANOFF |IND-PCCH |322 |3,99% | |
|PARTIDO POR LA DEMOCRACIA | |639 |7,92% | |
|61. RENE OSVALDO ALINCO BUSTOS |IND-PPD |639 |7,92% |* |
|O. CONVERGENCIA DEMOCRATICA | |2.297|28,47% | |
|PARTIDO DEMOCRATA CRISTIANO | |2.297|28,47% | |
|62. MIGUEL ANGEL CALISTO AGUILA |PDC |1.882|23,32% |* |
|63. CARMEN GLORIA MARTINEZ CARDENAS |PDC |224 |2,78% | |
|64. RENE ANSELMO LEGUE CARDENAS |PDC |191 |2,37% | |
|P. CHILE VAMOS | |1.963|24,33% | |
|UNION DEMOCRATA INDEPENDIENTE | |605 |7,50% | |
|65. NESTOR MERA MUÑOZ |UDI |605 |7,50% | |
|PARTIDO REGIONALISTA INDEPENDIENTE | |132 |1,64% | |
|66. PATRICIO HENRIQUEZ BARRIENTOS |PRI |132 |1,64% | |
|EVOLUCION POLITICA | |365 |4,52% | |
|67. GEOCONDA NAVARRETE ARRATIA |EVOP. |365 |4,52% | |
|RENOVACION NACIONAL | |861 |10,67% | |
|68. ARACELY LEUQUEN URIBE |RN |861 |10,67% |* |
|CANDIDATURA INDEPENDIENTE | |286 |3,54% | |
|69. CECILIO AGUILAR GALINDO |IND |286 |3,54% | |
我想阅读为他们的网站提供的 json 数据,以获得像上面的 table 这样的小标题,并从中移动到一个整洁的结构。
如果我只是尝试直接读取数据:
require(data.table)
require(jsonlite)
require(dplyr)
x <- fromJSON("http://www.servelelecciones.cl/data/elecciones_diputados/computo/comunas/114501.json")
y <- as_tibble(x$data)
然后我在最后一栏得到一些锁定信息:
> y
# A tibble: 6 x 7
a b c d e f sd
* <chr> <lgl> <chr> <chr> <chr> <chr> <list>
1 H. SUMEMOS NA 365 4,52% 4 "" <data.frame [2 × 7]>
2 K. COALICIÓN REGIONALISTA VERDE NA 200 2,48% 4 "" <data.frame [1 × 7]>
3 N. LA FUERZA DE LA MAYORIA NA 2.958 36,66% 4 "" <data.frame [4 × 7]>
4 O. CONVERGENCIA DEMOCRATICA NA 2.297 28,47% 3 "" <data.frame [1 × 7]>
5 P. CHILE VAMOS NA 1.963 24,33% 4 "" <data.frame [4 × 7]>
6 CANDIDATURA INDEPENDIENTE NA 286 3,54% NA NA <data.frame [1 × 7]>
当然试过了
- Read Json file into a data.frame without nested lists
非常欢迎任何帮助。
一种可能性,而不是编写函数,是使用 tidyr
并根据需要取消嵌套。
就我而言:
require(data.table)
require(jsonlite)
require(dplyr)
require(tidyr)
x <- fromJSON("http://www.servelelecciones.cl/data/elecciones_diputados/computo/comunas/114501.json")
y <- as_tibble(x$data)
y1 <- y %>% filter(a == "CANDIDATURA INDEPENDIENTE")
y2 <- y %>% filter(a != "CANDIDATURA INDEPENDIENTE") %>% unnest(sd)