如何将 SharePoint 列表加载到 R 中的小标题中?
How can I load a SharePoint list into a tibble in R?
我想将 SharePoint 列表加载到 R 中的小标题中。
我尝试的问题是,数据的每个值都包含在一个列表中。我如何解包每个值或更改数据转换以直接包含字符串,而不是列表列表?
# A tibble: 10 x 6
`__metadata` A B C D E
<list> <list> <list> <list> <list> <list>
1 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
2 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
3 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
4 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
5 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
6 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
7 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
...
我试过没有成功:
my_data %>% mutate_all(~ map(.x, unlist))
my_data %>% unlist(recursive = FALSE)
...以及 map()
、mutate_all()
、unnest()
和 unlist()
的许多其他组合。
我认为问题出在我处理数据的方式上。原来的JSON格式如下:
{
"d": {
"results": [
{
"__metadata": {
"id": "<GUID>",
"uri": "<redacted>",
"etag": "\"42\"",
"type": "SP.Data.DownloadcenterItem"
},
"A": {
"results": [
{
"__metadata": {
"id": "<GUID>",
"type": "SP.Data.UserInfoItem"
},
"Title": "<redacted>"
}
]
},
"C": {
"__metadata": {
"id": "<GUID>",
"type": "SP.Data.UserInfoItem"
},
"EMail": "<redacted>"
},
"B": "<redacted>",
"D": "<redacted>",
"E": "<redacted>"
},
...
],
"__next": "<redacted>"
}
}
以下代码用于加载 JSON 并将其转换为小标题:
current_page <- httr::GET('<URL>') %>% httr::content()
my_data <- current_page$d$results %>%
map(enframe) %>%
map(~ spread(.x, name, value))
dput(current_page$d$results)
的输出:
list(list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"), dmsAuthor = list(
results = list(list(`__metadata` = list(id = "<redacted>",
type = "<redacted>"), Title = "<redacted>"))),
dmsDocumentOwner = list(`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"))
以下适用于您的数据。你不需要在这里使用 purrr
包。
library(dplyr)
library(tibble)
library(tidyr)
enframe(unlist(current_page$d$results)) %>%
filter(!grepl("metadata", name, ignore.case = T)) %>%
group_by(name) %>%
mutate(rid = 1:n()) %>%
pivot_wider(-rid, names_from = "name", values_from = "value") %>%
unnest
#> # A tibble: 10 x 5
#> dmsAuthor.resul~ dmsDocumentOwne~ dmsDocumentID dmsDocVersion dmsSPTitle
#> <chr> <chr> <chr> <chr> <chr>
#> 1 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 2 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 3 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 4 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 5 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 6 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 7 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 8 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 9 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 10 <redacted> <redacted> <redacted> <redacted> <redacted>
警告:
#> Warning: Values in `value` are not uniquely identified; output will contain list-cols.
##> * Use `values_fn = list(value = list)` to suppress this warning.
##> * Use `values_fn = list(value = length)` to identify where the duplicates arise
##> * Use `values_fn = list(value = summary_fun)` to summarise duplicates
##> Warning: `cols` is now required.
##> Please use `cols = c(dmsAuthor.results.Title, dmsDocumentOwner.EMail, dmsDocumentID,
##> dmsDocVersion, dmsSPTitle)`
我想将 SharePoint 列表加载到 R 中的小标题中。
我尝试的问题是,数据的每个值都包含在一个列表中。我如何解包每个值或更改数据转换以直接包含字符串,而不是列表列表?
# A tibble: 10 x 6
`__metadata` A B C D E
<list> <list> <list> <list> <list> <list>
1 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
2 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
3 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
4 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
5 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
6 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
7 <list [4]> <list [1]> <chr [1]> <list [2]> <chr [1]> <chr [1]>
...
我试过没有成功:
my_data %>% mutate_all(~ map(.x, unlist))
my_data %>% unlist(recursive = FALSE)
...以及 map()
、mutate_all()
、unnest()
和 unlist()
的许多其他组合。
我认为问题出在我处理数据的方式上。原来的JSON格式如下:
{
"d": {
"results": [
{
"__metadata": {
"id": "<GUID>",
"uri": "<redacted>",
"etag": "\"42\"",
"type": "SP.Data.DownloadcenterItem"
},
"A": {
"results": [
{
"__metadata": {
"id": "<GUID>",
"type": "SP.Data.UserInfoItem"
},
"Title": "<redacted>"
}
]
},
"C": {
"__metadata": {
"id": "<GUID>",
"type": "SP.Data.UserInfoItem"
},
"EMail": "<redacted>"
},
"B": "<redacted>",
"D": "<redacted>",
"E": "<redacted>"
},
...
],
"__next": "<redacted>"
}
}
以下代码用于加载 JSON 并将其转换为小标题:
current_page <- httr::GET('<URL>') %>% httr::content()
my_data <- current_page$d$results %>%
map(enframe) %>%
map(~ spread(.x, name, value))
dput(current_page$d$results)
的输出:
list(list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"), dmsAuthor = list(
results = list(list(`__metadata` = list(id = "<redacted>",
type = "<redacted>"), Title = "<redacted>"))),
dmsDocumentOwner = list(`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"),
list(`__metadata` = list(id = "<redacted>",
uri = "<redacted>",
etag = "<redacted>", type = "<redacted>"),
dmsAuthor = list(results = list(list(`__metadata` = list(
id = "<redacted>", type = "<redacted>"),
Title = "<redacted>"))), dmsDocumentOwner = list(
`__metadata` = list(id = "<redacted>",
type = "<redacted>"), EMail = "<redacted>"),
dmsDocumentID = "<redacted>", dmsDocVersion = "<redacted>", dmsSPTitle = "<redacted>"))
以下适用于您的数据。你不需要在这里使用 purrr
包。
library(dplyr)
library(tibble)
library(tidyr)
enframe(unlist(current_page$d$results)) %>%
filter(!grepl("metadata", name, ignore.case = T)) %>%
group_by(name) %>%
mutate(rid = 1:n()) %>%
pivot_wider(-rid, names_from = "name", values_from = "value") %>%
unnest
#> # A tibble: 10 x 5
#> dmsAuthor.resul~ dmsDocumentOwne~ dmsDocumentID dmsDocVersion dmsSPTitle
#> <chr> <chr> <chr> <chr> <chr>
#> 1 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 2 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 3 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 4 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 5 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 6 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 7 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 8 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 9 <redacted> <redacted> <redacted> <redacted> <redacted>
#> 10 <redacted> <redacted> <redacted> <redacted> <redacted>
警告:
#> Warning: Values in `value` are not uniquely identified; output will contain list-cols.
##> * Use `values_fn = list(value = list)` to suppress this warning.
##> * Use `values_fn = list(value = length)` to identify where the duplicates arise
##> * Use `values_fn = list(value = summary_fun)` to summarise duplicates
##> Warning: `cols` is now required.
##> Please use `cols = c(dmsAuthor.results.Title, dmsDocumentOwner.EMail, dmsDocumentID,
##> dmsDocVersion, dmsSPTitle)`