我有一个超过 2000 行的数据框。从 df,我需要在 r 中创建一个模式
i have a dataframe with more than 2000 rows. From the df, i need to create a schema in r
格式如下:enter image description here
我需要为 df 创建以下架构。
我可能对一件商品有 2 条或更多条说明。
{
部分:第1节,
标题:电力电缆
List_of_Instruction: [
{
说明:提供的信息
},
]
List_of_items:[
{
产品:项目 1,
Unit:10,
评分:100
},
{
产品:项目 2,
Unit:2,
价格:2000
}
]
List_of_instruction:[
{
Instruction:information 1.0
},
]
List_of_items:[
{
Product:item.1.0,
Unit:5,
Rate:10
}
],
标题:电缆遏制,
List_of_instruction:[
{
说明:资料
}
],
List_of_items:[
{
Product:item,
Unit:100,
Rate:4
}
]
} ,
{
部分: …。
标题: …。
}
这里是一个例子,我假设你想在章节、标题和说明下分组,这些列在每个段的顶层,它从你的数据中得出结论。
d <- read.csv(text=
"Description,Unit,Rate,isItem,IsSection,IsInstruction,IsTitle,sheetname
Section1,NA,NA,FALSE,TRUE,FALSE,FALSE,Sheet1
Power Cables,NA,NA,FALSE,FALSE,FALSE,TRUE,Sheet1
Information Provided,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
Item1,10,100,TRUE,FALSE,FALSE,FALSE,Sheet1
Item2,2,2000,TRUE,FALSE,FALSE,FALSE,Sheet1
Information 1.0,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
Item1.0,5,10,TRUE,FALSE,FALSE,FALSE,Sheet1
Cable containment,NA,NA,FALSE,FALSE,FALSE,TRUE,Sheet1
Information,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
item,100,4,TRUE,FALSE,FALSE,FALSE,Sheet1
Section2,NA,NA,NA,TRUE,FALSE,FALSE,Sheet1
Pipe,NA,NA,FALSE,FALSE,FALSE,TRUE,Sheet1
details,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
Items4,7,500,TRUE,FALSE,FALSE,FALSE,Sheet1
",header=TRUE)
library(jsonlite)
library(zoo)
d2 <- d %>% mutate( Section = na.locf( ifelse( IsSection, Description, NA ) ) ) %>%
group_by( Section ) %>%
mutate( Title = na.locf( ifelse( IsTitle, Description, NA ), na.rm=FALSE ) ) %>%
group_by( Section, Title ) %>%
mutate( Instruction = na.locf( ifelse( IsInstruction, Description, NA ), na.rm=FALSE ) )
## A helper function to do the heavy finesse work:
make.segment <- function(d) {
with(
d,
list(
Section = Section[1],
Title = Title[1],
Instruction = Instruction[1],
List_of_items =
d %>% filter( isItem ) %>%
rename( Product=Description ) %>%
select( Product, Unit, Rate ) %>%
apply( 1, as.list )
)
)
}
## Standard dplyr work to do the heavy lifting:
l <- d2 %>% filter( !is.na(Title) & !is.na(Instruction) ) %>%
group_by( Section, Title, Instruction ) %>%
do( segment = make.segment(.) ) %>%
pull( segment ) %>%
as.list
toJSON( l, pretty=TRUE, auto_unbox=TRUE )
你的例子确实显示了 json,所以我假设这就是你想要的,结果是:
[
{
"Section": "Section1",
"Title": "Cable containment",
"Instruction": "Information",
"List_of_items": [
{
"Product": "item",
"Unit": "100",
"Rate": "4"
}
]
},
{
"Section": "Section1",
"Title": "Power Cables",
"Instruction": "Information 1.0",
"List_of_items": [
{
"Product": "Item1.0",
"Unit": "5",
"Rate": "10"
}
]
},
{
"Section": "Section1",
"Title": "Power Cables",
"Instruction": "Information Provided",
"List_of_items": [
{
"Product": "Item1",
"Unit": "10",
"Rate": " 100"
},
{
"Product": "Item2",
"Unit": " 2",
"Rate": "2000"
}
]
},
{
"Section": "Section2",
"Title": "Pipe",
"Instruction": "details",
"List_of_items": [
{
"Product": "Items4",
"Unit": "7",
"Rate": "500"
}
]
}
]
格式如下:enter image description here
我需要为 df 创建以下架构。 我可能对一件商品有 2 条或更多条说明。
{ 部分:第1节, 标题:电力电缆 List_of_Instruction: [ { 说明:提供的信息 }, ] List_of_items:[ { 产品:项目 1, Unit:10, 评分:100 }, { 产品:项目 2, Unit:2, 价格:2000 } ] List_of_instruction:[ { Instruction:information 1.0 }, ] List_of_items:[ { Product:item.1.0, Unit:5, Rate:10 } ], 标题:电缆遏制, List_of_instruction:[ { 说明:资料 } ], List_of_items:[ { Product:item, Unit:100, Rate:4 } ]
} ,
{ 部分: …。 标题: …。 }
这里是一个例子,我假设你想在章节、标题和说明下分组,这些列在每个段的顶层,它从你的数据中得出结论。
d <- read.csv(text=
"Description,Unit,Rate,isItem,IsSection,IsInstruction,IsTitle,sheetname
Section1,NA,NA,FALSE,TRUE,FALSE,FALSE,Sheet1
Power Cables,NA,NA,FALSE,FALSE,FALSE,TRUE,Sheet1
Information Provided,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
Item1,10,100,TRUE,FALSE,FALSE,FALSE,Sheet1
Item2,2,2000,TRUE,FALSE,FALSE,FALSE,Sheet1
Information 1.0,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
Item1.0,5,10,TRUE,FALSE,FALSE,FALSE,Sheet1
Cable containment,NA,NA,FALSE,FALSE,FALSE,TRUE,Sheet1
Information,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
item,100,4,TRUE,FALSE,FALSE,FALSE,Sheet1
Section2,NA,NA,NA,TRUE,FALSE,FALSE,Sheet1
Pipe,NA,NA,FALSE,FALSE,FALSE,TRUE,Sheet1
details,NA,NA,FALSE,FALSE,TRUE,FALSE,Sheet1
Items4,7,500,TRUE,FALSE,FALSE,FALSE,Sheet1
",header=TRUE)
library(jsonlite)
library(zoo)
d2 <- d %>% mutate( Section = na.locf( ifelse( IsSection, Description, NA ) ) ) %>%
group_by( Section ) %>%
mutate( Title = na.locf( ifelse( IsTitle, Description, NA ), na.rm=FALSE ) ) %>%
group_by( Section, Title ) %>%
mutate( Instruction = na.locf( ifelse( IsInstruction, Description, NA ), na.rm=FALSE ) )
## A helper function to do the heavy finesse work:
make.segment <- function(d) {
with(
d,
list(
Section = Section[1],
Title = Title[1],
Instruction = Instruction[1],
List_of_items =
d %>% filter( isItem ) %>%
rename( Product=Description ) %>%
select( Product, Unit, Rate ) %>%
apply( 1, as.list )
)
)
}
## Standard dplyr work to do the heavy lifting:
l <- d2 %>% filter( !is.na(Title) & !is.na(Instruction) ) %>%
group_by( Section, Title, Instruction ) %>%
do( segment = make.segment(.) ) %>%
pull( segment ) %>%
as.list
toJSON( l, pretty=TRUE, auto_unbox=TRUE )
你的例子确实显示了 json,所以我假设这就是你想要的,结果是:
[
{
"Section": "Section1",
"Title": "Cable containment",
"Instruction": "Information",
"List_of_items": [
{
"Product": "item",
"Unit": "100",
"Rate": "4"
}
]
},
{
"Section": "Section1",
"Title": "Power Cables",
"Instruction": "Information 1.0",
"List_of_items": [
{
"Product": "Item1.0",
"Unit": "5",
"Rate": "10"
}
]
},
{
"Section": "Section1",
"Title": "Power Cables",
"Instruction": "Information Provided",
"List_of_items": [
{
"Product": "Item1",
"Unit": "10",
"Rate": " 100"
},
{
"Product": "Item2",
"Unit": " 2",
"Rate": "2000"
}
]
},
{
"Section": "Section2",
"Title": "Pipe",
"Instruction": "details",
"List_of_items": [
{
"Product": "Items4",
"Unit": "7",
"Rate": "500"
}
]
}
]