从 R 数据框列构建 JSON 列

Construct JSON column from R dataframe columns

这是我的数据框:

df <- data.frame(item = c("Box 1", "Tape", "Roll 1"), Length = c(2, 
 10, 6), Width = c(4,3,3), Height  = c(6, NA, NA), Length_units =
 c("in", "ft", "yd"), option_1 = c("item_length", "item_length",
 "item_length"), option_2 = c("item_width", "item_width",
 "item_width"), option_3 = c("item_height", "color", NA), option_4 =
 c("thickness", NA, NA), width_units = c("in", "in", "in"),
 height_units = c("in", "in", NA), color = c(NA, "clear", NA),
 thickness = c( "200#", NA, NA ))
    item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200# 
2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>

我想将下面的 JSON 列附加到 df 的末尾。请注意 JSON 中的选项标签根据 df 选项列,并且 JSON 根据选项列是动态的。

{"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
{"dimensions":"10 ft x 3 in","color":"clear"}
{"dimensions":"6 yd x 3 in"}

非常感谢 dplyr 解决方案。

已编辑:我希望我的数据框看起来像这样

    item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness options_json
1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200# {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"} 
2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA> {"dimensions":"10 ft x 3 in","color":"clear"}
3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA> {"dimensions":"6 yd x 3 in"}

我们可能会考虑 paste/unite 列,然后使用 toJSON 转换为 JSON - 由于列的大小写不同,最好通过转换为较低的标准来标准化案例(tolower - get 来自 _units 列的相应列值变得更容易)。循环across列(length:height),pastestr_c),当列中的值为non-NA时对应的'_units'列值(使用case_when),然后 unite 那些列到单个列,select 感兴趣的列并转换为 JSON (toJSON)

library(dplyr)
library(stringr)
library(jsonlite)
library(tidyr)
out <- df %>% 
 rename_with(tolower, everything()) %>%
  mutate(across(length:height, 
   ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
      sep = " ")))) %>%
  unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
  select(dimensions, color, thickness) %>%
  toJSON(pretty = TRUE)

-输出

out
[
  {
    "dimensions": "2 in x 4 in x 6 in",
    "thickness": "200#"
  },
  {
    "dimensions": "10 ft x 3 in",
    "color": "clear"
  },
  {
    "dimensions": "6 yd x 3 in"
  }
] 

如果我们要创建一个列,请使用 mutaterowwise

df <- df %>% 
 rename_with(tolower, everything()) %>%
  mutate(across(length:height, 
   ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
      sep = " ")))) %>%
  unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
  select(dimensions, color, thickness) %>% 
  rowwise %>%
  transmute(options_json = toJSON(cur_data())) %>%
  ungroup %>%
  bind_cols(df, .)

-输出

df
  item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200#
2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>
                                              options_json
1 [{"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}]
2          [{"dimensions":"10 ft x 3 in","color":"clear"}]
3                           [{"dimensions":"6 yd x 3 in"}]

json 格式包括 opening/closing 方括号。我们可以用 str_remove

删除它
df <- df %>% 
 rename_with(tolower, everything()) %>%
  mutate(across(length:height, 
   ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
      sep = " ")))) %>%
  unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
  select(dimensions, color, thickness) %>%
 rowwise %>% 
 transmute(options_json = str_remove_all(toJSON(cur_data()), "\[|\]")) %>% 
 ungroup %>%
  bind_cols(df, .)

-输出

  item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200#
2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>
                                            options_json
1 {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
2          {"dimensions":"10 ft x 3 in","color":"clear"}
3                           {"dimensions":"6 yd x 3 in"}

或者 unclassrowwise

之后
df <- df %>% 
 rename_with(tolower, everything()) %>%
  mutate(across(length:height, 
   ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
      sep = " ")))) %>%
  unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
  select(dimensions, color, thickness) %>% 
  rowwise %>%
  transmute(options_json = toJSON(keep(unclass(cur_data()), 
     complete.cases), auto_unbox = TRUE)) %>% 
  ungroup %>% 
  bind_cols(df, .)

-输出

> df
  item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200#
2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>
                                            options_json
1 {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
2          {"dimensions":"10 ft x 3 in","color":"clear"}
3                           {"dimensions":"6 yd x 3 in"}

> str(df)
'data.frame':   3 obs. of  14 variables:
 $ item        : chr  "Box 1" "Tape" "Roll 1"
 $ Length      : num  2 10 6
 $ Width       : num  4 3 3
 $ Height      : num  6 NA NA
 $ Length_units: chr  "in" "ft" "yd"
 $ option_1    : chr  "item_length" "item_length" "item_length"
 $ option_2    : chr  "item_width" "item_width" "item_width"
 $ option_3    : chr  "item_height" "color" NA
 $ option_4    : chr  "thickness" NA NA
 $ width_units : chr  "in" "in" "in"
 $ height_units: chr  "in" "in" NA
 $ color       : chr  NA "clear" NA
 $ thickness   : chr  "200#" NA NA
 $ options_json: 'json' chr  "{\"dimensions\":\"2 in x 4 in x 6 in\",\"thickness\":\"200#\"}" "{\"dimensions\":\"10 ft x 3 in\",\"color\":\"clear\"}" "{\"dimensions\":\"6 yd x 3 in\"}"