如何从 Clickhouse 中的 json 中提取 json?

How to extract json from json in clickhouse?

我的基地有一个json:

{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}

我需要获取每个键的所有唯一值,但我在提取键 'd' 和键 'e'.

的值时遇到了一些问题

使用:

SELECT
   DISTINCT JSONExtractRaw(column, 'c')
FROM t1

我得到:

[{"d":3,"e":"str_1"}, 
{"d":4,"e":"str_2"}]

但是如果我再次对键 'd' 和键 'e' 使用 JsonExtract variety,它 returns 什么都没有。如何解决这个问题?

WITH
    (
        SELECT '{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}'
    ) AS j
SELECT JSONExtract(j, 'c', 'Array(Tuple(Int64,String))')

┌─JSONExtract(j, 'c', 'Array(Tuple(Int64,String))')─┐
│ [(3,'str_1'),(4,'str_2')]                         │
└───────────────────────────────────────────────────┘


WITH
    (
        SELECT '{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}, {"d":4,"e":"str_2"}]}'
    ) AS j
SELECT
    JSONExtract(j, 'c', 'Array(Tuple(Int64,String))') AS t,
    arrayReduce('groupUniqArray', arrayMap(x -> (x.1), t)) AS d,
    arrayReduce('groupUniqArray', arrayMap(x -> (x.2), t)) AS e

┌─t─────────────────────────────────────┬─d─────┬─e─────────────────┐
│ [(3,'str_1'),(4,'str_2'),(4,'str_2')] │ [4,3] │ ['str_2','str_1'] │
└───────────────────────────────────────┴───────┴───────────────────┘

如果需要,我会使用这样的 'safe' 查询来正确处理无序成员和遗漏成员。这种方式不是很快但可靠。

SELECT
  json,
  a_and_b,
  d_uniq_values,
  e_uniq_values
FROM (  
  SELECT
      json,
      JSONExtract(json, 'Tuple(a Nullable(Int32), b Nullable(Int32))') a_and_b,
      JSONExtractRaw(json, 'c') c_json,
      range(JSONLength(c_json)) AS array_indices,
      arrayDistinct(arrayMap(i -> JSONExtractInt(c_json, i + 1, 'd'), array_indices)) AS d_uniq_values,
      arrayDistinct(arrayMap(i -> JSONExtractString(c_json, i + 1, 'e'), array_indices)) AS e_uniq_values
  FROM
  (
      /* test data */
      SELECT arrayJoin([
        '{}',
        '{"a":1,"b":2}',
        '{"b":1,"a":2}',
        '{"b":1}',
        '{"a":1,"b":2,"c":[]}',
        '{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}',
        '{"b":1,"a":2,"c":[{"e":"3","d":1}, {"e":"4","d":2}]}',
        '{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}, {"d":3,"e":"str_1"}, {"d":4,"e":"str_1"}, {"d":7,"e":"str_9"}]}'      
        ]) AS json
  ))
FORMAT Vertical;

/* Result:

Row 1:
──────
json:          {}
a_and_b:       (NULL,NULL)
d_uniq_values: []
e_uniq_values: []

Row 2:
──────
json:          {"a":1,"b":2}
a_and_b:       (1,2)
d_uniq_values: []
e_uniq_values: []

Row 3:
──────
json:          {"b":1,"a":2}
a_and_b:       (2,1)
d_uniq_values: []
e_uniq_values: []

Row 4:
──────
json:          {"b":1}
a_and_b:       (NULL,1)
d_uniq_values: []
e_uniq_values: []

Row 5:
──────
json:          {"a":1,"b":2,"c":[]}
a_and_b:       (1,2)
d_uniq_values: []
e_uniq_values: []

Row 6:
──────
json:          {"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}
a_and_b:       (1,2)
d_uniq_values: [3,4]
e_uniq_values: ['str_1','str_2']

Row 7:
──────
json:          {"b":1,"a":2,"c":[{"e":"3","d":1}, {"e":"4","d":2}]}
a_and_b:       (2,1)
d_uniq_values: [1,2]
e_uniq_values: ['3','4']

Row 8:
──────
json:          {"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}, {"d":3,"e":"str_1"}, {"d":4,"e":"str_1"}, {"d":7,"e":"str_9"}]}
a_and_b:       (1,2)
d_uniq_values: [3,4,7]
e_uniq_values: ['str_1','str_2','str_9']
*/