聚合来自 JSONB 数组的不同值并结合 SQL 分组依据
Aggregating distinct values from JSONB arrays combined with SQL group by
我正在尝试在 SQL GROUP BY
语句中聚合来自 JSONB 数组的不同值:
一个dataset
有很多cfiles
而一个cfile
只有一个dataset
SELECT * FROM cfiles;
id | dataset_id | property_values (jsonb)
----+------------+-----------------------------------------------
1 | 1 | {"Sample Names": ["SampA", "SampB", "SampC"]}
2 | 1 | {"Sample Names": ["SampA", "SampB", "SampD"]}
3 | 1 | {"Sample Names": ["SampE"]}
4 | 2 | {"Sample Names": ["SampA", "SampF"]}
5 | 2 | {"Sample Names": ["SampG"]}
此查询有效,returns 是我想要的正确结果,但一团糟。
SELECT distinct(datasets.id) as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM unnest(
STRING_TO_ARRAY(
STRING_AGG(
DISTINCT REPLACE(
REPLACE(
REPLACE(
REPLACE(
cfiles.property_values ->> 'Sample Names', '",' || chr(32) || '"', ';'
), '[' , ''
), '"' , ''
), ']' , ''
), ';'
), ';'
)
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id
dataset_id | sample_names
------------+-----------------------------------
1 | SampA; SampB; SampC; SampD; SampE
2 | SampA; SampF; SampG
有没有更好的方法来编写这个查询而不需要所有的字符串操作?
我累了 jsonb_array_elements
但它给了我错误 子查询使用来自外部查询 的未分组列“cfiles.property_values”。然后我将 cfiles.property_values
添加到 GROUP BY
但它不再仅按 dataset_id
分组
不是我想要的结果:
SELECT DISTINCT datasets.id as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM jsonb_array_elements(
cfiles.property_values -> 'Sample Names'
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id, cfiles.property_values
dataset_id | sample_names
------------+---------------------------
1 | "SampA"; "SampB"; "SampC"
1 | "SampA"; "SampB"; "SampD"
1 | "SampE"
2 | "SampA"; "SampF"
2 | "SampG"
SQL 用于创建演示
CREATE TABLE datasets (
id INT PRIMARY KEY
);
CREATE TABLE cfiles (
id INT PRIMARY KEY,
dataset_id INT,
property_values JSONB,
FOREIGN KEY (dataset_id) REFERENCES datasets(id)
);
INSERT INTO datasets values (1),(2);
INSERT INTO cfiles values
(1,1,'{"Sample Names":["SampA", "SampB", "SampC"]}'),
(2,1,'{"Sample Names":["SampA", "SampB", "SampD"]}'),
(3,1,'{"Sample Names":["SampE"]}');
INSERT INTO cfiles values
(4,2,'{"Sample Names":["SampA", "SampF"]}'),
(5,2,'{"Sample Names":["SampG"]}');
jsonb_array_elements
是一个集合返回函数,应该在 FROM
子句中使用。在 SELECT 列表中使用它会使事情不必要地复杂化:
select c.dataset_id, string_agg(distinct n.name, '; ' order by n.name)
from cfiles c
cross join jsonb_array_elements_text(c.property_values -> 'Sample Names') as n(name)
group by c.dataset_id
order by c.dataset_id;
我正在尝试在 SQL GROUP BY
语句中聚合来自 JSONB 数组的不同值:
一个dataset
有很多cfiles
而一个cfile
只有一个dataset
SELECT * FROM cfiles;
id | dataset_id | property_values (jsonb)
----+------------+-----------------------------------------------
1 | 1 | {"Sample Names": ["SampA", "SampB", "SampC"]}
2 | 1 | {"Sample Names": ["SampA", "SampB", "SampD"]}
3 | 1 | {"Sample Names": ["SampE"]}
4 | 2 | {"Sample Names": ["SampA", "SampF"]}
5 | 2 | {"Sample Names": ["SampG"]}
此查询有效,returns 是我想要的正确结果,但一团糟。
SELECT distinct(datasets.id) as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM unnest(
STRING_TO_ARRAY(
STRING_AGG(
DISTINCT REPLACE(
REPLACE(
REPLACE(
REPLACE(
cfiles.property_values ->> 'Sample Names', '",' || chr(32) || '"', ';'
), '[' , ''
), '"' , ''
), ']' , ''
), ';'
), ';'
)
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id
dataset_id | sample_names
------------+-----------------------------------
1 | SampA; SampB; SampC; SampD; SampE
2 | SampA; SampF; SampG
有没有更好的方法来编写这个查询而不需要所有的字符串操作?
我累了 jsonb_array_elements
但它给了我错误 子查询使用来自外部查询 的未分组列“cfiles.property_values”。然后我将 cfiles.property_values
添加到 GROUP BY
但它不再仅按 dataset_id
不是我想要的结果:
SELECT DISTINCT datasets.id as dataset_id,
ARRAY_TO_STRING(
ARRAY(
SELECT DISTINCT * FROM jsonb_array_elements(
cfiles.property_values -> 'Sample Names'
) ORDER BY 1 ASC
), '; '
) as sample_names
FROM datasets
JOIN cfiles ON cfiles.dataset_id=datasets.id
GROUP BY datasets.id, cfiles.property_values
dataset_id | sample_names
------------+---------------------------
1 | "SampA"; "SampB"; "SampC"
1 | "SampA"; "SampB"; "SampD"
1 | "SampE"
2 | "SampA"; "SampF"
2 | "SampG"
SQL 用于创建演示
CREATE TABLE datasets (
id INT PRIMARY KEY
);
CREATE TABLE cfiles (
id INT PRIMARY KEY,
dataset_id INT,
property_values JSONB,
FOREIGN KEY (dataset_id) REFERENCES datasets(id)
);
INSERT INTO datasets values (1),(2);
INSERT INTO cfiles values
(1,1,'{"Sample Names":["SampA", "SampB", "SampC"]}'),
(2,1,'{"Sample Names":["SampA", "SampB", "SampD"]}'),
(3,1,'{"Sample Names":["SampE"]}');
INSERT INTO cfiles values
(4,2,'{"Sample Names":["SampA", "SampF"]}'),
(5,2,'{"Sample Names":["SampG"]}');
jsonb_array_elements
是一个集合返回函数,应该在 FROM
子句中使用。在 SELECT 列表中使用它会使事情不必要地复杂化:
select c.dataset_id, string_agg(distinct n.name, '; ' order by n.name)
from cfiles c
cross join jsonb_array_elements_text(c.property_values -> 'Sample Names') as n(name)
group by c.dataset_id
order by c.dataset_id;