PostgreSQL 求和而不是串联
PostgreSQL sum instead of concatenation
我有以下查询:
SELECT DISTINCT
branch.id,
branch.name,
SUM(CAST(COALESCE(NULLIF(REPLACE(dia_anterior.col_9, ',', '.'), ''), '0.0') AS double precision)) AS total_venta,
SUM(CAST(COALESCE(NULLIF(REPLACE(dia_anterior.col_4, ',', '.'), ''), '0.0') AS double precision)) AS total_personas,
SUM(CAST(COALESCE(NULLIF(REPLACE(ocupacion_dia_anterior.col_3, ',', '.'), ''), '0') AS double precision)) AS total_ocupacion
FROM branches AS branch
INNER JOIN queries AS q_dia_anterior
ON q_dia_anterior.query_structure_id = (SELECT id FROM query_structures WHERE query_structures.slug = 'dia-anterior')
INNER JOIN queries AS q_ocupacion_dia_anterior
ON q_ocupacion_dia_anterior.query_structure_id = (SELECT id FROM query_structures WHERE query_structures.slug = 'ocupacion-dia-anterior')
INNER JOIN queries AS q_ventas_x_articulo_dia_anterior
ON q_ventas_x_articulo_dia_anterior.query_structure_id = (SELECT id FROM query_structures WHERE query_structures.slug = 'ventas-x-articulo-dia-anterior')
INNER JOIN branch_data AS dia_anterior
ON dia_anterior.query_id = q_dia_anterior.id
AND dia_anterior.branch_id = branch.id
INNER JOIN branch_data AS ocupacion_dia_anterior
ON ocupacion_dia_anterior.query_id = q_ocupacion_dia_anterior.id
AND ocupacion_dia_anterior.branch_id = branch.id
INNER JOIN branch_data AS ventas_x_articulo_dia_anterior
ON ventas_x_articulo_dia_anterior.query_id = q_ventas_x_articulo_dia_anterior.id
AND ventas_x_articulo_dia_anterior.branch_id = branch.id
WHERE
branch.account_id = 1
GROUP BY
branch.id,
branch.name
ORDER BY
2
当我对结果进行分组以对列求和时,看起来不同行的值正在串联,从而产生巨大的值。
我是不是漏掉了什么?
为什么我得到多次求和的值?
虽然我认为这不能正确回答问题,但我通过以下查询得到了想要的结果:
WITH dia_actual AS (
SELECT
branch_data.branch_id,
CAST(COALESCE(NULLIF(REPLACE(branch_data.col_9, ',', '.'), ''), '0') AS double precision) AS total_venta,
CAST(COALESCE(NULLIF(REPLACE(branch_data.col_4, ',', '.'), ''), '0') AS double precision) AS total_personas
FROM branch_data
INNER JOIN queries AS query
ON query.id = branch_data.query_id
INNER JOIN query_structures AS query_structure
ON query_structure.id = query.query_structure_id
WHERE
query_structure.slug = 'dia-actual'
), ocupacion AS (
SELECT
branch_data.branch_id,
CAST(COALESCE(NULLIF(REPLACE(branch_data.col_3, ',', '.'), ''), '0') AS double precision) AS total_ocupacion
FROM branch_data
INNER JOIN queries AS query
ON query.id = branch_data.query_id
INNER JOIN query_structures AS query_structure
ON query_structure.id = query.query_structure_id
WHERE
query_structure.slug = 'ocupacion-dia-anterior'
)
SELECT
branch.name,
SUM(dia_actual.total_venta) AS total_venta,
SUM(dia_actual.total_personas) AS total_personas,
SUM(ocupacion.total_ocupacion) AS total_ocupacion
FROM branches AS branch
INNER JOIN dia_actual
ON dia_actual.branch_id = branch.id
INNER JOIN ocupacion
ON ocupacion.branch_id = branch.id
WHERE
branch.account_id = 1
GROUP BY
branch.name
ORDER BY
branch.name
使用 CTE 可以正确求和值。
填写一些有根据的猜测,这可能是您想要的查询:
SELECT b.name
, COALESCE(dia_actual.total_venta , 0) AS total_venta
, COALESCE(dia_actual.total_personas, 0) AS total_personas
, COALESCE(ocupacion.total_ocupacion, 0) AS total_ocupacion
FROM branches b
LEFT JOIN LATERAL (
SELECT sum(replace(NULLIF(bd.col_9, ''), ',', '.')::float) AS total_venta
, sum(replace(NULLIF(bd.col_4, ''), ',', '.')::float) AS total_personas
FROM branch_data bd
JOIN queries q ON q.id = bd.query_id
JOIN query_structures qs ON qs.id = q.query_structure_id
WHERE bd.branch_id = b.id -- lateral reference
AND qs.slug = 'dia-actual'
) dia_actual ON true
LEFT JOIN LATERAL (
SELECT sum(replace(NULLIF(bd.col_3, ''), ',', '.')::float) AS total_ocupacion
FROM branch_data bd
JOIN queries q ON q.id = bd.query_id
JOIN query_structures qs ON qs.id = q.query_structure_id
WHERE bd.branch_id = b.id -- lateral reference
AND qs.slug = 'ocupacion-dia-anterior'
) ocupacion ON true
WHERE b.account_id = 1
ORDER BY b.name;
备注
先求和,再连接,避免因代理交叉连接导致的行相乘。
- Two SQL LEFT JOINS produce incorrect result
使用 LEFT JOIN
避免丢失行,因为在右侧的一个查询中找不到值。
但是,既然你 select 一个 branch_id
我切换到 LEFT JOIN LATERAL ... ON true
,假设你只使用所有行的一小部分。
不需要更昂贵的 CTE。
还简化了从 text
到 double precision
的转换。
当然,最好先将这些数字存储为 numeric data type,而不是 text
。
我有以下查询:
SELECT DISTINCT
branch.id,
branch.name,
SUM(CAST(COALESCE(NULLIF(REPLACE(dia_anterior.col_9, ',', '.'), ''), '0.0') AS double precision)) AS total_venta,
SUM(CAST(COALESCE(NULLIF(REPLACE(dia_anterior.col_4, ',', '.'), ''), '0.0') AS double precision)) AS total_personas,
SUM(CAST(COALESCE(NULLIF(REPLACE(ocupacion_dia_anterior.col_3, ',', '.'), ''), '0') AS double precision)) AS total_ocupacion
FROM branches AS branch
INNER JOIN queries AS q_dia_anterior
ON q_dia_anterior.query_structure_id = (SELECT id FROM query_structures WHERE query_structures.slug = 'dia-anterior')
INNER JOIN queries AS q_ocupacion_dia_anterior
ON q_ocupacion_dia_anterior.query_structure_id = (SELECT id FROM query_structures WHERE query_structures.slug = 'ocupacion-dia-anterior')
INNER JOIN queries AS q_ventas_x_articulo_dia_anterior
ON q_ventas_x_articulo_dia_anterior.query_structure_id = (SELECT id FROM query_structures WHERE query_structures.slug = 'ventas-x-articulo-dia-anterior')
INNER JOIN branch_data AS dia_anterior
ON dia_anterior.query_id = q_dia_anterior.id
AND dia_anterior.branch_id = branch.id
INNER JOIN branch_data AS ocupacion_dia_anterior
ON ocupacion_dia_anterior.query_id = q_ocupacion_dia_anterior.id
AND ocupacion_dia_anterior.branch_id = branch.id
INNER JOIN branch_data AS ventas_x_articulo_dia_anterior
ON ventas_x_articulo_dia_anterior.query_id = q_ventas_x_articulo_dia_anterior.id
AND ventas_x_articulo_dia_anterior.branch_id = branch.id
WHERE
branch.account_id = 1
GROUP BY
branch.id,
branch.name
ORDER BY
2
当我对结果进行分组以对列求和时,看起来不同行的值正在串联,从而产生巨大的值。
我是不是漏掉了什么?
为什么我得到多次求和的值?
虽然我认为这不能正确回答问题,但我通过以下查询得到了想要的结果:
WITH dia_actual AS (
SELECT
branch_data.branch_id,
CAST(COALESCE(NULLIF(REPLACE(branch_data.col_9, ',', '.'), ''), '0') AS double precision) AS total_venta,
CAST(COALESCE(NULLIF(REPLACE(branch_data.col_4, ',', '.'), ''), '0') AS double precision) AS total_personas
FROM branch_data
INNER JOIN queries AS query
ON query.id = branch_data.query_id
INNER JOIN query_structures AS query_structure
ON query_structure.id = query.query_structure_id
WHERE
query_structure.slug = 'dia-actual'
), ocupacion AS (
SELECT
branch_data.branch_id,
CAST(COALESCE(NULLIF(REPLACE(branch_data.col_3, ',', '.'), ''), '0') AS double precision) AS total_ocupacion
FROM branch_data
INNER JOIN queries AS query
ON query.id = branch_data.query_id
INNER JOIN query_structures AS query_structure
ON query_structure.id = query.query_structure_id
WHERE
query_structure.slug = 'ocupacion-dia-anterior'
)
SELECT
branch.name,
SUM(dia_actual.total_venta) AS total_venta,
SUM(dia_actual.total_personas) AS total_personas,
SUM(ocupacion.total_ocupacion) AS total_ocupacion
FROM branches AS branch
INNER JOIN dia_actual
ON dia_actual.branch_id = branch.id
INNER JOIN ocupacion
ON ocupacion.branch_id = branch.id
WHERE
branch.account_id = 1
GROUP BY
branch.name
ORDER BY
branch.name
使用 CTE 可以正确求和值。
填写一些有根据的猜测,这可能是您想要的查询:
SELECT b.name
, COALESCE(dia_actual.total_venta , 0) AS total_venta
, COALESCE(dia_actual.total_personas, 0) AS total_personas
, COALESCE(ocupacion.total_ocupacion, 0) AS total_ocupacion
FROM branches b
LEFT JOIN LATERAL (
SELECT sum(replace(NULLIF(bd.col_9, ''), ',', '.')::float) AS total_venta
, sum(replace(NULLIF(bd.col_4, ''), ',', '.')::float) AS total_personas
FROM branch_data bd
JOIN queries q ON q.id = bd.query_id
JOIN query_structures qs ON qs.id = q.query_structure_id
WHERE bd.branch_id = b.id -- lateral reference
AND qs.slug = 'dia-actual'
) dia_actual ON true
LEFT JOIN LATERAL (
SELECT sum(replace(NULLIF(bd.col_3, ''), ',', '.')::float) AS total_ocupacion
FROM branch_data bd
JOIN queries q ON q.id = bd.query_id
JOIN query_structures qs ON qs.id = q.query_structure_id
WHERE bd.branch_id = b.id -- lateral reference
AND qs.slug = 'ocupacion-dia-anterior'
) ocupacion ON true
WHERE b.account_id = 1
ORDER BY b.name;
备注
先求和,再连接,避免因代理交叉连接导致的行相乘。
- Two SQL LEFT JOINS produce incorrect result
使用
LEFT JOIN
避免丢失行,因为在右侧的一个查询中找不到值。但是,既然你 select 一个
branch_id
我切换到LEFT JOIN LATERAL ... ON true
,假设你只使用所有行的一小部分。不需要更昂贵的 CTE。
还简化了从
text
到double precision
的转换。
当然,最好先将这些数字存储为 numeric data type,而不是 text
。