SQL 查询帮助计算最大值
SQL query help to calculate max
我有一个关于我需要在 SQL 中执行的查询的问题(我使用 BQ)。
我有这个table:
train_no, wagon_no, weight, length, date, startpoint(km), endpoint(km)
1, 123, 1000, 20, 20190101, 0, 7
1, 234, 2000, 20, 20190101, 1, 2
1, 345, 3000, 30, 20190101, 1, 5
1, 456, 1000, 40, 20190101, 1, 6
2, 987, 1000, 10, 20190101, 0, 8
2, 876, 2000, 20, 20190101, 1, 2
2, 765, 3000, 20, 20190101, 1, 5
2, 654, 1000, 20, 20190101, 1, 6
table 显示了两列带货车的火车。每辆货车我们看到货车在什么时候被添加到火车上。因此,对于 1 号列车,我们看到货车 234 包含在从起点 = 1(公里 1)到终点 = 2(公里 2)的列车上,然后从列车上移除。我们还看到 train_no =1 的最大端点为 7,因此火车的最大行驶距离为 7 公里。
火车总长度和总重量随距离变化,我想计算在这段距离内达到的最大长度和最大重量。我如何在 SQL 中执行此操作?
如有任何建议,我们将不胜感激。
编辑:
添加图片以明确我要寻找的内容。
正如您在附图中看到的那样 Train_no =1 在点 1 和点 2 之间有一个最大重量。总重量为 7000,是该特定距离的火车中所有货车的总重量。此外,总长度为 110,这是所有货车加在一起的总长度。
以下查询 returns 每个 KM 标记处的长度按每列火车的降序排列。
with data as (
select 1 as train_no, 123 as wagon_no, 1000 as weight, 20 as length, 20190101 as date, 0 as startpoint, 7 as endpoint union all
select 1, 234, 2000, 20, 20190101, 1, 2 union all
select 1, 345, 3000, 30, 20190101, 1, 5 union all
select 1, 456, 1000, 40, 20190101, 1, 6 union all
select 2, 987, 1000, 10, 20190101, 0, 8 union all
select 2, 876, 2000, 20, 20190101, 1, 2 union all
select 2, 765, 3000, 20, 20190101, 1, 5 union all
select 2, 654, 1000, 20, 20190101, 1, 6
),
km_array as (
select * from unnest(generate_array(0,10)) km
),
joined as (
select *
from km_array
cross join data
where km between startpoint and endpoint
),
train_length_at_each_km as (
select
km,
train_no,
sum(length) as length
from joined
group by 1,2
)
select
train_no, length, km
from train_length_at_each_km
order by train_no, length desc
获得最大重量将使用与 train_length_at_each_km
CTE 类似的逻辑。
以下适用于 BigQuery 标准 SQL
#standardSQL
WITH temp AS (
SELECT train_no, dt, MIN(startpoint) startpoint, MAX(endpoint) endpoint
FROM `project.dataset.table`
GROUP BY train_no, dt
)
SELECT train_no, dt, MAX(wagons) max_wagons, MAX(total_weight) AS max_total_weight, MAX(total_len) max_total_len
FROM (
SELECT train_no, dt, point, COUNT(wagon_no) wagons, SUM(weight) total_weight, SUM(len) total_len
FROM temp, UNNEST(GENERATE_ARRAY(startpoint, endpoint)) point
LEFT JOIN `project.dataset.table` t
USING(train_no, dt)
WHERE point >= t.startpoint AND point < t.endpoint
GROUP BY train_no, dt, point
)
GROUP BY train_no, dt
如果应用到您问题中的示例数据,如下例所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 train_no, 123 wagon_no, 1000 weight, 20 len, '20190101' dt, 0 startpoint, 7 endpoint UNION ALL
SELECT 1, 234, 2000, 20, '20190101', 1, 2 UNION ALL
SELECT 1, 345, 3000, 30, '20190101', 1, 5 UNION ALL
SELECT 1, 456, 1000, 40, '20190101', 1, 6 UNION ALL
SELECT 2, 987, 1000, 10, '20190101', 0, 8 UNION ALL
SELECT 2, 876, 2000, 20, '20190101', 1, 2 UNION ALL
SELECT 2, 765, 3000, 20, '20190101', 1, 5 UNION ALL
SELECT 2, 654, 1000, 20, '20190101', 1, 6
), temp AS (
SELECT train_no, dt, MIN(startpoint) startpoint, MAX(endpoint) endpoint
FROM `project.dataset.table`
GROUP BY train_no, dt
)
SELECT train_no, dt, MAX(wagons) max_wagons, MAX(total_weight) AS max_total_weight, MAX(total_len) max_total_len
FROM (
SELECT train_no, dt, point, COUNT(wagon_no) wagons, SUM(weight) total_weight, SUM(len) total_len
FROM temp, UNNEST(GENERATE_ARRAY(startpoint, endpoint)) point
LEFT JOIN `project.dataset.table` t
USING(train_no, dt)
WHERE point >= t.startpoint AND point < t.endpoint
GROUP BY train_no, dt, point
)
GROUP BY train_no, dt
结果是
Row train_no dt max_wagons max_total_weight max_total_len
1 1 20190101 4 7000 110
2 2 20190101 4 7000 70
我有一个关于我需要在 SQL 中执行的查询的问题(我使用 BQ)。
我有这个table:
train_no, wagon_no, weight, length, date, startpoint(km), endpoint(km)
1, 123, 1000, 20, 20190101, 0, 7
1, 234, 2000, 20, 20190101, 1, 2
1, 345, 3000, 30, 20190101, 1, 5
1, 456, 1000, 40, 20190101, 1, 6
2, 987, 1000, 10, 20190101, 0, 8
2, 876, 2000, 20, 20190101, 1, 2
2, 765, 3000, 20, 20190101, 1, 5
2, 654, 1000, 20, 20190101, 1, 6
table 显示了两列带货车的火车。每辆货车我们看到货车在什么时候被添加到火车上。因此,对于 1 号列车,我们看到货车 234 包含在从起点 = 1(公里 1)到终点 = 2(公里 2)的列车上,然后从列车上移除。我们还看到 train_no =1 的最大端点为 7,因此火车的最大行驶距离为 7 公里。
火车总长度和总重量随距离变化,我想计算在这段距离内达到的最大长度和最大重量。我如何在 SQL 中执行此操作?
如有任何建议,我们将不胜感激。
编辑:
添加图片以明确我要寻找的内容。
正如您在附图中看到的那样 Train_no =1 在点 1 和点 2 之间有一个最大重量。总重量为 7000,是该特定距离的火车中所有货车的总重量。此外,总长度为 110,这是所有货车加在一起的总长度。
以下查询 returns 每个 KM 标记处的长度按每列火车的降序排列。
with data as (
select 1 as train_no, 123 as wagon_no, 1000 as weight, 20 as length, 20190101 as date, 0 as startpoint, 7 as endpoint union all
select 1, 234, 2000, 20, 20190101, 1, 2 union all
select 1, 345, 3000, 30, 20190101, 1, 5 union all
select 1, 456, 1000, 40, 20190101, 1, 6 union all
select 2, 987, 1000, 10, 20190101, 0, 8 union all
select 2, 876, 2000, 20, 20190101, 1, 2 union all
select 2, 765, 3000, 20, 20190101, 1, 5 union all
select 2, 654, 1000, 20, 20190101, 1, 6
),
km_array as (
select * from unnest(generate_array(0,10)) km
),
joined as (
select *
from km_array
cross join data
where km between startpoint and endpoint
),
train_length_at_each_km as (
select
km,
train_no,
sum(length) as length
from joined
group by 1,2
)
select
train_no, length, km
from train_length_at_each_km
order by train_no, length desc
获得最大重量将使用与 train_length_at_each_km
CTE 类似的逻辑。
以下适用于 BigQuery 标准 SQL
#standardSQL
WITH temp AS (
SELECT train_no, dt, MIN(startpoint) startpoint, MAX(endpoint) endpoint
FROM `project.dataset.table`
GROUP BY train_no, dt
)
SELECT train_no, dt, MAX(wagons) max_wagons, MAX(total_weight) AS max_total_weight, MAX(total_len) max_total_len
FROM (
SELECT train_no, dt, point, COUNT(wagon_no) wagons, SUM(weight) total_weight, SUM(len) total_len
FROM temp, UNNEST(GENERATE_ARRAY(startpoint, endpoint)) point
LEFT JOIN `project.dataset.table` t
USING(train_no, dt)
WHERE point >= t.startpoint AND point < t.endpoint
GROUP BY train_no, dt, point
)
GROUP BY train_no, dt
如果应用到您问题中的示例数据,如下例所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 train_no, 123 wagon_no, 1000 weight, 20 len, '20190101' dt, 0 startpoint, 7 endpoint UNION ALL
SELECT 1, 234, 2000, 20, '20190101', 1, 2 UNION ALL
SELECT 1, 345, 3000, 30, '20190101', 1, 5 UNION ALL
SELECT 1, 456, 1000, 40, '20190101', 1, 6 UNION ALL
SELECT 2, 987, 1000, 10, '20190101', 0, 8 UNION ALL
SELECT 2, 876, 2000, 20, '20190101', 1, 2 UNION ALL
SELECT 2, 765, 3000, 20, '20190101', 1, 5 UNION ALL
SELECT 2, 654, 1000, 20, '20190101', 1, 6
), temp AS (
SELECT train_no, dt, MIN(startpoint) startpoint, MAX(endpoint) endpoint
FROM `project.dataset.table`
GROUP BY train_no, dt
)
SELECT train_no, dt, MAX(wagons) max_wagons, MAX(total_weight) AS max_total_weight, MAX(total_len) max_total_len
FROM (
SELECT train_no, dt, point, COUNT(wagon_no) wagons, SUM(weight) total_weight, SUM(len) total_len
FROM temp, UNNEST(GENERATE_ARRAY(startpoint, endpoint)) point
LEFT JOIN `project.dataset.table` t
USING(train_no, dt)
WHERE point >= t.startpoint AND point < t.endpoint
GROUP BY train_no, dt, point
)
GROUP BY train_no, dt
结果是
Row train_no dt max_wagons max_total_weight max_total_len
1 1 20190101 4 7000 110
2 2 20190101 4 7000 70