计算指定的 2 行累积数据之间的差异
Calculate difference between 2 specified rows of cumulative data
数据库版本 10.5.6-MariaDB
操作系统 Linux XX 5.9.8-arch1-1 #1 SMP PREEMPT 2020 年 11 月 10 日星期二 22:44:11 +0000 x86_64 GNU/Linux
我有一个自动填充累积数据的 table,
我想要一些 SQL 来显示 2 给定数据行之间的差异。
这是我当前的结果集
+------+------------+-----------+----------+----------+-----------+--------------+
| l_id | start_date | last_date | num_days | num_hits | num_sales | last_seen_on |
+------+------------+-----------+----------+----------+-----------+--------------+
| 1 | 3 | 13 | 10 | 3 | 3 | 5 |
| 3 | 3 | 13 | 10 | 3 | 3 | 5 |
| 4 | 3 | 13 | 10 | 3 | 3 | 5 |
| 5 | 3 | 13 | 10 | 3 | 3 | 5 |
| 6 | 3 | 13 | 10 | 3 | 3 | 5 |
| 7 | 13 | 13 | 0 | 0 | 0 | 5 |
+------+------------+-----------+----------+----------+-----------+--------------+
请注意,缺少产品 l_id = 2,因为它没有 l_date = 13 的记录。
Q1
下面显示的查询是获取数据的正确方法,还是有更好的方法?
Q2
是否可以从 WHERE 子句中获取开始日期(当前硬编码为 ed.data_date >= 3)和结束日期(当前硬编码为 ld.data_date <= 15)?
Q3
是否可以在结果集中获取产品id = 2的数据?
非常感谢
Table定义
CREATE TABLE
trc_warehouse_product
(
data_date INT DEFAULT UNIX_TIMESTAMP()
, product_id INT NOT NULL
, cumulative_product_hits INT
, cumulative_product_sales INT
, date_last_seen INT
, UNIQUE INDEX date_product (data_date, product_id)
)
仅供测试的示例数据
INSERT INTO
trc_warehouse_product
(data_date, product_id, cumulative_product_hits, cumulative_product_sales, date_last_seen)
VALUES
(1,1,1,1,1)
, (1,2,1,1,1)
, (1,3,1,1,1)
, (1,4,1,1,1)
, (1,5,1,1,1)
, (1,6,1,1,1)
, (3,1,2,2,2)
, (3,2,2,2,2)
, (3,3,2,2,2)
, (3,4,2,2,2)
, (3,5,2,2,2)
, (3,6,2,2,2)
, (5,1,3,3,3)
, (5,2,3,3,3)
, (5,3,3,3,3)
, (5,4,3,3,3)
, (5,5,3,3,3)
, (5,6,3,3,3)
, (5,7,3,3,3) -- New product added
, (8,1,4,4,4)
, (8,2,4,4,4)
, (8,3,4,4,4)
, (8,4,4,4,4)
, (8,5,4,4,4)
, (8,6,4,4,4)
, (8,7,4,4,4)
, (13,1,5,5,5)
-- , (13,2,5,5,5) Product removed
, (13,3,5,5,5)
, (13,4,5,5,5)
, (13,5,5,5,5)
, (13,6,5,5,5)
, (13,7,5,5,5)
查询
SELECT
t_last_date.l_id
, IF(ISNULL(t_first_date.f_date), t_last_date.l_date, t_first_date.f_date) AS start_date
, t_last_date.l_date AS last_date
, IF(ISNULL(t_first_date.f_date), 0, (t_last_date.l_date - t_first_date.f_date)) AS num_days
, IF(ISNULL(t_first_date.f_hits), 0, (t_last_date.l_hits - t_first_date.f_hits)) AS num_hits
, IF(ISNULL(t_first_date.f_sales), 0, (t_last_date.l_sales - t_first_date.f_sales)) AS num_sales
, t_last_date.l_last_seen AS last_seen_on
-- , t_last_date.* FOR DEBUGGING
-- , t_first_date.* FOR DEBUGGING
FROM
(
WITH latest_date AS
(
SELECT
ld.data_date
FROM
trc_warehouse_product AS ld
WHERE
ld.data_date <= 15
ORDER BY
ld.data_date DESC
LIMIT
1
)
SELECT
l.data_date AS l_date
, l.product_id AS l_id
, l.cumulative_product_hits AS l_hits
, l.cumulative_product_sales AS l_sales
, l.date_last_seen AS l_last_seen
FROM
trc_warehouse_product AS l
, latest_date
WHERE
l.data_date = latest_date.data_date
) AS t_last_date
LEFT OUTER JOIN
(
WITH earliest_date AS
(
SELECT
ed.data_date
FROM
trc_warehouse_product AS ed
WHERE
ed.data_date >= 3
ORDER BY
ed.data_date ASC
LIMIT
1
)
SELECT
f.data_date AS f_date
, f.product_id AS f_id
, f.cumulative_product_hits AS f_hits
, f.cumulative_product_sales AS f_sales
, f.date_last_seen AS f_last_seen
FROM
trc_warehouse_product AS f
, earliest_date
WHERE
f.data_date = earliest_date.data_date
) AS t_first_date
ON t_last_date.l_id = t_first_date.f_id
根据我对你问题的理解,简单的聚合可以满足你的要求:
select product_id,
min(data_date) start_date, max(data_date) end_date,
max(data_date) - min(data_date) num_days,
max(cumulative_product_hits) - min(cumulative_product_hits) num_hits,
max(cumulative_product_sales) - min(cumulative_product_sales) num_sales,
max(date_last_seen) last_seen_on
from trc_warehouse_product
where data_date >= 3 and data_date <= 15
group by product_id
对于您的示例数据,this produces:
product_id | start_date | end_date | num_days | num_hits | num_sales | last_seen_on
---------: | ---------: | -------: | -------: | -------: | --------: | -----------:
1 | 3 | 13 | 10 | 3 | 3 | 5
2 | 3 | 8 | 5 | 2 | 2 | 4
3 | 3 | 13 | 10 | 3 | 3 | 5
4 | 3 | 13 | 10 | 3 | 3 | 5
5 | 3 | 13 | 10 | 3 | 3 | 5
6 | 3 | 13 | 10 | 3 | 3 | 5
7 | 5 | 13 | 8 | 2 | 2 | 5
数据库版本 10.5.6-MariaDB
操作系统 Linux XX 5.9.8-arch1-1 #1 SMP PREEMPT 2020 年 11 月 10 日星期二 22:44:11 +0000 x86_64 GNU/Linux
我有一个自动填充累积数据的 table, 我想要一些 SQL 来显示 2 给定数据行之间的差异。
这是我当前的结果集
+------+------------+-----------+----------+----------+-----------+--------------+
| l_id | start_date | last_date | num_days | num_hits | num_sales | last_seen_on |
+------+------------+-----------+----------+----------+-----------+--------------+
| 1 | 3 | 13 | 10 | 3 | 3 | 5 |
| 3 | 3 | 13 | 10 | 3 | 3 | 5 |
| 4 | 3 | 13 | 10 | 3 | 3 | 5 |
| 5 | 3 | 13 | 10 | 3 | 3 | 5 |
| 6 | 3 | 13 | 10 | 3 | 3 | 5 |
| 7 | 13 | 13 | 0 | 0 | 0 | 5 |
+------+------------+-----------+----------+----------+-----------+--------------+
请注意,缺少产品 l_id = 2,因为它没有 l_date = 13 的记录。
Q1
下面显示的查询是获取数据的正确方法,还是有更好的方法?
Q2
是否可以从 WHERE 子句中获取开始日期(当前硬编码为 ed.data_date >= 3)和结束日期(当前硬编码为 ld.data_date <= 15)?
Q3
是否可以在结果集中获取产品id = 2的数据?
非常感谢
Table定义
CREATE TABLE
trc_warehouse_product
(
data_date INT DEFAULT UNIX_TIMESTAMP()
, product_id INT NOT NULL
, cumulative_product_hits INT
, cumulative_product_sales INT
, date_last_seen INT
, UNIQUE INDEX date_product (data_date, product_id)
)
仅供测试的示例数据
INSERT INTO
trc_warehouse_product
(data_date, product_id, cumulative_product_hits, cumulative_product_sales, date_last_seen)
VALUES
(1,1,1,1,1)
, (1,2,1,1,1)
, (1,3,1,1,1)
, (1,4,1,1,1)
, (1,5,1,1,1)
, (1,6,1,1,1)
, (3,1,2,2,2)
, (3,2,2,2,2)
, (3,3,2,2,2)
, (3,4,2,2,2)
, (3,5,2,2,2)
, (3,6,2,2,2)
, (5,1,3,3,3)
, (5,2,3,3,3)
, (5,3,3,3,3)
, (5,4,3,3,3)
, (5,5,3,3,3)
, (5,6,3,3,3)
, (5,7,3,3,3) -- New product added
, (8,1,4,4,4)
, (8,2,4,4,4)
, (8,3,4,4,4)
, (8,4,4,4,4)
, (8,5,4,4,4)
, (8,6,4,4,4)
, (8,7,4,4,4)
, (13,1,5,5,5)
-- , (13,2,5,5,5) Product removed
, (13,3,5,5,5)
, (13,4,5,5,5)
, (13,5,5,5,5)
, (13,6,5,5,5)
, (13,7,5,5,5)
查询
SELECT
t_last_date.l_id
, IF(ISNULL(t_first_date.f_date), t_last_date.l_date, t_first_date.f_date) AS start_date
, t_last_date.l_date AS last_date
, IF(ISNULL(t_first_date.f_date), 0, (t_last_date.l_date - t_first_date.f_date)) AS num_days
, IF(ISNULL(t_first_date.f_hits), 0, (t_last_date.l_hits - t_first_date.f_hits)) AS num_hits
, IF(ISNULL(t_first_date.f_sales), 0, (t_last_date.l_sales - t_first_date.f_sales)) AS num_sales
, t_last_date.l_last_seen AS last_seen_on
-- , t_last_date.* FOR DEBUGGING
-- , t_first_date.* FOR DEBUGGING
FROM
(
WITH latest_date AS
(
SELECT
ld.data_date
FROM
trc_warehouse_product AS ld
WHERE
ld.data_date <= 15
ORDER BY
ld.data_date DESC
LIMIT
1
)
SELECT
l.data_date AS l_date
, l.product_id AS l_id
, l.cumulative_product_hits AS l_hits
, l.cumulative_product_sales AS l_sales
, l.date_last_seen AS l_last_seen
FROM
trc_warehouse_product AS l
, latest_date
WHERE
l.data_date = latest_date.data_date
) AS t_last_date
LEFT OUTER JOIN
(
WITH earliest_date AS
(
SELECT
ed.data_date
FROM
trc_warehouse_product AS ed
WHERE
ed.data_date >= 3
ORDER BY
ed.data_date ASC
LIMIT
1
)
SELECT
f.data_date AS f_date
, f.product_id AS f_id
, f.cumulative_product_hits AS f_hits
, f.cumulative_product_sales AS f_sales
, f.date_last_seen AS f_last_seen
FROM
trc_warehouse_product AS f
, earliest_date
WHERE
f.data_date = earliest_date.data_date
) AS t_first_date
ON t_last_date.l_id = t_first_date.f_id
根据我对你问题的理解,简单的聚合可以满足你的要求:
select product_id,
min(data_date) start_date, max(data_date) end_date,
max(data_date) - min(data_date) num_days,
max(cumulative_product_hits) - min(cumulative_product_hits) num_hits,
max(cumulative_product_sales) - min(cumulative_product_sales) num_sales,
max(date_last_seen) last_seen_on
from trc_warehouse_product
where data_date >= 3 and data_date <= 15
group by product_id
对于您的示例数据,this produces:
product_id | start_date | end_date | num_days | num_hits | num_sales | last_seen_on ---------: | ---------: | -------: | -------: | -------: | --------: | -----------: 1 | 3 | 13 | 10 | 3 | 3 | 5 2 | 3 | 8 | 5 | 2 | 2 | 4 3 | 3 | 13 | 10 | 3 | 3 | 5 4 | 3 | 13 | 10 | 3 | 3 | 5 5 | 3 | 13 | 10 | 3 | 3 | 5 6 | 3 | 13 | 10 | 3 | 3 | 5 7 | 5 | 13 | 8 | 2 | 2 | 5