SQL:多列填充缺失值
SQL: Fill missing values multiple columns
使用给定的数据和 sql 命令
WITH
TableItem AS (
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0004' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 4 AS Quantity UNION ALL
# missing 2018-05-01
# missing Item18-004
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 5 AS Quantity UNION ALL
# missing Item18-0004, Item18-0005
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0006' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 6 AS Quantity UNION ALL
# some missing
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
# some missing, some new
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 5 AS Quantity UNION ALL
SELECT 'Item18-0007' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 7 AS Quantity ),
# Cross Join to get all combinations of ItemNr and PostingDate
TableItemNrPostingDate AS (
SELECT
ItemNr,
PostingDate
FROM (
SELECT
it1.ItemNr
FROM
TableItem it1
GROUP BY
it1.ItemNr ) t2
CROSS JOIN (
SELECT
it2.PostingDate
FROM
TableItem it2
GROUP BY
it2.PostingDate ) t3 ),
# Create Calender Table to get missing dates
TableCalenderDayItemNrPostingDate AS (
SELECT
CalenderDay,
TableItemNrPostingDate.ItemNr As ItemNr,
TableItemNrPostingDate.PostingDate as PostingDate
FROM
UNNEST( GENERATE_DATE_ARRAY("2018-04-30", DATE_ADD(DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH), INTERVAL -1 DAY), INTERVAL 1 DAY)) AS CalenderDay
LEFT JOIN
TableItemNrPostingDate
ON
CalenderDay = DATE(TableItemNrPostingDate.PostingDate)
ORDER BY
CalenderDay )
SELECT
CalenderDay,
FIRST_VALUE(ItemNr) OVER (PARTITION BY ItemNr ORDER BY CalenderDay ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
ItemNr,
PostingDate
FROM
TableCalenderDayItemNrPostingDate
ORDER BY
CalenderDay, ItemNr
我想获取每个 ItemNr 和每个日历日期的 ItemNr、PostingDate、数量 "from beginning"。缺失值必须用以前的值填充。
我的 table 将仅使用更改的数量和日期进行填充/更新。这意味着并非所有项目都每天更新,新项目稍后进入 table 并且在某些日子根本没有变化。
对于给定的数据,我想得到结果。 Quantity = ItemNr 方便识别。
Item18-0001 2018-04-30 1
Item18-0002 2018-04-30 2
Item18-0003 2018-04-30 3
Item18-0004 2018-04-30 4
Item18-0005 2018-04-30 0 (or null or empty row)
Item18-0006 2018-04-30 0 (or null or empty row)
Item18-0007 2018-04-30 0 (or null or empty row)
Item18-0001 2018-05-01 1
Item18-0002 2018-05-01 2
Item18-0003 2018-05-01 3
Item18-0004 2018-05-01 4
Item18-0005 2018-05-01 0 (or null or empty row)
Item18-0006 2018-05-01 0 (or null or empty row)
Item18-0007 2018-05-01 0 (or null or empty row)
Item18-0001 2018-05-02 1
Item18-0002 2018-05-02 2
Item18-0003 2018-05-02 3
Item18-0004 2018-05-02 4
Item18-0005 2018-05-02 5
Item18-0006 2018-05-02 0 (or null or empty row)
Item18-0007 2018-05-02 0 (or null or empty row)
Item18-0001 2018-05-03 1
Item18-0002 2018-05-03 2
Item18-0003 2018-05-03 3
Item18-0004 2018-05-03 4
Item18-0005 2018-05-03 5
Item18-0006 2018-05-03 6
Item18-0007 2018-05-03 0 (or null or empty row)
Item18-0001 2018-05-04 1
Item18-0002 2018-05-04 2
Item18-0003 2018-05-04 3
Item18-0004 2018-05-04 4
Item18-0005 2018-05-04 5
Item18-0006 2018-05-04 6
Item18-0007 2018-05-03 0 (or null or empty row)
Item18-0001 2018-05-05 1
Item18-0002 2018-05-05 2
Item18-0003 2018-05-05 3
Item18-0004 2018-05-05 4
Item18-0005 2018-05-05 5
Item18-0006 2018-05-05 6
Item18-0007 2018-05-05 7
我的SQL命令没有准备好,是错误的。我只是想表明我的努力或意图。
以下适用于 BigQuery StandardSQL
#standardSQL
WITH TableItemNr AS (
SELECT DISTINCT ItemNr FROM `project.dataset.TableItem`
), TableDates AS (
SELECT CAST(PostingDate AS TIMESTAMP) PostingDate
FROM (
SELECT DATE(MIN(PostingDate)) minPostingDate, DATE(MAX(PostingDate)) maxPostingDate
FROM `project.dataset.TableItem`
), UNNEST(GENERATE_DATE_ARRAY(minPostingDate, maxPostingDate)) PostingDate -- CURRENT_DATE() can be used instead of maxPostingDate depends on your needs
)
SELECT i.ItemNr, d.PostingDate, t.Quantity,
IF(t.ItemNr IS NULL, 0, 1) original,
LAST_VALUE(Quantity IGNORE NULLS) OVER(PARTITION BY ItemNr ORDER BY PostingDate) updatedQuantity
FROM TableDates d
CROSS JOIN TableItemNr i
LEFT JOIN `project.dataset.TableItem` t
USING(ItemNr, PostingDate)
-- ORDER BY PostingDate, ItemNr
您可以使用问题中的虚拟数据测试/玩上面的内容
#standardSQL
WITH `project.dataset.TableItem` AS (
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0004' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 4 AS Quantity UNION ALL
# missing 2018-05-01
# missing Item18-004
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 5 AS Quantity UNION ALL
# missing Item18-0004, Item18-0005
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0006' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 6 AS Quantity UNION ALL
# some missing
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
# some missing, some new
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 5 AS Quantity UNION ALL
SELECT 'Item18-0007' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 7 AS Quantity
), TableItemNr AS (
SELECT DISTINCT ItemNr FROM `project.dataset.TableItem`
), TableDates AS (
SELECT CAST(PostingDate AS TIMESTAMP) PostingDate
FROM (
SELECT DATE(MIN(PostingDate)) minPostingDate, DATE(MAX(PostingDate)) maxPostingDate
FROM `project.dataset.TableItem`
), UNNEST(GENERATE_DATE_ARRAY(minPostingDate, maxPostingDate)) PostingDate
)
SELECT i.ItemNr, d.PostingDate, t.Quantity,
IF(t.ItemNr IS NULL, 0, 1) original,
LAST_VALUE(Quantity IGNORE NULLS) OVER(PARTITION BY ItemNr ORDER BY PostingDate) updatedQuantity
FROM TableDates d
CROSS JOIN TableItemNr i
LEFT JOIN `project.dataset.TableItem` t
USING(ItemNr, PostingDate)
ORDER BY PostingDate, ItemNr
结果为
Row ItemNr PostingDate Quantity original updatedQuantity
1 Item18-0001 2018-04-30 00:00:00.000 UTC 1 1 1
2 Item18-0002 2018-04-30 00:00:00.000 UTC 2 1 2
3 Item18-0003 2018-04-30 00:00:00.000 UTC 3 1 3
4 Item18-0004 2018-04-30 00:00:00.000 UTC 4 1 4
5 Item18-0005 2018-04-30 00:00:00.000 UTC null 0 null
6 Item18-0006 2018-04-30 00:00:00.000 UTC null 0 null
7 Item18-0007 2018-04-30 00:00:00.000 UTC null 0 null
8 Item18-0001 2018-05-01 00:00:00.000 UTC null 0 1
9 Item18-0002 2018-05-01 00:00:00.000 UTC null 0 2
10 Item18-0003 2018-05-01 00:00:00.000 UTC null 0 3
11 Item18-0004 2018-05-01 00:00:00.000 UTC null 0 4
12 Item18-0005 2018-05-01 00:00:00.000 UTC null 0 null
13 Item18-0006 2018-05-01 00:00:00.000 UTC null 0 null
14 Item18-0007 2018-05-01 00:00:00.000 UTC null 0 null
15 Item18-0001 2018-05-02 00:00:00.000 UTC 1 1 1
16 Item18-0002 2018-05-02 00:00:00.000 UTC 1 1 1
17 Item18-0003 2018-05-02 00:00:00.000 UTC 1 1 1
18 Item18-0004 2018-05-02 00:00:00.000 UTC null 0 4
19 Item18-0005 2018-05-02 00:00:00.000 UTC 5 1 5
20 Item18-0006 2018-05-02 00:00:00.000 UTC null 0 null
21 Item18-0007 2018-05-02 00:00:00.000 UTC null 0 null
22 Item18-0001 2018-05-03 00:00:00.000 UTC 1 1 1
23 Item18-0002 2018-05-03 00:00:00.000 UTC 2 1 2
24 Item18-0003 2018-05-03 00:00:00.000 UTC 3 1 3
25 Item18-0004 2018-05-03 00:00:00.000 UTC null 0 4
26 Item18-0005 2018-05-03 00:00:00.000 UTC null 0 5
27 Item18-0006 2018-05-03 00:00:00.000 UTC 6 1 6
28 Item18-0007 2018-05-03 00:00:00.000 UTC null 0 null
29 Item18-0001 2018-05-04 00:00:00.000 UTC null 0 1
30 Item18-0002 2018-05-04 00:00:00.000 UTC 2 1 2
31 Item18-0002 2018-05-04 00:00:00.000 UTC 2 1 2
32 Item18-0003 2018-05-04 00:00:00.000 UTC 3 1 3
33 Item18-0003 2018-05-04 00:00:00.000 UTC 3 1 3
34 Item18-0004 2018-05-04 00:00:00.000 UTC null 0 4
35 Item18-0005 2018-05-04 00:00:00.000 UTC null 0 5
36 Item18-0006 2018-05-04 00:00:00.000 UTC null 0 6
37 Item18-0007 2018-05-04 00:00:00.000 UTC null 0 null
38 Item18-0001 2018-05-05 00:00:00.000 UTC 1 1 1
39 Item18-0002 2018-05-05 00:00:00.000 UTC null 0 2
40 Item18-0003 2018-05-05 00:00:00.000 UTC 3 1 3
41 Item18-0004 2018-05-05 00:00:00.000 UTC null 0 4
42 Item18-0005 2018-05-05 00:00:00.000 UTC 5 1 5
43 Item18-0006 2018-05-05 00:00:00.000 UTC null 0 6
44 Item18-0007 2018-05-05 00:00:00.000 UTC 7 1 7
注意:出于某种原因,您的日期是 TIMESTAMP 数据类型,我是否需要做一些额外的 CAST'ing
使用给定的数据和 sql 命令
WITH
TableItem AS (
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0004' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 4 AS Quantity UNION ALL
# missing 2018-05-01
# missing Item18-004
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 5 AS Quantity UNION ALL
# missing Item18-0004, Item18-0005
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0006' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 6 AS Quantity UNION ALL
# some missing
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
# some missing, some new
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 5 AS Quantity UNION ALL
SELECT 'Item18-0007' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 7 AS Quantity ),
# Cross Join to get all combinations of ItemNr and PostingDate
TableItemNrPostingDate AS (
SELECT
ItemNr,
PostingDate
FROM (
SELECT
it1.ItemNr
FROM
TableItem it1
GROUP BY
it1.ItemNr ) t2
CROSS JOIN (
SELECT
it2.PostingDate
FROM
TableItem it2
GROUP BY
it2.PostingDate ) t3 ),
# Create Calender Table to get missing dates
TableCalenderDayItemNrPostingDate AS (
SELECT
CalenderDay,
TableItemNrPostingDate.ItemNr As ItemNr,
TableItemNrPostingDate.PostingDate as PostingDate
FROM
UNNEST( GENERATE_DATE_ARRAY("2018-04-30", DATE_ADD(DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH), INTERVAL -1 DAY), INTERVAL 1 DAY)) AS CalenderDay
LEFT JOIN
TableItemNrPostingDate
ON
CalenderDay = DATE(TableItemNrPostingDate.PostingDate)
ORDER BY
CalenderDay )
SELECT
CalenderDay,
FIRST_VALUE(ItemNr) OVER (PARTITION BY ItemNr ORDER BY CalenderDay ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
ItemNr,
PostingDate
FROM
TableCalenderDayItemNrPostingDate
ORDER BY
CalenderDay, ItemNr
我想获取每个 ItemNr 和每个日历日期的 ItemNr、PostingDate、数量 "from beginning"。缺失值必须用以前的值填充。
我的 table 将仅使用更改的数量和日期进行填充/更新。这意味着并非所有项目都每天更新,新项目稍后进入 table 并且在某些日子根本没有变化。
对于给定的数据,我想得到结果。 Quantity = ItemNr 方便识别。
Item18-0001 2018-04-30 1
Item18-0002 2018-04-30 2
Item18-0003 2018-04-30 3
Item18-0004 2018-04-30 4
Item18-0005 2018-04-30 0 (or null or empty row)
Item18-0006 2018-04-30 0 (or null or empty row)
Item18-0007 2018-04-30 0 (or null or empty row)
Item18-0001 2018-05-01 1
Item18-0002 2018-05-01 2
Item18-0003 2018-05-01 3
Item18-0004 2018-05-01 4
Item18-0005 2018-05-01 0 (or null or empty row)
Item18-0006 2018-05-01 0 (or null or empty row)
Item18-0007 2018-05-01 0 (or null or empty row)
Item18-0001 2018-05-02 1
Item18-0002 2018-05-02 2
Item18-0003 2018-05-02 3
Item18-0004 2018-05-02 4
Item18-0005 2018-05-02 5
Item18-0006 2018-05-02 0 (or null or empty row)
Item18-0007 2018-05-02 0 (or null or empty row)
Item18-0001 2018-05-03 1
Item18-0002 2018-05-03 2
Item18-0003 2018-05-03 3
Item18-0004 2018-05-03 4
Item18-0005 2018-05-03 5
Item18-0006 2018-05-03 6
Item18-0007 2018-05-03 0 (or null or empty row)
Item18-0001 2018-05-04 1
Item18-0002 2018-05-04 2
Item18-0003 2018-05-04 3
Item18-0004 2018-05-04 4
Item18-0005 2018-05-04 5
Item18-0006 2018-05-04 6
Item18-0007 2018-05-03 0 (or null or empty row)
Item18-0001 2018-05-05 1
Item18-0002 2018-05-05 2
Item18-0003 2018-05-05 3
Item18-0004 2018-05-05 4
Item18-0005 2018-05-05 5
Item18-0006 2018-05-05 6
Item18-0007 2018-05-05 7
我的SQL命令没有准备好,是错误的。我只是想表明我的努力或意图。
以下适用于 BigQuery StandardSQL
#standardSQL
WITH TableItemNr AS (
SELECT DISTINCT ItemNr FROM `project.dataset.TableItem`
), TableDates AS (
SELECT CAST(PostingDate AS TIMESTAMP) PostingDate
FROM (
SELECT DATE(MIN(PostingDate)) minPostingDate, DATE(MAX(PostingDate)) maxPostingDate
FROM `project.dataset.TableItem`
), UNNEST(GENERATE_DATE_ARRAY(minPostingDate, maxPostingDate)) PostingDate -- CURRENT_DATE() can be used instead of maxPostingDate depends on your needs
)
SELECT i.ItemNr, d.PostingDate, t.Quantity,
IF(t.ItemNr IS NULL, 0, 1) original,
LAST_VALUE(Quantity IGNORE NULLS) OVER(PARTITION BY ItemNr ORDER BY PostingDate) updatedQuantity
FROM TableDates d
CROSS JOIN TableItemNr i
LEFT JOIN `project.dataset.TableItem` t
USING(ItemNr, PostingDate)
-- ORDER BY PostingDate, ItemNr
您可以使用问题中的虚拟数据测试/玩上面的内容
#standardSQL
WITH `project.dataset.TableItem` AS (
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0004' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 4 AS Quantity UNION ALL
# missing 2018-05-01
# missing Item18-004
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 5 AS Quantity UNION ALL
# missing Item18-0004, Item18-0005
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0006' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 6 AS Quantity UNION ALL
# some missing
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
# some missing, some new
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 5 AS Quantity UNION ALL
SELECT 'Item18-0007' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 7 AS Quantity
), TableItemNr AS (
SELECT DISTINCT ItemNr FROM `project.dataset.TableItem`
), TableDates AS (
SELECT CAST(PostingDate AS TIMESTAMP) PostingDate
FROM (
SELECT DATE(MIN(PostingDate)) minPostingDate, DATE(MAX(PostingDate)) maxPostingDate
FROM `project.dataset.TableItem`
), UNNEST(GENERATE_DATE_ARRAY(minPostingDate, maxPostingDate)) PostingDate
)
SELECT i.ItemNr, d.PostingDate, t.Quantity,
IF(t.ItemNr IS NULL, 0, 1) original,
LAST_VALUE(Quantity IGNORE NULLS) OVER(PARTITION BY ItemNr ORDER BY PostingDate) updatedQuantity
FROM TableDates d
CROSS JOIN TableItemNr i
LEFT JOIN `project.dataset.TableItem` t
USING(ItemNr, PostingDate)
ORDER BY PostingDate, ItemNr
结果为
Row ItemNr PostingDate Quantity original updatedQuantity
1 Item18-0001 2018-04-30 00:00:00.000 UTC 1 1 1
2 Item18-0002 2018-04-30 00:00:00.000 UTC 2 1 2
3 Item18-0003 2018-04-30 00:00:00.000 UTC 3 1 3
4 Item18-0004 2018-04-30 00:00:00.000 UTC 4 1 4
5 Item18-0005 2018-04-30 00:00:00.000 UTC null 0 null
6 Item18-0006 2018-04-30 00:00:00.000 UTC null 0 null
7 Item18-0007 2018-04-30 00:00:00.000 UTC null 0 null
8 Item18-0001 2018-05-01 00:00:00.000 UTC null 0 1
9 Item18-0002 2018-05-01 00:00:00.000 UTC null 0 2
10 Item18-0003 2018-05-01 00:00:00.000 UTC null 0 3
11 Item18-0004 2018-05-01 00:00:00.000 UTC null 0 4
12 Item18-0005 2018-05-01 00:00:00.000 UTC null 0 null
13 Item18-0006 2018-05-01 00:00:00.000 UTC null 0 null
14 Item18-0007 2018-05-01 00:00:00.000 UTC null 0 null
15 Item18-0001 2018-05-02 00:00:00.000 UTC 1 1 1
16 Item18-0002 2018-05-02 00:00:00.000 UTC 1 1 1
17 Item18-0003 2018-05-02 00:00:00.000 UTC 1 1 1
18 Item18-0004 2018-05-02 00:00:00.000 UTC null 0 4
19 Item18-0005 2018-05-02 00:00:00.000 UTC 5 1 5
20 Item18-0006 2018-05-02 00:00:00.000 UTC null 0 null
21 Item18-0007 2018-05-02 00:00:00.000 UTC null 0 null
22 Item18-0001 2018-05-03 00:00:00.000 UTC 1 1 1
23 Item18-0002 2018-05-03 00:00:00.000 UTC 2 1 2
24 Item18-0003 2018-05-03 00:00:00.000 UTC 3 1 3
25 Item18-0004 2018-05-03 00:00:00.000 UTC null 0 4
26 Item18-0005 2018-05-03 00:00:00.000 UTC null 0 5
27 Item18-0006 2018-05-03 00:00:00.000 UTC 6 1 6
28 Item18-0007 2018-05-03 00:00:00.000 UTC null 0 null
29 Item18-0001 2018-05-04 00:00:00.000 UTC null 0 1
30 Item18-0002 2018-05-04 00:00:00.000 UTC 2 1 2
31 Item18-0002 2018-05-04 00:00:00.000 UTC 2 1 2
32 Item18-0003 2018-05-04 00:00:00.000 UTC 3 1 3
33 Item18-0003 2018-05-04 00:00:00.000 UTC 3 1 3
34 Item18-0004 2018-05-04 00:00:00.000 UTC null 0 4
35 Item18-0005 2018-05-04 00:00:00.000 UTC null 0 5
36 Item18-0006 2018-05-04 00:00:00.000 UTC null 0 6
37 Item18-0007 2018-05-04 00:00:00.000 UTC null 0 null
38 Item18-0001 2018-05-05 00:00:00.000 UTC 1 1 1
39 Item18-0002 2018-05-05 00:00:00.000 UTC null 0 2
40 Item18-0003 2018-05-05 00:00:00.000 UTC 3 1 3
41 Item18-0004 2018-05-05 00:00:00.000 UTC null 0 4
42 Item18-0005 2018-05-05 00:00:00.000 UTC 5 1 5
43 Item18-0006 2018-05-05 00:00:00.000 UTC null 0 6
44 Item18-0007 2018-05-05 00:00:00.000 UTC 7 1 7
注意:出于某种原因,您的日期是 TIMESTAMP 数据类型,我是否需要做一些额外的 CAST'ing