SQL select 每天 30 天频率的流失客户
SQL select lapsed customers with 30 day frequency by day
目标是 select 在 2016 日历年的每一天之前的滚动 30 天内未进行购买的不同 customer_id 的计数。我创建了要加入的数据库中的日历 table。
这里有一个示例 table 供参考,假设您的客户订单标准化如下:
+-------------+------------+----------+
| customer_id | date | order_id |
+-------------+------------+----------+
| 123 | 01/25/2016 | 1000 |
+-------------+------------+----------+
| 123 | 04/27/2016 | 1025 |
+-------------+------------+----------+
| 444 | 02/02/2016 | 1010 |
+-------------+------------+----------+
| 521 | 01/23/2016 | 998 |
+-------------+------------+----------+
| 521 | 01/24/2016 | 999 |
+-------------+------------+----------+
目标输出实际上是一个日历,2016 年的每一天都有 1 行,计算当天有多少客户 "lapsed",这意味着他们最后一次购买是在 30 天或更长时间之前从一年中的那一天开始。最终输出将如下所示:
+------------+--------------+
| date | lapsed_count |
+------------+--------------+
| 01/01/2016 | 0 |
+------------+--------------+
| 01/02/2016 | 0 |
+------------+--------------+
| ... | ... |
+------------+--------------+
| 03/01/2016 | 12 |
+------------+--------------+
| 03/02/2016 | 9 |
+------------+--------------+
| 03/03/2016 | 7 |
+------------+--------------+
此数据在 2015 年不存在,因此 Jan-01-2016 无法计算流失客户的数量,因为那是可能进行购买的第一天。
所以对于 customer_id #123,他们是在 01/25/2016 和 04/27/2016 购买的。他们应该有 2 次失误计数,因为他们的购买间隔超过 30 天。一次失误发生在 2/24/2016,另一次失误发生在 05/27/2016。
Customer_id#444 只购买了一次,所以他们应该在 02/02/2016 之后的 30 天内在 03/02/2016 有一次失误。
Customer_id#521 很棘手,因为他们以 1 天的频率购买,我们不会计算 03/02/2016 的第一次购买,所以从他们最后一次购买 03/03/2016 开始只有一次失误.失效计数将发生在 04/02/2016(+30 天)。
如果您有 table 个日期,这是一种昂贵的方法:
select date,
sum(case when prev_date < date - 30 then 1 else 0 end) as lapsed
from (select c.date, o.customer_id, max(o.date) as prev_date
from calendar c cross join
(select distinct customer_id from orders) c left join
orders o
on o.date <= c.date and o.customer_id = c.customer_id
group by c.date, o.customer_id
) oc
group by date;
对于每个 date/customer 对,它确定客户在该日期之前进行的最新购买。然后它使用此信息来计算失效的数量。
老实说,这可能适用于少数几个日期,但不适用于一整年。
抱歉,我第一次没有正确阅读您的问题。这个查询会给你所有的失误。它获取每个订单并使用分析函数计算出下一个订单日期 - 如果间隔大于 30 天,则记录为失误
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
;
现在将其加入一组日期和 运行 COUNT 函数(将年份参数输入为 YYYY):
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,COUNT(*)
FROM
(
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
) lapses
,calendar
WHERE 1=1
AND calendar.date_value = TRUNC(lapses.lapse_date)
GROUP BY
calendar.date_value
;
或者如果你真的想要打印出每个日期,那么使用这个:
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,lapses
AS
(SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,(SELECT COUNT(*)
FROM lapses
WHERE calendar.date_value = lapses.lapse_date
)
FROM
calendar
WHERE 1=1
ORDER BY
calendar.date_value
;
这是我的做法:
WITH your_table AS (SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12345 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12346 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('25/01/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/02/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('16/03/2016', 'dd/mm/yyyy') order_date, 12348 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12349 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/02/2016', 'dd/mm/yyyy') order_date, 12350 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('01/03/2016', 'dd/mm/yyyy') order_date, 12351 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('03/03/2016', 'dd/mm/yyyy') order_date, 12352 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12353 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/05/2016', 'dd/mm/yyyy') order_date, 12354 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('23/06/2016', 'dd/mm/yyyy') order_date, 12355 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('19/01/2017', 'dd/mm/yyyy') order_date, 12356 order_id FROM dual),
-- end of mimicking your_table with data in it
lapsed_info AS (SELECT customer_id,
order_date,
CASE WHEN TRUNC(SYSDATE) - order_date <= 30 THEN NULL
WHEN COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date RANGE BETWEEN 1 FOLLOWING AND 30 FOLLOWING) = 0 THEN order_date+30
ELSE NULL
END lapsed_date
FROM your_table),
dates AS (SELECT to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 dt
FROM dual
CONNECT BY to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 <= TRUNC(SYSDATE))
SELECT dates.dt,
COUNT(li.lapsed_date) lapsed_count
FROM dates
LEFT OUTER JOIN lapsed_info li ON dates.dt = li.lapsed_date
GROUP BY dates.dt
ORDER BY dates.dt;
结果:
DT LAPSED_COUNT
---------- ------------
01/01/2016 0
<snip>
23/01/2016 0
24/01/2016 0
25/01/2016 0
26/01/2016 0
<snip>
19/02/2016 0
20/02/2016 0
21/02/2016 0
22/02/2016 0
23/02/2016 0
24/02/2016 1
25/02/2016 0
<snip>
29/02/2016 0
01/03/2016 0
02/03/2016 0
03/03/2016 0
04/03/2016 0
<snip>
15/03/2016 0
16/03/2016 0
17/03/2016 0
<snip>
20/03/2016 0
21/03/2016 0
22/03/2016 0
<snip>
30/03/2016 0
31/03/2016 0
01/04/2016 0
02/04/2016 1
03/04/2016 0
<snip>
14/04/2016 0
15/04/2016 1
16/04/2016 0
17/04/2016 0
18/04/2016 0
19/04/2016 0
<snip>
17/05/2016 0
18/05/2016 2
19/05/2016 0
20/05/2016 0
21/05/2016 0
<snip>
18/06/2016 0
19/06/2016 1
20/06/2016 0
21/06/2016 0
22/06/2016 0
23/06/2016 0
24/06/2016 0
<snip>
22/07/2016 0
23/07/2016 1
24/07/2016 0
<snip>
18/01/2017 0
19/01/2017 0
20/01/2017 0
<snip>
08/02/2017 0
这会获取您的数据,并使用分析计数函数计算出在当前行日期的 30 天内(但不包括)具有值的行数。
然后我们应用 case 表达式来确定如果该行的日期在今天日期的 30 天内,我们会将这些日期计为未失效。如果返回的计数为 0,则该行被视为失效,我们会将失效日期输出为 order_date 加上 30 天。任何其他计数结果表示该行没有失效。
以上都是在lapsed_info
子查询中计算出来的。
然后我们需要做的就是列出日期(参见 dates
子查询)并根据 lapsed_date 将 lapsed_info 子查询外连接到它,然后进行计数每天的失效日期。
目标是 select 在 2016 日历年的每一天之前的滚动 30 天内未进行购买的不同 customer_id 的计数。我创建了要加入的数据库中的日历 table。
这里有一个示例 table 供参考,假设您的客户订单标准化如下:
+-------------+------------+----------+
| customer_id | date | order_id |
+-------------+------------+----------+
| 123 | 01/25/2016 | 1000 |
+-------------+------------+----------+
| 123 | 04/27/2016 | 1025 |
+-------------+------------+----------+
| 444 | 02/02/2016 | 1010 |
+-------------+------------+----------+
| 521 | 01/23/2016 | 998 |
+-------------+------------+----------+
| 521 | 01/24/2016 | 999 |
+-------------+------------+----------+
目标输出实际上是一个日历,2016 年的每一天都有 1 行,计算当天有多少客户 "lapsed",这意味着他们最后一次购买是在 30 天或更长时间之前从一年中的那一天开始。最终输出将如下所示:
+------------+--------------+
| date | lapsed_count |
+------------+--------------+
| 01/01/2016 | 0 |
+------------+--------------+
| 01/02/2016 | 0 |
+------------+--------------+
| ... | ... |
+------------+--------------+
| 03/01/2016 | 12 |
+------------+--------------+
| 03/02/2016 | 9 |
+------------+--------------+
| 03/03/2016 | 7 |
+------------+--------------+
此数据在 2015 年不存在,因此 Jan-01-2016 无法计算流失客户的数量,因为那是可能进行购买的第一天。
所以对于 customer_id #123,他们是在 01/25/2016 和 04/27/2016 购买的。他们应该有 2 次失误计数,因为他们的购买间隔超过 30 天。一次失误发生在 2/24/2016,另一次失误发生在 05/27/2016。
Customer_id#444 只购买了一次,所以他们应该在 02/02/2016 之后的 30 天内在 03/02/2016 有一次失误。
Customer_id#521 很棘手,因为他们以 1 天的频率购买,我们不会计算 03/02/2016 的第一次购买,所以从他们最后一次购买 03/03/2016 开始只有一次失误.失效计数将发生在 04/02/2016(+30 天)。
如果您有 table 个日期,这是一种昂贵的方法:
select date,
sum(case when prev_date < date - 30 then 1 else 0 end) as lapsed
from (select c.date, o.customer_id, max(o.date) as prev_date
from calendar c cross join
(select distinct customer_id from orders) c left join
orders o
on o.date <= c.date and o.customer_id = c.customer_id
group by c.date, o.customer_id
) oc
group by date;
对于每个 date/customer 对,它确定客户在该日期之前进行的最新购买。然后它使用此信息来计算失效的数量。
老实说,这可能适用于少数几个日期,但不适用于一整年。
抱歉,我第一次没有正确阅读您的问题。这个查询会给你所有的失误。它获取每个订单并使用分析函数计算出下一个订单日期 - 如果间隔大于 30 天,则记录为失误
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
;
现在将其加入一组日期和 运行 COUNT 函数(将年份参数输入为 YYYY):
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,COUNT(*)
FROM
(
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
) lapses
,calendar
WHERE 1=1
AND calendar.date_value = TRUNC(lapses.lapse_date)
GROUP BY
calendar.date_value
;
或者如果你真的想要打印出每个日期,那么使用这个:
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,lapses
AS
(SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,(SELECT COUNT(*)
FROM lapses
WHERE calendar.date_value = lapses.lapse_date
)
FROM
calendar
WHERE 1=1
ORDER BY
calendar.date_value
;
这是我的做法:
WITH your_table AS (SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12345 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12346 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('25/01/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/02/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('16/03/2016', 'dd/mm/yyyy') order_date, 12348 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12349 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/02/2016', 'dd/mm/yyyy') order_date, 12350 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('01/03/2016', 'dd/mm/yyyy') order_date, 12351 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('03/03/2016', 'dd/mm/yyyy') order_date, 12352 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12353 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/05/2016', 'dd/mm/yyyy') order_date, 12354 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('23/06/2016', 'dd/mm/yyyy') order_date, 12355 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('19/01/2017', 'dd/mm/yyyy') order_date, 12356 order_id FROM dual),
-- end of mimicking your_table with data in it
lapsed_info AS (SELECT customer_id,
order_date,
CASE WHEN TRUNC(SYSDATE) - order_date <= 30 THEN NULL
WHEN COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date RANGE BETWEEN 1 FOLLOWING AND 30 FOLLOWING) = 0 THEN order_date+30
ELSE NULL
END lapsed_date
FROM your_table),
dates AS (SELECT to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 dt
FROM dual
CONNECT BY to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 <= TRUNC(SYSDATE))
SELECT dates.dt,
COUNT(li.lapsed_date) lapsed_count
FROM dates
LEFT OUTER JOIN lapsed_info li ON dates.dt = li.lapsed_date
GROUP BY dates.dt
ORDER BY dates.dt;
结果:
DT LAPSED_COUNT
---------- ------------
01/01/2016 0
<snip>
23/01/2016 0
24/01/2016 0
25/01/2016 0
26/01/2016 0
<snip>
19/02/2016 0
20/02/2016 0
21/02/2016 0
22/02/2016 0
23/02/2016 0
24/02/2016 1
25/02/2016 0
<snip>
29/02/2016 0
01/03/2016 0
02/03/2016 0
03/03/2016 0
04/03/2016 0
<snip>
15/03/2016 0
16/03/2016 0
17/03/2016 0
<snip>
20/03/2016 0
21/03/2016 0
22/03/2016 0
<snip>
30/03/2016 0
31/03/2016 0
01/04/2016 0
02/04/2016 1
03/04/2016 0
<snip>
14/04/2016 0
15/04/2016 1
16/04/2016 0
17/04/2016 0
18/04/2016 0
19/04/2016 0
<snip>
17/05/2016 0
18/05/2016 2
19/05/2016 0
20/05/2016 0
21/05/2016 0
<snip>
18/06/2016 0
19/06/2016 1
20/06/2016 0
21/06/2016 0
22/06/2016 0
23/06/2016 0
24/06/2016 0
<snip>
22/07/2016 0
23/07/2016 1
24/07/2016 0
<snip>
18/01/2017 0
19/01/2017 0
20/01/2017 0
<snip>
08/02/2017 0
这会获取您的数据,并使用分析计数函数计算出在当前行日期的 30 天内(但不包括)具有值的行数。
然后我们应用 case 表达式来确定如果该行的日期在今天日期的 30 天内,我们会将这些日期计为未失效。如果返回的计数为 0,则该行被视为失效,我们会将失效日期输出为 order_date 加上 30 天。任何其他计数结果表示该行没有失效。
以上都是在lapsed_info
子查询中计算出来的。
然后我们需要做的就是列出日期(参见 dates
子查询)并根据 lapsed_date 将 lapsed_info 子查询外连接到它,然后进行计数每天的失效日期。