计数不同和大小写问题
Count distinct and case issue
我在 Snowflake 演示数据库上重现了实时数据问题。
case "Mark 1" 被评估为 NULL(没关系)。我假设基于此条件 ("Mark 2") 的 count distinct 应该 return 0 但它 returns 1.count distinct 和 count 之间没有区别。
我错过了什么?为什么在这种情况下 null 被计算在内?
代码:
WITH _table1 as(
SELECT
O_CUSTKEY,
COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=50000 AND O_TOTALPRICE<100000 THEN O_ORDERKEY END) AS NO_SALES_OVER_50_TO_100,
COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=100000 THEN O_TOTALPRICE END) AS NO_SALES_OVER_100
FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
GROUP BY 1
)
SELECT
O_CUSTKEY,
CASE WHEN NO_SALES_OVER_50_TO_100>0 THEN O_CUSTKEY END, --Mark 1
COUNT(DISTINCT CASE WHEN NO_SALES_OVER_50_TO_100>0 THEN O_CUSTKEY end) --Mark 2
FROM _table1
WHERE O_CUSTKEY=8117
GROUP BY 1,2
这似乎是一个错误,因为如果用简单的逻辑检查案例,你会得到标记 2 的 0
WITH table1 as (
SELECT
100 as column1,
--0 as column2
COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=50000 AND O_TOTALPRICE<100000 THEN O_ORDERKEY END) as column2
FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
WHERE O_CUSTKEY = 8117
GROUP BY 1
)
select column1, column2,
case when column2 >0 then column1 end as mark1,
count(distinct mark1) as mark2
FROM table1
WHERE column1 = 100
group by 1,2;
给予
COLUMN1 COLUMN2 MARK1 MARK2
100 0 NULL 1
是
WITH table1 as (
SELECT
100 as column1,
0 as column2
--COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=50000 AND O_TOTALPRICE<100000 THEN O_ORDERKEY END) as column2
FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
WHERE O_CUSTKEY = 8117
GROUP BY 1
)
select column1, column2,
case when column2 >0 then column1 end as mark1,
count(distinct mark1) as mark2
FROM table1
WHERE column1 = 100
group by 1,2;
给予
COLUMN1 COLUMN2 MARK1 MARK2
100 0 NULL 0
因此你应该 post 把它变成雪花一样的错误。
我在 Snowflake 演示数据库上重现了实时数据问题。 case "Mark 1" 被评估为 NULL(没关系)。我假设基于此条件 ("Mark 2") 的 count distinct 应该 return 0 但它 returns 1.count distinct 和 count 之间没有区别。
我错过了什么?为什么在这种情况下 null 被计算在内?
代码:
WITH _table1 as(
SELECT
O_CUSTKEY,
COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=50000 AND O_TOTALPRICE<100000 THEN O_ORDERKEY END) AS NO_SALES_OVER_50_TO_100,
COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=100000 THEN O_TOTALPRICE END) AS NO_SALES_OVER_100
FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
GROUP BY 1
)
SELECT
O_CUSTKEY,
CASE WHEN NO_SALES_OVER_50_TO_100>0 THEN O_CUSTKEY END, --Mark 1
COUNT(DISTINCT CASE WHEN NO_SALES_OVER_50_TO_100>0 THEN O_CUSTKEY end) --Mark 2
FROM _table1
WHERE O_CUSTKEY=8117
GROUP BY 1,2
这似乎是一个错误,因为如果用简单的逻辑检查案例,你会得到标记 2 的 0
WITH table1 as (
SELECT
100 as column1,
--0 as column2
COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=50000 AND O_TOTALPRICE<100000 THEN O_ORDERKEY END) as column2
FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
WHERE O_CUSTKEY = 8117
GROUP BY 1
)
select column1, column2,
case when column2 >0 then column1 end as mark1,
count(distinct mark1) as mark2
FROM table1
WHERE column1 = 100
group by 1,2;
给予
COLUMN1 COLUMN2 MARK1 MARK2
100 0 NULL 1
是
WITH table1 as (
SELECT
100 as column1,
0 as column2
--COUNT(DISTINCT CASE WHEN O_TOTALPRICE>=50000 AND O_TOTALPRICE<100000 THEN O_ORDERKEY END) as column2
FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS
WHERE O_CUSTKEY = 8117
GROUP BY 1
)
select column1, column2,
case when column2 >0 then column1 end as mark1,
count(distinct mark1) as mark2
FROM table1
WHERE column1 = 100
group by 1,2;
给予
COLUMN1 COLUMN2 MARK1 MARK2
100 0 NULL 0
因此你应该 post 把它变成雪花一样的错误。