Oracle 正则表达式计算逗号包围的字符串的多次出现
Oracle regex count multiple occurrences of a string surrounded by commas
这个问题类似于我的一个 问题。我正在寻找一种方法来计算 Oracle (11g) SQL 数据库中列中以逗号分隔的值列表中的字符串。例如,假设我有以下数据:
SELECT ('SL,PK') as col1 FROM dual
UNION ALL
SELECT ('SL,CR,SL') as col1 FROM dual
UNION ALL
SELECT ('PK,SL') as col1 FROM dual
UNION ALL
SELECT ('SL,SL') as col1 FROM dual
UNION ALL
SELECT ('SL') as col1 FROM dual
UNION ALL
SELECT ('PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,OSL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SLR,PK') as col1 FROM dual
COL1
-----
SL,PK
SL,CR,SL
PK,SL
SL,SL
SL
PK
PI,SL,PK
PI,SL,SL,PK
PI,SL,SL,SL,PK
PI,SL,SL,SL,SL,PK
PI,OSL,SL,PK
PI,SL,SLR,PK
我希望严格计算子字符串 'SL' 的所有出现次数(即不包括 'OSL'、'SLR' 等)。 理想的结果应该是这样的:
COL1 COL2
----- -----
SL,PK 1
SL,CR,SL 2
PK,SL 1
SL,SL 2
SL 1
PK 0
PI,SL,PK 1
PI,SL,SL,PK 2
PI,SL,SL,SL,PK 3
PI,SL,SL,SL,SL,PK 4
PI,OSL,SL,PK 1
PI,SL,SLR,PK 1
我可以使用 length
和 regexp_replace
完成此操作:
SELECT
col1,
(length(col1) - NVL(length(regexp_replace(regexp_replace(col1,'(^|,)(SL)($|,)','' || '' || '',1,0,'imn'),'(^|,)(SL)($|,)','' || '' || '',1,0,'imn')),0))/length('SL') as col2
FROM (
SELECT ('SL,PK') as col1 FROM dual
UNION ALL
SELECT ('SL,CR,SL') as col1 FROM dual
UNION ALL
SELECT ('PK,SL') as col1 FROM dual
UNION ALL
SELECT ('SL,SL') as col1 FROM dual
UNION ALL
SELECT ('SL') as col1 FROM dual
UNION ALL
SELECT ('PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,OSL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SLR,PK') as col1 FROM dual
)
COL1 COL2
----- -----
SL,PK 1
SL,CR,SL 2
PK,SL 1
SL,SL 2
SL 1
PK 0
PI,SL,PK 1
PI,SL,SL,PK 2
PI,SL,SL,SL,PK 3
PI,SL,SL,SL,SL,PK 4
PI,OSL,SL,PK 1
PI,SL,SLR,PK 1
但希望有一个更优雅的解决方案,也许 regexp_count
。我已经在其他具有单词边界 \b
构造可用(使用 \bSL\b
)的正则表达式实现中成功实现了我的目标,但尚未找到 Oracle 正则表达式的解决方案。
这是一种选择:
SQL> with temp as
2 (select col1,
3 regexp_substr(col1, '[^,]+', 1, column_value) val
4 from test cross join
5 table(cast(multiset(select level from dual
6 connect by level <= regexp_count(col1, ',') + 1
7 ) as sys.odcinumberlist))
8 )
9 select col1,
10 sum(case when val = 'SL' then 1 else 0 end) col2
11 From temp
12 group by col1;
COL1 COL2
----------------- ----------
PI,SL,SLR,PK 1
PK,SL 1
PK 0
SL,CR,SL 2
PI,OSL,SL,PK 1
SL,SL 2
PI,SL,SL,PK 2
PI,SL,SL,SL,PK 3
SL,PK 1
SL 1
PI,SL,PK 1
PI,SL,SL,SL,SL,PK 4
12 rows selected.
SQL>
它有什么作用?
temp
CTE将每一列拆分成行(分隔符为逗号)
- 最后的
select
只是计算每个 col1
的 SL
的数量
如果破解字符串,您可以使用 regexp_count()
:
select col1, regexp_count(replace(col1, ',', ',,'), '(^|\W)SL(\W|$)')
这使定界符加倍,因此第一个匹配项不会耗尽 -- 解决了潜在的问题,即 Oracle 正则表达式不支持先行。
Here 是一个 db<>fiddle.
你可以用一个XMLTABLE
分割字符串然后计数:
SELECT col1,
(
SELECT COUNT(*)
FROM XMLTABLE(
('"' || REPLACE( col1, ',', '","' ) || '"')
COLUMNS
value CHAR(2) PATH '.'
)
WHERE value = 'SL'
) AS col2
FROM test_data
因此,对于您的测试数据:
CREATE TABLE test_data ( col1 ) AS
SELECT 'SL,PK' FROM dual UNION ALL
SELECT 'SL,CR,SL' FROM dual UNION ALL
SELECT 'PK,SL' FROM dual UNION ALL
SELECT 'SL,SL' FROM dual UNION ALL
SELECT 'SL' FROM dual UNION ALL
SELECT 'PK' FROM dual UNION ALL
SELECT 'PI,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SL,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SL,SL,SL,PK' FROM dual UNION ALL
SELECT 'PI,OSL,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SLR,PK' FROM dual
这输出:
COL1 | COL2
:---------------- | ---:
SL,PK | 1
SL,CR,SL | 2
PK,SL | 1
SL,SL | 2
SL | 1
PK | 0
PI,SL,PK | 1
PI,SL,SL,PK | 2
PI,SL,SL,SL,PK | 3
PI,SL,SL,SL,SL,PK | 4
PI,OSL,SL,PK | 1
PI,SL,SLR,PK | 2
db<>fiddle here
这个问题类似于我的一个
SELECT ('SL,PK') as col1 FROM dual
UNION ALL
SELECT ('SL,CR,SL') as col1 FROM dual
UNION ALL
SELECT ('PK,SL') as col1 FROM dual
UNION ALL
SELECT ('SL,SL') as col1 FROM dual
UNION ALL
SELECT ('SL') as col1 FROM dual
UNION ALL
SELECT ('PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,OSL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SLR,PK') as col1 FROM dual
COL1
-----
SL,PK
SL,CR,SL
PK,SL
SL,SL
SL
PK
PI,SL,PK
PI,SL,SL,PK
PI,SL,SL,SL,PK
PI,SL,SL,SL,SL,PK
PI,OSL,SL,PK
PI,SL,SLR,PK
我希望严格计算子字符串 'SL' 的所有出现次数(即不包括 'OSL'、'SLR' 等)。 理想的结果应该是这样的:
COL1 COL2
----- -----
SL,PK 1
SL,CR,SL 2
PK,SL 1
SL,SL 2
SL 1
PK 0
PI,SL,PK 1
PI,SL,SL,PK 2
PI,SL,SL,SL,PK 3
PI,SL,SL,SL,SL,PK 4
PI,OSL,SL,PK 1
PI,SL,SLR,PK 1
我可以使用 length
和 regexp_replace
完成此操作:
SELECT
col1,
(length(col1) - NVL(length(regexp_replace(regexp_replace(col1,'(^|,)(SL)($|,)','' || '' || '',1,0,'imn'),'(^|,)(SL)($|,)','' || '' || '',1,0,'imn')),0))/length('SL') as col2
FROM (
SELECT ('SL,PK') as col1 FROM dual
UNION ALL
SELECT ('SL,CR,SL') as col1 FROM dual
UNION ALL
SELECT ('PK,SL') as col1 FROM dual
UNION ALL
SELECT ('SL,SL') as col1 FROM dual
UNION ALL
SELECT ('SL') as col1 FROM dual
UNION ALL
SELECT ('PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SL,SL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,OSL,SL,PK') as col1 FROM dual
UNION ALL
SELECT ('PI,SL,SLR,PK') as col1 FROM dual
)
COL1 COL2
----- -----
SL,PK 1
SL,CR,SL 2
PK,SL 1
SL,SL 2
SL 1
PK 0
PI,SL,PK 1
PI,SL,SL,PK 2
PI,SL,SL,SL,PK 3
PI,SL,SL,SL,SL,PK 4
PI,OSL,SL,PK 1
PI,SL,SLR,PK 1
但希望有一个更优雅的解决方案,也许 regexp_count
。我已经在其他具有单词边界 \b
构造可用(使用 \bSL\b
)的正则表达式实现中成功实现了我的目标,但尚未找到 Oracle 正则表达式的解决方案。
这是一种选择:
SQL> with temp as
2 (select col1,
3 regexp_substr(col1, '[^,]+', 1, column_value) val
4 from test cross join
5 table(cast(multiset(select level from dual
6 connect by level <= regexp_count(col1, ',') + 1
7 ) as sys.odcinumberlist))
8 )
9 select col1,
10 sum(case when val = 'SL' then 1 else 0 end) col2
11 From temp
12 group by col1;
COL1 COL2
----------------- ----------
PI,SL,SLR,PK 1
PK,SL 1
PK 0
SL,CR,SL 2
PI,OSL,SL,PK 1
SL,SL 2
PI,SL,SL,PK 2
PI,SL,SL,SL,PK 3
SL,PK 1
SL 1
PI,SL,PK 1
PI,SL,SL,SL,SL,PK 4
12 rows selected.
SQL>
它有什么作用?
temp
CTE将每一列拆分成行(分隔符为逗号)- 最后的
select
只是计算每个col1
的
SL
的数量
如果破解字符串,您可以使用 regexp_count()
:
select col1, regexp_count(replace(col1, ',', ',,'), '(^|\W)SL(\W|$)')
这使定界符加倍,因此第一个匹配项不会耗尽 -- 解决了潜在的问题,即 Oracle 正则表达式不支持先行。
Here 是一个 db<>fiddle.
你可以用一个XMLTABLE
分割字符串然后计数:
SELECT col1,
(
SELECT COUNT(*)
FROM XMLTABLE(
('"' || REPLACE( col1, ',', '","' ) || '"')
COLUMNS
value CHAR(2) PATH '.'
)
WHERE value = 'SL'
) AS col2
FROM test_data
因此,对于您的测试数据:
CREATE TABLE test_data ( col1 ) AS
SELECT 'SL,PK' FROM dual UNION ALL
SELECT 'SL,CR,SL' FROM dual UNION ALL
SELECT 'PK,SL' FROM dual UNION ALL
SELECT 'SL,SL' FROM dual UNION ALL
SELECT 'SL' FROM dual UNION ALL
SELECT 'PK' FROM dual UNION ALL
SELECT 'PI,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SL,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SL,SL,SL,PK' FROM dual UNION ALL
SELECT 'PI,OSL,SL,PK' FROM dual UNION ALL
SELECT 'PI,SL,SLR,PK' FROM dual
这输出:
COL1 | COL2 :---------------- | ---: SL,PK | 1 SL,CR,SL | 2 PK,SL | 1 SL,SL | 2 SL | 1 PK | 0 PI,SL,PK | 1 PI,SL,SL,PK | 2 PI,SL,SL,SL,PK | 3 PI,SL,SL,SL,SL,PK | 4 PI,OSL,SL,PK | 1 PI,SL,SLR,PK | 2
db<>fiddle here