如何在 SQLite 中重用递归 WITH 子句?
How to reuse a recursive WITH clause in SQLite?
我有这个很好的递归 WITH 子句:
WITH RECURSIVE split(seq, word, str) AS (
SELECT 0, null, replace('name+one+two+three.jpg', '.jpg', '+')
UNION ALL SELECT
seq+1,
substr(str, 0, instr(str, '+')),
substr(str, instr(str, '+')+1)
FROM split WHERE str != ''
) SELECT word FROM split where seq>1
输出为:
one
two
three
现在,我如何重用这个子句,应用 SELECT name from Images
代替常量字符串 'name+one+two+three.jpg'
?
目标是提取可在整组图像名称中找到的所有唯一“+后缀”字符串。例如,这是示例数据:
DROP TABLE IF EXISTS ImagesTemp;
CREATE TEMP TABLE ImagesTemp (name );
INSERT INTO ImagesTemp (name)
VALUES
('IMG_0403+newport+malboro+kool.jpg'),
('IMG_0404+camel+newport.JPG'),
('IMG_0405+dunhill+doral+malboro.png');
SELECT * from ImagesTemp
预期输出为:
word count
malboro 2
newport 2
kool 1
dunhill 1
doral 1
camel 1
考虑用 table 中的 select 替换 CTE 的锚点,如下所示:
WITH RECURSIVE split(seq, word, str) AS (
SELECT 0, null, replace(name, '.jpg', '+')
FROM images
UNION ALL
SELECT
seq+1,
substr(str, 0, instr(str, '+')),
substr(str, instr(str, '+')+1)
FROM split
WHERE str != ''
)
SELECT word FROM split WHERE seq>1
想通了!这里的技巧是确保文件名以“+”结尾,这样递归子句就不会挂起。我还添加了文件扩展名的计数。
WITH RECURSIVE split(seq, word, str, filename) AS (
SELECT 0, null, lower(replace(name, '.', '+.')||'+'), name from ImagesTemp
UNION ALL
SELECT
seq+1,
substr(str, 0, instr(str, '+')),
substr(str, instr(str, '+')+1),
filename
FROM split
WHERE str != ''
)
SELECT distinct word, count(*) as count, filename as sample
FROM split WHERE word != '' and seq>1
GROUP BY word
ORDER BY count DESC
结果:
word count sample
newport 2 IMG_0403+newport+malboro+kool.jpg
malboro 2 IMG_0403+newport+malboro+kool.jpg
.jpg 2 IMG_0404+camel+newport.JPG
kool 1 IMG_0403+newport+malboro+kool.jpg
dunhill 1 IMG_0405+dunhill+doral+malboro.png
doral 1 IMG_0405+dunhill+doral+malboro.png
camel 1 IMG_0404+camel+newport.JPG
.png 1 IMG_0405+dunhill+doral+malboro.png
我有这个很好的递归 WITH 子句:
WITH RECURSIVE split(seq, word, str) AS (
SELECT 0, null, replace('name+one+two+three.jpg', '.jpg', '+')
UNION ALL SELECT
seq+1,
substr(str, 0, instr(str, '+')),
substr(str, instr(str, '+')+1)
FROM split WHERE str != ''
) SELECT word FROM split where seq>1
输出为:
one
two
three
现在,我如何重用这个子句,应用 SELECT name from Images
代替常量字符串 'name+one+two+three.jpg'
?
目标是提取可在整组图像名称中找到的所有唯一“+后缀”字符串。例如,这是示例数据:
DROP TABLE IF EXISTS ImagesTemp;
CREATE TEMP TABLE ImagesTemp (name );
INSERT INTO ImagesTemp (name)
VALUES
('IMG_0403+newport+malboro+kool.jpg'),
('IMG_0404+camel+newport.JPG'),
('IMG_0405+dunhill+doral+malboro.png');
SELECT * from ImagesTemp
预期输出为:
word count
malboro 2
newport 2
kool 1
dunhill 1
doral 1
camel 1
考虑用 table 中的 select 替换 CTE 的锚点,如下所示:
WITH RECURSIVE split(seq, word, str) AS (
SELECT 0, null, replace(name, '.jpg', '+')
FROM images
UNION ALL
SELECT
seq+1,
substr(str, 0, instr(str, '+')),
substr(str, instr(str, '+')+1)
FROM split
WHERE str != ''
)
SELECT word FROM split WHERE seq>1
想通了!这里的技巧是确保文件名以“+”结尾,这样递归子句就不会挂起。我还添加了文件扩展名的计数。
WITH RECURSIVE split(seq, word, str, filename) AS (
SELECT 0, null, lower(replace(name, '.', '+.')||'+'), name from ImagesTemp
UNION ALL
SELECT
seq+1,
substr(str, 0, instr(str, '+')),
substr(str, instr(str, '+')+1),
filename
FROM split
WHERE str != ''
)
SELECT distinct word, count(*) as count, filename as sample
FROM split WHERE word != '' and seq>1
GROUP BY word
ORDER BY count DESC
结果:
word count sample
newport 2 IMG_0403+newport+malboro+kool.jpg
malboro 2 IMG_0403+newport+malboro+kool.jpg
.jpg 2 IMG_0404+camel+newport.JPG
kool 1 IMG_0403+newport+malboro+kool.jpg
dunhill 1 IMG_0405+dunhill+doral+malboro.png
doral 1 IMG_0405+dunhill+doral+malboro.png
camel 1 IMG_0404+camel+newport.JPG
.png 1 IMG_0405+dunhill+doral+malboro.png