计算数组中前面重复项的数量
Count number of preceding repeating items in an array
我有以下查询,遍历数组中的每个项目并回头看看有多少重复的 f,包括它自己。
它可以工作,但是它在处理大量行时会很慢 - 有没有更简洁的方法来处理数组中的序列?
SELECT
['p','p','f','f','f','f','p','f', 'f', 'f'] AS sequence,
arrayMap( (x,y) -> (x,
if (x='f', (arrayFirstIndex( k -> k=0,
arrayCumSumNonNegative((n, index) -> n = 'f' ? 1 : -index,
arrayReverse(arraySlice(sequence,1,y)) as arr,
arrayEnumerate(arr)))
)-1, 0)), sequence, arrayEnumerate(sequence))
result:
[('p',0),('p',0),('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
提前致谢
试试这个查询:
WITH 'f' AS ch
SELECT
arraySplit((x, i) -> x = ch and sequence[i - 1] != ch or x != ch and sequence[i - 1] = ch, sequence, arrayEnumerate(sequence)) parts,
arrayMap(part -> arrayMap((x, index) -> (x, x = ch ? index : 0), part, arrayEnumerate(part)), parts) parts_and_number,
arrayFlatten(parts_and_number) result
FROM (
SELECT arrayJoin([
['p','p','f','f','f','f','p','f', 'f', 'f'],
['p','w','f','f','f','f','p','f', 'f', 'f'],
['f','f','f','f','p','f', 'f', 'f'],
['p','w'],
['f', 'f'],
['f']
]) as sequence)
/*
Row 1:
──────
parts: [['p','p'],['f','f','f','f'],['p'],['f','f','f']]
parts_and_number: [[('p',0),('p',0)],[('f',1),('f',2),('f',3),('f',4)],[('p',0)],[('f',1),('f',2),('f',3)]]
result: [('p',0),('p',0),('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
Row 2:
──────
parts: [['p','w'],['f','f','f','f'],['p'],['f','f','f']]
parts_and_number: [[('p',0),('w',0)],[('f',1),('f',2),('f',3),('f',4)],[('p',0)],[('f',1),('f',2),('f',3)]]
result: [('p',0),('w',0),('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
Row 3:
──────
parts: [['f','f','f','f'],['p'],['f','f','f']]
parts_and_number: [[('f',1),('f',2),('f',3),('f',4)],[('p',0)],[('f',1),('f',2),('f',3)]]
result: [('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
Row 4:
──────
parts: [['p','w']]
parts_and_number: [[('p',0),('w',0)]]
result: [('p',0),('w',0)]
Row 5:
──────
parts: [['f','f']]
parts_and_number: [[('f',1),('f',2)]]
result: [('f',1),('f',2)]
Row 6:
──────
parts: [['f']]
parts_and_number: [[('f',1)]]
result: [('f',1)]
*/
我有以下查询,遍历数组中的每个项目并回头看看有多少重复的 f,包括它自己。
它可以工作,但是它在处理大量行时会很慢 - 有没有更简洁的方法来处理数组中的序列?
SELECT
['p','p','f','f','f','f','p','f', 'f', 'f'] AS sequence,
arrayMap( (x,y) -> (x,
if (x='f', (arrayFirstIndex( k -> k=0,
arrayCumSumNonNegative((n, index) -> n = 'f' ? 1 : -index,
arrayReverse(arraySlice(sequence,1,y)) as arr,
arrayEnumerate(arr)))
)-1, 0)), sequence, arrayEnumerate(sequence))
result:
[('p',0),('p',0),('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
提前致谢
试试这个查询:
WITH 'f' AS ch
SELECT
arraySplit((x, i) -> x = ch and sequence[i - 1] != ch or x != ch and sequence[i - 1] = ch, sequence, arrayEnumerate(sequence)) parts,
arrayMap(part -> arrayMap((x, index) -> (x, x = ch ? index : 0), part, arrayEnumerate(part)), parts) parts_and_number,
arrayFlatten(parts_and_number) result
FROM (
SELECT arrayJoin([
['p','p','f','f','f','f','p','f', 'f', 'f'],
['p','w','f','f','f','f','p','f', 'f', 'f'],
['f','f','f','f','p','f', 'f', 'f'],
['p','w'],
['f', 'f'],
['f']
]) as sequence)
/*
Row 1:
──────
parts: [['p','p'],['f','f','f','f'],['p'],['f','f','f']]
parts_and_number: [[('p',0),('p',0)],[('f',1),('f',2),('f',3),('f',4)],[('p',0)],[('f',1),('f',2),('f',3)]]
result: [('p',0),('p',0),('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
Row 2:
──────
parts: [['p','w'],['f','f','f','f'],['p'],['f','f','f']]
parts_and_number: [[('p',0),('w',0)],[('f',1),('f',2),('f',3),('f',4)],[('p',0)],[('f',1),('f',2),('f',3)]]
result: [('p',0),('w',0),('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
Row 3:
──────
parts: [['f','f','f','f'],['p'],['f','f','f']]
parts_and_number: [[('f',1),('f',2),('f',3),('f',4)],[('p',0)],[('f',1),('f',2),('f',3)]]
result: [('f',1),('f',2),('f',3),('f',4),('p',0),('f',1),('f',2),('f',3)]
Row 4:
──────
parts: [['p','w']]
parts_and_number: [[('p',0),('w',0)]]
result: [('p',0),('w',0)]
Row 5:
──────
parts: [['f','f']]
parts_and_number: [[('f',1),('f',2)]]
result: [('f',1),('f',2)]
Row 6:
──────
parts: [['f']]
parts_and_number: [[('f',1)]]
result: [('f',1)]
*/