Hive:如何消除重复的子串
Hive: how to eliminate the duplicated substrings
蜂巢 table:
create table T (i int, s string);
insert into T values
(1, "a1&b2"),
(1, "b2&c3"),
(2, "c1&d2"),
(2, "c1");
s 列包含由 &
分隔的值
所需的输出应按第 1 列分组并连接 s 列,但只有一个唯一的子字符串值(由 & 分隔):
i grouped_s
-- -------------
1 a1&b2&c3
2 c1&d2
这是我的尝试:
SELECT i,
concat_ws('&',
collect_set(
split(concat_ws('&' , collect_set(s)), "&" )
)
)
as grouped_s
FROM T group by i;
我知道了:
FAILED: SemanticException [Error 10128]: Line 6:24 Not yet supported place for UDAF 'collect_set'
我也想在不使用嵌套 SQL 的情况下实现它。
select t.i, concat_ws('&',collect_set(e.val)) as grouped_s
from T t
lateral view outer explode(split(t.s,'&')) e as val
group by t.i;
结果:
t.i grouped_s
1 a1&b2&c3
2 c1&d2
蜂巢 table:
create table T (i int, s string);
insert into T values
(1, "a1&b2"),
(1, "b2&c3"),
(2, "c1&d2"),
(2, "c1");
s 列包含由 &
分隔的值所需的输出应按第 1 列分组并连接 s 列,但只有一个唯一的子字符串值(由 & 分隔):
i grouped_s
-- -------------
1 a1&b2&c3
2 c1&d2
这是我的尝试:
SELECT i,
concat_ws('&',
collect_set(
split(concat_ws('&' , collect_set(s)), "&" )
)
)
as grouped_s
FROM T group by i;
我知道了:
FAILED: SemanticException [Error 10128]: Line 6:24 Not yet supported place for UDAF 'collect_set'
我也想在不使用嵌套 SQL 的情况下实现它。
select t.i, concat_ws('&',collect_set(e.val)) as grouped_s
from T t
lateral view outer explode(split(t.s,'&')) e as val
group by t.i;
结果:
t.i grouped_s
1 a1&b2&c3
2 c1&d2