更新时间:2023-01-12 22:21:13
选项1:保留上次出现的位置
这将保留每个单词的最后一次出现。
Eg 'hello,world,hello,world,hello'
将导致'world,hello'
select regexp_replace
(
column_name
,'(?< = ^ |,)(?< (?=。*(?(?=,| $))'
,'
)
from mytable
;
+ ------------------- +
|枪,人|
|穿梭,敌人,奔跑|
|击中,追逐|
+ ------------------- +
这将保持每个单词的第一次出现。
Eg 'hello,world,hello,world,hello'
将导致'hello,world'
选择反向
(
regexp_replace
(
reverse(column_name)
,' (?=。*),(?=。*(?< =,)\\?k< word>(?=,| $))'
,''
)
)
from mytable
;
'樱桃,苹果,樱桃,樱桃,樱桃,香蕉,苹果'
将导致'Apple,Banana,Cherry'
select regexp_replace
(
concat_ws(',',sort_array(split(column_name,', '))
,'(?。*?)(,\\\k (?=,| $))+'
,'$ {word}'
)
from mytable
;
I have column(string) comma separated with duplicate values. I want to remove duplicates:
e.g.
column_name
-----------------
gun,gun,man,gun,man
shuttle,enemy,enemy,run
hit,chase
I want result like:
column_name
----------------
gun,man
shuttle,enemy,run
hit,chase
I am using hive database.Please help.
This will keep the last occurrence of every word.
E.g. 'hello,world,hello,world,hello'
will result in 'world,hello'
select regexp_replace
(
column_name
,'(?<=^|,)(?<word>.*?),(?=.*(?<=,)\\k<word>(?=,|$))'
,''
)
from mytable
;
+-------------------+
| gun,man |
| shuttle,enemy,run |
| hit,chase |
+-------------------+
This will keep the first occurrence of every word.
E.g. 'hello,world,hello,world,hello'
will result in 'hello,world'
select reverse
(
regexp_replace
(
reverse(column_name)
,'(?<=^|,)(?<word>.*?),(?=.*(?<=,)\\k<word>(?=,|$))'
,''
)
)
from mytable
;
E.g. 'Cherry,Apple,Cherry,Cherry,Cherry,Banana,Apple'
will result in 'Apple,Banana,Cherry'
select regexp_replace
(
concat_ws(',',sort_array(split(column_name,',')))
,'(?<=^|,)(?<word>.*?)(,\\k<word>(?=,|$))+'
,'${word}'
)
from mytable
;