The table consists of the columns: word and sentence. I am trying to replace the words in the sentences with a link(consists of the word and its id) if the words exist in the word column. The below code replaces just fine. But I need help to figure out a way to identify the exact word to replace when id's are the same but the text is different.
For example: id= 2 has 2 rows with words testing and test.
Right now, it replaces the first sentence as below. Both testing and test are replaced with http://localhost/2/<u>testing</u>
automtestingation http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> is popular kind of http://localhost/2/<u>testing</u>
I am expecting it to be
automtestingation http://localhost/2/<u>testing</u> http://localhost/2/<u>test</u> is popular kind of http://localhost/2/<u>testing</u>
Create table temp(
id NUMBER,
word VARCHAR2(1000),
sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation testing test is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
SELECT 2,'test','test' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL
with words(id, word, word_length, search1, replace1, search2, replace2) as (
select id, word, length(word),
'(^|\W)' || REGEXP_REPLACE(word, '([][)(}{|^$\.*+?])', '\\\1') || '($|\W)',
'\1{'|| id ||'}\2',
'{'|| id ||'}',
'http://localhost/' || id || '/<u>' || word || '</u>'
FROM temp
)
, joined_data as (
select w.search1, w.replace1, w.search2, w.replace2,
s.rowid s_rid, s.sentence,
row_number() over(partition by s.rowid order by word_length desc) rn
from words w
join temp s
on instr(UPPER(s.sentence), UPPER(w.word)) > 0
and regexp_like(s.sentence, w.search1)
)
, unpivoted_data as (
select S_RID, SENTENCE, PHASE, SEARCH_STRING, REPLACE_STRING,
row_number() over(partition by s_rid order by phase, rn) rn,
case when row_number() over(partition by s_rid order by phase, rn)
= count(*) over(partition by s_rid)
then 1
else 0
end is_last
from joined_data
unpivot(
(search_string, replace_string)
for phase in ( (search1, replace1) as 1, (search2, replace2) as 2 ))
)
, replaced_data(S_RID, RN, is_last, SENTENCE) as (
select S_RID, RN, is_last,
regexp_replace(SENTENCE, search_string, replace_string,1,0,'i')
from unpivoted_data
where rn = 1
union all
select n.S_RID, n.RN, n.is_last,
case when n.phase = 1
then regexp_replace(o.SENTENCE, n.search_string, n.replace_string,1,0,'i')
else replace(o.SENTENCE, n.search_string, n.replace_string)
end
from unpivoted_data n
join replaced_data o
on o.s_rid = n.s_rid and n.rn = o.rn + 1
)
select s_rid, sentence from replaced_data
where is_last = 1
order by s_rid;
Same as previous answer with a couple of small changes to replace the matched word with a unique identifier (already generated using ROW_NUMBER
analytic function) and then when replacing with the URI then use the id
again.
Merge:
MERGE INTO temp dst
USING (
WITH ordered_words ( rn, id, word, regex_safe_word ) AS (
SELECT ROW_NUMBER() OVER ( ORDER BY LENGTH( word ) ASC, word DESC ),
id,
word,
REGEXP_REPLACE( word, '([][)(}{|^$\.*+?])', '\\\1' )
FROM temp
),
sentences_with_ids ( rid, sentence, rn ) AS (
SELECT ROWID,
sentence,
( SELECT COUNT(*) + 1 FROM ordered_words )
FROM temp
UNION ALL
SELECT s.rid,
REGEXP_REPLACE(
REGEXP_REPLACE(
s.sentence,
'(^|\W)' || w.regex_safe_word || '($|\W)',
'\1${'|| w.rn ||'}\2' -- Changed here
),
'(^|\W)' || w.regex_safe_word || '($|\W)',
'\1${' || w.rn || '}\2' -- Changed here
),
s.rn - 1
FROM sentences_with_ids s
INNER JOIN ordered_words w
ON ( s.rn - 1 = w.rn )
),
sentences_with_words ( rid, sentence, rn ) AS (
SELECT rid,
sentence,
( SELECT COUNT(*) + 1 FROM ordered_words )
FROM sentences_with_ids
WHERE rn = 1
UNION ALL
SELECT s.rid,
REPLACE(
s.sentence,
'${' || w.rn || '}', -- Changed here
'http://localhost/' || w.id || '/<u>' || w.word || '</u>'
),
s.rn - 1
FROM sentences_with_words s
INNER JOIN ordered_words w
ON ( s.rn - 1 = w.rn )
)
SELECT rid, sentence
FROM sentences_with_words
WHERE rn = 1
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
UPDATE
SET sentence = src.sentence;
Output:
SELECT * FROM temp
ID | WORD | SENTENCE -: | :---------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 1 | automation testing | automtestingation http://localhost/2/<u>testing</u> http://localhost/2/<u>test</u> is popular kind of http://localhost/2/<u>testing</u> 2 | testing | http://localhost/3/<u>manual testing</u> 2 | test | http://localhost/2/<u>test</u> 3 | manual testing | this is an old method of http://localhost/2/<u>testing</u> 4 | punctuation | http://localhost/1/<u>automation testing</u>,http://localhost/3/<u>manual testing</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>testing</u>-http://localhost/2/<u>testing</u> 5 | B-number analysis | http://localhost/6/<u>B-number analysis table</u> 6 | B-number analysis table | http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u> 7 | Not Matched | http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> 8 | ^[($ | http://localhost/2/<u>testing</u> characters http://localhost/8/<u>^[($</u> that need escaping in a regular expression
db<>fiddle here
For your code, use the same technique in the first sub-query factoring clause:
with words(id, word, word_length, search1, replace1, search2, replace2) as (
select id, word, length(word),
'(^|\W)' || REGEXP_REPLACE(word, '([][)(}{|^$\.*+?])', '\\\1') || '($|\W)',
'\1{'|| ROW_NUMBER() OVER ( ORDER BY LENGTH( word ) DESC, word ASC ) ||'}\2',
'{'|| ROW_NUMBER() OVER ( ORDER BY LENGTH( word ) DESC, word ASC ) ||'}',
'http://localhost/' || id || '/<u>' || word || '</u>'
FROM temp
)