I would like to get results from the fields (path) in table URL where the last directory in the path field from URL table contains at least two matching words from the same row in the Locations table.
Table URL
WITH arrayJoin([
('/directory1/paris-oslo-london.htm', 0 ),
('/directory1/paris-oslo-granada.htm', 1 ),
('/directory1/london-moscow.htm', 2),
('/directory1/barcelona-moscow.htm', 3),
('/directory2/keyword1-keyword2-lisboa-london.htm', 4 ),
('/directory3/ny-anykeyword-milan.htm', 5),
('/directory3/anykeyword-berlin-milan-moscow', 6)] )as data
SELECT data.1 as path,data.2 as value
path | value |
---|---|
/directory1/paris-oslo-london.htm | 0 |
/directory1/paris-oslo-granada.htm | 1 |
/directory1/london-moscow.htm | 2 |
/directory1/barcelona-moscow.htm | 3 |
/directory1/keyword1-keyword2-lisboa-london.htm | 4 |
/directory3/ny-anykeyword-milan.htm | 5 |
WITH arrayJoin([
('paris','row 1'),
('oslo','row 2'),
('granada','row 3'),
('ny-paris-milan-granada','row 4'),
('london-moscow','row 5'),
('lisboa','row 6'),
('ny-aaaa-milan','row 7'),
('berlin-moscow-keyword3','row 8')] )as data
SELECT data.1 as location_name,data.2 as row_number
location_name | row_number |
---|---|
paris | 1 |
oslo | 2 |
granada | 3 |
ny-paris-milan-granada | 4 |
london-moscow | 5 |
lisboa | 6 |
ny-aaaa-milan | 7 |
berlin-moscow-keyword3 | 8 |
I try this SQL but dowcounts each keyword if exists in any rows, not in the same row
WITH arrayFlatten(groupUniqArray(splitByNonAlpha(arrayJoin([
('paris','row 1'),
('oslo','row 2'),
('granada','row 3'),
('ny-paris-milan-granada','row 4'),
('london-moscow','row 5'),
('lisboa','row 6'),
('ny-aaaa-milan','row 7'),
('berlin-moscow-keyword3','row 8')] ).1))) as location_namee
SELECT arrayJoin([
('/directory1/paris-oslo-london.htm', 0 ),
('/directory1/paris-oslo-granada.htm', 1 ),
('/directory1/london-moscow.htm', 2),
('/directory1/barcelona-moscow.htm', 3),
('/directory2/keyword1-keyword2-lisboa-london.htm', 4 ),
('/directory3/ny-anykeyword-milan.htm', 5),
('/directory3/anykeyword-berlin-milan-moscow', 6)] ).1 as path,
splitByNonAlpha(replaceRegexpAll(path,'^\/(.*)\/(.*)\.htm$', '\\2')) as words_in_directory,
arrayFilter(x -> has(location_namee,x),words_in_directory) as matchedWords
path | words_in_directory | matchedWords |
---|---|---|
/directory1/paris-oslo-london.htm | ['paris','oslo','london'] | ['paris','oslo','london'] |
/directory1/paris-oslo-granada.htm | ['paris','oslo','granada'] | ['paris','oslo','granada'] |
/directory1/london-moscow.htm | ['london','moscow'] | ['london','moscow'] |
/directory1/barcelona-moscow.htm | ['barcelona','moscow'] | ['moscow'] |
/directory2/keyword1-keyword2-lisboa-london.htm | ['keyword1','keyword2','lisboa','london'] | ['lisboa','london'] |
/directory3/ny-anykeyword-milan.htm | ['ny','anykeyword','milan'] | ['ny','milan'] |
/directory3/anykeyword-berlin-milan-moscow | ['directory3','anykeyword','berlin','milan','moscow'] | ['berlin','milan','moscow'] |
url | match | number_of_elements_matching_in_url | matchedWords | row_matched |
---|---|---|---|---|
directory1/paris-oslo-granada.htm | yes | 2 | 'paris','granada' | row 4 |
directory3/ny-anykeyword-otherkeyword-milan.htm | yes | 2 | 'ny','milan' | row 7 |
directory3/anykeyword-berlin-milan-moscow | yes | 2 | 'berlin','moscow' | row 8 |
directory1/barcelona-moscow.htm | not | 1 | '-' | - |
directory1/london-moscow.htm | yes | 2 | 'london','moscow' | row 5 |
directory1/paris-oslo-london. | not | 1 | '-' | - |
directory1/keyword1-keyword2-lisboa-london.htm | not | 1 | '-' | - |
What is wrong here?
select path, value, length(matchedWords) number_of_elements_matching_in_url, words_in_directory, matchedWords, row_number
from (
WITH ((splitByNonAlpha((arrayJoin([
('paris','row 1'),
('oslo','row 2'),
('granada','row 3'),
('ny-paris-milan-granada','row 4'),
('london-moscow','row 5'),
('lisboa','row 6'),
('ny-aaaa-milan','row 7'),
('berlin-moscow-keyword3','row 8')]) as data1).1)),data1.2) as data
select path, value, splitByNonAlpha(replaceRegexpAll(path,'^\/(.*)\/(.*)\.htm$', '\\2')) as words_in_directory,
data.1 location_name, data.2 as row_number,
arrayIntersect( words_in_directory, data.1 ) matchedWords
from
(WITH arrayJoin([
('/directory1/paris-oslo-london.htm', 0 ),
('/directory1/paris-oslo-granada.htm', 1 ),
('/directory1/london-moscow.htm', 2),
('/directory1/barcelona-moscow.htm', 3),
('/directory2/keyword1-keyword2-lisboa-london.htm', 4 ),
('/directory3/ny-anykeyword-milan.htm', 5),
('/directory3/anykeyword-berlin-milan-moscow', 6)] )as datax
SELECT datax.1 as path, datax.2 as value)
)
order by row_number
┌─path────────────────────────────────────────────┬─value─┬─number_of_elements_matching_in_url─┬─words_in_directory────────────────────────────────────┬─matchedWords────────┬─row_number─┐
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 0 │ ['directory3','anykeyword','berlin','milan','moscow'] │ [] │ row 1 │
│ /directory1/barcelona-moscow.htm │ 3 │ 0 │ ['barcelona','moscow'] │ [] │ row 1 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 0 │ ['keyword1','keyword2','lisboa','london'] │ [] │ row 1 │
│ /directory1/london-moscow.htm │ 2 │ 0 │ ['london','moscow'] │ [] │ row 1 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 0 │ ['ny','anykeyword','milan'] │ [] │ row 1 │
│ /directory1/paris-oslo-london.htm │ 0 │ 1 │ ['paris','oslo','london'] │ ['paris'] │ row 1 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 1 │ ['paris','oslo','granada'] │ ['paris'] │ row 1 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 0 │ ['ny','anykeyword','milan'] │ [] │ row 2 │
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 0 │ ['directory3','anykeyword','berlin','milan','moscow'] │ [] │ row 2 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 1 │ ['paris','oslo','granada'] │ ['oslo'] │ row 2 │
│ /directory1/london-moscow.htm │ 2 │ 0 │ ['london','moscow'] │ [] │ row 2 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 0 │ ['keyword1','keyword2','lisboa','london'] │ [] │ row 2 │
│ /directory1/paris-oslo-london.htm │ 0 │ 1 │ ['paris','oslo','london'] │ ['oslo'] │ row 2 │
│ /directory1/barcelona-moscow.htm │ 3 │ 0 │ ['barcelona','moscow'] │ [] │ row 2 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 1 │ ['paris','oslo','granada'] │ ['granada'] │ row 3 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 0 │ ['ny','anykeyword','milan'] │ [] │ row 3 │
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 0 │ ['directory3','anykeyword','berlin','milan','moscow'] │ [] │ row 3 │
│ /directory1/london-moscow.htm │ 2 │ 0 │ ['london','moscow'] │ [] │ row 3 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 0 │ ['keyword1','keyword2','lisboa','london'] │ [] │ row 3 │
│ /directory1/paris-oslo-london.htm │ 0 │ 0 │ ['paris','oslo','london'] │ [] │ row 3 │
│ /directory1/barcelona-moscow.htm │ 3 │ 0 │ ['barcelona','moscow'] │ [] │ row 3 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 2 │ ['paris','oslo','granada'] │ ['paris','granada'] │ row 4 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 2 │ ['ny','anykeyword','milan'] │ ['ny','milan'] │ row 4 │
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 1 │ ['directory3','anykeyword','berlin','milan','moscow'] │ ['milan'] │ row 4 │
│ /directory1/paris-oslo-london.htm │ 0 │ 1 │ ['paris','oslo','london'] │ ['paris'] │ row 4 │
│ /directory1/london-moscow.htm │ 2 │ 0 │ ['london','moscow'] │ [] │ row 4 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 0 │ ['keyword1','keyword2','lisboa','london'] │ [] │ row 4 │
│ /directory1/barcelona-moscow.htm │ 3 │ 0 │ ['barcelona','moscow'] │ [] │ row 4 │
│ /directory1/barcelona-moscow.htm │ 3 │ 1 │ ['barcelona','moscow'] │ ['moscow'] │ row 5 │
│ /directory1/london-moscow.htm │ 2 │ 2 │ ['london','moscow'] │ ['moscow','london'] │ row 5 │
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 1 │ ['directory3','anykeyword','berlin','milan','moscow'] │ ['moscow'] │ row 5 │
│ /directory1/paris-oslo-london.htm │ 0 │ 1 │ ['paris','oslo','london'] │ ['london'] │ row 5 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 0 │ ['ny','anykeyword','milan'] │ [] │ row 5 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 0 │ ['paris','oslo','granada'] │ [] │ row 5 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 1 │ ['keyword1','keyword2','lisboa','london'] │ ['london'] │ row 5 │
│ /directory1/barcelona-moscow.htm │ 3 │ 0 │ ['barcelona','moscow'] │ [] │ row 6 │
│ /directory1/london-moscow.htm │ 2 │ 0 │ ['london','moscow'] │ [] │ row 6 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 1 │ ['keyword1','keyword2','lisboa','london'] │ ['lisboa'] │ row 6 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 0 │ ['paris','oslo','granada'] │ [] │ row 6 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 0 │ ['ny','anykeyword','milan'] │ [] │ row 6 │
│ /directory1/paris-oslo-london.htm │ 0 │ 0 │ ['paris','oslo','london'] │ [] │ row 6 │
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 0 │ ['directory3','anykeyword','berlin','milan','moscow'] │ [] │ row 6 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 0 │ ['keyword1','keyword2','lisboa','london'] │ [] │ row 7 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 0 │ ['paris','oslo','granada'] │ [] │ row 7 │
│ /directory1/london-moscow.htm │ 2 │ 0 │ ['london','moscow'] │ [] │ row 7 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 2 │ ['ny','anykeyword','milan'] │ ['ny','milan'] │ row 7 │
│ /directory1/paris-oslo-london.htm │ 0 │ 0 │ ['paris','oslo','london'] │ [] │ row 7 │
│ /directory1/barcelona-moscow.htm │ 3 │ 0 │ ['barcelona','moscow'] │ [] │ row 7 │
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 1 │ ['directory3','anykeyword','berlin','milan','moscow'] │ ['milan'] │ row 7 │
│ /directory2/keyword1-keyword2-lisboa-london.htm │ 4 │ 0 │ ['keyword1','keyword2','lisboa','london'] │ [] │ row 8 │
│ /directory1/paris-oslo-granada.htm │ 1 │ 0 │ ['paris','oslo','granada'] │ [] │ row 8 │
│ /directory1/london-moscow.htm │ 2 │ 1 │ ['london','moscow'] │ ['moscow'] │ row 8 │
│ /directory3/ny-anykeyword-milan.htm │ 5 │ 0 │ ['ny','anykeyword','milan'] │ [] │ row 8 │
│ /directory1/paris-oslo-london.htm │ 0 │ 0 │ ['paris','oslo','london'] │ [] │ row 8 │
│ /directory1/barcelona-moscow.htm │ 3 │ 1 │ ['barcelona','moscow'] │ ['moscow'] │ row 8 │
│ /directory3/anykeyword-berlin-milan-moscow │ 6 │ 2 │ ['directory3','anykeyword','berlin','milan','moscow'] │ ['moscow','berlin'] │ row 8 │
└─────────────────────────────────────────────────┴───────┴────────────────────────────────────┴───────────────────────────────────────────────────────┴─────────────────────┴────────────┘