This is a follow up question of How to extract multiple rows from a table based on values from multiple columns from another table and then concatenate in SQL?. I am new to SQL. I have 2 tables Table 1 and Table 2. Table 1 have columns ID, SSTART, SEND, UPSTART, UPEND, DNSTART, DNSTAND. Table 2 have columns Position and Seq.
Table 1
ID | UPSTART | UPEND | SStart | SEnd | DNSTART | DNEND |
---|---|---|---|---|---|---|
1 | 98 | 99 | 100 | 104 | 105 | 106 |
2 | 98 | 99 | 100 | 104 | 105 | 106 |
3 | 100 | 101 | 102 | 106 | 107 | 108 |
4 | 100 | 101 | 102 | 106 | 107 | 108 |
Table 2
Position | Seq |
---|---|
98 | M |
99 | N |
100 | A |
101 | T |
102 | C |
103 | T |
104 | G |
105 | T |
106 | T |
107 | G |
108 | T |
109 | G |
My final table needs Columns ID, SStart, SSEnd, FullSeq, UPStart, UPEnd, UPSeq, DNStart, DNStand, DnSeq as shown below:
ID | UpStart | UpEnd | UpSeq | SStart | SSEnd | FullSeq | DNStart | DNStand | DnSeq |
---|---|---|---|---|---|---|---|---|---|
1 | 98 | 99 | MN | 100 | 104 | ATCTG | 105 | 106 | TT |
2 | 98 | 99 | MN | 100 | 104 | ATCTG | 105 | 106 | TT |
3 | 100 | 101 | AT | 102 | 106 | CTGTT | 107 | 108 | GT |
4 | 100 | 101 | AT | 102 | 106 | CTGTT | 107 | 108 | GT |
I am using SQL server 2016. I tried
; WITH SequenceCTE AS(
SELECT
[ID] ,
[SStart],
[SSEnd],
[UpStart],
[UpEnd],
Seq,
[DnStart],
[DnEnd],
[Position]
FROM Table_1 a
JOIN Table_2 b
ON b.Position >= a.[UpStart] AND
b.Position <= a.[DnEnd]
)
SELECT DISTINCT
a.ID,
a.[UpStart],
a.[UpEnd],
UpSeq = (
SELECT STUFF(',' + Seq,1,1,'')
FROM SequenceCTE b
WHERE
a.ID = b.ID AND
a.[Position] > = b.[UpStart] AND
a.[Position] < = b.[UpEnd] AND
order by b.Position
FOR XML PATH ('')
) ,
a.[SStart],
a.[SSEnd],
FullSeq = (
SELECT STUFF(',' + Seq,1,1,'')
FROM SequenceCTE b
WHERE
a.ID = b.ID AND
a.[SStart] = b.[SStart] AND
a.[SSEnd] = b.[SSEnd]
order by b.Position
FOR XML PATH ('')
) ,
a.[DnStart],
a.[DnEnd],
DownSeq = (
SELECT STUFF(',' + Seq,1,1,'')
FROM SequenceCTE b
WHERE
a.ID = b.ID AND
a.[Position] > = b.[DnStart] AND
a.[Position] < = b.[DnEnd]
order by b.Position
FOR XML PATH ('')
)
FROM SequenceCTE a
But it didn't work. Then I tried
With FullSeqCTE as(
Select * ,
b1.Seq as FullSeq
from Table_1 a
join Table_2 b1 ON b1.Position >= a.SStart and b1.Position <= a.SSEnd
), UpperSeqCTE as(
Select * ,
b2.Seq as UpSeq
from Table_1 a
join Table_2 b2 ON b2.Position >= a.UpStart and b2.Position <= a.UpEnd
), LowerSeqCTE as (
Select * ,
b3.Seq as DownSeq
from Table_1 a
join Table_2 b3 ON b3.Position > = a.DnStart and b3.Position < = a.DnEnd
)
But I am not sure how to proceed. Thank you very much for all the help.
Create Statement Table 1
CREATE TABLE [Table_1](
[SStart] [int] NULL,
[SSend] [int] NULL,
[ID] [int] NULL,
[UpStart] [int] NULL,
[UpEnd] [int] NULL,
[DnStart] [int] NULL,
[DnEnd] [int] NULL
) ON [PRIMARY]
GO
Insert Statement Table 1
INSERT INTO [Table_1]
([ID]
,[UpStart]
,[UpEnd]
,[SStart]
,[SSend]
,[DnStart]
,[DnEnd])
VALUES
(1,98,99,100,104,105,106),
(2,98,99,100,104,105,106),
(3,100,101,102,106,107,108),
(4,100,101,102,106,107,108)
GO
Create Statement Table 2
CREATE TABLE [Table_2](
[Position] [int] NULL,
[Seq] [nvarchar](1) NULL
) ON [PRIMARY]
Go
Insert Statement for Table 2
INSERT INTO [dbo].[Table_2]
([Position]
,[Seq])
VALUES
(98,'M'),
(99,'N'),
(100,'A'),
(101,'T'),
(102,'C'),
(103,'T'),
(104,'G'),
(105,'T'),
(106,'T'),
(107,'G'),
(108,'T'),
(109,'G')
GO
I am using common table expressions (cte_Up
, cte_S
, cte_Dn
) to limit the required groupings.
Solution 1
When working with SQL Server 2017 or later, you can use the STRING_AGG()
function to concatenate columns.
with cte_Up as
(
select t1.Id, string_agg(t2.Seq, '') within group (order by t2.Position) as UpSeq
from table_1 t1
join table_2 t2
on t2.Position >= t1.UpStart
and t2.Position <= t1.UpEnd
group by t1.Id
),
cte_S as
(
select t1.Id, string_agg(t2.Seq, '') within group (order by t2.Position) as SSeq
from table_1 t1
join table_2 t2
on t2.Position >= t1.SStart
and t2.Position <= t1.SEnd
group by t1.Id
),
cte_Dn as
(
select t1.Id, string_agg(t2.Seq, '') within group (order by t2.Position) as DnSeq
from table_1 t1
join table_2 t2
on t2.Position >= t1.DnStart
and t2.Position <= t1.DnEnd
group by t1.Id
)
select t1.Id,
t1.UpStart,
t1.UpEnd,
u.UpSeq,
t1.SStart,
t1.SEnd,
s.SSeq,
t1.DnStart,
t1.DnEnd,
d.DnSeq
from table_1 t1
join cte_Up u
on u.Id = t1.Id
join cte_S s
on s.Id = t1.Id
join cte_Dn d
on d.Id = t1.Id;
Fiddle to see things in action.
Solution 2
Using the for xml
clause to achieve string concatenation when string_agg()
is not availabe.
with cte_Up as
(
select t1.Id,
( select '' + t2.Seq
from table_2 t2
where t2.Position >= t1.UpStart
and t2.Position <= t1.UpEnd
order by t2.Position
for xml path('') ) as UpSeq
from table_1 t1
),
cte_S as
(
select t1.Id,
( select '' + t2.Seq
from table_2 t2
where t2.Position >= t1.SStart
and t2.Position <= t1.SEnd
order by t2.Position
for xml path('') ) as SSeq
from table_1 t1
),
cte_Dn as
(
select t1.Id,
( select '' + t2.Seq
from table_2 t2
where t2.Position >= t1.DnStart
and t2.Position <= t1.DnEnd
order by t2.Position
for xml path('') ) as DnSeq
from table_1 t1
)
select t1.Id,
t1.UpStart,
t1.UpEnd,
u.UpSeq,
t1.SStart,
t1.SEnd,
s.SSeq,
t1.DnStart,
t1.DnEnd,
d.DnSeq
from table_1 t1
join cte_Up u
on u.Id = t1.Id
join cte_S s
on s.Id = t1.Id
join cte_Dn d
on d.Id = t1.Id;
Fiddle to see things in action.
Result
Id | UpStart UpEnd UpSeq | SStart SEnd SSeq | DnStart DnEnd DnSeq
-- | ------- ----- ----- | ------ ---- ----- | ------- ----- -----
1 | 98 99 MN | 100 104 ATCTG | 105 106 TT
2 | 98 99 MN | 100 104 ATCTG | 105 106 TT
3 | 100 101 AT | 102 106 CTGTT | 107 108 GT
4 | 100 101 AT | 102 106 CTGTT | 107 108 GT