Search code examples
sql-serversql-server-2005string-parsing

Merge two field into One with only new items


I have two tables which both contain 'pipe separated values'

e.g:

Table 1:

 DataField_A

 item 1|item 2|item 3|etc.....

Table 2:

 DataField_A

 item 7|item 5|item 3|etc.....

I need to merge Table 2 into table 1 such that Table 2 contains all items across both tables.

Doing this pro-grammatically would be a simple matter of looping through each item in table 2 and adding to table 1 if it does not exist in table 1.

How can this be done in SQL as a stored procedure?


Solution

  • I've used a parsing function (the example I'm using comes from here) to parse the string in Table1. Then I use that function in a CTE to find missing elements in Table2 and merge the data.

    /* Helper function to parse delimited string */
    CREATE FUNCTION [dbo].[fnParseStringTSQL] (@string NVARCHAR(MAX),@separator NCHAR(1))
    RETURNS @parsedString TABLE (string NVARCHAR(MAX))
    AS 
    BEGIN
       DECLARE @position int
       SET @position = 1
       SET @string = @string + @separator
       WHILE charindex(@separator,@string,@position) <> 0
          BEGIN
             INSERT into @parsedString
             SELECT substring(@string, @position, charindex(@separator,@string,@position) - @position)
             SET @position = charindex(@separator,@string,@position) + 1
          END
         RETURN
    END
    go
    
    /* Set up some sample data */
    declare @Table1 table (
        id int,
        DataField_1A varchar(500)
    )
    
    declare @Table2 table (
        id int,
        DataField_2A varchar(500)
    )
    
    insert into @Table1
        (id, DataField_1A)
        select 1, 'item 1|item 2|item 3'
        union
        select 2, 'item A|item B|item C|item D|item Z'
    
    insert into @Table2
        (id, DataField_2A)
        select 1, 'item 7|item 5|item 3'
        union
        select 2, 'item A|item Y|item Z'
    
    /* data before the update */
    select * from @Table2
    
    /* boolean to ensure loop executes at least once */
    declare @FirstLoop bit
    set @FirstLoop = 1
    
    /* Do the updates */
    while (@FirstLoop = 1 or @@ROWCOUNT <> 0) begin 
        set @FirstLoop = 0
    
        ;with cteMissingItems as (
        select t2.id, p.string
            from @Table2 t2
                inner join @Table1 t1
                    on t2.id = t1.id
                cross apply dbo.fnParseStringTSQL(t1.DataField_1A,'|') p
            where charindex(p.string, t2.DataField_2A) = 0
        )
        update t2
            set t2.DataField_2A = t2.DataField_2A + '|' + mi.string
            from @Table2 t2
                inner join cteMissingItems mi
                    on t2.id = mi.id
    end /* while */
    
    /* Prove the update worked */
    select * from @Table2
    
    /* Clean up */
    drop function dbo.fnParseStringTSQL