Search code examples
sqlsql-serverstringformatemail-address

How to put mailto around email addresses in text string


I am trying to figure out how to be able to select/find and format each email address contained in a piece of text.

Example string:

Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – paulhall@email.com - Mikel Zubizarreta – mikelzubizarreta@email.com

The output of the string should be:

Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – <a href='mailto:paulhall@email.com'> - paulhall@email.com</a> - Mikel Zubizarreta – <a href='mailto:mikelzubizarreta@email.com'>mikelzubizarreta@email.com</a>

This are the attempts I have come up with:

Within a select:

,   CASE 
        WHEN CHARINDEX('@',CONDITION) > 0 THEN 
            REPLACE(CONDITION, dbo.FN_GET_EMAIL_FROM_STRING(CONDITION), '<a href=''mailto:' + dbo.FN_GET_EMAIL_FROM_STRING(CONDITION) + '''>' + dbo.FN_GET_EMAIL_FROM_STRING(CONDITION) + '</a>')
        ELSE CONDITION
    END [CONDITION]

Contents of dbo.FN_GET_EMAIL_FROM_STRING(CONDITION):

ALTER FUNCTION [dbo].[FN_GET_EMAIL_FROM_STRING]
(
    @TextContainingEmail VARCHAR(1000)
)
RETURNS VARCHAR(1000)
AS
    BEGIN

        DECLARE @retval VARCHAR(1000);

        SELECT TOP 
            1 @retval = Items 
        FROM 
            dbo.FN_SPLIT_STRING(@TextContainingEmail, '')
        WHERE 
            Items LIKE '%@%';            

         RETURN @retval;         

    END;

Contents of: FN_SPLIT_STRING(@TextContainingEmail, '')

    ALTER FUNCTION [dbo].[FN_SPLIT_STRING]
    (
        @STRING NVARCHAR(4000)
    ,   @Delimiter CHAR(1)
    )

    RETURNS @Results TABLE(Items NVARCHAR(4000))

    AS
         BEGIN

             DECLARE @INDEX INT;
             DECLARE @SLICE NVARCHAR(4000);

             -- HAVE TO SET TO 1 SO IT DOESNT EQUAL ZERO FIRST TIME IN LOOP
             SELECT @INDEX = 1;

             IF @STRING IS NULL
                 RETURN;
             WHILE @INDEX != 0
                 BEGIN 
                     -- GET THE INDEX OF THE FIRST OCCURENCE OF THE SPLIT CHARACTER
                     SELECT 
                       @INDEX = CHARINDEX(@Delimiter, LTRIM(RTRIM(@STRING)));
                     -- NOW PUSH EVERYTHING TO THE LEFT OF IT INTO THE SLICE VARIABLE
                     IF @INDEX != 0
                         SELECT 
                           @SLICE = LEFT(@STRING, @INDEX - 1);
                         ELSE
                         SELECT 
                           @SLICE = @STRING;
                     -- PUT THE ITEM INTO THE RESULTS SET
                     INSERT INTO @Results
                     (
                       Items
                     )
                     VALUES(@SLICE);
                     -- CHOP THE ITEM REMOVED OFF THE MAIN STRING
                     SELECT 
                       @STRING = REPLACE(RIGHT(@STRING, LEN(@STRING) - @INDEX), ',', '');
                     -- BREAK OUT IF WE ARE DONE
                     IF LEN(@STRING) = 0
                         BREAK;
                 END;
             RETURN;
         END;

But the output for the string I used as an example at the top of this post, ends up looking like this:

Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – <a href='mailto:paulhall@email.com'>paulhall@email.com</a> - Mikel Zubizarreta – mikelzubizarreta@email.com

As you can see, it sort of works but it only ads the 'mailto' tag to the first email address and not the second one.


Solution

  • This solution uses the splitter function created by Eirikur Eiriksson based on the original function by Jeff Moden. The whole explanation of this function can be found here. I'll just copy the code for the function.

    CREATE FUNCTION [dbo].[DelimitedSplit8K_LEAD]
    --===== Define I/O parameters
            (@pString VARCHAR(8000), @pDelimiter CHAR(1))
    RETURNS TABLE WITH SCHEMABINDING AS
     RETURN
    --===== "Inline" CTE Driven "Tally Table” produces values from 0 up to 10,000...
         -- enough to cover VARCHAR(8000)
     WITH E1(N) AS (
                     SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
                     SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
                     SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
                    ),                          --10E+1 or 10 rows
           E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
           E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
     cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front
                         -- for both a performance gain and prevention of accidental "overruns"
                     SELECT 0 UNION ALL
                     SELECT TOP (DATALENGTH(ISNULL(@pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
                    ),
    cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
                     SELECT t.N+1
                       FROM cteTally t
                      WHERE (SUBSTRING(@pString,t.N,1) = @pDelimiter OR t.N = 0) 
                    )
    --===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
     SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),
            Item = SUBSTRING(@pString,s.N1,ISNULL(NULLIF((LEAD(s.N1,1,1) OVER (ORDER BY s.N1) - 1),0)-s.N1,8000))
       FROM cteStart s
    ;
    GO
    

    This way we can identify the email addresses independently and concatenate the string again using FOR XML.

    CREATE TABLE #SampleData(
        String  varchar(8000)
    )
    INSERT INTO #SampleData VALUES('Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – paulhall@email.com - Mikel Zubizarreta – mikelzubizarreta@email.com')
    
    SELECT STUFF(( SELECT ' ' + CASE WHEN s.Item LIKE '_%@_%._%' THEN '<a href=''mailto:' + s.Item + '''>' + s.Item + '</a>'
                                ELSE s.Item END
                FROM dbo.DelimitedSplit8K_LEAD( d.String, ' ') s
                ORDER BY s.ItemNumber
                FOR XML PATH(''), TYPE).value('./text()[1]', 'varchar(max)'), 1, 1, '')
    FROM #SampleData d