Search code examples
sqlsql-serversql-server-2008patindexcharindex

SQL- select only numbers from column which contains string


I have a table like this (first column contains id adn second column is varchar which contains string (some formulas)) -

column_id       column_formula
4686         4686 = 4684 - 4685 
4687         4687 = ( 4681 / 1.205 / 4684 * 1000 ) 
4717         4717 = ( 4711 + 4712 + 4713 + 4714 + 4715 + 4716 )/6 
4719         4719 = abs( 4716 - 4715 ) 
4787         4787 = max(max(max(max(max( 4780 , 4781 ), 4782 ), 4783 ), 4784 ), 4785 ) - min(min(min(min(min( 4780 , 4781 ), 4782 ), 4783 ), 4784 ), 4785 ) 

Now need some query which will give an output as follows -

col1    col2
4686    4684      
4686    4685      
4687    4681 
4687    4684 
4717    4711
4717    4712

and so on.. (only numbers except the numbers which are less than 4 digits like 1.205 or so .. and 1000)

Its very difficult to use patindex on this one

Can any1 give me a solution on this


Solution

  • You need to have a function for splitting comma-delimited strings into separate rows. Here is the DelimitedSplit8K function by Jeff Moden:

    CREATE FUNCTION [dbo].[DelimitedSplit8K](
        @pString NVARCHAR(4000), @pDelimiter NCHAR(1)
    )
    RETURNS TABLE WITH SCHEMABINDING AS
    RETURN
    WITH E1(N) AS (
        SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
        SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
    )
    ,E2(N) AS (SELECT 1 FROM E1 a, E1 b)
    ,E4(N) AS (SELECT 1 FROM E2 a, E2 b)
    ,cteTally(N) AS(
        SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
    )
    ,cteStart(N1) AS(
        SELECT 1 UNION ALL 
        SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
    ),
    cteLen(N1,L1) AS(
    SELECT 
        s.N1,
        ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
    FROM cteStart s
    )
    SELECT 
        ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
        Item       = SUBSTRING(@pString, l.N1, l.L1)
    FROM cteLen l
    

    The idea is to REPLACE first all mathematical characters: +, -, *, /, (, ), = with a comma ,. This is needed so we can split the operands later. Next call the splitter function to split the row using comma , as the delimiter. Once the split is done, filter all numeric values using NOT LIKE '[^0-9]%' AND Item <> ''. You want to INSERT the results of this to a Temp Table. Then, you SELECT from that Temp Table with the appropriate WHERE clause:

    ;WITH cteSanitized AS(
        SELECT
            t.Column_Id,
            Item = 
                REPLACE(
                    REPLACE(
                        REPLACE(
                            REPLACE(
                                REPLACE(
                                    REPLACE(
                                        REPLACE(
                                            SUBSTRING(t.Column_Formula, CHARINDEX('=', t.Column_Formula), LEN(t.Column_Formula) - CHARINDEX('=', t.Column_Formula) + 1)
                                            ,'+',','
                                        ), '-',','
                                    ), '/',','
                                ), '*',','
                            ), ')', ',)'
                        ), '(',','
                    ), '=',','
                )
        FROM Test t
    )
    ,CteSplitted AS(
        SELECT
            s.Column_Id,
            ItemNumber,
            Item = LTRIM(RTRIM(x.Item))
        FROM cteSanitized s
        CROSS APPLY dbo.DelimitedSplit8K(s.Item, ',') x
    )
    SELECT 
        Column_Id,
        ItemNumber,
        Item = CONVERT(NUMERIC, ITEM)
    INTO #TempTable
    FROM CteSplitted
    WHERE 
        Item  NOT LIKE '[^0-9]%'
        AND Item <> ''
    
    SELECT DISTINCT
        Col1 = Column_Id,
        Col2 = Item
    FROM #TempTable
    WHERE Item > 1000
    
    DROP TABLE #TempTable
    

    RESULT

    Col1        Col2
    ----------- ---------
    4686        4684
    4686        4685
    4687        4681
    4687        4684
    4717        4711
    4717        4712
    4717        4713
    4717        4714
    4717        4715
    4717        4716
    4719        4715
    4719        4716
    4787        4780
    4787        4781
    4787        4782
    4787        4783
    4787        4784
    4787        4785