Search code examples
sqlt-sqlazure-sql-databaseclustered-indexnon-clustered-index

Create clustered or nonclustered index on 1 million rows table for LIKE query?


I have a database table with postal codes, city, long, lat, province.

I have one use for this table which is to return suggested codes (plus city and province) for an autocomplete widget.

city and postalcode make up a unique record. I am starting the query at 3 characters.

This query is slow which makes the autocomplete experience bad. I am wondering given the information above what kind of Index would work best? I am using Azure SQL datastore and as such i am unable to run Query Analyzer/Tuning Advisor.

I have tried nonclustered index on postalCode and a 2 column clustered index on postalCode and city. Both yielded same results on a query:

Select * 
From PostalCode 
Where code LIKE '%L6J 0%'

I am not updating or inserting on this table.


Solution

  • As the length of postcode is fairly short and known (8), this is a good candidate for chunking. Break the postcode up into all its constituent chunks and store them with their starting position and lengths to enable index seeks.

    So for example, for a postcode such as 'OX1 1JZ', store all the following strings:

    start len postcodePart
    1   2   OX
    1   3   OX1
    1   4   OX11
    1   5   OX11J
    1   6   OX11JZ
    2   2   X1
    2   3   X11
    2   4   X11J
    2   5   X11JZ
    3   2   11
    3   3   11J
    3   4   11JZ
    4   2   1J
    4   3   1JZ
    5   2   JZ
    

    Here is a sample script demonstrating the technique and how to shred the postcodes, using 100 sample postcodes and a trigger.

    NB!! This is not production ready code, just a sample to show the technique.

    USE tempdb
    GO
    
    -- https://www.postcodelist.co.uk/
    --uk-postcodes.csv
    
    IF OBJECT_ID('dbo.postCodeParts') IS NOT NULL DROP TABLE dbo.postCodeParts
    IF OBJECT_ID('dbo.postCodes') IS NOT NULL DROP TABLE dbo.postCodes
    GO
    
    CREATE TABLE dbo.postCodes (
        postcodeId              INT IDENTITY CONSTRAINT PK_postCodes PRIMARY KEY,
        postcode                VARCHAR(8) NOT NULL
    
        --... the rest of your columns
    
        )
    GO
    
    
    CREATE TABLE dbo.postCodeParts (
        postcodePartId          INT IDENTITY CONSTRAINT PK_postCodeParts PRIMARY KEY NONCLUSTERED,
        postcodeId              INT NOT NULL FOREIGN KEY REFERENCES dbo.postCodes ( postcodeId ),
    
        totalLen                TINYINT NOT NULL,
        xStart                  TINYINT NOT NULL,
        xLen                    TINYINT NOT NULL,
        postcodePart            VARCHAR(8) NOT NULL INDEX cdx_postCodeParts CLUSTERED
    
        )
    GO
    
    
    -- Add a trimmed copy of the postcode to the parts table, chunked up.
    CREATE TRIGGER dbo.trg_postCodes
    ON dbo.postcodes
    FOR INSERT
    AS
    BEGIN
    
        ;WITH cte AS
        (
        SELECT *
        FROM (
            VALUES ( 1 ), ( 2 ), ( 3 ), ( 4 ), ( 5 ), ( 6 ), ( 7 ), ( 8 ) 
            ) x(y)
        )
        INSERT INTO dbo.postCodeParts ( postcodeId, totalLen, xStart, xLen, postcodePart )
        SELECT 
            p.postcodeId, 
            p.xTotalLen, 
            c1.y AS xstart, 
            c2.y AS xlen, 
            SUBSTRING( p.postCode, c1.y, c2.y ) AS xstring
        FROM ( 
            SELECT
                postcodeId,
                REPLACE( postcode, ' ', '' ) postCode, 
                LEN( REPLACE( postcode, ' ', '' ) ) AS xTotalLen 
            FROM inserted 
        ) p
            CROSS JOIN cte c1
                CROSS JOIN cte c2
        WHERE c2.y Between 2 And p.xTotalLen
          AND ( ( c2.y ) + ( c1.y - 1 ) ) <= p.xTotalLen
    
    END
    GO
    
    
    INSERT INTO dbo.postcodes ( postcode )
    VALUES
        ( 'OX1 1AA' ),( 'OX1 1AB' ),( 'OX1 1AD' ),( 'OX1 1AE' ),( 'OX1 1AF' ),( 'OX1 1AG' ),( 'OX1 1AN' ),( 'OX1 1AS' ),( 'OX1 1AW' ),( 'OX1 1AY' ),
        ( 'OX1 1AZ' ),( 'OX1 1BD' ),( 'OX1 1BE' ),( 'OX1 1BN' ),( 'OX1 1BP' ),( 'OX1 1BS' ),( 'OX1 1BT' ),( 'OX1 1BU' ),( 'OX1 1BX' ),( 'OX1 1BY' ),
        ( 'OX1 1BZ' ),( 'OX1 1DA' ),( 'OX1 1DB' ),( 'OX1 1DE' ),( 'OX1 1DF' ),( 'OX1 1DG' ),( 'OX1 1DJ' ),( 'OX1 1DL' ),( 'OX1 1DP' ),( 'OX1 1DQ' ),
        ( 'OX1 1DS' ),( 'OX1 1DW' ),( 'OX1 1DZ' ),( 'OX1 1EA' ),( 'OX1 1EF' ),( 'OX1 1EJ' ),( 'OX1 1EN' ),( 'OX1 1EP' ),( 'OX1 1EQ' ),( 'OX1 1ER' ),
        ( 'OX1 1ES' ),( 'OX1 1ET' ),( 'OX1 1EU' ),( 'OX1 1EW' ),( 'OX1 1EX' ),( 'OX1 1GA' ),( 'OX1 1GB' ),( 'OX1 1GD' ),( 'OX1 1GE' ),( 'OX1 1GF' ),
        ( 'OX1 1GH' ),( 'OX1 1GJ' ),( 'OX1 1GL' ),( 'OX1 1HB' ),( 'OX1 1HD' ),( 'OX1 1HF' ),( 'OX1 1HG' ),( 'OX1 1HH' ),( 'OX1 1HN' ),( 'OX1 1HP' ),
        ( 'OX1 1HQ' ),( 'OX1 1HR' ),( 'OX1 1HS' ),( 'OX1 1HT' ),( 'OX1 1HU' ),( 'OX1 1HW' ),( 'OX1 1HX' ),( 'OX1 1HY' ),( 'OX1 1HZ' ),( 'OX1 1JA' ),
        ( 'OX1 1JB' ),( 'OX1 1JD' ),( 'OX1 1JE' ),( 'OX1 1JF' ),( 'OX1 1JG' ),( 'OX1 1JH' ),( 'OX1 1JJ' ),( 'OX1 1JL' ),( 'OX1 1JP' ),( 'OX1 1JQ' ),
        ( 'OX1 1JR' ),( 'OX1 1JS' ),( 'OX1 1JT' ),( 'OX1 1JU' ),( 'OX1 1JW' ),( 'OX1 1JX' ),( 'OX1 1JY' ),( 'OX1 1JZ' ),( 'OX1 1LB' ),( 'OX1 1LD' ),
        ( 'OX1 1LE' ),( 'OX1 1LF' ),( 'OX1 1LG' ),( 'OX1 1LJ' ),( 'OX1 1LL' ),( 'OX1 1LQ' ),( 'OX1 1LT' ),( 'OX1 1LU' ),( 'OX1 1LY' ),( 'OX1 1ND' )
    GO
    
    SELECT * FROM dbo.postCodes
    SELECT * FROM dbo.postCodeParts ORDER BY xStart, xLen
    
    SELECT * 
    FROM dbo.postCodes pc
        INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
    WHERE postcodePart = '1J'
    ORDER BY xStart, xLen
    GO
    
    IF OBJECT_ID('dbo.usp_searchPostCodes') IS NOT NULL DROP PROC dbo.usp_searchPostCodes
    GO
    CREATE PROC dbo.usp_searchPostCodes
    
        @searchString   VARCHAR(8)
    
    AS
    
        SET NOCOUNT ON
    
        --!!TODO add error handling
        --!!TODO does not deal with middle wildcards or _ wildcard
    
        DECLARE @leadingWildCard BIT
        DECLARE @cleanSearchString VARCHAR(8)
    
        SELECT @leadingWildCard = CASE WHEN LEFT( @searchString, 1 ) = '%' THEN 1 ELSE 0 END
        SELECT @cleanSearchString = REPLACE( REPLACE( @searchString, ' ', '' ), '%', '' )
    
        -- Debugging
        --PRINT @leadingWildCard
        --PRINT @cleanSearchString
    
        IF @leadingWildCard = 0
    
            -- No leading wildcard, start at position 1
            SELECT pc.postcode
            FROM dbo.postCodes pc
                INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
            WHERE pcp.postcodePart = @cleanSearchString
              AND pcp.xstart = 1
            ORDER BY xStart, xLen
    
        ELSE
    
            -- Leading wildcard, return all positions
            SELECT pc.postcode
            FROM dbo.postCodes pc
                INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
            WHERE pcp.postcodePart = @cleanSearchString
            ORDER BY xStart, xLen
    
    RETURN
    GO
    
    
    EXEC dbo.usp_searchPostCodes 'OX1 1J%'
    EXEC dbo.usp_searchPostCodes '%X1 1J%'
    GO
    
    
    SELECT xStart, xLen, postcodePart
    FROM dbo.postCodes pc
        INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
    WHERE pc.postcode= 'OX1 1JZ'
    ORDER BY xStart, xLen