Search code examples
sql-serversql-graph

SQL Server : Graph-DB with soft delete isn't working as expected


I try to use the graph functionality in SQL Server. Now I have a problem with soft-delete.

I have the following graph

[1] -> [2] -> [9 (deleted)] -> [4]

When I run the following script

CREATE TABLE MyNode 
(
    [Id] [bigint] NOT NULL,
    [IsDeleted] [bit] NOT NULL,
) AS NODE;

CREATE TABLE MyEdge 
(
    State [int] NOT NULL
) AS EDGE;

INSERT INTO MyNode (Id, IsDeleted)
VALUES (1, 0), (2, 0), (4, 0), (9, 1);

INSERT INTO MyEdge
VALUES
( (SELECT $node_id FROM MyNode WHERE Id = 1), (SELECT $node_id FROM MyNode WHERE Id = 2), 1),
( (SELECT $node_id FROM MyNode WHERE Id = 2), (SELECT $node_id FROM MyNode WHERE Id = 9), 1),
( (SELECT $node_id FROM MyNode WHERE Id = 9), (SELECT $node_id FROM MyNode WHERE Id = 4), 1)
;

SELECT
    src.Id ID_SOURCE
,   LAST_VALUE(trgt.Id) WITHIN GROUP (GRAPH PATH) AS ID_TARGET
,   STRING_AGG(trgt.Id, '->') WITHIN GROUP (GRAPH PATH) AS ID_CHAIN
--, STRING_AGG(compare.State, '->') WITHIN GROUP (GRAPH PATH) AS STATE_CHAIN
--, STRING_AGG(trgt.IsDeleted, '->') WITHIN GROUP (GRAPH PATH) AS DELETED_CHAIN
FROM
    MyNode AS src
,   (   SELECT
            *
        FROM
            MyEdge
        WHERE
            State = 1
    ) FOR PATH AS compare
,   (   SELECT
            *
        FROM
            MyNode
        WHERE
            IsDeleted = 0
    ) FOR PATH AS trgt
WHERE
    MATCH ( SHORTEST_PATH( src(-(compare)->trgt)+ ) )
    AND src.Id = 1;

SELECT
    src.Id AS SOURCE_ID
,   ed.State AS EDGE_STATE
,   trgt.Id AS TARGET_ID
FROM
    MyNode AS src
,   MyEdge AS ed
,   MyNode AS trgt
WHERE
    MATCH( src-(ed)->trgt )
    AND src.Id = 2;


DROP TABLE MyNode;
DROP TABLE MyEdge;

(Hint: this only works on SQL-Server 2019)

I get the following result

ID_SOURCE ID_TARGET ID_CHAIN
1 2 2
1 4 2->4

There is no edge 2->4 but 2->9->4. But the node 9 is deleted so it is used for the graph traversal but is suppressed in the output.

Is this a bug in SQL-Server or am I doing something wrong?

Or is there another way I should use soft delete with Graph-DB?


Solution

  • You could filter out the edges from/to soft-deleted nodes

    ....
    (   
    SELECT e.*
    FROM MyEdge as e
    WHERE
        e.State = 1
        and exists(select * from MyNode as x where x.IsDeleted = 0 and x.$node_id = e.$to_id)
        --and exists(select * from MyNode as x where x.IsDeleted = 0 and x.$node_id = e.$from_id)
    ) FOR PATH AS compare
    ....
    

    Somehow the following query returns an error (after several secs of execution):

    SELECT
        src.Id ID_SOURCE
    ,   LAST_VALUE(trgt.Id) WITHIN GROUP (GRAPH PATH) AS ID_TARGET
    ,   STRING_AGG(trgt.Id, '->') WITHIN GROUP (GRAPH PATH) AS ID_CHAIN
    --, STRING_AGG(compare.State, '->') WITHIN GROUP (GRAPH PATH) AS STATE_CHAIN
    --, STRING_AGG(trgt.IsDeleted, '->') WITHIN GROUP (GRAPH PATH) AS DELETED_CHAIN
    FROM
        MyNode AS src
    ,   
    (   
    SELECT e.*
    FROM MyEdge as e
    WHERE
        e.State = 1
        and exists(select * from MyNode as x where x.IsDeleted = 0 and x.$node_id = e.$to_id)
        --and exists(select * from MyNode as x where x.IsDeleted = 0 and x.$node_id = e.$from_id)
    ) FOR PATH AS compare
    
    ,   (   SELECT
                *
            FROM
                MyNode
            WHERE
                IsDeleted = 0
        ) FOR PATH AS trgt
    WHERE
        MATCH ( SHORTEST_PATH( src(-(compare)->trgt)+ ) )
    

    ...and the error goes away by appending a condition/reference to src table, which is always true :

    AND src.Id = src.Id --or src.IsDeleted = src.IsDeleted
    

    There is definitely something going wrong [Microsoft SQL Server 2019 (RTM-CU8-GDR) (KB4583459)]..