Search code examples
gremlinamazon-neptune

Transform project results into a single list in Gremlin query


I have a gremlin query that I would like to return an array of user ids. Currently it's returning an array of arrays. One array for each projection.

Is there a way to transform this array of arrays into a single array of user ids in the query or is this something I need to handle at the application level?

Any help is very much appreciated.

g.V('testUser').fold()
.coalesce(
    unfold().project('bi_directional_connection', 'single_directional_connection')
            .by(
                bothE('bi_directional_connection')
                    .has('status', 'ACCEPTED')
                    .otherV()
                    .has('active', true)
                    .values('user_id')
                    .fold()
                    .dedup()
                    .limit(100)
            )
            .by(
                outE('single_directional_connection')
                    .otherV()
                    .values('user_id')
                    .fold()
                    .dedup()
                    .limit(100)

            ).select(values),
        
    project('err').by(constant("user does not exist"))
)

EDIT: Here is my sample data

    // Set up test data
g.addV('joshTest1')
    .property(T.id, 'joshTest1')
    .property(single, 'user_id', 'joshTest1')
    .property(single, 'role', 'test-user')
    .property(single, 'active', true)
.addV('joshTest2')
    .property(T.id, 'joshTest2')
    .property(single, 'user_id', 'joshTest2')
    .property(single, 'role', 'test-user')
    .property(single, 'active', true)
.addV('joshTest3')
    .property(T.id, 'joshTest3')
    .property(single, 'user_id', 'joshTest3')
    .property(single, 'role', 'test-user')
    .property(single, 'active', true)
.addV('joshTest4')
    .property(T.id, 'joshTest4')
    .property(single, 'user_id', 'joshTest4')
    .property(single, 'role', 'test-user')
    .property(single, 'active', true)
.addE('single_directional_connection')
    .from(V('joshTest2'))
    .to(V('joshTest1'))
    .property('status', 'ACCEPTED')
.addE('single_directional_connection')
    .from(V('joshTest3'))
    .to(V('joshTest1'))
    .property('status', 'ACCEPTED')
.addE('bi_directional_connection')
    .from(V('joshTest2'))
    .to(V('joshTest3'))
    .property('status', 'ACCEPTED')
.addE('bi_directional_connection')
    .from(V('joshTest3'))
    .to(V('joshTest2'))
    .property('status', 'ACCEPTED')
.addE('bi_directional_connection')
    .from(V('joshTest2'))
    .to(V('joshTest4'))
    .property('status', 'ACCEPTED')
.addE('bi_directional_connection')
    .from(V('joshTest4'))
    .to(V('joshTest2'))
    .property('status', 'ACCEPTED')

Here is the response I get from running the query against the sample data. I'm doing this in a AWS Jupyter notebook.

[['joshTest3', 'joshTest4', 'joshTest3', 'joshTest4'], ['joshTest1']]

Notice I'm also getting duplicates which I do not want.

What I would like to get is this:

['joshTest3', 'joshTest4', 'joshTest1']

Solution

  • Here is an answer to get you going. I moved the dedup before the fold and massaged the results at the end. I am going to study this query some more and will update this answer if I come up with a simpler query as an option.

    g.V('joshTest2').fold()
    .coalesce(
        unfold().project('bi_directional_connection', 'single_directional_connection')
                .by(
                    bothE('bi_directional_connection')
                        .has('status', 'ACCEPTED')
                        .otherV()
                        .has('active', true)
                        .values('user_id')
                        .dedup()
                        .fold()
                        .limit(100)
                )
                .by(
                    outE('single_directional_connection')
                        .otherV()
                        .values('user_id')
                        .dedup()
                        .fold()
                        .limit(100)
    
                ).select(values)
                 .unfold()
                 .unfold()
                 .fold(),
            
        project('err').by(constant("user does not exist"))
    )
    

    This yields

    ['joshTest3', 'joshTest4', 'joshTest1']