Search code examples
gremlinazure-cosmosdb-gremlinapigremlinpython

Gremlin, Group By Edge Properties


I am using CosmosDB Gremlin Engine and trying to write a query that returns all EDGE properties, grouped by key, with a list of distinct values.

Here is an example of my schema;

g.V().has('server','id','AARCWIVPAP13056').fold().coalesce(unfold(), addV('server').property('id', 'AARCWIVPAP13056').property('record_type', 'server').property('OsName', 'Microsoft Windows Server 2012 R2 Standard').property('CsCaption', 'AARCWIVPAP13056').property('OsType', 'WINNT').property('OsOperatingSystemSKU', 'StandardServerEdition').property('OsInstallDate', '/Date(1478080755000)/').property('PSComputerName', 'None').property('WindowsEditionId', 'ServerStandard').property('WindowsProductName', 'Windows Server 2012 R2 Standard').property('BiosCaption', 'PhoenixBIOS 4.0 Release 6.0 ').property('CsNumberOfLogicalProcessors', '1').property('CsNumberOfProcessors', '1').property('CsTotalPhysicalMemory', '8589463552').property('TotalPhysicalMemory', '8'))

g.V().has('service','id','AdobeARMservice').fold().coalesce(unfold(), addV('service').property('id', 'AdobeARMservice').property('display_name', 'Adobe Acrobat Update Service').property('record_type', 'service').property('service_name', 'AdobeARMservice').property('name', 'AdobeARMservice'))
g.V('AARCWIVPAP13056').as('parent').V('AdobeARMservice').coalesce(__.inE('has_service').where(outV().as('parent')), addE('has_service').from('parent')).property('status', 'Stopped').property('service_account', 'LocalSystem').property('start_type', 'Disabled')
g.V('AdobeARMservice').as('parent').V('AARCWIVPAP13056').coalesce(__.inE('installed_on').where(outV().as('parent')), addE('installed_on').from('parent')).property('status', 'Stopped').property('service_account', 'LocalSystem').property('start_type', 'Disabled')

g.V().has('service','id','AdtAgent').fold().coalesce(unfold(), addV('service').property('id', 'AdtAgent').property('display_name', 'Microsoft Monitoring Agent Audit Forwarding').property('record_type', 'service').property('service_name', 'AdtAgent').property('name', 'AdtAgent'))
g.V('AARCWIVPAP13056').as('parent').V('AdtAgent').coalesce(__.inE('has_service').where(outV().as('parent')), addE('has_service').from('parent')).property('status', 'Stopped').property('service_account', 'NT AUTHORITY\\NetworkService').property('start_type', 'Disabled')
g.V('AdtAgent').as('parent').V('AARCWIVPAP13056').coalesce(__.inE('installed_on').where(outV().as('parent')), addE('installed_on').from('parent')).property('status', 'Stopped').property('service_account', 'NT AUTHORITY\\NetworkService').property('start_type', 'Disabled')

g.V().has('service','id','AeLookupSvc').fold().coalesce(unfold(), addV('service').property('id', 'AeLookupSvc').property('display_name', 'Application Experience').property('record_type', 'service').property('service_name', 'AeLookupSvc').property('name', 'AeLookupSvc'))
g.V('AARCWIVPAP13056').as('parent').V('AeLookupSvc').coalesce(__.inE('has_service').where(outV().as('parent')), addE('has_service').from('parent')).property('status', 'Stopped').property('service_account', 'localSystem').property('start_type', 'Manual')
g.V('AeLookupSvc').as('parent').V('AARCWIVPAP13056').coalesce(__.inE('installed_on').where(outV().as('parent')), addE('installed_on').from('parent')).property('status', 'Stopped').property('service_account', 'localSystem').property('start_type', 'Manual')

g.V().has('service','id','ALG').fold().coalesce(unfold(), addV('service').property('id', 'ALG').property('display_name', 'Application Layer Gateway Service').property('record_type', 'service').property('service_name', 'ALG').property('name', 'ALG'))
g.V('AARCWIVPAP13056').as('parent').V('ALG').coalesce(__.inE('has_service').where(outV().as('parent')), addE('has_service').from('parent')).property('status', 'Stopped').property('service_account', 'NT AUTHORITY\\LocalService').property('start_type', 'Manual')
g.V('ALG').as('parent').V('AARCWIVPAP13056').coalesce(__.inE('installed_on').where(outV().as('parent')), addE('installed_on').from('parent')).property('status', 'Stopped').property('service_account', 'NT AUTHORITY\\LocalService').property('start_type', 'Manual')



g.V().has('scheduled_task','id','computer_maintenance_-_sg_afi_compmaint_sa_0030_unv').fold().coalesce(unfold(), addV('scheduled_task').property('task_name', 'Computer Maintenance - SG_AFI_CompMaint_Sa_0030_Unv').property('id', 'computer_maintenance_-_sg_afi_compmaint_sa_0030_unv').property('record_type', 'scheduled_task').property('task_date', 'None').property('author', 'ASHLEYFURNITURE\\s_afiarcOMAct').property('task_path', '\\'))
g.V('AARCWIVPAP13056').as('parent').V('computer_maintenance_-_sg_afi_compmaint_sa_0030_unv').coalesce(__.inE('runs_task').where(outV().as('parent')), addE('runs_task').from('parent'))
g.V('computer_maintenance_-_sg_afi_compmaint_sa_0030_unv').as('parent').V('AARCWIVPAP13056').coalesce(__.inE('task_runs_on').where(outV().as('parent')), addE('task_runs_on').from('parent'))

g.V().has('scheduled_task','id','configmgr_client_health').fold().coalesce(unfold(), addV('scheduled_task').property('task_name', 'ConfigMgr Client Health').property('id', 'configmgr_client_health').property('record_type', 'scheduled_task').property('task_date', 'None').property('author', 'ASHLEYFURNITURE\\s_afiarcOMAct').property('task_path', '\\'))
g.V('AARCWIVPAP13056').as('parent').V('configmgr_client_health').coalesce(__.inE('runs_task').where(outV().as('parent')), addE('runs_task').from('parent'))
g.V('configmgr_client_health').as('parent').V('AARCWIVPAP13056').coalesce(__.inE('task_runs_on').where(outV().as('parent')), addE('task_runs_on').from('parent'))

The Properties on the Edges change based on what 2 Vertex's I am linking, so I cannot specify the property names in the query. Also, some Edges do not have extra properties.

The following query is getting me close to my desired output;

g.V().has('id', 'AARCWIVPAP13056')
.as('entity')
.map(outE().group().by(label))
.as('edge')
.select('entity', 'edge')

But it is returning all the raw Edges.

The ideal shape of my response would be;

[
  {
    "entity": {
      "id": "AARCWIVPAP13056",
      "label": "server",
      "type": "vertex",
      "properties": {
        "record_type": [
          {
            "id": "AARCWIVPAP13056|record_type",
            "value": "server"
          }
        ],
        "OsName": [
          {
            "id": "55c70a2a-afb3-4aec-a4e1-86bcbd360068",
            "value": "Microsoft Windows Server 2012 R2 Standard"
          }
        ],
        "CsCaption": [
          {
            "id": "819461e1-8833-4c65-858f-eeea1051edf9",
            "value": "AARCWIVPAP13056"
          }
        ],
        "OsType": [
          {
            "id": "f1928eee-fdd1-479b-8b8d-361666761481",
            "value": "WINNT"
          }
        ],
        "OsOperatingSystemSKU": [
          {
            "id": "bda0ceb5-d818-477d-8998-24537757225f",
            "value": "StandardServerEdition"
          }
        ],
        "OsInstallDate": [
          {
            "id": "487beea1-4d7c-4e68-be0f-a3171c1f86db",
            "value": "/Date(1478080755000)/"
          }
        ],
        "PSComputerName": [
          {
            "id": "2d3a9c6b-83c2-4058-a232-070d9928ed28",
            "value": "None"
          }
        ],
        "WindowsEditionId": [
          {
            "id": "b0a0378c-901f-4510-a157-7fbc363d0c27",
            "value": "ServerStandard"
          }
        ],
        "WindowsProductName": [
          {
            "id": "684f384c-2e19-427a-9c34-e7ee0a1c3c53",
            "value": "Windows Server 2012 R2 Standard"
          }
        ],
        "BiosCaption": [
          {
            "id": "a8ea593b-6cee-42b6-9275-5d06ca4dd6a2",
            "value": "PhoenixBIOS 4.0 Release 6.0     "
          }
        ],
        "CsNumberOfLogicalProcessors": [
          {
            "id": "a245cc43-d2d5-44e1-9696-f7fc585c6490",
            "value": "1"
          }
        ],
        "CsNumberOfProcessors": [
          {
            "id": "67de0a1f-0bab-4dae-90d0-70c46dc50775",
            "value": "1"
          }
        ],
        "CsTotalPhysicalMemory": [
          {
            "id": "32def833-079b-4184-a428-46a400af9cb1",
            "value": "8589463552"
          }
        ],
        "TotalPhysicalMemory": [
          {
            "id": "e6bfedb5-f8f5-460a-958e-1b287a407295",
            "value": "8"
          }
        ],
        "role": [
          {
            "id": "d2943f08-d3a8-4ccd-94e8-188fc66f90aa",
            "value": "APP"
          }
        ],
        "AppContact": [
          {
            "id": "a4c72bcc-f2c2-47c6-982f-f1819d7aa0ab",
            "value": "Keith Branes"
          }
        ],
        "test": [
          {
            "id": "fd37a6de-e60c-4589-901f-1473cfd416a9",
            "value": "tester"
          }
        ],
        "acl_test": [
          {
            "id": "aeb4d52b-5615-484f-917a-2a21f1a83260",
            "value": "acl_test_value",
            "properties": {
              "harvested": "private"
            }
          }
        ],
        "acl_test2": [
          {
            "id": "0b3b5216-3a1d-4817-bcb2-8acd2c9bcffa",
            "value": "acl_test_value2",
            "properties": {
              "read_only": "true"
            }
          }
        ],
        "tester": [
          {
            "id": "c0a1f3b0-4a26-40b2-a762-8a1e008dbcb0",
            "value": "rester"
          }
        ]
      }
    },
    "edge": {
      "has_service": [
        {
            "status": ["Running", "Stopped"],
            "start_type": ["Manual", "Disabled"],
            "service_account": [ "Network Service", "LocalSystem" ]
          }],
      "runs_task": []
    }
]

Solution

  • Your data and query still don't return me any data and it's a bit complex so I'm not sure where it is disconnected to fix it. In any case, judging from what you've written, I think you can do what you want with project():

    gremlin> g.V().has('person', 'name', 'marko').
    ......1>   project('entity', 'edgeData').
    ......2>     by(valueMap(true)).
    ......3>     by(outE().properties().group().by(key()).by(value().fold()))
    ==>[entity:[id:1,label:person,name:[marko],age:[29]],edgeData:[weight:[0.4,0.5,1.0]]]