Search code examples
sqldatabaseazureazure-data-explorerkql

KQL How to merge result rows post mv-apply to get the right count


Working with KQL

Given this data set

let MyTable = datatable (VMID:int, ID:string, Type:string, features:dynamic, scanner:dynamic)
[
    1, "ID-1", "Windows", dynamic([
        {"name": "name1", "value": true},
        {"name": "name2", "value": false},
        {"name": "name3", "value": true}
    ]), dynamic([
        {"name": "s1", "expiry": false},
        {"name": "s2", "expiry": true},
        {"name": "s3", "expiry": true},
        {"name": "s4", "expiry": false}
    ]),
    2, "ID-1", "Windows", dynamic([
        {"name": "name1", "value": true},
        {"name": "name2", "value": false},
        {"name": "name3", "value": true}
    ]), dynamic([
        {"name": "s1", "expiry": false},
        {"name": "s2", "expiry": true},
        {"name": "s3", "expiry": false},
        {"name": "s4", "expiry": true}
    ]),
    3, "ID-1", "Linux", dynamic([
        {"name": "name1", "value": true},
        {"name": "name2", "value": false},
        {"name": "name3", "value": true}
    ]), dynamic([
        {"name": "s1", "expiry": false},
        {"name": "s2", "expiry": false},
        {"name": "s3", "expiry": true},
        {"name": "s4", "expiry": false}
    ]),
    4, "ID-2", "Windows", dynamic([
        {"name": "name1", "value": true},
        {"name": "name2", "value": false},
        {"name": "name3", "value": true}
    ]), dynamic([
        {"name": "s1", "expiry": false},
        {"name": "s2", "expiry": true},
        {"name": "s3", "expiry": false},
        {"name": "s4", "expiry": true}
    ]),
    5, "ID-2", "Windows", dynamic([
        {"name": "name1", "value": true},
        {"name": "name2", "value": false},
        {"name": "name3", "value": true}
    ]), dynamic([
        {"name": "s1", "expiry": false},
        {"name": "s2", "expiry": true},
        {"name": "s3", "expiry": true},
        {"name": "s4", "expiry": true}
    ])
];

table input

I want to filter features by name1 and name2 and ScanState by s1 and s3 and then count the number of VMID for each Type and also give a list of VMID for

  1. has value == true for feature.name = name1
  2. has value = false for feature.name = name2
  3. has expiry = false for scanState.name = s1
  4. has expiry = true for scanState.name = s3

My main problem is when using mv-apply it breaks the JSON into multiple rows. And doing a count() leads to extra results.

MyTable
| mv-apply features, scanner on (where  features.name == "name1" or features.name == "name2" or scanner.name == "s1" or scanner.name == "s3" )
| extend feature1State = tobool(features.name == "name1" and features.value == true)
| extend feature2State = tobool(features.name == "name2" and features.value == false)
| extend scan1State = tobool(scanner.name == "s1" and scanner.expiry == false)
| extend scan2State = tobool(scanner.name == "s3" and scanner.expiry == true)
| summarize vmCount = count(VMID),
    f1count = countif(feature1State == true),
    f2count= countif(feature2State== true),
    scan1count = countif(scan1State == true),
    scan2count = countif(scan2State == true),
    f1FailVm = make_set_if(VMID, feature1State == false and isnotempty( VMID)),
    f2FailVm = make_set_if(VMID, feature2State== false and isnotempty( VMID)),
    scan1FailVm = make_set_if(VMID, scan1State == false and isnotempty( VMID)),
    scan2FailVm = make_set_if(VMID, scan2State == false and isnotempty( VMID))
    by ID, Type

The output of it result output

Do note that I get 6 VM Count for ID-1 which isn't right. This is due to the fact that mv-apply in essence created multiple rows. Since the number of entries in features and Scanner are different so it picks the maximum of them.

Is there a better way to solve this.

Update 1: Using count_distinct solve for VMID but how to get the right failed List value . Output using count_distinct count_distinct


Solution

    • Use vmCount = count_distinct( VMID) instead of count(VMID). The count_distinct function is used to count the number of distinct VMID values by Type and Id fields.
    • f1FailVm field is the list of VMIDs which does not have a true in feature1State field in Type and Id. For this case, take the first set as the set of VMID values that have a false value in the feature1State field, and the second set as the set of VMID values that have a true value in the feature1State field and then use set_difference between them.

    Code:

    MyTable
    | mv-apply features, scanner on (where features.name == "name1" or features.name == "name2" or scanner.name == "s1" or scanner.name == "s3" )
    | extend feature1State = tobool(features.name == "name1" and features.value == true)
    | extend feature2State = tobool(features.name == "name2" and features.value == false)
    | extend scan1State = tobool(scanner.name == "s1" and scanner.expiry == false)
    | extend scan2State = tobool(scanner.name == "s3" and scanner.expiry == true)
    | summarize vmCount = count_distinct( VMID),
    f1count = countif(feature1State == true),
    f2count= countif(feature2State== true),
    scan1count = countif(scan1State == true),
    scan2count = countif(scan2State == true),
    f1failVM= set_difference(make_set_if(VMID, feature1State == false), make_set_if(VMID, feature1State == true)),
    f2FailVM = set_difference(make_set_if(VMID, feature2State == false), make_set_if(VMID, feature2State == true)),
    scan1FailVm = set_difference(make_set_if(VMID, scan1State == false), make_set_if(VMID, scan1State == true)),
    scan2FailVm = set_difference(make_set_if(VMID, scan2State == false), make_set_if(VMID, scan2State == true))
    by ID, Type
    

    Output:

    ID Type vmCount f1count f2count scan1count scan2count f1failVM f2FailVM scan1FailVm scan2FailVm
    ID-1 Windows 2 2 2 2 1 [] [] [] [2]
    ID-1 Linux 1 1 1 1 1 [] [] [] []
    ID-2 Windows 2 2 2 2 1 [] [] [] [4]

    fiddle