Search code examples
sqljsonamazon-web-servicesbroadcastreceiveramazon-athena

find the owner of EC2 instance by Athena and CloudTrail


In order to know the owner of each EC2 instance, I query the cloudtrail logs stored in S3 by Athena.

I have a table in Athena with the following stucture:

CREATE EXTERNAL TABLE cloudtrail_logs (
eventversion STRING,
useridentity STRUCT<
               type:STRING,
               principalid:STRING,
               arn:STRING,
               accountid:STRING,
               invokedby:STRING,
               accesskeyid:STRING,
               userName:STRING,
sessioncontext:STRUCT<
attributes:STRUCT<
               mfaauthenticated:STRING,
               creationdate:STRING>,
sessionissuer:STRUCT<  
               type:STRING,
               principalId:STRING,
               arn:STRING, 
               accountId:STRING,
               userName:STRING>>>,
eventtime STRING,
eventsource STRING,
eventname STRING,
awsregion STRING,
sourceipaddress STRING,
useragent STRING,
errorcode STRING,
errormessage STRING,
requestparameters STRING,
responseelements STRING,
additionaleventdata STRING,
requestid STRING,
eventid STRING,
resources ARRAY<STRUCT<
               ARN:STRING,
               accountId:STRING,
               type:STRING>>,
eventtype STRING,
apiversion STRING,
readonly STRING,
recipientaccountid STRING,
serviceeventdetails STRING,
sharedeventid STRING,
vpcendpointid STRING
)
PARTITIONED BY (account string, region string, year string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<BUCKET>/AWSLogs/';

I want to find the identity of the user who launch an EC2 instances so I need to parse the field responseelements and only get the rows with responseelements that has a particular instanceID.

the field responseelements is like this:

{
"requestId":"cab34472-31cc-44cd-ae32-a84077e55cb6",
"reservationId":"r-05964c8549788ac50",
"ownerId":"xxxxxxxxxx",
"groupSet":{},
"instancesSet":{
    "items":[
    {"instanceId":"i-043543cb4c12",
    "imageId":"ami-078df974",
    "instanceState":{"code":0,"name":"pending"},
    "privateDnsName":"ip-444444.eu-west-1.compute.internal",
    "keyName":"key-dev","amiLaunchIndex":0,"productCodes":{},
    "instanceType":"t2.large",
    "launchTime":1488438050000,
    "placement":{"availabilityZone":"eu-west-1b","tenancy":"default"},
    "monitoring":{"state":"pending"},
    "subnetId":"subnet-d8fffff",
    "vpcId":"vpc-444435",
    "privateIpAddress":"10.0.42.49",
    "stateReason":{"code":"pending","message":"pending"},
    "architecture":"x86_64",
    "rootDeviceType":"ebs",
    "rootDeviceName":"/dev/xvda",
    "blockDeviceMapping":{},
    "virtualizationType":"hvm",
    "hypervisor":"xen",
    "clientToken":"c6e53004-c561-437d-a642-196489ff297c_subnet-fffffffff",
    "groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
    "sourceDestCheck":true,
    "networkInterfaceSet":{
        "items":[
        {"networkInterfaceId":"eni-b16b66f0",
        "subnetId":"subnet-dffffff",
        "vpcId":"vpc-50fffff35",
        "ownerId":"xxxxxxxx",
        "status":"in-use",
        "macAddress":"fdsfdsfsdfqdsf",
        "privateIpAddress":"10.0.42.34234213",
        "privateDnsName":"ip-1dddddd.eu-west-1.compute.internal",
        "sourceDestCheck":true,
        "groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
        "attachment":{"attachmentId":"eni-attach-45619121","deviceIndex":0,"status":"attaching","attachTime":1488438050000,"deleteOnTermination":true},
        "privateIpAddressesSet":{"item":[{"privateIpAddress":"10ffffff","privateDnsName":"ip-ffffff.eu-west-1.compute.internal","primary":true}]},
        "ipv6AddressesSet":{},
        "tagSet":{}}]}
    ,"iamInstanceProfile":{"arn":"arn:aws:iam::xxxxx:instance-profile/infra-EC2InstanceProfile-1D59C5YR0LIYJ","id":"eeeeeeeeeeeeeeeeee"},
    "ebsOptimized":false}
    ]
    },
    "requesterId":"226008221399"
}

This is my query that I tried:

SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instanceId') as instance_id  
FROM cloudtrail_logs
WHERE account = 'xxxxxxxxxxxxxxx' 
AND eventname = 'RunInstances';

but this gives instance_id as an empty column. How to properly get only instance_id from the resposneelement?


Solution

  • I found the right query to find the owner of an ECS instance. That might help someone!

    SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id  
    FROM cloudtrail_logs
    WHERE account = 'xxxxxxx' 
    AND eventname = 'RunInstances'
    AND responseelements LIKE '%i-3434ecb4c12%' 
    ;