Search code examples
python-3.6aws-cloudformationamazon-iamaws-glueaws-cdk

AWS cdk python, which IAM role for a glue crawler with a daily trigger?


I am trying to deploy a glue crawler for an s3. Unfortunately I cant manage to find an appropriate IAM role that allows the crawler to run. The permissions I need are just to read/write to S3, and logs:PutLogsEvent, but somehow I am not getting it right. Here is my code, it can be deployed but the crawler does not have permissions to run.

from aws_cdk import (
    aws_events as events,
    aws_lambda as lambda_,
    aws_events_targets as targets,
    aws_iam as iam,
    aws_glue as glue,
    core
)

class MyStack(core.Stack):

    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # what should I put in the role exactly?
        glue_role = iam.Role(
            self, 'Role__arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole',
            assumed_by=iam.ServicePrincipal('glue.amazonaws.com'),
        )
        glue_trigger = glue.CfnTrigger(self, "glue-daily-trigger",
            name = "etl-trigger",
            schedule = "cron(5 * * * ? *)", # every hour at X.05, every day
            type="SCHEDULED",
            actions=[
                {
                    "jobName": "glue_crawler-daily"
                }
            ],
            start_on_creation=True
        )
        crawler_name = 'crawler_units_data'
        glue_crawler = glue.CfnCrawler(
            self, crawler_name,
            name=crawler_name,
            database_name='data_science',
            role=glue_role.role_arn,#'arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole',
            targets={"s3Targets": [{"path": "s3://random_s3/units/"}]},
        )
        glue_trigger.add_depends_on(glue_crawler)

I tried several things and translating code from javascript examples like this one but the methods being called from javascript do not map 100% with python.

This role (created from the GUI) works correctly and has 2 policies.

  1. Policy to read and write from s3
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::random_s3/units*"
            ]
        }
    ]
}
  1. AWSGlueServicePolicy
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "glue:*",
                "s3:GetBucketLocation",
                "s3:ListBucket",
                "s3:ListAllMyBuckets",
                "s3:GetBucketAcl",
                "ec2:DescribeVpcEndpoints",
                "ec2:DescribeRouteTables",
                "ec2:CreateNetworkInterface",
                "ec2:DeleteNetworkInterface",
                "ec2:DescribeNetworkInterfaces",
                "ec2:DescribeSecurityGroups",
                "ec2:DescribeSubnets",
                "ec2:DescribeVpcAttribute",
                "iam:ListRolePolicies",
                "iam:GetRole",
                "iam:GetRolePolicy",
                "cloudwatch:PutMetricData"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:CreateBucket"
            ],
            "Resource": [
                "arn:aws:s3:::aws-glue-*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:PutObject",
                "s3:DeleteObject"
            ],
            "Resource": [
                "arn:aws:s3:::aws-glue-*/*",
                "arn:aws:s3:::*/*aws-glue-*/*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject"
            ],
            "Resource": [
                "arn:aws:s3:::crawler-public*",
                "arn:aws:s3:::aws-glue-*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": [
                "arn:aws:logs:*:*:/aws-glue/*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "ec2:CreateTags",
                "ec2:DeleteTags"
            ],
            "Condition": {
                "ForAllValues:StringEquals": {
                    "aws:TagKeys": [
                        "aws-glue-service-resource"
                    ]
                }
            },
            "Resource": [
                "arn:aws:ec2:*:*:network-interface/*",
                "arn:aws:ec2:*:*:security-group/*",
                "arn:aws:ec2:*:*:instance/*"
            ]
        }
    ]
}

Solution

  • As it turns out, I needed to pass the name and policy in a different way

        glue_role = iam.Role(
            self, 'glue_role_id2323',
            role_name = 'Rolename',
            assumed_by=iam.ServicePrincipal('glue.amazonaws.com'),
            managed_policies=[iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSGlueServiceRole')]
        )