Search code examples
amazon-web-servicesterraformaws-glueterraform-provider-aws

How to run AWS Glue Crawler after resource update/created?


I have defined a resource in my Terraform to create a Glue Crawler that I don't want to schedule. But I want it to run after being created and updated. I couldn't find anything in the documentation about how to trigger this.

resource "aws_glue_crawler" "my_crawler" {
  database_name = "my_db"
  name          = "my_crawler"
  role          = "arn:aws:iam::111111111111:role/service-role/someRole"

  s3_target {
    path = "s3://my_bucket/key/prefix"
  }

}

Solution

  • You could use a local-exec provisioner to use the AWS CLI to trigger your Glue crawler once it is created:

    resource "aws_glue_crawler" "my_crawler" {
      database_name = "my_db"
      name          = "my_crawler"
      role          = "arn:aws:iam::111111111111:role/service-role/someRole"
    
      s3_target {
        path = "s3://my_bucket/key/prefix"
      }
    
      provisioner "local-exec" {
        command = "aws glue start-crawler --name ${self.name}"
      }
    }
    

    This would be only triggered when creating the crawler and not at any other point, such as if you changed the s3_target.path or anything else.

    If you wanted to be able to trigger this when changing the s3_target.path you'd need to use a null_resource with a trigger:

    resource "aws_glue_crawler" "my_crawler" {
      database_name = "my_db"
      name          = "my_crawler"
      role          = "arn:aws:iam::111111111111:role/service-role/someRole"
    
      s3_target {
        path = "s3://my_bucket/key/prefix"
      }
    }
    
    resource "null_resource" "run_crawler" {
      # Changes to the crawler's S3 path requires re-running
      triggers = {
        s3_path = "${aws_glue_crawler.my_crawler.s3_target.0.path}"
      }
    
      provisioner "local-exec" {
        command = "aws glue start-crawler --name ${aws_glue_crawler.my_crawler.name}"
      }
    }