Search code examples
amazon-s3terraformamazon-cloudfront

How to configure cloudfront for two s3 web bucket origins using terraform


I have a static website on one s3 bucket and uploaded media on another s3 bucket, and I would like a cloudfront distribution that switches between the buckets based on either path or domain (i don't care which, I just want what works). I also have an api gateway path for my Lambda-based service code (cloudfront redirects to this correctly).

The problem is that I cannot seem to get cloudfront to route requests to the second bucket. I'm getting 404 errors now, but I have mucked about with the config and managed to get 403s we well. My current terraform looks like this:

resource "aws_cloudfront_distribution" "blog" {
  enabled  = true
  price_class = "PriceClass_All"


# cloudfront for web bucket
  origin {
    domain_name = "${aws_s3_bucket.frontend.id}.s3-website-${aws_s3_bucket.frontend.region}.amazonaws.com"
    origin_id   = "s3-${aws_s3_bucket.frontend.id}"

    custom_origin_config {
      http_port                = 80
      https_port               = 443
      origin_keepalive_timeout = 5
      origin_protocol_policy   = "http-only"
      origin_read_timeout      = 30
      origin_ssl_protocols     = ["TLSv1.2"]
    }
  }

  default_cache_behavior {
    allowed_methods        = ["GET", "HEAD", "OPTIONS"]
    cached_methods         = ["GET", "HEAD", "OPTIONS"]
    target_origin_id       = "s3-${aws_s3_bucket.frontend.id}"
    viewer_protocol_policy = "redirect-to-https"
    compress               = true
    forwarded_values {
      query_string = true
      cookies {
        forward = "all"
      }
      headers = ["Access-Control-Request-Headers", "Access-Control-Request-Method", "Origin"]
    }
  }

# cloudfront for resources bucket 
# although upload is through api below, getting the resources should be just like a website
# essentially, if the request contains the resources
# https://s3.ap-southeast-2.amazonaws.com/resources.dablogga.zenithgamesfoundry.com/mike2_small.png
  origin {
    domain_name = "${aws_s3_bucket.resources.id}.s3-website-${aws_s3_bucket.resources.region}.amazonaws.com"
    origin_id   = "s3-${aws_s3_bucket.resources.id}"
    origin_path = "/resources"

    custom_origin_config {
      http_port                = 80
      https_port               = 443
      origin_keepalive_timeout = 5
      origin_protocol_policy   = "http-only"
      origin_read_timeout      = 30
      origin_ssl_protocols     = ["TLSv1.2"]
    }
  }
  ordered_cache_behavior {
    allowed_methods = ["GET", "HEAD", "OPTIONS"]
    cached_methods  = ["GET", "HEAD", "OPTIONS"]
    # so urls like http://dablogga.zenithgamesfoundry.com/resources/*
    # to be redirected to the s3 bucket in the resources.dablogga.zenithgamesfoundry.com domain
    # note that '/resources' does not clash with anything that react builds in the website.
    path_pattern            = "resources*"
    target_origin_id        = "s3-${aws_s3_bucket.resources.id}"
    viewer_protocol_policy  = "redirect-to-https"
    compress                = true
    forwarded_values {
      query_string = true
      cookies {
        forward = "all"
      }
      headers = ["Access-Control-Request-Headers", "Access-Control-Request-Method", "Origin"]
    }
  }


# cloudfront for api gateway (to lambda etc.)
  origin {
    domain_name = replace(
      replace(
        aws_apigatewayv2_stage.zgf_api_gw_api.invoke_url,
        "https://",
        ""
      ),
      "/api",
      ""
    )
    origin_id = "api-gateway-blog"
    custom_origin_config {
      http_port              = 80
      https_port             = 443
      origin_protocol_policy = "https-only"
      origin_ssl_protocols   = ["TLSv1.2"]
    }
  }


  ordered_cache_behavior {
    allowed_methods = ["GET", "HEAD", "OPTIONS", "POST", "PUT", "DELETE", "PATCH"]
    cached_methods  = ["GET", "HEAD", "OPTIONS"]
    path_pattern = "api*"
    target_origin_id = "api-gateway-blog"
    viewer_protocol_policy = "https-only"
    cache_policy_id = aws_cloudfront_cache_policy.api_gateway_optimized.id
    origin_request_policy_id = aws_cloudfront_origin_request_policy.api_gateway_optimized.id
  }

  aliases = ["${var.web_domain}","${var.resources_domain}"]

  restrictions {
    geo_restriction {
      restriction_type = "none"
    }
  }

  viewer_certificate {
    acm_certificate_arn = "${var.resources_dablogga_certificate_arn}"
    ssl_support_method = "sni-only"
  }

  is_ipv6_enabled = true
}

resource "aws_cloudfront_cache_policy" "api_gateway_optimized" {
  name        = "ApiGatewayOptimized"

  default_ttl = 0
  max_ttl     = 0
  min_ttl     = 0

  parameters_in_cache_key_and_forwarded_to_origin {
    cookies_config {
      cookie_behavior = "none"
    }

    headers_config {
      header_behavior = "none"
    }
    query_strings_config {
      query_string_behavior = "none"
    }
  }
}

resource "aws_cloudfront_origin_request_policy" "api_gateway_optimized" {
  name    = "ApiGatewayOptimized"

  cookies_config {
    cookie_behavior = "none"
  }

  headers_config {
    header_behavior = "whitelist"
    headers {
      items = ["Accept-Charset", "Accept", "User-Agent", "Referer"]
    }
  }

  query_strings_config {
    query_string_behavior = "all"
  }
}


// This Route53 record will point at our CloudFront distribution.
// two route53 records pointing at the same cloudfront distribution, they will be distinguished by the
// two origin ids
resource "aws_route53_record" "www" {
  zone_id = "${var.hosted_zone_id}"
  name    = "${var.web_domain}"
  type    = "A"

// assuming that this is just pointing to the cloudfront CNAME record???
  alias {
    name                   = aws_cloudfront_distribution.blog.domain_name
    zone_id                = aws_cloudfront_distribution.blog.hosted_zone_id
    evaluate_target_health = false
  }
}

resource "aws_route53_record" "resources" {
  zone_id = "${var.hosted_zone_id}"
  name    = "${var.resources_domain}"
  type    = "A"

// assuming that this is just pointing to the cloudfront CNAME record???
  alias {
    name                   = aws_cloudfront_distribution.blog.domain_name
    zone_id                = aws_cloudfront_distribution.blog.hosted_zone_id
    evaluate_target_health = false
  }
}


The buckets are provisioned and are available, and as far as I can see, they work (note that there's a bucket for the lambda layer as well as the "frontend" and "resources" bucket).

For reference, here's the s3 configuration (I'm not expecting any trouble here):

resource "aws_s3_bucket" "frontend" {
  bucket        = "${var.web_domain}"
  force_destroy = "false"
}

resource "aws_s3_bucket_website_configuration" "bucket-website-config" {
  bucket = aws_s3_bucket.frontend.id

  index_document {
    suffix = "index.html"
  }
  error_document {
    key = "index.html"
  }
}

resource "aws_s3_bucket_acl" "frontend" {
  bucket = aws_s3_bucket.frontend.id
  acl    = "public-read"
}

resource "aws_s3_bucket_public_access_block" "frontend" {
  bucket = aws_s3_bucket.frontend.id

  block_public_acls       = false
  block_public_policy     = false
  ignore_public_acls      = false
  restrict_public_buckets = false
}

# for the AWS Lmbda code
resource "aws_s3_bucket" "backend" {
  bucket        = "backend.${var.project_name}"
  force_destroy = "false"
}

resource "aws_s3_bucket_acl" "backend" {
  bucket = aws_s3_bucket.backend.id
  acl    = "private"
}
resource "aws_s3_bucket_public_access_block" "backend" {
  bucket = aws_s3_bucket.backend.id

  block_public_acls       = true
  block_public_policy     = true
  ignore_public_acls      = true
  restrict_public_buckets = true
}

# for the static image or resources files (such nas the images uploaded in blogs)
# important not to destroy this on each build
resource "aws_s3_bucket" "resources" {
  bucket        = "${var.resources_domain}"
  force_destroy = "false"
}

resource "aws_s3_bucket_website_configuration" "bucket-resources-config" {
  bucket = aws_s3_bucket.resources.id

  index_document {
    suffix = "index.html"
  }
  error_document {
    key = "index.html"
  }
}

resource "aws_s3_bucket_acl" "resources" {
  bucket = aws_s3_bucket.resources.id
  acl    = "public-read"
  depends_on = [aws_s3_bucket_ownership_controls.resources]
}

resource "aws_s3_bucket_ownership_controls" "resources" {
  bucket = aws_s3_bucket.resources.id
  rule {
    object_ownership = "ObjectWriter"
  }
  depends_on = [aws_s3_bucket_public_access_block.resources]
}

resource "aws_s3_bucket_public_access_block" "resources" {
  bucket = aws_s3_bucket.resources.id

  block_public_acls       = false
  block_public_policy     = false
  ignore_public_acls      = false
  restrict_public_buckets = false
}

# the resources bucket is not in the same origin as the website bucket (frontend) so a request from that origin
# such as the blog image load, will be blocked without the below.
resource "aws_s3_bucket_cors_configuration" "resources" {
  bucket = aws_s3_bucket.resources.id

  cors_rule {
    allowed_methods = ["GET"]
    allowed_origins = ["*"]
  }
}

I have been working on this for days and I'm going nuts, why is it that a url to:

https://dablogga.zenithgamesfoundry.com/resources/mike2_small.png

results in a 404?

whereas anything on

https://dablogga.zenithgamesfoundry.com/*

is totally fine?

Alternatively, can i switch based on the domain, given that the second bucket is: resources.dablogga.zenithgamesfoundry.com

???

EDIT: Note that the origin for API Gateway somehow forwards to API GW (and then onto the Lambda) when the /api path is found in the request. But its hard to see what is actually doing it, I assume that its some kind of automagic occorring behind the scenes in some api gw cloudfront function (or something). In any case, it proves that forwarding based on a path is possible...somehow.


Solution

  • After what seemed like an endless process of trial and error, I have stumbled upon a solution. Essentially, the second web bucket needs an origin request policy and a cache policy. No idea why the first one doesn't need it, or why they have to be configured in totally different ways.

    Anyway, this is my new cloudfront terraform script:

    resource "aws_cloudfront_distribution" "blog" {
      enabled  = true
      price_class = "PriceClass_All"
    
    
    # cloudfront for web bucket
      origin {
        domain_name = "${aws_s3_bucket.frontend.id}.s3-website-${aws_s3_bucket.frontend.region}.amazonaws.com"
        origin_id   = "s3-${aws_s3_bucket.frontend.id}"
    
        custom_origin_config {
          http_port                = 80
          https_port               = 443
          origin_keepalive_timeout = 5
          origin_protocol_policy   = "http-only"
          origin_read_timeout      = 30
          origin_ssl_protocols     = ["TLSv1.2"]
        }
      }
    
      default_cache_behavior {
        allowed_methods        = ["GET", "HEAD", "OPTIONS"]
        cached_methods         = ["GET", "HEAD", "OPTIONS"]
        target_origin_id       = "s3-${aws_s3_bucket.frontend.id}"
        viewer_protocol_policy = "redirect-to-https"
        compress               = true
        forwarded_values {
          query_string = true
          cookies {
            forward = "all"
          }
          headers = ["Access-Control-Request-Headers", "Access-Control-Request-Method", "Origin"]
        }
      }
    
    # cloudfront for resources bucket 
    # although upload is through api below, getting the resources should be just like a website
    # essentially, if the request contains the resources
    # https://s3.ap-southeast-2.amazonaws.com/resources.dablogga.zenithgamesfoundry.com/mike2_small.png
      origin {
        domain_name = "${aws_s3_bucket.resources.id}.s3-website-${aws_s3_bucket.resources.region}.amazonaws.com"
        origin_id   = "s3-${aws_s3_bucket.resources.id}"
    
        custom_origin_config {
          http_port                = 80
          https_port               = 443
          origin_keepalive_timeout = 5
          origin_protocol_policy   = "http-only"
          origin_read_timeout      = 30
          origin_ssl_protocols     = ["TLSv1.2"]
        }
      }
      ordered_cache_behavior {
        allowed_methods = ["GET", "HEAD", "OPTIONS"]
        cached_methods  = ["GET", "HEAD", "OPTIONS"]
        # so urls like http://dablogga.zenithgamesfoundry.com/resources/*
        # to be redirected to the s3 bucket in the resources.dablogga.zenithgamesfoundry.com domain
        # note that '/resources' does not clash with anything that react builds in the website.
        path_pattern            = "resources*"
        target_origin_id        = "s3-${aws_s3_bucket.resources.id}"
        viewer_protocol_policy  = "redirect-to-https"
        cache_policy_id = aws_cloudfront_cache_policy.resources.id
        origin_request_policy_id = aws_cloudfront_origin_request_policy.resources.id
        compress                = true
      }
    
    
    # cloudfront for api gateway (to lambda etc.)
      origin {
        domain_name = replace(
          replace(
            aws_apigatewayv2_stage.zgf_api_gw_api.invoke_url,
            "https://",
            ""
          ),
          "/api",
          ""
        )
        origin_id = "api-gateway-blog"
        custom_origin_config {
          http_port              = 80
          https_port             = 443
          origin_protocol_policy = "https-only"
          origin_ssl_protocols   = ["TLSv1.2"]
        }
      }
    
    
      ordered_cache_behavior {
        allowed_methods = ["GET", "HEAD", "OPTIONS", "POST", "PUT", "DELETE", "PATCH"]
        cached_methods  = ["GET", "HEAD", "OPTIONS"]
        path_pattern = "api*"
        target_origin_id = "api-gateway-blog"
        viewer_protocol_policy = "https-only"
        cache_policy_id = aws_cloudfront_cache_policy.api_gateway_optimized.id
        origin_request_policy_id = aws_cloudfront_origin_request_policy.api_gateway_optimized.id
      }
    
      aliases = ["${var.web_domain}","${var.resources_domain}"]
    
      restrictions {
        geo_restriction {
          restriction_type = "none"
        }
      }
    
      viewer_certificate {
        acm_certificate_arn = "${var.resources_dablogga_certificate_arn}"
        ssl_support_method = "sni-only"
      }
    
      is_ipv6_enabled = true
    }
    
    ## Request forwarding policies for resources
    ##
    ##
    resource "aws_cloudfront_cache_policy" "resources" {
      name        = "resources"
    
      default_ttl = 0
      max_ttl     = 0
      min_ttl     = 0
    
      parameters_in_cache_key_and_forwarded_to_origin {
        cookies_config {
          cookie_behavior = "none"
        }
    
        headers_config {
          header_behavior = "none"
        }
        query_strings_config {
          query_string_behavior = "none"
        }
      }
    }
    
    resource "aws_cloudfront_origin_request_policy" "resources" {
      name    = "resources"
    
      cookies_config {
        cookie_behavior = "none"
      }
    
      headers_config {
        header_behavior = "whitelist"
        headers {
          items = ["Accept-Charset", "Accept", "User-Agent", "Referer"]
        }
      }
    
      query_strings_config {
        query_string_behavior = "all"
      }
    }
    
    ## Request forwarding policies for API GW
    ##
    ##
    resource "aws_cloudfront_cache_policy" "api_gateway_optimized" {
      name        = "ApiGatewayOptimized"
    
      default_ttl = 0
      max_ttl     = 0
      min_ttl     = 0
    
      parameters_in_cache_key_and_forwarded_to_origin {
        cookies_config {
          cookie_behavior = "none"
        }
    
        headers_config {
          header_behavior = "none"
        }
        query_strings_config {
          query_string_behavior = "none"
        }
      }
    }
    
    resource "aws_cloudfront_origin_request_policy" "api_gateway_optimized" {
      name    = "ApiGatewayOptimized"
    
      cookies_config {
        cookie_behavior = "none"
      }
    
      headers_config {
        header_behavior = "whitelist"
        headers {
          items = ["Accept-Charset", "Accept", "User-Agent", "Referer"]
        }
      }
    
      query_strings_config {
        query_string_behavior = "all"
      }
    }
    
    
    ## This Route53 record will point at our CloudFront distribution.
    ## two route53 records pointing at the same cloudfront distribution, they will be distinguished by the
    ## two origin ids
    resource "aws_route53_record" "www" {
      zone_id = "${var.hosted_zone_id}"
      name    = "${var.web_domain}"
      type    = "A"
    
    ## assuming that this is just pointing to the cloudfront CNAME record???
      alias {
        name                   = aws_cloudfront_distribution.blog.domain_name
        zone_id                = aws_cloudfront_distribution.blog.hosted_zone_id
        evaluate_target_health = false
      }
    }
    
    resource "aws_route53_record" "resources" {
      zone_id = "${var.hosted_zone_id}"
      name    = "${var.resources_domain}"
      type    = "A"
    
    ## assuming that this is just pointing to the cloudfront CNAME record???
      alias {
        name                   = aws_cloudfront_distribution.blog.domain_name
        zone_id                = aws_cloudfront_distribution.blog.hosted_zone_id
        evaluate_target_health = false
      }
    }
    

    seems really convoluted, but it works, as long as I put everything inside a "resources" directory on the new bucket.

    go figure...