Search code examples
terraformterraform-provider-awsamazon-athena

Athen table specifying table properties using terraform script


Some of my athena tables are configured to partition based on a column called partition_date. When viewing the DDL of those tables, I see a number of table properties.

TBLPROPERTIES (
  'classification'='json', 
  'compressionType'='gzip', 
  'groupFiles'='inPartition', 
  'projection.enabled'='true', 
  'projection.partition_date.format'='yyyy-MM-dd', 
  'projection.partition_date.range'='2020-07-01,2025-12-31', 
  'projection.partition_date.type'='date', 
  'typeOfData'='file')

My question is: how do I implement these table properties using terraform script?

I have tried parameters field but no luck and received error from terraform planning:

│ Error: Reference to undeclared resource
│ 
│   on glue.tf line 19, in resource "aws_glue_catalog_table" "gu_play_and_earn_prod":
│   19:       projection.partition_date.type    = "date"
│ 
│ A managed resource "projection" "partition_date" has not been declared in the root module.


Solution

  • Please refer to below example using which you should be able to pass these properties with your glue_catalog_table. Here is a full example of the same.

    resource "aws_glue_catalog_table" "alb_logs_src" {
    
      // General
      name          = var.alb_logs_tbl_name
      database_name = var.athena_db_name
      table_type    = "EXTERNAL_TABLE"
      description   = "Application Load Balancer (ALB) logs From ${local.alb_src_s3_path}"
    
      // Table Properties
      parameters = {
    
        // General
        EXTERNAL                      = "TRUE"
        "has_encrypted_data"          = "false"
        "projection.enabled"          = tostring(var.enable_projected_partitions)
        "partition_filtering.enabled" = tostring(var.enable_partition_filtering)
    
        // Partition Projection - Date
        "projection.date.type"   = "date"
        "projection.date.range"  = "2022/01/01,NOW"
        "projection.date.format" = "yyyy/MM/dd"
    
        // Partition Projection - Region - All Active Regions
        "projection.region.type"   = "enum"
        "projection.region.values" = join(", ", var.organization_enabled_regions)
    
        // Partition Projection - Accounts in Org
        "projection.account_id.type"   = "enum"
        "projection.account_id.values" = join(", ", var.organization_account_ids != "" ? var.organization_account_ids : [data.aws_caller_identity.current.id])
    
        // Storage Location
        "storage.location.template" = "${local.alb_src_s3_path}AWSLogs/$${account_id}/elasticloadbalancing/$${region}/$${date}/"
    
      }
    
      // Partition Indexes
      partition_index {
        index_name = "date_partition_index"
        keys       = ["date"]
      }
    
      // Partition Columns
      partition_keys {
        name    = "account_id"
        type    = "string"
        comment = "The ID of the network interface for which the traffic is recorded"
      }
      ......
      ......
      ......