I am trying to create a database using terraform and this seems very complicated for a poor query...
Could you help me, please?
I have tried null_resource with local-exec and data "external" Python... I think I am looking the wrong way
ex which doesn't works in terraform 0.12
resource "null_resource" "create-endpoint" {
provisioner "local-exec" {
query = <<EOF
{
CREATE EXTERNAL TABLE `dashboard_loading_time`(
`timestamp_iso` string,
`app_identification` struct<service:string,app_name:string,app_type:string,stage:string>,
`user` struct<api_gateway_key:struct<id:string,name:string>,mashery_key:struct<id:string,name:string>,employee:struct<id:string,name:string>>,
`action` struct<action_type:string,path:string>,
`result` struct<status:string,http_status:string,response:struct<response:string>>)
PARTITIONED BY (
`year` int)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/dev'
}
EOF
command = "aws athena start-query-execution --query-string "query""
}
}
I would like to find the simplest way to do this using terraform.
If you wanna make it for athena, need to make glue
resources.
try below code with terraform.
variable "service_name" {
default = "demo-service"
}
variable "workspace" {
default = "dev"
}
variable "columns" {
default = {
id = "int"
type = "string"
status = "int"
created_at = "timestamp"
}
}
resource "aws_glue_catalog_database" "athena" {
name = "${var.service_name}_db"
}
resource "aws_glue_catalog_table" "athena" {
name = "${var.service_name}_logs"
database_name = "${aws_glue_catalog_database.athena.name}"
table_type = "EXTERNAL_TABLE"
parameters = {
EXTERNAL = "TRUE"
}
storage_descriptor {
location = "s3://${var.service_name}-${var.workspace}-data-pipeline/log/"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"
ser_de_info {
name = "jsonserde"
serialization_library = "org.openx.data.jsonserde.JsonSerDe"
parameters = {
"serialization.format" = "1"
}
}
dynamic "columns" {
for_each = "${var.columns}"
content {
name = "${columns.key}"
type = "${columns.value}"
}
}
}
partition_keys {
name = "year"
type = "string"
}
partition_keys {
name = "month"
type = "string"
}
partition_keys {
name = "day"
type = "string"
}
partition_keys {
name = "hour"
type = "string"
}
}
refer to this repository : aws-serverless-data-pipeline-by-terraform