Search code examples
google-cloud-platformgoogle-cloud-dataflowgcloudgoogle-cloud-scheduler

Cloud Scheduler giving "INVALID_ARGUMENT" error to deploy Dataflow job (using flex template)


I have a cloud scheduler which is supposed to deploy the dataflow job using the flex template. I have terraform changes to create cloud scheduler which creates a cloud scheduler job. However, when im trying to run the job it given error

{"@type":"type.googleapis.com/google.cloud.scheduler.logging.AttemptFinished", "jobName":"projects/engineering-199721/locations/us-central1/jobs/dummy-batch-daily-job", "status":"INVALID_ARGUMENT", "targetType":"HTTP", "url":"https://dataflow.googleapis.com/v1b3/projects/engineering-199721/locations/us-central1/templates:launch?gcsPath=gs://inf_bigdata_dev15_temp_location/dataflow/templates/sample_batch_job_D3.1.7-47-g3c619"}

below is my tf code

  resource "google_cloud_scheduler_job" "dummy-batch_daily"
   {
  name = "dummy-batch-daily-job"
  schedule = "0 2 * * *"  # 2 am daily
  region = var.GCP_REGION
  time_zone = "America/Los_Angeles"
  
  http_target {
    http_method = "POST"
    uri = "https://dataflow.googleapis.com/v1b3/projects/${var.GCP_PROJECT}/locations/${var.GCP_REGION}/templates:launch?gcsPath=gs://xyz/dataflow/templates/sample_batch_job_D3.1.7-47-g3c619"

    oauth_token {
      service_account_email = "[email protected]"
    }
    
    body = base64encode(<<-EOT
    {
      "jobName": "dummy-batch",
      "parameters": {
        "project": "${var.GCP_PROJECT}",
        "deploymentName": "${var.INF_PURE_DEPLOYMENT}",
        "bigQueryProject": "${var.GCP_PROJECT}",
        "bigQueryTable": "${var.INF_BIGDATA_TELEMETRY_REPROCESS_BIGQUERY_TABLE}",
        "bigQueryDataset": "${var.INF_BIGDATA_BIGQUERY_DATASET}",
        "bigDataDisplayVersion": 1
      },
      "environment": {
        "subnetwork": "regions/${var.GCP_REGION}/subnetworks/${var.INF_BIGDATA_SUBNET}",
        "tempLocation": "${var.INF_BIGDATA_PIPELINE_TEMP_LOCATION}/dataflow",
        "numWorkers": ${var.INF_BIGDATA_START_NUM_WORKERS}
      }
    }
  EOT
    )
    headers = {
      "Content-Type" = "application/json"
    }
  }
}

Did tried https://medium.com/@zhongchen/schedule-your-dataflow-batch-jobs-with-cloud-scheduler-8390e0e958e, as well tried removing the 'parameters' block but still the same issue.

Any help would be appreciated.


Solution

  • Meaning:

    resource "google_cloud_scheduler_job" "dummy-batch_daily" {
      name = "dummy-batch-daily-job"
      schedule = "0 2 * * *"  # 2 am daily
      region = ${var.GCP_REGION}
      
      http_target {
        http_method = "POST"
        uri = "https://dataflow.googleapis.com/v1b3/projects/${var.GCP_PROJECT}/locations/${var.GCP_REGION}/flexTemplates:launch"
    
        oauth_token {
          service_account_email = "[email protected]"
        }
        
        body = base64encode(<<-EOT
        {
          "launchParameter": {
            "jobName": "dummy-batch",
            "containerSpecGcsPath": "gs://xyz/dataflow/templates/sample_batch_job_D3.1.7-47-g3c619",
            "parameters": {
              "project": "${var.GCP_PROJECT}",
              "deploymentName": "${var.INF_PURE_DEPLOYMENT}",
              "bigQueryProject": "${var.GCP_PROJECT}",
              "bigQueryTable": "${var.INF_BIGDATA_TELEMETRY_REPROCESS_BIGQUERY_TABLE}",
              "bigQueryDataset": "${var.INF_BIGDATA_BIGQUERY_DATASET}",
              "bigDataDisplayVersion": 1
            },
            "environment": {
              "subnetwork": "regions/${var.GCP_REGION}/subnetworks/${var.INF_BIGDATA_SUBNET}",
              "tempLocation": "${var.INF_BIGDATA_PIPELINE_TEMP_LOCATION}/dataflow",
              "numWorkers": ${var.INF_BIGDATA_START_NUM_WORKERS}
            }
          }
    }
      EOT
        )
      }
    }