Search code examples
node.jsgoogle-bigquery

Update BigQuery table with an array of objects where some of the values may be null


I need to update an exisitng table in BigQuery with custom array of objects. I am using Node.js with this client library: https://github.com/googleapis/nodejs-bigquery

Let's say I have a table with two "STRING" type columns to update it I use the following code.

import BigQueryConnection from "./utilities/BigQuery.connection";

const data = [
  {
    id: "1",
    country: "Germany"
  },
    {
    id: "2",
    country: "France"
  },
]


  const tableId = 'test_dataset.test_table';
  const mergeQuery = `
  MERGE ${tableId} t
  USING UNNEST(@rows) s
  ON t.id = s.id
  WHEN MATCHED THEN
  UPDATE SET 
    id = s.id,
    country = s.country
  WHEN NOT MATCHED THEN
    INSERT (id, country) VALUES (s.id, s.country)`;

  const options = {
    query: mergeQuery,
    params: {
      rows: data
    },
    types: {
      id: 'STRING',
      country: 'STRING',
    },
    useLegacySql: false
  };


  const response = await BigQueryConnection.bigquery.createQueryJob(options);
  const job = response[0];

  const [rows] = await job.getQueryResults(job);
  console.log(rows)

It works well but from time to time my data comes like this:

const data = [
  {
    id: "1",
    country: null
  },
  {
    id: "2",
    country: "France"
  },
]

it always gives the following error. Error: Parameter types must be provided for null values via the 'types' field in query options. Even though the parameter types are provided the error persist. Am I missing something or this just doesn't work with Big Query?


Solution

  • I created the script on my side with the following structure :

    enter image description here

    The query.js file :

    'use strict';
    
    function main() {
        // [START bigquery_query]
        // [START bigquery_client_default_credentials]
        // Import the Google Cloud client library using default credentials
        const {BigQuery} = require('@google-cloud/bigquery');
        const bigquery = new BigQuery();
    
        const data = [
            {
                "featureName": "featureReal",
                "jobName": "jobReal",
                "pipelineStep": "pipelineReal",
                "inputElement": "inputElementReal",
                "exceptionType": "myExceptionType",
                "stackTrace": "stackTraceReal",
                "componentType": "componentTypeReal",
                "dagOperator": "dagOperatorReal",
                "additionalInfo": "info Real"
            },
            {
                "featureName": "featurePSG",
                "jobName": "jobPSG2",
                "pipelineStep": "pipelinePSG",
                "inputElement": "inputElementPSG",
                "exceptionType": "myExceptionType",
                "stackTrace": "stackTracePSG",
                "componentType": "componentTypePSG",
                "dagOperator": "dagOperatorPSG",
                "additionalInfo": "info PSG"
            }
        ]
    
        // [END bigquery_client_default_credentials]
        async function query() {
            const tableId = "`gb-poc-373711.monitoring.job_failure`"
    
            const mergeQuery = `
              MERGE ${tableId} t
              USING UNNEST(@rows) s
              ON t.featureName = s.featureName
              WHEN MATCHED THEN
              UPDATE SET 
                jobName = s.jobName,
                pipelineStep = s.pipelineStep
              WHEN NOT MATCHED THEN
                INSERT (
                    featureName, 
                    jobName,
                    pipelineStep,
                    inputElement,
                    exceptionType,
                    stackTrace,
                    componentType,
                    dagOperator,
                    additionalInfo
                ) 
                VALUES (
                    s.featureName, 
                    s.jobName,
                    s.pipelineStep,
                    s.inputElement,
                    s.exceptionType,
                    s.stackTrace,
                    s.componentType,
                    s.dagOperator,
                    s.additionalInfo
                )`;
    
            // For all options, see https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
            const options = {
                query: mergeQuery,
                // Location must match that of the dataset(s) referenced in the query.
                location: 'EU',
                params: {rows: data}
            };
    
            // Run the query as a job
            const [job] = await bigquery.createQueryJob(options);
            console.log(`Job ${job.id} started.`);
    
            // Wait for the query to finish
            const [rows] = await job.getQueryResults();
    
            // Print the results
            console.log('Rows:');
            rows.forEach(row => console.log(row));
        }
    
        // [END bigquery_query]
        query();
    }
    
    main(...process.argv.slice(2));
    

    It worked correctly on my side.

    • The rows parameter is correctly passed to my query
    • I also tested the merge query on null fields and it worked correctly

    The package.json file :

    {
      "name": "bigquery-node-client-test",
      "version": "1.0.0",
      "description": "",
      "main": "index.js",
      "scripts": {
        "test": "echo \"Error: no test specified\" && exit 1"
      },
      "author": "",
      "license": "ISC",
      "dependencies": {
        "@google-cloud/bigquery": "^6.1.0"
      }
    }
    

    To have the details and reference documentation for the options, you can check this link.