Search code examples
jsongoogle-bigquerytypeform

How to Append to an Array in BigQuery or: How I Learned to Handle Optional Responses in Multiple Choice Typeform Payload


Working with Typeform Surveys

Here is the survey design for question 1: survey question 1

Here is the survey design for question 2: survey question 2

Here is the survey design for question 3: survey question 3

Response Data

I took the survey twice.

On the first submission:

  1. (favorite color) red, blue, Other: yellow
  2. (quest) To seek the holy grail
  3. (another question) what is your name

On the second submission:

  1. (favorite color) red, Other: orange
  2. (quest) Other: something else
  3. (another question) What... is the air-speed velocity of an unladen swallow?

Here is a sample JSON file, which I saved as option.json:

{   "event_id": "EVENT_ID_1",   "event_type": "form_response",   "form_response": {     "submitted_at": "2022-07-12T22:51:01Z",     "token": "TOKEN_1",     "calculated": null,     "answers": [{       "value": {         "boolean": null,         "file_url": null,         "url": null,         "phone_number": null,         "email": null,         "field": {           "ref": "QUESTION_REF_1",           "id": "QUESTION_ID_1",           "type": "multiple_choice"         },         "text": null,         "number": null,         "choices": {           "labels": [{             "value": "red"           }, {             "value": "blue"           }],           "other": "yellow"         },         "type": "choices",         "date": null,         "choice": null       }     }, {       "value": {         "boolean": null,         "file_url": null,         "url": null,         "phone_number": null,         "email": null,         "field": {           "ref": "QUESTION_REF_2",           "id": "QUESTION_ID_2",           "type": "multiple_choice"         },         "text": null,         "number": null,         "choices": {           "labels": [{             "value": "To seek the holy grail"           }],           "other": null         },         "type": "choices",         "date": null,         "choice": null       }     }, {       "value": {         "boolean": null,         "file_url": null,         "url": null,         "phone_number": null,         "email": null,         "field": {           "ref": "QUESTION_REF_3",           "id": "QUESTION_ID_3",           "type": "short_text"         },         "text": "what is your name",         "number": null,         "choices": null,         "type": "text",         "date": null,         "choice": null       }     }],     "form_id": "FORM_ID",     "variables": [],     "definition": {       "id": "FORM_ID",       "title": "Test Multiple Choice Other",       "fields": [{         "value": {           "ref": "QUESTION_REF_1",           "id": "QUESTION_ID_1",           "type": "multiple_choice",           "title": "What is your favorite color?",           "choices": [{             "value": {               "id": "CHOICE_ID_1",               "label": "red"             }           }, {             "value": {               "id": "CHOICE_ID_2",               "label": "blue"             }           }],           "allow_multiple_selections": "true",           "allow_other_choice": "true"         }       }, {         "value": {           "ref": "QUESTION_REF_2",           "id": "QUESTION_ID_2",           "type": "multiple_choice",           "title": "What is your quest?",           "choices": [{             "value": {               "id": "CHOICE_ID_3",               "label": "To seek the holy grail"             }           }],           "allow_multiple_selections": "true",           "allow_other_choice": "true"         }       }, {         "value": {           "ref": "QUESTION_REF_3",           "id": "QUESTION_ID_3",           "type": "short_text",           "title": "What else could I ask?",           "choices": [],           "allow_multiple_selections": null,           "allow_other_choice": null         }       }]     },     "hidden": null,     "landed_at": "2022-07-12T22:49:34Z"   },   "_sdc_received_at": "2022-07-12T22:51:33.248Z",   "_sdc_sequence": "1657666262148",   "_sdc_batched_at": "2022-07-12T22:57:17.785Z",   "_sdc_table_version": "0" }
{   "event_id": "EVENT_ID_2",   "event_type": "form_response",   "form_response": {     "submitted_at": "2022-07-12T22:53:08Z",     "token": "TOKEN_2",     "calculated": null,     "answers": [{       "value": {         "boolean": null,         "file_url": null,         "url": null,         "phone_number": null,         "email": null,         "field": {           "ref": "QUESTION_REF_1",           "id": "QUESTION_ID_1",           "type": "multiple_choice"         },         "text": null,         "number": null,         "choices": {           "labels": [{             "value": "red"           }],           "other": "orange"         },         "type": "choices",         "date": null,         "choice": null       }     }, {       "value": {         "boolean": null,         "file_url": null,         "url": null,         "phone_number": null,         "email": null,         "field": {           "ref": "QUESTION_REF_2",           "id": "QUESTION_ID_2",           "type": "multiple_choice"         },         "text": null,         "number": null,         "choices": {           "labels": [],           "other": "something else"         },         "type": "choices",         "date": null,         "choice": null       }     }, {       "value": {         "boolean": null,         "file_url": null,         "url": null,         "phone_number": null,         "email": null,         "field": {           "ref": "QUESTION_REF_3",           "id": "QUESTION_ID_3",           "type": "short_text"         },         "text": "What... is the air-speed velocity of an unladen swallow?",         "number": null,         "choices": null,         "type": "text",         "date": null,         "choice": null       }     }],     "form_id": "FORM_ID",     "variables": [],     "definition": {       "id": "FORM_ID",       "title": "Test Multiple Choice Other",       "fields": [{         "value": {           "ref": "QUESTION_REF_1",           "id": "QUESTION_ID_1",           "type": "multiple_choice",           "title": "What is your favorite color?",           "choices": [{             "value": {               "id": "CHOICE_ID_1",               "label": "red"             }           }, {             "value": {               "id": "CHOICE_ID_2",               "label": "blue"             }           }],           "allow_multiple_selections": "true",           "allow_other_choice": "true"         }       }, {         "value": {           "ref": "QUESTION_REF_2",           "id": "QUESTION_ID_2",           "type": "multiple_choice",           "title": "What is your quest?",           "choices": [{             "value": {               "id": "CHOICE_ID_3",               "label": "To seek the holy grail"             }           }],           "allow_multiple_selections": "true",           "allow_other_choice": "true"         }       }, {         "value": {           "ref": "QUESTION_REF_3",           "id": "QUESTION_ID_3",           "type": "short_text",           "title": "What else could I ask?",           "choices": [],           "allow_multiple_selections": null,           "allow_other_choice": null         }       }]     },     "hidden": null,     "landed_at": "2022-07-12T22:51:27Z"   },   "_sdc_received_at": "2022-07-12T22:53:38.604Z",   "_sdc_sequence": "1657666388772",   "_sdc_batched_at": "2022-07-12T22:57:17.818Z",   "_sdc_table_version": "0" }

Then I created a table in BigQuery using that option.json file:

BQ Create Table with Console

I am trying to flatten this in a way that I can easily share it with others.

  1. sequence for multiple choice responses: one issue is that I need to keep track of how many options someone selects for each multiple choice question.
  2. another problem is that I need to be able to handle the "other" option, which does not get saved in the same way as a regular multiple choice option.

What I have so far works fine as long as no one selects "optional" in the multiple choice types:

SELECT
  tf.event_id,
  a.value.field.id AS question_id,
  f.value.title AS question_title,
  a.value.type AS question_type,
  CASE
    WHEN a.value.type = 'choices' THEN c.value
    WHEN a.value.type = 'text' THEN a.value.text
  END AS value,
  IF( a.value.type = 'choices' , choices_index+1 , 1 ) AS sequence,
  COALESCE( ARRAY_LENGTH( a.value.choices.labels ) , 1 ) AS sequence_end,
FROM `<PROJECT_ID>.<DATASET_ID>.option` AS tf
LEFT JOIN UNNEST( tf.form_response.answers ) AS a
LEFT JOIN UNNEST( a.value.choices.labels ) AS c WITH OFFSET AS choices_index
INNER JOIN UNNEST( form_response.definition.fields ) f
  ON a.value.field.id = f.value.id

Result: query result

Result in JSON:

[{
  "event_id": "EVENT_ID_1",
  "question_id": "QUESTION_ID_1",
  "question_title": "What is your favorite color?",
  "question_type": "choices",
  "value": "red",
  "sequence": "1",
  "sequence_end": "2"
}, {
  "event_id": "EVENT_ID_1",
  "question_id": "QUESTION_ID_1",
  "question_title": "What is your favorite color?",
  "question_type": "choices",
  "value": "blue",
  "sequence": "2",
  "sequence_end": "2"
}, {
  "event_id": "EVENT_ID_1",
  "question_id": "QUESTION_ID_2",
  "question_title": "What is your quest?",
  "question_type": "choices",
  "value": "To seek the holy grail",
  "sequence": "1",
  "sequence_end": "1"
}, {
  "event_id": "EVENT_ID_1",
  "question_id": "QUESTION_ID_3",
  "question_title": "What else could I ask?",
  "question_type": "text",
  "value": "what is your name",
  "sequence": "1",
  "sequence_end": "1"
}, {
  "event_id": "EVENT_ID_2",
  "question_id": "QUESTION_ID_1",
  "question_title": "What is your favorite color?",
  "question_type": "choices",
  "value": "red",
  "sequence": "1",
  "sequence_end": "1"
}, {
  "event_id": "EVENT_ID_2",
  "question_id": "QUESTION_ID_2",
  "question_title": "What is your quest?",
  "question_type": "choices",
  "value": null,
  "sequence": null,
  "sequence_end": "0"
}, {
  "event_id": "EVENT_ID_2",
  "question_id": "QUESTION_ID_3",
  "question_title": "What else could I ask?",
  "question_type": "text",
  "value": "What... is the air-speed velocity of an unladen swallow?",
  "sequence": "1",
  "sequence_end": "1"
}]

I want the result to append the "optional" responses to the end of the list of the multiple-choice selections, which would mean my total query result should have 9 rows.

Specifically: EVENT_ID_1, QUESTION_ID_1 should have 3 rows:

  1. red (sequence: 1, sequence_end: 3)
  2. blue (sequence: 2, sequence_end: 3)
  3. yellow (sequence: 3, sequence_end: 3)

and EVENT_ID_2, QUESTION_ID_1 should have 2 rows:

  1. red (sequence: 1, sequence_end: 2)
  2. orange (sequence: 2, sequence_end: 2)

and EVENT_ID_3, QUESTION_ID_2 should have 1 non-null row:

  1. something else (sequence: 1, sequence_end: 1)

I need your brilliance. Could you please help?


Solution

  • Basic SQL approach can be:

    • Find all the normal response values.
    • Find all the other response values.
    • Combine them together using union all

    Below is an example from your shared data:

    with titles as (
    select distinct f.value.id question_id,f.value.title as question_title 
    from `TABLE_NAME` tf
    ,UNNEST( form_response.definition.fields ) f
    )
    ,response as(
     
    select tf.event_id 
    ,a.value.field.id as question_id
    ,a.value.type AS question_type
    ,CASE
        WHEN a.value.type = 'choices' THEN c.value
        WHEN a.value.type = 'text' THEN a.value.text
      END AS value
    from `TABLE_NAME` tf
    ,UNNEST( tf.form_response.answers ) AS a
    left join UNNEST( a.value.choices.labels ) AS c
     
    )
    ,optionals as(
      select distinct tf.event_id 
        ,a.value.field.id as question_id
        ,a.value.type AS question_type
        ,a.value.choices.other value
      from `TABLE_NAME` tf
      ,UNNEST( tf.form_response.answers ) AS a
      where a.value.choices.other is not null
     
    )
    select t.question_title
    ,f.*
    ,row_number() over (partition by event_id,f.question_id,question_type ) sequence
    ,count(*) over (partition by event_id,f.question_id,question_type) sequence_end
    from (
    select * from response where value is not null
    union all
    select * from optionals
    )f
    left join titles t on f.question_id = t.question_id
    order by event_id,question_id
    
    

    Above is just an example, your query also works fine all you have to do is get another result set which will contain only optional value and merge both results together.

    N.B Use Temp tables instead of CTE.