I want to create a flag in Bigquery which will return 1 when true and 0 when false. The statement works fine when it has to return the "else" value which is 0. However, when it satisfies the condition, it returns two rows with both 1 and 0 in them. Why is this happening?
Below is the code used:
table AS(
SELECT
id,
month,
ROUND((text/(month_days/7)), 2) AS value
FROM (SELECT id, extract(month FROM date) AS month,
(32 - EXTRACT(DAY FROM DATE_ADD(DATE_TRUNC(DATE(date), MONTH), INTERVAL 31 DAY))) AS month_days,
sum(text_sent) AS text
FROM table1
WHERE date BETWEEN '2020-01-01 00:00:00 UTC' AND '2020-06-30 00:00:00 UTC'
GROUP BY 1,2,3)),
table_flag AS(
SELECT
id,
CASE
WHEN month = 1 AND value > 100 THEN 1
WHEN month = 2 AND value > 150 THEN 1
WHEN month = 3 AND value > 130 THEN 1
WHEN month = 4 AND value > 200 THEN 1
WHEN month = 5 AND value > 235 THEN 1
WHEN month = 6 AND value > 125 THEN 1
WHEN month = 7 AND value > 324 THEN 1
WHEN month = 8 AND value > 160 THEN 1
WHEN month = 9 AND value > 350 THEN 1
WHEN month = 10 AND value > 80 THEN 1
WHEN month = 11 AND value > 245 THEN 1
ELSE 0
END AS value_flag
FROM
table)
SELECT
t.id,
t.value,
t.month,
tf.value_flag
FROM
table t
LEFT JOIN
table_flag tf
ON
t.id = tf.id
WHERE t.id IS NOT NULL
GROUP BY 1,2,3,4
ORDER BY 1
I have also tried nested IF, but that doesn't work either:
SELECT DISTINCT(id),
(IF((month = 1 AND value > 100), 1,
(IF((month = 2 AND value > 150), 1,
(IF((month = 3 AND value > 130), 1,
(IF((month = 4 AND value > 200), 1,
(IF((month = 5 AND value > 235), 1,
(IF((month = 6 AND value > 125), 1,
(IF((month = 7 AND value > 324), 1,
(IF((month = 8 AND value > 160), 1,
(IF((month = 9 AND value > 350), 1,
(IF((month = 10 AND value > 80), 1,
(IF((month = 11 AND value > 245), 1,0))))))))))))))))))))))
AS value_flag
FROM table)
This is how the output looks right now (This is NOT what I want):
The output is completely wrong. Please suggest alternate method (if any) to do it.
P.S.: This is my first question here, please let me know if any other information is needed. Thanks in advance for the help!
Both table
and table_flag
has several rows with identical id
. BigQuery for each row in table
finds several rows in table_flag
. To remove duplicates we could add month
to table_flag
and to the ON
clause. But we actually do not need the last LEFT JOIN
. Try this:
WITH table AS(
SELECT
id,
month,
ROUND((text/(month_days/7)), 2) AS value
FROM (
SELECT
id,
extract(month FROM date) AS month,
(32 - EXTRACT(DAY FROM DATE_ADD(DATE_TRUNC(DATE(date), MONTH), INTERVAL 31 DAY))) AS month_days,
sum(text_sent) AS text
FROM table1
WHERE
date BETWEEN '2020-01-01 00:00:00 UTC' AND '2020-06-30 00:00:00 UTC'
AND id IS NOT NULL
GROUP BY 1,2,3
)
)
SELECT
id,
value,
month,
CASE
WHEN month = 1 AND value > 100 THEN 1
WHEN month = 2 AND value > 150 THEN 1
WHEN month = 3 AND value > 130 THEN 1
WHEN month = 4 AND value > 200 THEN 1
WHEN month = 5 AND value > 235 THEN 1
WHEN month = 6 AND value > 125 THEN 1
WHEN month = 7 AND value > 324 THEN 1
WHEN month = 8 AND value > 160 THEN 1
WHEN month = 9 AND value > 350 THEN 1
WHEN month = 10 AND value > 80 THEN 1
WHEN month = 11 AND value > 245 THEN 1
ELSE 0
END AS value_flag
FROM table
ORDER BY 1
or this:
SELECT
id,
month,
ROUND((text/(month_days/7)), 2) AS value,
CASE
WHEN month = 1 AND ROUND((text/(month_days/7)), 2) > 100 THEN 1
WHEN month = 2 AND ROUND((text/(month_days/7)), 2) > 150 THEN 1
WHEN month = 3 AND ROUND((text/(month_days/7)), 2) > 130 THEN 1
WHEN month = 4 AND ROUND((text/(month_days/7)), 2) > 200 THEN 1
WHEN month = 5 AND ROUND((text/(month_days/7)), 2) > 235 THEN 1
WHEN month = 6 AND ROUND((text/(month_days/7)), 2) > 125 THEN 1
WHEN month = 7 AND ROUND((text/(month_days/7)), 2) > 324 THEN 1
WHEN month = 8 AND ROUND((text/(month_days/7)), 2) > 160 THEN 1
WHEN month = 9 AND ROUND((text/(month_days/7)), 2) > 350 THEN 1
WHEN month = 10 AND ROUND((text/(month_days/7)), 2) > 80 THEN 1
WHEN month = 11 AND ROUND((text/(month_days/7)), 2) > 245 THEN 1
ELSE 0
END AS value_flag
FROM (
SELECT
id,
extract(month FROM date) AS month,
(32 - EXTRACT(DAY FROM DATE_ADD(DATE_TRUNC(DATE(date), MONTH), INTERVAL 31 DAY))) AS month_days,
sum(text_sent) AS text
FROM table1
WHERE
date BETWEEN '2020-01-01 00:00:00 UTC' AND '2020-06-30 00:00:00 UTC'
AND id IS NOT NULL
GROUP BY 1,2,3
)
ORDER BY 1