Search code examples
sqlgoogle-bigqueryaggregate-functionsdate-arithmeticsql-null

BigQuery comparing DATE and TIMESTAMP


Here is my example I am using in MySQL. However, in BigQuery, my OnSite timestamp is a DATE and my Documents timestamp is a TIMESTAMP.

BigQuery is having trouble with the below query because I get the message:

No matching signature for function DATE for argument types: DATE. Supported signatures: DATE(TIMESTAMP, [STRING]); DATE(DATETIME); DATE(INT64, INT64, INT64) at [8:146]

Does anyone know what I need to do to make it so the query works with comparing DATEs and TIMESTAMPs?

Schema (MySQL v5.7)

CREATE TABLE OnSite
    (`uid` varchar(55), `worksite_id`  varchar(55), `timestamp` datetime)
;

INSERT INTO OnSite
    (`uid`, `worksite_id`, `timestamp`)
VALUES
  ("u12345", "worksite_1", '2019-01-01'),
  ("u12345", "worksite_1", '2019-01-02'),
  ("u12345", "worksite_1", '2019-01-03'),
  ("u12345", "worksite_1", '2019-01-04'),
  ("u12345", "worksite_1", '2019-01-05'),
  ("u12345", "worksite_1", '2019-01-06'),
  ("u1", "worksite_1", '2019-01-01'),
  ("u1", "worksite_1", '2019-01-02'),
  ("u1", "worksite_1", '2019-01-05'),
  ("u1", "worksite_1", '2019-01-06')

;


CREATE TABLE Documents
    (`document_id` varchar(55), `uid` varchar(55), `worksite_id`  varchar(55), `type` varchar(55), `timestamp` datetime)
;

INSERT INTO Documents
    (`document_id`, `uid`, `worksite_id`, `type`, `timestamp`)

VALUES
  ("1",     "u12345",   "worksite_1", 'work_permit',    '2019-01-01 00:00:00'),
  ("2",     "u12345",   "worksite_2", 'job',            '2019-01-02 00:00:00'),
  ("3",     "u12345",   "worksite_1", 'work_permit',    '2019-01-03 00:00:00'),
  ("4",     "u12345",   "worksite_2", 'job',            '2019-01-04 00:00:00'),
  ("5",     "u12345",   "worksite_1", 'work_permit',    '2019-01-05 00:00:00'),
  ("6",     "u12345",   "worksite_2", 'job',            '2019-01-06 00:00:00'),
  ("7",     "u12345",   "worksite_1", 'work_permit',    '2019-01-07 00:00:00'),
  ("8",     "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00'),
  ("9",     "u12345",   "worksite_1", 'job',            '2019-01-09 00:00:00'),
  ("10",    "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00'),
  ("11",    "u12345",   "worksite_1", 'work_permit',    '2019-01-09 00:00:00'),
  ("12",    "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00'),
  ("13",    "u12345",   "worksite_1", 'job',            '2019-01-09 00:00:00'),
  ("14",    "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00'),
  ("15",    "u12345",   "worksite_1", 'work_permit',    '2019-01-09 00:00:00')

;

Query #1

SELECT
  IFNULL(OnSite.worksite_id, Documents.worksite_id) as `Worksite`,
  DATE(IFNULL(OnSite.timestamp, Documents.timestamp)) as `Date`,
  COUNT(Documents.worksite_id) as `Users_on_Site`,
  COUNT(DISTINCT OnSite.uid) as `Completed`

FROM OnSite
  LEFT JOIN Documents ON OnSite.worksite_id = Documents.worksite_id AND DATE(OnSite.timestamp) = DATE(Documents.timestamp)
GROUP BY `Date`, `Worksite`;

| Worksite   | Date       | Users_on_Site | Completed |
| ---------- | ---------- | ------------- | --------- |
| worksite_1 | 2019-01-01 | 2             | 2         |
| worksite_1 | 2019-01-02 | 0             | 2         |
| worksite_1 | 2019-01-03 | 1             | 1         |
| worksite_1 | 2019-01-04 | 0             | 1         |
| worksite_1 | 2019-01-05 | 2             | 2         |
| worksite_1 | 2019-01-06 | 0             | 2         |

View on DB Fiddle


Solution

  • Below is for BigQuery Standard SQL

    #standardSQL
    SELECT
      IFNULL(OnSite.worksite_id, Documents.worksite_id) AS `Worksite`,
      IFNULL(OnSite.timestamp, DATE(Documents.timestamp)) AS `DATE`,
      COUNT(Documents.worksite_id) AS `Users_on_Site`,
      COUNT(DISTINCT OnSite.uid) AS `Completed`
    FROM `project.dataset.OnSite` OnSite
    LEFT JOIN `project.dataset.Documents` Documents 
    ON OnSite.worksite_id = Documents.worksite_id 
    AND OnSite.timestamp = DATE(Documents.timestamp)
    GROUP BY `DATE`, `Worksite`
    

    if to apply to sample data from your question

    WITH `project.dataset.OnSite` AS (
      SELECT "u12345" uid, "worksite_1" worksite_id, DATE '2019-01-01' `TIMESTAMP` UNION ALL
      SELECT "u12345", "worksite_1", '2019-01-02' UNION ALL
      SELECT "u12345", "worksite_1", '2019-01-03' UNION ALL
      SELECT "u12345", "worksite_1", '2019-01-04' UNION ALL
      SELECT "u12345", "worksite_1", '2019-01-05' UNION ALL
      SELECT "u12345", "worksite_1", '2019-01-06' UNION ALL
      SELECT "u1", "worksite_1", '2019-01-01' UNION ALL
      SELECT "u1", "worksite_1", '2019-01-02' UNION ALL
      SELECT "u1", "worksite_1", '2019-01-05' UNION ALL
      SELECT "u1", "worksite_1", '2019-01-06' 
    ), `project.dataset.Documents` AS (
      SELECT "1" document_id,     "u12345" uid,   "worksite_1" worksite_id, 'work_permit' type,    TIMESTAMP '2019-01-01 00:00:00' `TIMESTAMP` UNION ALL
      SELECT "2",     "u12345",   "worksite_2", 'job',            '2019-01-02 00:00:00' UNION ALL
      SELECT "3",     "u12345",   "worksite_1", 'work_permit',    '2019-01-03 00:00:00' UNION ALL
      SELECT "4",     "u12345",   "worksite_2", 'job',            '2019-01-04 00:00:00' UNION ALL
      SELECT "5",     "u12345",   "worksite_1", 'work_permit',    '2019-01-05 00:00:00' UNION ALL
      SELECT "6",     "u12345",   "worksite_2", 'job',            '2019-01-06 00:00:00' UNION ALL
      SELECT "7",     "u12345",   "worksite_1", 'work_permit',    '2019-01-07 00:00:00' UNION ALL
      SELECT "8",     "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00' UNION ALL
      SELECT "9",     "u12345",   "worksite_1", 'job',            '2019-01-09 00:00:00' UNION ALL
      SELECT "10",    "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00' UNION ALL
      SELECT "11",    "u12345",   "worksite_1", 'work_permit',    '2019-01-09 00:00:00' UNION ALL
      SELECT "12",    "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00' UNION ALL
      SELECT "13",    "u12345",   "worksite_1", 'job',            '2019-01-09 00:00:00' UNION ALL
      SELECT "14",    "u12345",   "worksite_2", 'work_permit',    '2019-01-09 00:00:00' UNION ALL
      SELECT "15",    "u12345",   "worksite_1", 'work_permit',    '2019-01-09 00:00:00' 
    )
    

    result will be as expected

    Row Worksite    Date        Users_on_Site   Completed    
    1   worksite_1  2019-01-01  2               2    
    2   worksite_1  2019-01-02  0               2    
    3   worksite_1  2019-01-03  1               1    
    4   worksite_1  2019-01-04  0               1    
    5   worksite_1  2019-01-05  2               2    
    6   worksite_1  2019-01-06  0               2