sql segmentation-fault snowflake-cloud-data-platform polynomials

How to use the polynomial function equation to extract coefficients from all segments in all channels using SQL query

I am looking to query the data for a list of channels from Snowflake using SQL query and then used the polynomial function equation to each segment in each channel. The number of rows for each segment is 100 rows. I tried the SQL query below, but I have received this error:

Error: invalid identifier 'engine_temperature_1' (line 32) -->

AVG(engine_temperature_1) - (SUM(x * CAST(TIME AS FLOAT) * CAST(TIME AS FLOAT)) / SUM(CAST(TIME AS FLOAT) * CAST(TIME AS FLOAT))) - (SUM(x * CAST(TIME AS FLOAT)) / SUM(CAST(TIME AS FLOAT) * CAST(TIME AS FLOAT))) AS c

This is the query:

WITH C1 AS (
SELECT
    SERIAL_NUMBER,
    EVENT_DATE,
    EVENT_TS AS TIME,
    CAST (VIMS:_engine_speed AS FLOAT) AS ENGSPD,
    CAST (VIMS:_engine_load AS FLOAT) AS Engine_load,
    CAST (VIMS:_engine_temperature_1 AS FLOAT) AS engine_temperature_1,
    CAST (VIMS:_engine_temperature_2) AS engine_temperature_2,
    CAST (VIMS:_engine_temperature_3 AS FLOAT) AS engine_temperature_3,
    CAST (VIMS:_engine_temperature_4 AS FLOAT) AS engine_temperature_4,
    FLOOR((ROW_NUMBER() OVER (ORDER BY EVENT_TS) - 1) / 100) + 1 AS 
segment_id
FROM "HELIOS_TDH_VIMS_PROD_DB"."BASE"."DATA_LOGGER"
WHERE
    SERIAL_NUMBER in ('KSN00205')
    AND EVENT_DATE = '2022-11-18'
ORDER BY
    SERIAL_NUMBER, EVENT_TS
)
SELECT C1.TIME,
    C1.segment_id,
    AVG(C1.engine_temperature_1) AS avg_temp,
    AVG(C1.engine_temperature_1) - (a * POWER(AVG(C1.TIME), 2) + b * 
AVG(C1.TIME) + c) AS residual
FROM C1
CROSS JOIN (
    SELECT segment_id,
    SUM(x) / COUNT(*) AS a,
    SUM(x * TIME) / SUM(TIME * TIME) AS b,
    AVG(engine_temperature_1) - (SUM(x * CAST(TIME AS FLOAT) * CAST(TIME AS 
FLOAT)) / SUM(CAST(TIME AS FLOAT) * CAST(TIME AS FLOAT))) - (SUM(x * 
CAST(TIME AS FLOAT)) / SUM(CAST(TIME AS FLOAT) * CAST(TIME AS FLOAT))) AS c
FROM (SELECT
    segment_id,
    engine_temperature_1 - AVG(engine_temperature_1) AS x,
    TIME
    FROM C1)
GROUP BY
    segment_id) 
GROUP BY C1.TIME, C1.segment_id

What should change in that query to fix the error above.

Solution

You are getting the error because when you pull things from C1 in your CROSS JOIN SUB-SELECT you don't pull it in:

thus:

WITH c1 AS (
    SELECT
        serial_number,
        event_date,
        event_ts AS time,
        vims:_engine_speed::float AS ENGSPD,
        vims:_engine_load::float AS Engine_load,
        vims:_engine_temperature_1::float AS engine_temperature_1,
        vims:_engine_temperature_2::float AS engine_temperature_2,
        vims:_engine_temperature_3::float) AS engine_temperature_3,
        vims:_engine_temperature_4::float) AS engine_temperature_4,
        FLOOR((ROW_NUMBER() OVER (ORDER BY event_ts) - 1) / 100) + 1 AS segment_id
    FROM helios_tdh_vims_prod_db.base.data_logger
    WHERE serial_number in ('KSN00205')
        AND event_date = '2022-11-18'
    ORDER BY serial_number, event_ts
)
SELECT 
    c1.time,
    c1.segment_id,
    AVG(C1.engine_temperature_1) AS avg_temp,
    AVG(C1.engine_temperature_1) - (a * POWER(AVG(C1.TIME), 2) + b * AVG(C1.TIME) + c) AS residual
FROM C1
CROSS JOIN (
    SELECT 
        segment_id,
        SUM(x) / COUNT(*) AS a,
        SUM(x * TIME) / SUM(TIME * TIME) AS b,
        AVG(engine_temperature_1) - (SUM(x * TIME::float * TIME::float) / SUM(TIME::float * TIME::float)) - (SUM(x * TIME::float) / SUM(TIME::float * TIME::float)) AS c
    FROM (
        SELECT
            segment_id,
            engine_temperature_1 - AVG(engine_temperature_1) AS x,
            engine_temperature_1,
            TIME
        FROM C1
    )
    GROUP BY segment_id
) 
GROUP BY c1.time, c1.segment_id

should work. I would be inclined to to the float and squaring once, to make the code more readable. As the optimizer should do this anyways.

WITH c1 AS (
    SELECT
        serial_number,
        event_date,
        event_ts AS time,
        vims:_engine_speed::float AS ENGSPD,
        vims:_engine_load::float AS Engine_load,
        vims:_engine_temperature_1::float AS engine_temperature_1,
        vims:_engine_temperature_2::float AS engine_temperature_2,
        vims:_engine_temperature_3::float) AS engine_temperature_3,
        vims:_engine_temperature_4::float) AS engine_temperature_4,
        FLOOR((ROW_NUMBER() OVER (ORDER BY event_ts) - 1) / 100) + 1 AS segment_id
    FROM helios_tdh_vims_prod_db.base.data_logger
    WHERE serial_number in ('KSN00205')
        AND event_date = '2022-11-18'
    ORDER BY serial_number, event_ts
)
SELECT 
    c1.time,
    c1.segment_id,
    AVG(C1.engine_temperature_1) AS avg_temp,
    AVG(C1.engine_temperature_1) - (a * POWER(AVG(c1.time), 2) + b * AVG(c1.time) + c) AS residual
FROM C1
CROSS JOIN (
    SELECT 
        segment_id,
        SUM(x) / COUNT(*) AS a,
        SUM(x * time) / SUM(time * time) AS b,
        AVG(engine_temperature_1) - (SUM(x * time_f_sq) / SUM(time_f_sq)) - (SUM(x * time_f) / SUM(time_f_sq)) AS c
    FROM (
        SELECT
            segment_id,
            engine_temperature_1 - AVG(engine_temperature_1) AS x,
            engine_temperature_1,
            time,
            time * time as time_sq,
            time::float as time_f,
            time_f * time_f as time_f_sq
        FROM C1
    )
    GROUP BY segment_id
) 
GROUP BY c1.time, c1.segment_id

now that I run it:

WITH fake_data(event_ts, _engine_temperature_1) as (
    select * from values
    (current_timestamp, 50.0),
    (current_timestamp, 100.0),
    (current_timestamp, 150.0)
),c1 AS (
    SELECT
        event_ts AS time,
        _engine_temperature_1 AS engine_temperature_1,
        FLOOR((ROW_NUMBER() OVER (ORDER BY event_ts) - 1) / 100) + 1 AS segment_id
    FROM fake_data
), c2 as (
    SELECT
        segment_id,
        engine_temperature_1 - AVG(engine_temperature_1) AS x,
        engine_temperature_1,
        time,
        time * time as time_sq,
        time::float as time_f,
        time_f * time_f as time_f_sq
    FROM C1
), c3 as (
    SELECT 
        segment_id,
        SUM(x) / COUNT(*) AS a,
        SUM(x * time) / SUM(time_sq) AS b,
        AVG(engine_temperature_1) - (SUM(x * time_f_sq) / SUM(time_f_sq)) - (SUM(x * time_f) / SUM(time_f_sq)) AS c
    FROM c2
    GROUP BY segment_id
)
SELECT 
    c1.time,
    c1.segment_id,
    AVG(C1.engine_temperature_1) AS avg_temp,
    AVG(C1.engine_temperature_1) - (a * POWER(AVG(c1.time), 2) + b * AVG(c1.time) + c) AS residual
FROM C1
CROSS JOIN c3 
GROUP BY c1.time, c1.segment_id

it begs the question what is time_ts unit? because time does not normally be squared.

ok, so if I change to just int's for time, and just make more data that makes sense, and comment out that final maths part, because Snowflake is correct "it just makes no sense"

WITH fake_data(time, engine_temperature_1, segment_id) as (
    select *  from values
    (10, 50.0, 0),
    (13, 100.0, 0),
    (15, 150.0, 0),
    (111, 51.0, 1),
    (114, 101.0, 1),
    (116, 151.0, 1)
),c1 AS (
    select * from fake_data
    --SELECT
    --    event_ts AS time,
    --    _engine_temperature_1 AS engine_temperature_1,
    --    FLOOR((ROW_NUMBER() OVER (ORDER BY event_ts) - 1) / 100) + 1 AS segment_id
    --FROM fake_data
), c2 as (
    SELECT
        segment_id,
        engine_temperature_1 - AVG(engine_temperature_1) over(partition by segment_id) AS x,
        engine_temperature_1,
        time,
        time * time as time_sq,
        time::float as time_f,
        time_f * time_f as time_f_sq
    FROM C1
), c3 as (
    SELECT 
        segment_id,
        SUM(x) / COUNT(*) AS a,
        SUM(x * time) / SUM(time_sq) AS b,
        AVG(engine_temperature_1) - (SUM(x * time_f_sq) / SUM(time_f_sq)) - (SUM(x * time_f) / SUM(time_f_sq)) AS c
    FROM c2
    GROUP BY segment_id
)
SELECT 
    c1.time,
    c1.segment_id,
    *
    --AVG(C1.engine_temperature_1) AS avg_temp,
   --avg_temp - (c3.a * POWER(AVG(c1.time), 2) + c3.b * AVG(c1.time) + c3.c) AS residual
FROM C1
CROSS JOIN c3 
--GROUP BY c1.time, c1.segment_id

gives:

which makes me think you shouldn't be doing a cross join, but still what you really are wanting/expecting is missing, and guess the gaps is too hard.