Search code examples
mysqlmedian

how to count median properly in mysql 5.7


this is my fiddle https://dbfiddle.uk/?rdbms=mysql_5.7&fiddle=7946871d9c25cd8914353c70fde1fe8d

so this is my queries select count(user_id) as itung, user_Id from

(SELECT t1.user_id, 
       t1.createdAt cretecompare1, 
       t2.createdAt cretecompare2,
       DATEDIFF(t2.createdAt, t1.createdAt) diff
-- table for a transaction
FROM test t1
-- table for prev. transaction
JOIN test t2 ON t1.user_id = t2.user_id 
            AND t1.createdAt < t2.createdAt
            AND 7 NOT IN (t1.status_id, t2.status_id)
JOIN (SELECT t3.user_id
      FROM test t3
      WHERE t3.status_id != 7
      GROUP BY t3.user_id
      HAVING SUM(t3.createdAt < '2020-04-01') > 1
         AND SUM(t3.createdAt BETWEEN '2020-02-01' AND '2020-04-01')) t4 ON t1.user_id = t4.user_id
WHERE NOT EXISTS (SELECT NULL
                   FROM test t5
                   WHERE t1.user_id = t5.user_id
                     AND t5.status_id != 7
                     AND t1.createdAt < t5.createdAt
                     AND t5.createdAt < t2.createdAt)
HAViNG cretecompare2  BETWEEN '2020-02-01' AND '2020-04-01') aa
group by user_Id
output table:
    +--------+---------+
    |  itung | user_Id |
    +--------+---------+
    |      1 |      13 |
    |      2 |      14 |
    +--------+---------+

based on that table i want to find out the max(itung), min(itung), and the median(itung), with this query

select max(itung), min(itung), format(avg(itung), 2),  IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',')
, ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), ROUND((CAST(SUBSTRING_INDEX(SUBSTRING_INDEX
( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100
* COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX
( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100
* COUNT(*)), ',', -1) AS DECIMAL)) / 2)) as median from
(select count(user_id) as itung, user_Id from 
(SELECT t1.user_id, 
       t1.createdAt cretecompare1, 
       t2.createdAt cretecompare2,
       DATEDIFF(t2.createdAt, t1.createdAt) diff
-- table for a transaction
FROM test t1
-- table for prev. transaction
JOIN test t2 ON t1.user_id = t2.user_id 
            AND t1.createdAt < t2.createdAt
            AND 7 NOT IN (t1.status_id, t2.status_id)
JOIN (SELECT t3.user_id
      FROM test t3
      WHERE t3.status_id != 7
      GROUP BY t3.user_id
      HAVING SUM(t3.createdAt < '2020-04-01') > 1
         AND SUM(t3.createdAt BETWEEN '2020-02-01' AND '2020-04-01')) t4 ON t1.user_id = t4.user_id
WHERE NOT EXISTS (SELECT NULL
                   FROM test t5
                   WHERE t1.user_id = t5.user_id
                     AND t5.status_id != 7
                     AND t1.createdAt < t5.createdAt
                     AND t5.createdAt < t2.createdAt)
HAViNG cretecompare2  BETWEEN '2020-02-01' AND '2020-04-01') aa
group by user_Id) ab

output table:
+------------+------------+-----------------------+--------+
| max(itung) | min(itung) | format(avg(itung), 2) | median |
+------------+------------+-----------------------+--------+
|          2 |          1 |                  1.50 |      2 |
+------------+------------+-----------------------+--------+

you know that's wrong query for the median because median should be 1,5 not 2. where my wrong at in my median query?


Solution

  • You have ROUND() there to round the reported median to an integer. If you don't want that, remove it:

    select max(itung), min(itung), format(avg(itung), 2),  IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',') , ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), (CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL)) / 2) as median
    

    or leave in the round and add a number of decimal places to round to, here 3:

    select max(itung), min(itung), format(avg(itung), 2),  IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ',') , ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), ROUND((CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX ( GROUP_CONCAT(itung ORDER BY itung SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL)) / 2, 3)) as median
    

    Be aware that looking up the median values from a GROUP_CONCAT comma-separated list of all the values only works if there are not too many rows, since GROUP_CONCAT will be truncated at @@group_concat_max_len, which defaults to 1024 characters on MySQL or on MariaDB before 10.2.