Search code examples
javascriptsqlmysqltypescript

Implement Custom aggregation in Mysql Query


Here is the table

id | module_id | rating

1  |        421   | 3          
2  |        421   | 5        
3. |        5321  | 4          
4  |        5321  | 5           
5  |        5321  | 4              
6  |        641   | 2   
7  |        641   | 3           
8  |        641   | 3            
9  |        641   | 2          
10 |        7201  | 4          
11 |        7201  | 4               
12 |        7201  | 3           
+------------+-----------------+------------------+-----------------------+---------------------------------------------+

In my code, i'm fecthing ratings of a module id and sorting them using a custom formula. and sorting the module ids in descending order with respect to the formula value

here is my code

let arr = [421,5321,641,7201];
let temp = [], ci_bounds = [];
for(let j=0; j<arr.length; j++){
    let id = arr[j];
    let ratings = execute("select * from table where module_id = " + id);
    if(ratings.length>0){
        for(let k=0; k<ratings.length; k++){
            const rat = ratings[k];
            if(rat.rating > 3){
                pos++;
            }else if(rat.rating <= 3){
                neg++;
            }
        }
        let lower_bound = ((pos + 1.9208) / (pos + neg) - 1.96 * Math.sqrt((pos * neg) / (pos + neg) + 0.9604) / (pos + neg)) / (1 + 3.8416 / (pos + neg));
        ci_bounds[j] = lower_bound;
        temp[j] = [arr[j], ci_bounds[j]];
        pos=0; neg=0;
    }else{
        temp[j] = [arr[j], 0];
    }
   let un_empty = temp.filter(function (el) {
      return el != null;
   });
}

Here in temp array i have sub-arrays with first element is module_id and second element is lowerbound.

temp value for above data
[
    [
        421,
        0.09452865480086611
    ],
    [
        5321,
        0.43849391955098216
    ],
    [
        641,
        2.8316237110415135e-17
    ],
    [
        7201,
        0.20765495512648788
    ]
]

and i'm sorting the above input arr with these lower_bounds. so that my final answer array should be reordered format of above input arr according to lower_bound in desc order.


let sortedArray = un_empty.sort(function(a, b) {
    return b[1] - a[1];
});

sortedArray output - 
[
    [
        5321,
        0.43849391955098216
    ],
    [
        7201,
        0.20765495512648788
    ],
    [
        421,
        0.09452865480086611
    ],
    [
        641,
        2.8316237110415135e-17
    ] 
]
arr = sortedArray;
let sorting = [];
for(let i=0; i<arr.length; i++){
    const id_arr = arr[i][1];
    for(let j=0; j<id_arr.length; j++){
        y[j] = id_arr[j][0];
    }
    sorting.push(y);
    y = [];
}
Final ans:  sorting = [5321, 7201, 421, 641];

Can we do this above aggregation in query itself?.


Solution

  • You could do this with a subquery to calculate the values for pos and neg for each module_id, then an outer query to calculate lower_bound:

    SELECT
        module_id,
        ((pos + 1.9208) / (pos + neg) - 1.96 * SQRT((pos * neg) / (pos + neg) + 0.9604) / (pos + neg)) / (1 + 3.8416 / (pos + neg)) as lower_bound
    FROM (
        SELECT module_id, SUM(rating > 3) AS pos, SUM(rating <= 3) AS neg
        FROM ratings
        GROUP BY module_id
    ) posneg
    ORDER BY lower_bound DESC;
    

    That gives a result like this:

    +-----------+------------------------+
    | module_id | lower_bound            |
    +-----------+------------------------+
    |      5321 |    0.43849391932274523 |
    |      7201 |    0.20765495508553936 |
    |       421 |    0.09452865480086611 |
    |       641 | 2.8316237110415135e-17 |
    +-----------+------------------------+
    

    If you just want the module_id values, then you can shift the calculation of lower_bound to the ORDER BY clause:

    SELECT
        module_id
    FROM (
        SELECT module_id, SUM(rating > 3) AS pos, SUM(rating <= 3) AS neg
        FROM ratings
        GROUP BY module_id
    ) posneg
    ORDER BY
        ((pos + 1.9208) / (pos + neg) - 1.96 * SQRT((pos * neg) / (pos + neg) + 0.9604) / (pos + neg)) / (1 + 3.8416 / (pos + neg))
        DESC;
    

    produces:

    +-----------+
    | module_id |
    +-----------+
    |      5321 |
    |      7201 |
    |       421 |
    |       641 |
    +-----------+
    

    If you want to specify particular module_id values, then you can insert these (using JavaScript) into your SQL query. You'll also need to modify the SQL query a little bit - see the example below which shows 5321, 421, 641, 7201 and 26 (which is not present in the ratings table, so gets a lower_bound value of zero):

    SELECT
        module_id
    FROM (
        SELECT t.column_0 as module_id, SUM(rating > 3) AS pos, SUM(rating <= 3) AS neg
        FROM (VALUES
            ROW(421),
            ROW(5321),
            ROW(641),
            ROW(7201),
            ROW(26)
        ) t
        LEFT JOIN ratings ON t.column_0 = ratings.module_id
        GROUP BY t.column_0
    ) posneg
    ORDER BY
        ((pos + 1.9208) / (pos + neg) - 1.96 * SQRT((pos * neg) / (pos + neg) + 0.9604) / (pos + neg)) / (1 + 3.8416 / (pos + neg))
        DESC;