Search code examples
mysqlwindow-functions

Select country has the highest account balance


I am new to SQL.I have two tables Customers and deposit_transactions. I am trying to select the country which has the highest account balance for each date. I am joining deposit_transactions table(to get the sum of amount based on customer_id) and joining with customers table to get the Country

Schema (MySQL v8.0)

CREATE TABLE deposit_transactions (
  `deposit_id` VARCHAR(7),
  `customer_id` VARCHAR(5),
  `date` TEXT,
  `transaction_type` VARCHAR(7),
  `amount` INTEGER,
  `currency` VARCHAR(3)
);

INSERT INTO deposit_transactions
  (`deposit_id`, `customer_id`, `date`, `transaction_type`, `amount`, `currency`)
VALUES
  ('DE47653', 'DO900', '1/1/2019', 'pay_in', '10000', 'EUR'),
  ('DE47654', 'DO901', '2/1/2019', 'pay_in', '10000', 'EUR'),
  ('DE47655', 'DO902', '2/1/2019', 'pay_in', '10000', 'EUR'),
  ('UK47656', 'DO903', '3/1/2019', 'pay_in', '10000', 'GBP'),
  ('UK47657', 'DO904', '3/1/2019', 'pay_in', '130000', 'GBP'),
  ('DE47658', 'DO905', '3/1/2019', 'pay_in', '140000', 'EUR'),
  ('DE47659', 'DO906', '3/1/2019', 'pay_in', '10000', 'EUR'),
  ('DE47660', 'DO907', '3/1/2019', 'pay_in', '10000', 'EUR'),
  ('DE47661', 'DO908', '3/1/2019', 'pay_in', '100000', 'EUR'),
  ('DE47662', 'DO909', '4/1/2019', 'pay_in', '10000', 'EUR'),
  ('UK47663', 'DO910', '4/1/2019', 'pay_in', '10000', 'GBP'),
  ('UK47664', 'DO911', '4/1/2019', 'pay_in', '5000', 'GBP'),
  ('UK47665', 'DO912', '4/1/2019', 'pay_in', '4000', 'GBP'),
  ('DE47666', 'DO913', '5/1/2019', 'pay_in', '10000', 'EUR'),
  ('DE47661', 'DO908', '5/1/2019', 'pay_out', '50000', 'EUR'),
  ('DE47667', 'DO914', '11/1/2019', 'pay_in', '30000', 'EUR'),
  ('DE47668', 'DO915', '11/1/2019', 'pay_in', '10000', 'EUR'),
  ('DE47669', 'DO916', '11/1/2019', 'pay_in', '25000', 'EUR'),
  ('DE47670', 'DO917', '11/1/2019', 'pay_in', '50000', 'EUR'),
  ('DE47667', 'DO914', '12/1/2019', 'pay_out', '7000', 'EUR'),
  ('DE47667', 'DO914', '12/15/2019', 'pay_in', '12000', 'EUR'),
  ('DE47671', 'DO918', '1/1/2021', 'pay_in', '9000', 'EUR'),
  ('DE47672', 'DO919', '1/1/2021', 'pay_in', '10000', 'EUR'),
  ('DE47673', 'DO920', '1/1/2021', 'pay_in', '11000', 'EUR'),
  ('DE47674', 'DO921', '1/1/2021', 'pay_in', '12000', 'EUR'),
  ('DE47675', 'DO922', '1/1/2021', 'pay_in', '13000', 'EUR'),
  ('DE47676', 'DO923', '1/1/2021', 'pay_in', '14000', 'EUR'),
  ('DE47677', 'DO924', '1/1/2021', 'pay_in', '30000', 'EUR'),
  ('DE47678', 'DO925', '1/1/2021', 'pay_in', '16000', 'EUR');

CREATE TABLE customers (
  `customer_id` VARCHAR(5),
  `country` VARCHAR(7)
);

INSERT INTO customers
  (`customer_id`, `country`)
VALUES
  ('DO900', 'Germany'),
  ('DO901', 'Germany'),
  ('DO902', 'Spain'),
  ('DO903', 'UK'),
  ('DO904', 'UK'),
  ('DO905', 'Austria'),
  ('DO906', 'Germany'),
  ('DO907', 'Germany'),
  ('DO908', 'Germany'),
  ('DO909', 'Germany'),
  ('DO910', 'UK'),
  ('DO911', 'UK'),
  ('DO912', 'UK'),
  ('DO913', 'Germany'),
  ('DO914', 'Austria'),
  ('DO915', 'Germany'),
  ('DO916', 'Austria'),
  ('DO917', 'Germany'),
  ('DO918', 'Germany'),
  ('DO919', 'Spain'),
  ('DO920', 'Germany'),
  ('DO921', 'Spain'),
  ('DO922', 'Germany'),
  ('DO923', 'Germany'),
  ('DO924', 'Germany'),
  ('DO925', 'Spain'),
  ('DO926', 'Germany'),
  ('DO927', 'Germany'),
  ('DO928', 'Germany'),
  ('DO929', 'Germany'),
  ('DO711', 'UK'),
  ('DO712', 'UK'),
  ('DO713', 'Germany'),
  ('DO714', 'Austria'),
  ('DO715', 'Germany'),
  ('DO716', 'Austria'),
  ('DO717', 'Germany'),
  ('DO718', 'Germany'),
  ('DO719', 'Spain'),
  ('DO720', 'Germany'),
  ('DO721', 'Spain'),
  ('DO722', 'Germany'),
  ('DO723', 'Germany'),
  ('DO724', 'Germany'),
  ('DO725', 'Spain'),
  ('DO726', 'Germany'),
  ('DO727', 'Germany'),
  ('DO728', 'Germany'),
  ('DO729', 'Germany');

Below is the sample query I created. But I am not sure if its correct

Query

SELECT t.customer_id,
       c.country,t.total_amount
FROM  (SELECT customer_id,
              date,
              transaction_type,
              Sum(amount)
                OVER (
                  partition BY customer_id ) AS total_amount_transfered,
              Row_number()
                OVER (
                  partition BY customer_id
                  ORDER BY date DESC)        AS n
       FROM   deposit_transactions)t
      INNER JOIN customers AS c
              ON c.customer_id = t.customer_id
WHERE  t.n = 1
       AND t.transaction_type = 'pay_in' order by total_amount desc limit 1;


Solution

  • Step 1. Finding the total balance amount for each country.

    Since you need to get the balance and you have two kinds of transactions, I'd rather transform the "pay-out" transactions into negative balance and leave "pay-in" transactions as are. Also you should take into account the different currencies, because "GBP" currency has a higher value than "EUR" currency (GBP ~ EUR*0.85). If you don't want to consider "pay-out" negative impact, you can transform them to 0. This balance wrangling is done here by a CASE statement.

    Then the balances are aggregated with a SUM function over the corresponding countries (accessible because of the JOIN operation) and grouped over the "country" and "date" field.

    SELECT country,
           date AS transaction_date,
    --     STR_TO_DATE(date, '%m/%d/%Y') AS transaction_date,
           SUM(CASE WHEN transaction_type = 'pay_in' AND currency = 'EUR' THEN amount 
                    WHEN transaction_type = 'pay_in' AND currency = 'GBP' THEN amount/0.85
    --              WHEN transaction_type = 'pay_out'                      THEN 0
                    WHEN transaction_type = 'pay_out' AND currency = 'EUR' THEN -amount
                    WHEN transaction_type = 'pay_out' AND currency = 'GBP' THEN -amount/0.85 
               END) AS amount_eur
    FROM       deposit_transactions dt
    INNER JOIN customers c
            ON dt.customer_id = c.customer_id
    GROUP BY country,
             transaction_date
    

    Optionally in this step, you could also transform the "date" field from the TEXT type to the DATE type using the STR_TO_DATE function.


    Step 2. Ranking the countries over the summed amounts.

    In order to apply a rank, you use the ROW_NUMBER window function, to partition on the transaction date and order by the balance amount descendently.

    SELECT *, ROW_NUMBER() OVER(
                  PARTITION BY transaction_date
                  ORDER     BY amount_eur DESC ) AS rn
    FROM   total_amounts_per_country
    

    Step 3. Selecting the country having the highest balance.

    You can just all rows which have the rank equal to 1, given that there will be a 1 for each date. Then select the interesting fields, namely "transaction_date" and "country".

    SELECT transaction_date, 
           country
    FROM ranked_countries
    WHERE rn = 1
    

    Full Query. It uses MySQL common table expressions to combine the three steps.

    WITH total_amounts_per_country AS(
        SELECT country,
               date AS transaction_date,
    --         STR_TO_DATE(date, '%m/%d/%Y') AS transaction_date,
               SUM(CASE WHEN transaction_type = 'pay_in' AND currency = 'EUR' THEN amount 
                        WHEN transaction_type = 'pay_in' AND currency = 'GBP' THEN amount/0.85
    --                  WHEN transaction_type = 'pay_out'                      THEN 0
                        WHEN transaction_type = 'pay_out' AND currency = 'EUR' THEN -amount
                        WHEN transaction_type = 'pay_out' AND currency = 'GBP' THEN -amount/0.85               END) AS amount_eur
        FROM       deposit_transactions dt
        INNER JOIN customers c
                ON dt.customer_id = c.customer_id
        GROUP BY country,
                 transaction_date
    ), ranked_countries AS (
        SELECT *, ROW_NUMBER() OVER(
                      PARTITION BY transaction_date
                      ORDER     BY amount_eur DESC ) AS rn
        FROM   total_amounts_per_country
    )
    SELECT transaction_date, 
           country
    FROM ranked_countries
    WHERE rn = 1
    

    Check the demo here.