SQL | question brushing | cumulative summation problem

1, The maximum number of visits per user in a single month as of the end of each month and the total number of visits accumulated to that month

1. Create table

CREATE TABLE IF NOT EXISTS visits(
userid VARCHAR(10),
datetime1 DATETIME,
visits INT)ENGINE=InnoDB DEFAULT CHARSET=utf8;

2. Insert data

INSERT INTO visits VALUES('A','2021-01-03 14:59:43',5);
INSERT INTO visits VALUES('A','2021-01-03 14:59:43',15);
INSERT INTO visits VALUES('B','2021-01-03 14:59:43',5);
INSERT INTO visits VALUES('A','2021-01-03 14:59:43',8);
INSERT INTO visits VALUES('B','2021-01-03 14:59:43',25);
INSERT INTO visits VALUES('A','2021-01-03 14:59:43',5);
INSERT INTO visits VALUES('A','2021-02-03 14:59:43',4);
INSERT INTO visits VALUES('A','2021-02-03 14:59:43',6);
INSERT INTO visits VALUES('B','2021-02-03 14:59:43',10);
INSERT INTO visits VALUES('B','2021-02-03 14:59:43',5);
INSERT INTO visits VALUES('A',NOW(),16);
INSERT INTO visits VALUES('A',NOW(),22);
INSERT INTO visits VALUES('B',NOW(),23);
INSERT INTO visits VALUES('B',NOW(),10);
INSERT INTO visits VALUES('B',NOW(),1);

3. Logical SQL
(1) Find the number of visits per user per month
First, extract the month and year (year_month from datetime1) from datetime,
According to the grouping of users and months, calculate the total number of visits per user and month

mysql> SELECT userid,month, SUM(visits) sum_vistits
    -> FROM (SELECT userid,EXTRACT(YEAR_MONTH FROM datetime1) AS month ,visits
    ->       FROM visits) a
    -> GROUP BY userid,month;
+--------+--------+-------------+
| userid | month  | sum_vistits |
+--------+--------+-------------+
| A      | 202101 |          33 |
| B      | 202101 |          30 |
| A      | 202102 |          10 |
| B      | 202102 |          15 |
| A      | 202104 |          38 |
| B      | 202104 |          34 |
+--------+--------+-------------+
6 rows in set (0.01 sec)

(2) Use the window functions MAX() OVER() and SUM() OVER() to find the maximum and cumulative visits by the end of the month

mysql> SELECT userid,month,sum_vistits AS 'user Number of visits in the month',MAX(sum_vistits) OVER(PARTITION BY userid ORDER BY month) AS 'user Maximum visits as of the month' ,SUM(sum_vistits) OVER(PARTITION BY userid ORDER BY month) AS 'user Cumulative visits'
    -> FROM (SELECT userid,month, SUM(visits) sum_vistits
    ->       FROM (SELECT userid,EXTRACT(YEAR_MONTH FROM datetime1) AS month ,visits
    ->             FROM visits) a
    ->       GROUP BY userid,month) A;
+--------+--------+----------------+------------------------+------------------+
| userid | month  | user Number of visits in the month | user Maximum visits as of the month  | user Cumulative visits  |
+--------+--------+----------------+-------------------------+-----------------+
| A      | 202101 |             33 |                      33 |               33 |
| A      | 202102 |             10 |                      33 |               43 |
| A      | 202104 |             38 |                      38 |               81 |
| B      | 202101 |             30 |                      30 |               30 |
| B      | 202102 |             15 |                      30 |               45 |
| B      | 202104 |             34 |                      34 |               79 |
+--------+--------+----------------+-------------------------+------------------+
6 rows in set (0.00 sec)

2, Calculate the number of views and cumulative viewing time of each column

1. Create table

CREATE TABLE IF NOT EXISTS vedio(
	uid INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
	channl VARCHAR(20),
	min INT
)AUTO_INCREMENT=1,
ENGINE=InnoDB DEFAULT CHARSET=utf8;

2. Insert data

INSERT INTO vedio (channl,min) VALUES ('1',23);
INSERT INTO vedio (channl,min) VALUES ('1',12);
INSERT INTO vedio (channl,min) VALUES ('1',12);
INSERT INTO vedio (channl,min) VALUES ('1',32);
INSERT INTO vedio (channl,min) VALUES ('1',342);
INSERT INTO vedio (channl,min) VALUES ('2',13);
INSERT INTO vedio (channl,min) VALUES ('2',34);
INSERT INTO vedio (channl,min) VALUES ('2',13);
INSERT INTO vedio (channl,min) VALUES ('2',134);

3. Logical SQL

mysql> SELECT channl,COUNT(1) AS 'Viewing times' ,SUM(min) AS 'Cumulative viewing time'
    -> FROM vedio
    -> GROUP BY channl;
+--------+--------------+--------------------+
| channl | Viewing times     | Cumulative viewing time       |
+--------+--------------+--------------------+
| 1      |            5 |                421 |
| 2      |            4 |                194 |
+--------+--------------+--------------------+
2 rows in set (0.00 sec)

3, The current month sales of each store and the total sales accumulated to the current month

1. Create table

CREATE TABLE IF NOT EXISTS t1_v(
store VARCHAR(10),
month VARCHAR(10),
sales_amount FLOAT
);

2. Insert data

INSERT INTO t1_v VALUES('a',01,150);
INSERT INTO t1_v VALUES('a',01,200);
INSERT INTO t1_v VALUES('b',01,1000);
INSERT INTO t1_v VALUES('b',01,800);
INSERT INTO t1_v VALUES('c',01,250);
INSERT INTO t1_v VALUES('c',01,220);
INSERT INTO t1_v VALUES('b',01,6000);
INSERT INTO t1_v VALUES('a',02,2000);
INSERT INTO t1_v VALUES('a',02,3000);
INSERT INTO t1_v VALUES('b',02,1000);
INSERT INTO t1_v VALUES('b',02,1500);
INSERT INTO t1_v VALUES('c',02,350);
INSERT INTO t1_v VALUES('c',02,280);
INSERT INTO t1_v VALUES('a',03,350);
INSERT INTO t1_v VALUES('a',03,250);

3. Logical SQL
(1) Find out the monthly sales of each store

SELECT store,month,sum(sales_amount) sum
FROM t1_v
GROUP BY store,month;

(2) Use the window function SUM() OVER() to calculate the cumulative sales of the current month

mysql> SELECT store,month,sum AS 'Current month sales' ,SUM(sum) OVER(PARTITION BY store ORDER BY month) as 'Cumulative sales'
    -> FROM (SELECT store,month,sum(sales_amount) sum
    ->       FROM t1_v
    ->       GROUP BY store,month) t1;
+-------+-------+-----------------+-----------------+
| store | month | Current month sales       | Cumulative sales       |
+-------+-------+-----------------+-----------------+
| a     | 1     |             350 |             350 |
| a     | 2     |            5000 |            5350 |
| a     | 3     |             600 |            5950 |
| b     | 1     |            7800 |            7800 |
| b     | 2     |            2500 |           10300 |
| c     | 1     |             470 |             470 |
| c     | 2     |             630 |            1100 |
+-------+-------+-----------------+-----------------+
7 rows in set (0.01 sec)

4, The two basketball teams had a fierce game and the scores rose alternately. After the game, get a score table, which records the team, player number, player name, score and score time. Now the team should reward the outstanding players in the game. Count the names of players who help their teams surpass the score in the game and the corresponding time

Column: team, player number, player name, score, score time
team,player_id,player_name,score,datetime1
1. Create table

CREATE TABLE IF NOT EXISTS ball_score(
team VARCHAR(10),
player_id INT,
player_name VARCHAR(10),
score INT,
datetime1 DATETIME
);

2. Insert data

INSERT INTO ball_score VALUES('A',1,'A1',1,'2021/03/02 10:01:00');
INSERT INTO ball_score VALUES('A',5,'A5',1,'2021/03/02 10:02:00');
INSERT INTO ball_score VALUES('B',4,'B4',3,'2021/03/02 10:03:00');
INSERT INTO ball_score VALUES('A',4,'A4',3,'2021/03/02 10:04:00');
INSERT INTO ball_score VALUES('B',1,'B1',3,'2021/03/02 10:05:00');
INSERT INTO ball_score VALUES('A',3,'A3',3,'2021/03/02 10:06:00');
INSERT INTO ball_score VALUES('A',4,'A4',3,'2021/03/02 10:07:00');
INSERT INTO ball_score VALUES('B',1,'B1',2,'2021/03/02 10:08:00');
INSERT INTO ball_score VALUES('B',2,'B2',2,'2021/03/02 10:09:00');

3. Logical SQL
At the beginning of the logic error, the following SQL is written. This SQL is only used to screen the teams with high cumulative scores in this game, but it is not necessarily anti super. It is likely that the scores have been high before

SELECT player_name,datetime1
FROM (SELECT *,LAG(team) OVER() AS lag_team,LAG(sum) OVER() AS lag_sum
	  FROM (SELECT *,SUM(score) OVER(PARTITION BY team ORDER BY datetime1) AS sum
	  	    FROM ball_score
	  	    ORDER BY datetime1) t1) t2
WHERE team<>lag_team
AND sum>lag_sum;

Adjusted ideas:
(1) Calculate the cumulative scores of team AB according to the time. If team A scores 0, team B scores 0, and team A scores 0

mysql> SELECT *,
    ->          (CASE WHEN team='A' THEN score ELSE 0 END) AS 'a_score',
    ->          (CASE WHEN team='B' THEN score ELSE 0 END) AS 'b_score'
    -> FROM ball_score;
+------+-----------+-------------+-------+---------------------+---------+---------+
| team | player_id | player_name | score | datetime1           | a_score | b_score |
+------+-----------+-------------+-------+---------------------+---------+---------+
| A    |         1 | A1          |     1 | 2021-03-02 10:01:00 |       1 |       0 |
| A    |         5 | A5          |     1 | 2021-03-02 10:02:00 |       1 |       0 |
| B    |         4 | B4          |     3 | 2021-03-02 10:03:00 |       0 |       3 |
| A    |         4 | A4          |     3 | 2021-03-02 10:04:00 |       3 |       0 |
| B    |         1 | B1          |     3 | 2021-03-02 10:05:00 |       0 |       3 |
| A    |         3 | A3          |     3 | 2021-03-02 10:06:00 |       3 |       0 |
| A    |         4 | A4          |     3 | 2021-03-02 10:07:00 |       3 |       0 |
| B    |         1 | B1          |     2 | 2021-03-02 10:08:00 |       0 |       2 |
| B    |         2 | B2          |     2 | 2021-03-02 10:09:00 |       0 |       2 |
+------+-----------+-------------+-------+---------------------+---------+---------+
9 rows in set (0.00 sec)

(2) Based on the above table, calculate the cumulative score of each time point
Note here that there can be no division by team in SUM() OVER(), so the sum is accumulated according to time, not by component
Otherwise, when team B scores, team A's cumulative score is 0, which is not the result we want.

mysql> SELECT *,SUM(a_score) OVER(ORDER BY datetime1) AS a_sum_score,SUM(b_score) OVER(ORDER BY datetime1) AS b_sum_score
    -> FROM (SELECT *,
    ->      		(CASE WHEN team='A' THEN score ELSE 0 END) AS 'a_score',
    ->     			(CASE WHEN team='B' THEN score ELSE 0 END) AS 'b_score'
    ->       FROM ball_score) t1
    -> ORDER BY datetime1;
+------+-----------+-------------+-------+---------------------+---------+---------+-------------+-------------+
| team | player_id | player_name | score | datetime1           | a_score | b_score | a_sum_score | b_sum_score |
+------+-----------+-------------+-------+---------------------+---------+---------+-------------+-------------+
| A    |         1 | A1          |     1 | 2021-03-02 10:01:00 |       1 |       0 |           1 |           0 |
| A    |         5 | A5          |     1 | 2021-03-02 10:02:00 |       1 |       0 |           2 |           0 |
| B    |         4 | B4          |     3 | 2021-03-02 10:03:00 |       0 |       3 |           2 |           3 |
| A    |         4 | A4          |     3 | 2021-03-02 10:04:00 |       3 |       0 |           5 |           3 |
| B    |         1 | B1          |     3 | 2021-03-02 10:05:00 |       0 |       3 |           5 |           6 |
| A    |         3 | A3          |     3 | 2021-03-02 10:06:00 |       3 |       0 |           8 |           6 |
| A    |         4 | A4          |     3 | 2021-03-02 10:07:00 |       3 |       0 |          11 |           6 |
| B    |         1 | B1          |     2 | 2021-03-02 10:08:00 |       0 |       2 |          11 |           8 |
| B    |         2 | B2          |     2 | 2021-03-02 10:09:00 |       0 |       2 |          11 |          10 |
+------+-----------+-------------+-------+---------------------+---------+---------+-------------+-------------+
9 rows in set (0.00 sec)

(3) There are two definitions of anti superelevation

  (a)Cumulative score by the end of playing A team>B Team, accumulated scores by the end of the game A team<B Team, count B Team anti super
     Cumulative score by the end of playing A team<B Team, accumulated scores by the end of the game A team>B Team, count A Team anti super
     The difference between the above field and the field must be positive or negative, and the multiplication is less than 0
     
  (b)Cumulative score by last game A team<B team,Cumulative score of playing A team=B Team, accumulated scores by the end of the game A team>B Team, count A Team anti super
     Cumulative score by last game A team>B team,Cumulative score of playing A team=B Team, accumulated scores by the end of the game A team<B Team, count B Team anti super
     The difference between the above field and the field is 0. The difference between the field and the field is positive or negative, and the multiplication is less than 0

On the basis of trademark, add three columns, the difference of team A and team B, the difference of playing and the difference of playing

mysql> SELECT *,(a_sum_score-b_sum_score) AS sum_dev,LAG(a_sum_score-b_sum_score,1) OVER() AS sum_dev1,LAG(a_sum_score-b_sum_score,2) OVER() AS sum_dev2
    -> FROM (SELECT *,SUM(a_score) OVER(ORDER BY datetime1) AS a_sum_score,SUM(b_score) OVER(ORDER BY datetime1) AS b_sum_score
    ->       FROM (SELECT *,
    ->      				(CASE WHEN team='A' THEN score ELSE 0 END) AS 'a_score',
    ->      				(CASE WHEN team='B' THEN score ELSE 0 END) AS 'b_score'
    ->        	   FROM ball_score) t1
    ->     	 ORDER BY datetime1) t2;
+------+-----------+-------------+-------+---------------------+---------+---------+-------------+-------------+---------+----------+----------+
| team | player_id | player_name | score | datetime1           | a_score | b_score | a_sum_score | b_sum_score | sum_dev | sum_dev1 | sum_dev2 |
+------+-----------+-------------+-------+---------------------+---------+---------+-------------+-------------+---------+----------+----------+
| A    |         1 | A1          |     1 | 2021-03-02 10:01:00 |       1 |       0 |           1 |           0 |       1 |     NULL |     NULL |
| A    |         5 | A5          |     1 | 2021-03-02 10:02:00 |       1 |       0 |           2 |           0 |       2 |        1 |     NULL |
| B    |         4 | B4          |     3 | 2021-03-02 10:03:00 |       0 |       3 |           2 |           3 |      -1 |        2 |        1 |
| A    |         4 | A4          |     3 | 2021-03-02 10:04:00 |       3 |       0 |           5 |           3 |       2 |       -1 |        2 |
| B    |         1 | B1          |     3 | 2021-03-02 10:05:00 |       0 |       3 |           5 |           6 |      -1 |        2 |       -1 |
| A    |         3 | A3          |     3 | 2021-03-02 10:06:00 |       3 |       0 |           8 |           6 |       2 |       -1 |        2 |
| A    |         4 | A4          |     3 | 2021-03-02 10:07:00 |       3 |       0 |          11 |           6 |       5 |        2 |       -1 |
| B    |         1 | B1          |     2 | 2021-03-02 10:08:00 |       0 |       2 |          11 |           8 |       3 |        5 |        2 |
| B    |         2 | B2          |     2 | 2021-03-02 10:09:00 |       0 |       2 |          11 |          10 |       1 |        3 |        5 |
+------+-----------+-------------+-------+---------------------+---------+---------+-------------+-------------+---------+----------+----------+
9 rows in set (0.00 sec)

Add the WHERE condition and take out the player's name and scoring time

mysql> SELECT player_name AS 'Player name',datetime1 AS 'Scoring time'
    -> FROM (SELECT *,(a_sum_score-b_sum_score) AS sum_dev,LAG(a_sum_score-b_sum_score,1) OVER() AS sum_dev1,LAG(a_sum_score-b_sum_score,2) OVER() AS sum_dev2
    ->       FROM (SELECT *,SUM(a_score) OVER(ORDER BY datetime1) AS a_sum_score,SUM(b_score) OVER(ORDER BY datetime1) AS b_sum_score
    ->             FROM (SELECT *,
    ->      					(CASE WHEN team='A' THEN score ELSE 0 END) AS 'a_score',
    ->     						(CASE WHEN team='B' THEN score ELSE 0 END) AS 'b_score'
    ->       			 FROM ball_score) t1
    ->    		   ORDER BY datetime1) t2)t3
    -> WHERE sum_dev*sum_dev1<0 
    -> OR (sum_dev*sum_dev1=0 AND sum_dev*sum_dev2<0);
+--------------+---------------------+
| Player name     | Scoring time            |
+--------------+---------------------+
| B4           | 2021-03-02 10:03:00 |
| A4           | 2021-03-02 10:04:00 |
| B1           | 2021-03-02 10:05:00 |
| A3           | 2021-03-02 10:06:00 |
+--------------+---------------------+
4 rows in set (0.00 sec)

Keywords: SQL

Added by xeonman13 on Thu, 17 Feb 2022 23:38:08 +0200