Mysql窗口函数
一、什么是窗口函数?
窗口函数是一种在查询结果的"窗口"上执行计算的函数,它不会像常规聚合函数那样将多行合并为一行,而是为每一行返回一个值,同时保持原始行数不变。
基本语法
<窗口函数>(<参数>) OVER (
[PARTITION BY <分区表达式>]
[ORDER BY <排序表达式> [ASC | DESC]]
[ROWS/RANGE <窗口范围>]
)
- <窗口函数>:可以是聚合函数(如SUM、AVG)或专用函数(如ROW_NUMBER、RANK)。
- OVER():必需子句,定义窗口框架。
- PARTITION BY:可选,用于分组数据;若省略,窗口覆盖整个结果集。
- ORDER BY:可选,指定窗口内行的排序顺序。
- 窗口范围:可选,默认ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW。
二、 准备测试数据
-- 创建销售数据表
CREATE TABLE sales (
sale_id INT PRIMARY KEY,
salesperson VARCHAR(50),
region VARCHAR(50),
sale_date DATE,
amount DECIMAL(10,2)
);
INSERT INTO sales VALUES
(1, '张三', '北京', '2023-01-15', 5000),
(2, '李四', '上海', '2023-01-16', 8000),
(3, '王五', '北京', '2023-01-17', 6000),
(4, '张三', '北京', '2023-02-01', 7000),
(5, '李四', '上海', '2023-02-02', 9000),
(6, '赵六', '广州', '2023-02-03', 4000),
(7, '王五', '北京', '2023-02-04', 5500),
(8, '张三', '北京', '2023-03-01', 8500);
-- 创建员工薪资表
CREATE TABLE employee_salary (
emp_id INT,
name VARCHAR(50),
department VARCHAR(50),
salary DECIMAL(10,2),
hire_date DATE
);
INSERT INTO employee_salary VALUES
(1, '张三', '技术部', 15000, '2020-01-15'),
(2, '李四', '技术部', 12000, '2021-03-20'),
(3, '王五', '销售部', 8000, '2022-06-10'),
(4, '赵六', '销售部', 7500, '2022-08-05'),
(5, '钱七', '技术部', 13000, '2020-11-30'),
(6, '孙八', '人事部', 9000, '2021-09-15'),
(7, '周九', '技术部', 16000, '2019-05-20');
1. 排名函数
ROW_NUMBER()
SELECT
salesperson,
region,
amount,
ROW_NUMBER() OVER (PARTITION BY region ORDER BY amount DESC) as row_num
FROM sales;
结果:为每个地区的销售按金额降序分配唯一序号
RANK() 和 DENSE_RANK()
SELECT
name,
department,
salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) as rank_pos,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dense_rank_pos
FROM employee_salary;
区别:
RANK(): 相同值相同排名,后续排名跳过 (1,2,2,4)
DENSE_RANK(): 相同值相同排名,后续排名不跳过 (1,2,2,3)
NTILE() - 数据分桶
SELECT
name,
salary,
NTILE(4) OVER (ORDER BY salary DESC) as quartile
FROM employee_salary;
结果:将员工按薪资分为4个分组,用于四分位分析
2. 分布函数
PERCENT_RANK()
SELECT
name,
salary,
ROUND(PERCENT_RANK() OVER (ORDER BY salary) * 100, 2) as percentile
FROM employee_salary;
结果:计算每个薪资在总体中的百分比排名 (0-1之间)
CUME_DIST()
SELECT
name,
salary,
ROUND(CUME_DIST() OVER (ORDER BY salary) * 100, 2) as cumulative_dist
FROM employee_salary;
结果:计算累计分布(小于等于当前值的行数比例)
3. 前后行函数
LAG() 和 LEAD()
SELECT
salesperson,
sale_date,
amount,
LAG(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date) as prev_amount,
LEAD(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date) as next_amount,
amount - LAG(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date) as growth
FROM sales;
应用:计算环比增长、访问前后期数据
FIRST_VALUE() 和 LAST_VALUE()
SELECT
salesperson,
sale_date,
amount,
FIRST_VALUE(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) as first_sale,
LAST_VALUE(amount) OVER (PARTITION BY salesperson ORDER BY sale_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as last_sale
FROM sales;
注意:LAST_VALUE需要指定正确的窗口框架
4. 聚合窗口函数
累计计算
SELECT
salesperson,
sale_date,
amount,
SUM(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) as running_total,
AVG(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) as running_avg
FROM sales;
移动平均
SELECT
sale_date,
amount,
AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as moving_avg_3,
AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as moving_avg_7
FROM sales;
5. 窗口框架详解
框架语法
ROWS BETWEEN frame_start AND frame_end
常用框架范围
SELECT
salesperson,
sale_date,
amount,
-- 从开始到当前行
SUM(amount) OVER (PARTITION BY salesperson ORDER BY sale_date ROWS UNBOUNDED PRECEDING) as total_to_date,
-- 前1行到当前行
AVG(amount) OVER (PARTITION BY salesperson ORDER BY sale_date ROWS 1 PRECEDING) as avg_last_2,
-- 前1行到后1行
SUM(amount) OVER (PARTITION BY salesperson ORDER BY sale_date ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as sum_3_rows
FROM sales;
6. 高级窗口函数应用
复杂业务分析
-- 销售员绩效综合分析
WITH sales_stats AS (
SELECT
salesperson,
region,
sale_date,
amount,
-- 排名分析
ROW_NUMBER() OVER (PARTITION BY region ORDER BY amount DESC) as region_rank,
-- 累计分析
SUM(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) as ytd_sales,
-- 趋势分析
LAG(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date) as prev_month,
-- 占比分析
amount * 100.0 / SUM(amount) OVER (PARTITION BY salesperson) as pct_of_total
FROM sales
)
SELECT *,
CASE
WHEN region_rank = 1 THEN '地区冠军'
WHEN region_rank <= 3 THEN '地区前三'
ELSE '其他'
END as performance_level
FROM sales_stats;
员工薪资深度分析
SELECT
name,
department,
salary,
-- 部门内分析
AVG(salary) OVER (PARTITION BY department) as dept_avg,
salary - AVG(salary) OVER (PARTITION BY department) as diff_from_dept_avg,
-- 公司整体分析
AVG(salary) OVER () as company_avg,
salary - AVG(salary) OVER () as diff_from_company_avg,
-- 排名分析
RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank,
RANK() OVER (ORDER BY salary DESC) as company_rank,
-- 分布分析
PERCENT_RANK() OVER (PARTITION BY department ORDER BY salary) as dept_percentile,
CUME_DIST() OVER (ORDER BY salary) as company_cume_dist
FROM employee_salary
ORDER BY department, salary DESC;
7. 性能优化技巧
使用窗口函数替代复杂子查询
不推荐:
SELECT
s1.salesperson,
s1.amount,
(SELECT COUNT(*)
FROM sales s2
WHERE s2.region = s1.region AND s2.amount > s1.amount) + 1 as rank
FROM sales s1;
推荐:
SELECT
salesperson,
amount,
RANK() OVER (PARTITION BY region ORDER BY amount DESC) as rank
FROM sales;
合理使用PARTITION BY
-- 只在需要时使用PARTITION BY
SELECT
salesperson,
region,
amount,
-- 需要分区:按销售员计算累计
SUM(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) as personal_running_total,
-- 不需要分区:总体排名
RANK() OVER (ORDER BY amount DESC) as overall_rank
FROM sales;
8. 实际业务场景
场景1:销售排行榜
SELECT
salesperson,
region,
total_sales,
region_rank,
company_rank
FROM (
SELECT
salesperson,
region,
SUM(amount) as total_sales,
RANK() OVER (PARTITION BY region ORDER BY SUM(amount) DESC) as region_rank,
RANK() OVER (ORDER BY SUM(amount) DESC) as company_rank
FROM sales
GROUP BY salesperson, region
) ranked_sales
WHERE region_rank <= 3 OR company_rank <= 10;
场景2:员工薪资调整分析
SELECT
name,
department,
salary,
dept_avg,
CASE
WHEN salary < dept_avg * 0.9 THEN '低于平均-建议调整'
WHEN salary > dept_avg * 1.2 THEN '高于平均-表现优秀'
ELSE '在正常范围内'
END as adjustment_recommendation
FROM (
SELECT
name,
department,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg
FROM employee_salary
) with_avg;
三、总结
窗口函数是MySQL中极其强大的功能,它能够:
简化复杂查询,避免多重子查询
提高查询性能
实现高级分析功能
保持数据的原始粒度
掌握窗口函数能够显著提升数据分析和报表开发的效率