GROUP BY 是 SQL 中用于将数据按照一个或多个列进行分组的子句,通常与聚合函数(如 COUNT, SUM, AVG 等)一起使用。
SELECT column1, aggregate_function(column2)
FROM table_name
WHERE condition
GROUP BY column1;
按照指定列将数据分成多个组:
-- 按部门分组
SELECT department, COUNT(*) as employee_count
FROM employees
GROUP BY department;
可以按多个列进行分组:
-- 按部门和职位分组
SELECT department, job_title, COUNT(*) as count
FROM employees
GROUP BY department, job_title;
GROUP BY 常与以下聚合函数配合使用:
COUNT()
- 计数SUM()
- 求和AVG()
- 平均值MAX()
- 最大值MIN()
- 最小值-- 计算每个部门的平均薪资
SELECT department, AVG(salary) as avg_salary
FROM employees
GROUP BY department;
HAVING 用于过滤分组后的结果:
-- 筛选员工数超过5人的部门
SELECT department, COUNT(*) as employee_count
FROM employees
GROUP BY department
HAVING COUNT(*) > 5;
-- 按部门平均薪资降序排列
SELECT department, AVG(salary) as avg_salary
FROM employees
GROUP BY department
ORDER BY avg_salary DESC;
ROLLUP
生成分组的小计和总计行CUBE
生成所有可能的分组组合-- 使用 ROLLUP 生成小计
SELECT department, job_title, SUM(salary) as total_salary
FROM employees
GROUP BY ROLLUP(department, job_title);
错误示例:
-- 错误:name 不在 GROUP BY 子句中
SELECT department, name, AVG(salary)
FROM employees
GROUP BY department;
解决方案:
-- 正确:要么包含在 GROUP BY 中,要么使用聚合函数
SELECT department, STRING_AGG(name, ', ') as employees, AVG(salary)
FROM employees
GROUP BY department;
-- WHERE 在分组前过滤行
SELECT department, AVG(salary)
FROM employees
WHERE hire_date > '2020-01-01'
GROUP BY department;
-- HAVING 在分组后过滤组
SELECT department, AVG(salary)
FROM employees
GROUP BY department
HAVING AVG(salary) > 50000;
GROUP BY 会将 NULL 值视为相同的分组:
-- NULL 部门会被分到同一组
SELECT department, COUNT(*)
FROM employees
GROUP BY department;
-- 按产品和年份分析销售额
SELECT
product_id,
EXTRACT(YEAR FROM sale_date) as year,
SUM(amount) as total_sales,
COUNT(*) as transaction_count
FROM sales
GROUP BY product_id, EXTRACT(YEAR FROM sale_date)
ORDER BY product_id, year;
-- 按日期和页面统计访问量
SELECT
visit_date,
page_url,
COUNT(DISTINCT user_id) as unique_visitors,
COUNT(*) as page_views
FROM web_visits
GROUP BY visit_date, page_url;
GROUP BY 是 SQL 中强大的数据汇总工具,掌握它能显著提高数据分析的效率和质量。