CREATE TABLE tnp ( id INTNOT NULL AUTO_INCREMENT, refBIGINTNOT NULL, name VARCHAR(255), PRIMARY KEY pk (id), UNIQUE KEY uk (name) ); -- 表本身可以正常创建
-- 尝试按 id 分区:唯一键 uk(name) 不包含列 id,报错 ALTER TABLE tnp PARTITIONBY HASH(id) PARTITIONS 4; -- ERROR 1503 (HY000): A UNIQUE INDEX must include all columns -- in this partitioned table's partitioning function
-- 尝试按 name 分区:主键 pk(id) 不包含列 name,报错 ALTER TABLE tnp PARTITIONBY KEY(name) PARTITIONS 4; -- ERROR 1503 (HY000): A PRIMARY KEY must include all columns -- in this partitioned table's partitioning function
解决方式:
将 name 加入主键:主键改为 (id, name),唯一键变为仅 (id),此时可以按 name 进行 KEY 分区
将 id 加入唯一键:唯一键改为 (id, name),主键仍为 (id),此时可以按 id 进行 HASH 分区
移除唯一键:只保留主键,直接按 id 分区
1.3 分区的优势
容纳更多数据:单表数据量可以超过单个磁盘分区的容量
快速删除旧数据:通过 DROP PARTITION 删除整个分区,比 DELETE 高效得多
查询优化:分区裁剪使查询只扫描相关分区,大幅提升性能
灵活维护:可以单独对某个分区执行检查、修复、优化操作
二、分区类型
MySQL 8.4 支持以下分区类型:RANGE、LIST、HASH、KEY,以及基于它们的 COLUMNS 变体和 LINEAR 变体。
2.1 RANGE 分区
按连续范围划分分区,使用 VALUES LESS THAN 定义。范围必须严格递增,不重叠。建议在最后使用 MAXVALUE 作为”兜底”分区,接收所有超出定义范围的行。
语法格式:
1 2 3 4 5 6
PARTITIONBYRANGE (expression) ( PARTITION partition_name VALUES LESS THAN (expr), PARTITION partition_name VALUES LESS THAN (expr), ... PARTITION partition_name VALUES LESS THAN (MAXVALUE) );
以下按 store_id 将员工表分为 4 个分区:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
CREATE TABLE employees ( id INTNOT NULL, fname VARCHAR(30), lname VARCHAR(30), hired DATENOT NULLDEFAULT'1970-01-01', separated DATENOT NULLDEFAULT'9999-12-31', job_code INTNOT NULL, store_id INTNOT NULL ) PARTITIONBYRANGE (store_id) ( PARTITION p0 VALUES LESS THAN (6), PARTITION p1 VALUES LESS THAN (11), PARTITION p2 VALUES LESS THAN (16), PARTITION p3 VALUES LESS THAN (21) );
SELECT PARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME ='employees';
添加带 MAXVALUE 的兜底分区,避免插入超出范围的 store_id 时报错:
1 2 3 4 5 6
ALTER TABLE employees PARTITIONBYRANGE (store_id) ( PARTITION p0 VALUES LESS THAN (6), PARTITION p1 VALUES LESS THAN (11), PARTITION p2 VALUES LESS THAN (16), PARTITION p3 VALUES LESS THAN MAXVALUE );
按年份删除离职员工数据(删除分区即删除数据,比 DELETE 高效得多):
1 2
-- 按年分区后,删除 1991 年前离职的员工 ALTER TABLE employees DROPPARTITION p0;
VALUES LESS THAN 中也可以使用表达式,只要表达式结果可与 < 比较:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
CREATE TABLE employees ( id INTNOT NULL, fname VARCHAR(30), lname VARCHAR(30), hired DATENOT NULLDEFAULT'1970-01-01', separated DATENOT NULLDEFAULT'9999-12-31', job_code INT, store_id INT ) PARTITIONBYRANGE (YEAR(separated)) ( PARTITION p0 VALUES LESS THAN (1991), PARTITION p1 VALUES LESS THAN (1996), PARTITION p2 VALUES LESS THAN (2001), PARTITION p3 VALUES LESS THAN MAXVALUE );
2.2 LIST 分区
按离散值列表划分分区,使用 VALUES IN 定义。分区之间不需要有序,但同一值不能出现在多个分区中。
与 RANGE 不同,LIST 没有类似 MAXVALUE 的兜底机制。如果插入的值不在任何分区列表中,语句报错。使用 IGNORE 关键字可以静默跳过不匹配的行:
1 2 3 4 5 6 7 8 9 10 11 12
-- 插入不匹配的值会报错:store_id=21 不属于任何地区分区 INSERT INTO employees VALUES (99, 'Zara', 'Lee', '2024-01-01', '2024-01-01', 999, 21); -- ERROR 1526 (HY000): Table has no partition for value 21
RANGE COLUMNS 的核心语义与 RANGE 不同:比较的是元组(元组即多列值的列表),而不是标量值。
以下示例演示元组比较语义——插入 (5,10)、(5,11)、(5,12) 三个元组:
1 2 3 4 5 6 7 8 9 10 11
CREATE TABLE rc1 (a INT, b INT) PARTITIONBYRANGE COLUMNS(a, b) ( PARTITION p0 VALUES LESS THAN (5, 12), PARTITION p3 VALUES LESS THAN (MAXVALUE, MAXVALUE) );
INSERT INTO rc1 VALUES (5,10), (5,11), (5,12);
SELECT PARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME ='rc1';
子分区(Subpartitioning,组合分区)在 RANGE 或 LIST 分区的基础上,再按 HASH 或 KEY 划分。总分区数 = 外层分区数 × 子分区数。
语法格式:
1 2 3 4 5 6 7 8 9
PARTITIONBYRANGE (expression) SUBPARTITION BY HASH (expression) SUBPARTITIONS num ( PARTITION partition_name VALUES LESS THAN (expr) ( SUBPARTITION subpartition_name, SUBPARTITION subpartition_name ), ... );
以下示例演示数据分布过程:外层按年份 RANGE 分区,内层按 HASH(TO_DAYS(purchased)) 再分:
1 2 3 4 5 6 7 8
CREATE TABLE ts (id INT, purchased DATE) PARTITIONBYRANGE(YEAR(purchased)) SUBPARTITION BY HASH(TO_DAYS(purchased)) SUBPARTITIONS 2 ( PARTITION p0 VALUES LESS THAN (1990), PARTITION p1 VALUES LESS THAN (2000), PARTITION p2 VALUES LESS THAN MAXVALUE );
CREATE TABLE ts (id INT, purchased DATE) PARTITIONBYRANGE(YEAR(purchased)) SUBPARTITION BY HASH(TO_DAYS(purchased)) ( PARTITION p0 VALUES LESS THAN (1990) ( SUBPARTITION s0, SUBPARTITION s1 ), PARTITION p1 VALUES LESS THAN (2000) ( SUBPARTITION s2, SUBPARTITION s3 ), PARTITION p2 VALUES LESS THAN MAXVALUE ( SUBPARTITION s4, SUBPARTITION s5 ) );
注意事项:
所有外层分区的子分区数必须相同
子分区名称在整个表中必须唯一
子分区只能是 HASH 或 KEY,RANGE/LIST 分区不能作为子分区
SUBPARTITION BY KEY 必须显式指定列(不能省略,这是与 PARTITION BY KEY 的差异)
三、NULL 值处理
MySQL 分区将 NULL 视为小于任何非 NULL 值,但不同分区类型处理方式不同。
3.1 RANGE 分区中的 NULL
NULL 存入编号最小的分区(第一个分区):
1 2 3 4 5 6 7 8 9 10 11
CREATE TABLE t1 ( c1 INT, c2 VARCHAR(20) ) PARTITIONBYRANGE(c1) ( PARTITION p0 VALUES LESS THAN (0), PARTITION p1 VALUES LESS THAN (10), PARTITION p2 VALUES LESS THAN MAXVALUE );
INSERT INTO t1 VALUES (NULL, 'mothra');
查询验证:
1 2 3
SELECT PARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME ='t1';
INSERT INTO th VALUES (NULL, 'mothra'), (0, 'gigan');
PARTITION_NAME
TABLE_ROWS
p0
2
p1
0
NULL 和 0 都被存入 p0。
3.4 RANGE/LIST 分区管理
删除分区(DROP PARTITION)会删除分区及其所有数据:
1 2 3 4 5 6 7 8 9 10
CREATE TABLE tr (id INT, name VARCHAR(50), purchased DATE) PARTITIONBYRANGE(YEAR(purchased)) ( PARTITION p0 VALUES LESS THAN (1990), PARTITION p1 VALUES LESS THAN (1995), PARTITION p2 VALUES LESS THAN (2000), PARTITION p3 VALUES LESS THAN (2005) );
-- 删除 1990-1994 年的分区 ALTER TABLE tr DROPPARTITION p1;
添加分区(ADD PARTITION)只能添加到 RANGE 分区的末端,或 LIST 分区的末尾:
1 2 3 4 5 6 7 8 9 10 11 12 13
CREATE TABLE tr_add ( id INT, name VARCHAR(50), purchased DATE ) PARTITIONBYRANGE(YEAR(purchased)) ( PARTITION p0 VALUES LESS THAN (1990), PARTITION p1 VALUES LESS THAN (2000), PARTITION p2 VALUES LESS THAN (2005) );
-- RANGE 分区:只能添加在高端 ALTER TABLE tr_add ADDPARTITION ( PARTITION p3 VALUES LESS THAN (2010) );
尝试在中间添加分区会报错:
1 2 3 4
ALTER TABLE tr_add ADDPARTITION ( PARTITION p4 VALUES LESS THAN (1970) ); -- ERROR 1493 (HY000): VALUES LESS THAN value must be strictly increasing for each partition
需要在中间插入分区时,使用 REORGANIZE PARTITION 重新组织现有分区:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
CREATE TABLE tr_split ( id INT, name VARCHAR(50), purchased DATE ) PARTITIONBYRANGE(YEAR(purchased)) ( PARTITION p0 VALUES LESS THAN (1990), PARTITION p1 VALUES LESS THAN (2000), PARTITION p2 VALUES LESS THAN (2010) );
INSERT INTO tr_split VALUES (1, 'item1', '1985-06-15'); INSERT INTO tr_split VALUES (2, 'item2', '1988-12-20'); INSERT INTO tr_split VALUES (3, 'item3', '1995-03-10');
SELECT PARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME ='tr_split';
PARTITION_NAME
TABLE_ROWS
p0
2
p1
1
p2
0
将 p0 拆分为两个分区:
1 2 3 4
ALTER TABLE tr_split REORGANIZE PARTITION p0 INTO ( PARTITION n0 VALUES LESS THAN (1970), PARTITION n1 VALUES LESS THAN (1990) );
拆分后验证(注意执行 ANALYZE TABLE 刷新统计):
1 2 3 4
ANALYZE TABLE tr_split;
SELECT PARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME ='tr_split';
CREATE TABLE tr_merge ( id INT, name VARCHAR(50), purchased DATE ) PARTITIONBYRANGE(YEAR(purchased)) ( PARTITION p0 VALUES LESS THAN (1970), PARTITION p1 VALUES LESS THAN (1980), PARTITION p2 VALUES LESS THAN (2010) );
INSERT INTO tr_merge VALUES (1, 'old', '1965-01-01'); INSERT INTO tr_merge VALUES (2, 'mid1', '1975-06-15'); INSERT INTO tr_merge VALUES (3, 'mid2', '1978-12-20');
ALTER TABLE tr_merge REORGANIZE PARTITION p0, p1 INTO ( PARTITION p_merged VALUES LESS THAN (1980) );
拆分分区:REORGANIZE PARTITION 既可以合并分区,也可以将一个分区拆分为多个。
减少 LIST/HASH/KEY 分区的数量:
1 2 3 4 5 6 7 8 9
CREATE TABLE clients ( id INT, signed DATE ) PARTITIONBY HASH(MONTH(signed)) PARTITIONS 12;
-- 将分区数从 12 减少到 8 ALTER TABLE clients COALESCE PARTITION4;
增加分区数:
1 2
-- 增加 6 个分区 ALTER TABLE clients ADDPARTITION PARTITIONS 6;
四、分区裁剪
分区裁剪(Partition Pruning)是 MySQL 分区最重要的查询优化手段。裁剪的原理:查询时只扫描可能包含匹配行的分区,排除不相关分区。执行效果可能使查询快一个数量级。
CREATE TABLE t2 ( fname VARCHAR(50), lname VARCHAR(50), region_code TINYINT UNSIGNED, dob DATE ) PARTITIONBYRANGE(YEAR(dob)) ( PARTITION d0 VALUES LESS THAN (1970), PARTITION d1 VALUES LESS THAN (1975), PARTITION d2 VALUES LESS THAN (1980), PARTITION d3 VALUES LESS THAN (1985), PARTITION d4 VALUES LESS THAN (1990), PARTITION d5 VALUES LESS THAN (2000), PARTITION d6 VALUES LESS THAN (2005), PARTITION d7 VALUES LESS THAN MAXVALUE );
以下查询均可触发裁剪:
1 2 3 4 5 6 7 8
-- 等值查询 SELECT*FROM t2 WHERE dob ='1982-06-23';
-- 范围查询 UPDATE t2 SET region_code =8 WHERE dob BETWEEN'1991-02-15'AND'1997-04-25';
DELETEFROM t2 WHERE dob >='1984-06-21'AND dob <='1999-06-21';
CREATE TABLE employees ( id INTNOT NULL AUTO_INCREMENT PRIMARY KEY, fname VARCHAR(25), lname VARCHAR(25), store_id INT, department_id INT ) PARTITIONBYRANGE(id) ( PARTITION p0 VALUES LESS THAN (5), -- id 1-4 PARTITION p1 VALUES LESS THAN (10), -- id 5-9 PARTITION p2 VALUES LESS THAN (15), -- id 10-14 PARTITION p3 VALUES LESS THAN MAXVALUE -- id 15+ );
SELECT*FROM employees PARTITION (p0, p2) WHERE lname LIKE'S%';
id
fname
lname
store_id
department_id
4
Jim
Smith
2
4
11
Jill
Stone
1
4
结合排序和聚合:
1 2
SELECT id, CONCAT(fname, ' ', lname) AS name FROM employees PARTITION (p0) ORDERBY lname;
id
name
3
Ellen Johnson
4
Jim Smith
1
Bob Taylor
2
Frank Williams
1 2 3
SELECT store_id, COUNT(department_id) AS c FROM employees PARTITION (p1, p2, p3) GROUPBY store_id HAVING c >4;
store_id
c
2
5
5.3 DML 语句中的分区选择
分区选择支持 SELECT、DELETE、INSERT、REPLACE、UPDATE、LOAD DATA、LOAD XML 等多种语句。
在 INSERT 中指定源分区:
1 2 3 4 5 6 7 8 9 10 11 12 13
DROPTABLE IF EXISTS employees_copy; CREATE TABLE employees_copy LIKE employees; ALTER TABLE employees_copy REMOVE PARTITIONING; INSERT INTO employees_copy SELECT*FROM employees WHERE id <=4; -- 模拟已存有其他分区数据
SELECT'复制前:'AS stage; SELECT*FROM employees_copy;
INSERT INTO employees_copy SELECT*FROM employees PARTITION (p2);
SELECT'INSERT ... PARTITION (p2) 后:'AS stage; SELECT*FROM employees_copy WHERE id BETWEEN10AND14;
id
fname
lname
store_id
department_id
10
Lou
Waters
2
4
11
Jill
Stone
1
4
12
Roger
White
3
2
13
Howard
Andrews
1
2
14
Fred
Goldberg
3
3
在 DELETE 中指定分区:
1 2 3
DELETEFROM employees PARTITION (p0) WHERE lname ='Smith';
SELECT*FROM employees PARTITION (p0);
id
fname
lname
store_id
department_id
1
Bob
Taylor
3
2
2
Frank
Williams
1
2
3
Ellen
Johnson
3
4
id=4 (Jim Smith) 被删除,其他分区不受影响。
在 UPDATE 中指定分区:
1 2 3 4
UPDATE employees PARTITION (p2) SET store_id =99WHERE id =10;
SELECT'UPDATE PARTITION (p2) SET store_id=99 WHERE id=10:'AS stage; SELECT id, fname, store_id FROM employees WHERE id =10;
id
fname
store_id
10
Lou
99
5.4 子分区选择
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
CREATE TABLE employees_sub ( id INTNOT NULL, fname VARCHAR(25), lname VARCHAR(25), store_id INT ) PARTITIONBYRANGE(id) SUBPARTITION BY KEY(id) SUBPARTITIONS 2 ( PARTITION p0 VALUES LESS THAN (5), PARTITION p1 VALUES LESS THAN (10), PARTITION p2 VALUES LESS THAN MAXVALUE );
INSERT INTO employees_sub VALUES (1,'Bob','Taylor',3),(2,'Frank','Williams',1),(3,'Ellen','Johnson',3), (4,'Jim','Smith',2),(5,'Mary','Jones',1),(6,'Linda','Black',2), (7,'Ed','Jones',2),(8,'June','Wilson',3), (10,'Alice','Rogers',2),(11,'Tom','Harris',3),(12,'Kate','Lee',1),(13,'Sam','Clark',2);
查询各子分区的数据分布:
1 2 3 4
SELECT PARTITION_NAME, SUBPARTITION_NAME, TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME ='employees_sub' ORDERBY PARTITION_NAME, SUBPARTITION_NAME;