Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner/core: keep sort operator when ordered by tablesample (#48315) #48449

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -913,12 +913,16 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter
prop.CanAddEnforcer = true
}
ds.storeTask(prop, t)
<<<<<<< HEAD:planner/core/find_best_task.go
if ds.SampleInfo != nil && !t.invalid() {
if _, ok := t.plan().(*PhysicalTableSample); !ok {
warning := expression.ErrInvalidTableSample.GenWithStackByArgs("plan not supported")
ds.ctx.GetSessionVars().StmtCtx.AppendWarning(warning)
}
}
=======
err = validateTableSamplePlan(ds, t, err)
>>>>>>> 83e39bc83ab (planner/core: keep sort operator when ordered by tablesample (#48315)):pkg/planner/core/find_best_task.go
}()

t, err = ds.tryToGetDualTask()
Expand Down Expand Up @@ -2122,10 +2126,8 @@ func (ds *DataSource) convertToSampleTable(prop *property.PhysicalProperty,
return invalidTask, nil
}
if candidate.isMatchProp {
// TableSample on partition table can't keep order.
if ds.tableInfo.GetPartitionInfo() != nil {
return invalidTask, nil
}
// Disable keep order property for sample table path.
return invalidTask, nil
}
p := PhysicalTableSample{
TableSampleInfo: ds.SampleInfo,
Expand Down Expand Up @@ -2478,3 +2480,15 @@ func pushDownNot(ctx sessionctx.Context, conds []expression.Expression) []expres
}
return conds
}

func validateTableSamplePlan(ds *DataSource, t task, err error) error {
if err != nil {
return err
}
if ds.SampleInfo != nil && !t.invalid() {
if _, ok := t.plan().(*PhysicalTableSample); !ok {
return expression.ErrInvalidTableSample.GenWithStackByArgs("plan not supported")
}
}
return nil
}
191 changes: 191 additions & 0 deletions tests/integrationtest/r/executor/sample.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
set @@global.tidb_scatter_region=1
drop table if exists t;
set tidb_enable_clustered_index = on;
create table t (a varchar(255) primary key, b bigint);
insert into t values ('b', 100), ('y', 100);
split table t between ('a') and ('z') regions 2;
select a from t tablesample regions();
a
b
y
drop table t;
create table t (a varchar(255), b int, c decimal, primary key (a, b, c));
split table t between ('a', 0, 0) and ('z', 100, 100) regions 2;
insert into t values ('b', 10, 100), ('y', 100, 10);
select * from t tablesample regions();
a b c
b 10 100
y 100 10
drop table t;
create table t (a bigint primary key, b int default 10);
split table t between (1) and (100000) regions 4;
insert into t(a) values (200), (25600), (50300), (99900), (99901);
select a from t tablesample regions();
a
200
25600
50300
99900
drop table t;
create table t (a bigint, b int default 10);
split table t between (0) and (100000) regions 4;
insert into t(a) values (1), (2), (3);
select a from t tablesample regions();
a
1
set tidb_enable_clustered_index=default;
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
create view v as select * from t;
select * from v tablesample regions();
Error 8128 (HY000): Invalid TABLESAMPLE: Unsupported TABLESAMPLE in views
select * from information_schema.tables tablesample regions();
Error 8128 (HY000): Invalid TABLESAMPLE: Unsupported TABLESAMPLE in virtual tables
select a from t tablesample system();
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
select a from t tablesample bernoulli(10 percent);
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
select a from t as t1 tablesample regions(), t as t2 tablesample system();
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
select a from t tablesample ();
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
select _tidb_rowid from t tablesample regions();
_tidb_rowid
1
select a, _tidb_rowid from t tablesample regions();
a _tidb_rowid
1 1
select _tidb_rowid, b from t tablesample regions();
_tidb_rowid b
1 abc
select b, _tidb_rowid, a from t tablesample regions();
b _tidb_rowid a
abc 1 1
drop table if exists t;
create table t (a int, b varchar(255), primary key (a)) partition by hash(a) partitions 2;
insert into t values (1, '1'), (2, '2'), (3, '3');
select count(*) from t tablesample regions();
count(*)
2
delete from t;
insert into t values (1, '1');
select count(*) from t partition (p0) tablesample regions();
count(*)
0
select count(*) from t partition (p1) tablesample regions();
count(*)
1
drop table if exists t;
create table t (a int, b int, unique key idx(a)) partition by range (a) (
partition p0 values less than (0),
partition p1 values less than (10),
partition p2 values less than (30),
partition p3 values less than (maxvalue));
insert into t values (2, 2), (31, 31), (12, 12);
select _tidb_rowid from t tablesample regions() order by _tidb_rowid;
_tidb_rowid
1
2
3
drop table if exists t;
create table t (a int primary key, b int as (a + 1), c int as (b + 1), d int as (c + 1));
split table t between (0) and (10000) regions 4;
insert into t(a) values (1), (2), (2999), (4999), (9999);
select a from t tablesample regions();
a
1
2999
9999
select c from t tablesample regions();
c
3
3001
10001
select a, b from t tablesample regions();
a b
1 2
2999 3000
9999 10000
select d, c from t tablesample regions();
d c
4 3
3002 3001
10002 10001
select a, d from t tablesample regions();
a d
1 4
2999 3002
9999 10002
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
select * from t tablesample regions();
a
1
10002
begin;
insert into t values (20006), (50000);
select * from t tablesample regions();
a
1
10002
delete from t where a = 1;
select * from t tablesample regions();
a
1
10002
commit;
select * from t tablesample regions();
a
1000
10002
20006
50000
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
begin;
select * from t tablesample regions();
a
1
10002
insert into t values (20006), (50000);
select * from t tablesample regions();
a
1
10002
commit;
select * from t tablesample regions();
a
1
10002
20006
50000
drop table if exists t;
create table t (a int primary key, b int, c varchar(255));
split table t between (0) and (10000) regions 5;
insert into t values (1000, 1, '1'), (1001, 1, '1'), (2100, 2, '2'), (4500, 3, '3');
create index idx_0 on t (b);
select a from t tablesample regions() order by a;
a
1000
2100
4500
select a from t use index (idx_0) tablesample regions() order by a;
Error 8128 (HY000): Invalid TABLESAMPLE: plan not supported
DROP TABLE IF EXISTS a;
CREATE TABLE a (pk bigint unsigned primary key clustered, v text);
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk, 'a' FROM b;
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk + (1<<63), 'b' FROM b;
SPLIT TABLE a BY (500);
SELECT * FROM a TABLESAMPLE REGIONS() ORDER BY pk;
pk v
500 a
9223372036854775809 b
set @@global.tidb_scatter_region=default;
133 changes: 133 additions & 0 deletions tests/integrationtest/t/executor/sample.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
set @@global.tidb_scatter_region=1

# TestTableSampleSchema
drop table if exists t;
set tidb_enable_clustered_index = on;
create table t (a varchar(255) primary key, b bigint);
insert into t values ('b', 100), ('y', 100);
split table t between ('a') and ('z') regions 2;
select a from t tablesample regions();
drop table t;
create table t (a varchar(255), b int, c decimal, primary key (a, b, c));
split table t between ('a', 0, 0) and ('z', 100, 100) regions 2;
insert into t values ('b', 10, 100), ('y', 100, 10);
select * from t tablesample regions();
drop table t;
create table t (a bigint primary key, b int default 10);
split table t between (1) and (100000) regions 4;
insert into t(a) values (200), (25600), (50300), (99900), (99901);
select a from t tablesample regions();
drop table t;
create table t (a bigint, b int default 10);
split table t between (0) and (100000) regions 4;
insert into t(a) values (1), (2), (3);
select a from t tablesample regions();
set tidb_enable_clustered_index=default;

# TestTableSampleInvalid
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
create view v as select * from t;
-- error 8128
select * from v tablesample regions();
-- error 8128
select * from information_schema.tables tablesample regions();
-- error 8128
select a from t tablesample system();
-- error 8128
select a from t tablesample bernoulli(10 percent);
-- error 8128
select a from t as t1 tablesample regions(), t as t2 tablesample system();
-- error 8128
select a from t tablesample ();

# TestTableSampleWithTiDBRowID
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
select _tidb_rowid from t tablesample regions();
select a, _tidb_rowid from t tablesample regions();
select _tidb_rowid, b from t tablesample regions();
select b, _tidb_rowid, a from t tablesample regions();

# TestTableSampleWithPartition
drop table if exists t;
create table t (a int, b varchar(255), primary key (a)) partition by hash(a) partitions 2;
insert into t values (1, '1'), (2, '2'), (3, '3');
select count(*) from t tablesample regions();
delete from t;
insert into t values (1, '1');
select count(*) from t partition (p0) tablesample regions();
select count(*) from t partition (p1) tablesample regions();
## Test https://github.com/pingcap/tidb/issues/27349
drop table if exists t;
create table t (a int, b int, unique key idx(a)) partition by range (a) (
partition p0 values less than (0),
partition p1 values less than (10),
partition p2 values less than (30),
partition p3 values less than (maxvalue));
insert into t values (2, 2), (31, 31), (12, 12);
select _tidb_rowid from t tablesample regions() order by _tidb_rowid;

# TestTableSampleGeneratedColumns
drop table if exists t;
create table t (a int primary key, b int as (a + 1), c int as (b + 1), d int as (c + 1));
split table t between (0) and (10000) regions 4;
insert into t(a) values (1), (2), (2999), (4999), (9999);
select a from t tablesample regions();
select c from t tablesample regions();
select a, b from t tablesample regions();
select d, c from t tablesample regions();
select a, d from t tablesample regions();

# TestTableSampleUnionScanIgnorePendingKV
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
select * from t tablesample regions();
begin;
insert into t values (20006), (50000);
select * from t tablesample regions();
delete from t where a = 1;
select * from t tablesample regions();
commit;
select * from t tablesample regions();

# TestTableSampleTransactionConsistency
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
begin;
select * from t tablesample regions();

connect (conn1, localhost, root,, executor__sample);
insert into t values (20006), (50000);
connection default;

select * from t tablesample regions();
commit;
select * from t tablesample regions();
disconnect conn1;

# TestTableSampleNotSupportedPlanWarning
drop table if exists t;
create table t (a int primary key, b int, c varchar(255));
split table t between (0) and (10000) regions 5;
insert into t values (1000, 1, '1'), (1001, 1, '1'), (2100, 2, '2'), (4500, 3, '3');
create index idx_0 on t (b);
select a from t tablesample regions() order by a;
-- error 8128
select a from t use index (idx_0) tablesample regions() order by a;

# TestTableSampleUnsignedIntHandle
DROP TABLE IF EXISTS a;
CREATE TABLE a (pk bigint unsigned primary key clustered, v text);
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk, 'a' FROM b;
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk + (1<<63), 'b' FROM b;
SPLIT TABLE a BY (500);
SELECT * FROM a TABLESAMPLE REGIONS() ORDER BY pk;

set @@global.tidb_scatter_region=default;