Skip to content

Commit

Permalink
Support for sampling tables with generated columns
Browse files Browse the repository at this point in the history
This change adds support for sampling tables with generated columns
and tests to check the new feature is working correctly.

Fixes mla#52
  • Loading branch information
nicholasd-ff committed Feb 15, 2024
1 parent 5ab4d7b commit ba4ff66
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 8 deletions.
27 changes: 21 additions & 6 deletions pg_sample
Original file line number Diff line number Diff line change
Expand Up @@ -684,15 +684,30 @@ while (my $row = lower_keys($sth->fetchrow_hashref)) {
notice "No candidate key found for '$table'; ignoring --ordered";
}
}

# Only extract non-generated columns, ORDER BY necessary because later
# joins rely on column order to be identical between tables.
my @cols_to_copy = map { $_->{column_name} } $dbh->selectall_array(qq{
SELECT column_name
FROM information_schema.columns
WHERE table_name=?
AND table_schema=?
AND is_generated='NEVER'
ORDER BY ordinal_position
}, { Slice => {} }, ($table->table, $table->schema) );
notice "only copying cols [@cols_to_copy] ";
# Quote the column names just in case.
my $quoted_cols_to_copy = join(
', ',
map { $dbh->quote_identifier($_) } @cols_to_copy
);
$dbh->do(qq{
CREATE $unlogged TABLE $sample_table AS
SELECT *
FROM ONLY $table
SELECT $quoted_cols_to_copy
FROM ONLY $table
$tablesample
WHERE $where
$order
$limit
WHERE $where
$order
$limit
});

if ($opt{verbose}) {
Expand Down
33 changes: 31 additions & 2 deletions t/pg_sample.t
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use warnings;
use Carp;
use DBI;
use Getopt::Long qw/ GetOptions :config no_ignore_case /;
use Test::More tests => 15;
use Test::More tests => 26;

$| = 1;

Expand Down Expand Up @@ -273,6 +273,27 @@ $dbh->do(qq{CREATE INDEX "my_index" ON "test_ordered" (name);});
$dbh->do(qq{INSERT INTO "test_ordered" VALUES (1, 'b'), (2, 'a');});
$dbh->do(qq{CLUSTER "test_ordered" USING "my_index"; -- with this, default SELECT will return 2,1;});

### Generated Columns
# We have 3 columns here to catch problems with failing to order by our ordinal position.
$dbh->do(qq{
CREATE TABLE some_numbers(
id int PRIMARY KEY GENERATED ALWAYS AS IDENTITY
, base_val int
, double_val int GENERATED ALWAYS AS (base_val*2) STORED
, other_val text
);
});
$dbh->do(qq{
INSERT INTO some_numbers(base_val, other_val) (
SELECT
generate_series AS base_val,
RPAD('a', generate_series, 'x') AS other_val
FROM generate_series(1, 10)
);
});

### End Generated Columns

# Perform code coverage analysis? Requires Devel::Cover module.
if ($opt{cover}) {
$ENV{PERL5OPT} .= ' -MDevel::Cover=+select,pg_sample,+ignore,.*';
Expand Down Expand Up @@ -315,7 +336,15 @@ is($ord, '2,1', "ordered test case broken, this should return by clustered order
($cnt) = $dbh->selectrow_array("SELECT count(*) FROM $long_schema.$long_name");
is($cnt, 10, "long table name should have 10 rows");


# Check the generated table loaded properly
my @generated_rows = $dbh->selectall_array(
qq{ SELECT base_val, double_val FROM some_numbers; },
{ Slice => {} }
);
is(scalar @generated_rows, 10, "some_numbers table should have 10 rows");
foreach my $row (@generated_rows) {
is($row->{double_val}, $row->{base_val}*2, "The double_val column is not 2x the base_val");
}

@opts = (@base_opts, '--ordered');
$cmd = "pg_sample @opts $opt{db_name} > sample_ordered.sql";
Expand Down

0 comments on commit ba4ff66

Please sign in to comment.