Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for generated columns #53

Merged
merged 3 commits into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM perl:5.28.3
FROM perl:5.38

WORKDIR /app

Expand Down
62 changes: 53 additions & 9 deletions pg_sample
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ BEGIN {
return bless {
schema => shift,
table => shift,
columns => shift,
};
}

Expand All @@ -251,6 +252,31 @@ BEGIN {
$self->{schema} = shift if @_;
return $self->{schema};
}

sub columns {
my $self = shift;
return $self->{columns};
}

sub quote_column {
my $self = shift;
my $column = shift;
my $alias = shift;
if (defined($alias)) {
return "$alias." . $self->dbh->quote_identifier($column);
} else {
return $self->dbh->quote_identifier($column);
}
}

sub columns_quoted {
my $self = shift;
my $alias = shift;
return join(
', ',
map { $self->quote_column($_, $alias) } @{$self->{columns}}
);
}

sub table {
my $self = shift;
Expand Down Expand Up @@ -411,7 +437,7 @@ sub quote_constant (@) {
$name;
};

return Table->new($opt{sample_schema}, $sample_table);
return Table->new($opt{sample_schema}, $sample_table, $table->columns);
}
}

Expand Down Expand Up @@ -611,7 +637,9 @@ notice "[limit] $_->[0] = $_->[1]\n" foreach @limits;
my @tables;
my %sample_tables; # real table name -> sample table name
my $sth = $dbh->table_info(undef, undef, undef, 'TABLE');
while (my $row = lower_keys($sth->fetchrow_hashref)) {
my $table_info = $sth->fetchall_arrayref({});
foreach my $row (@{$table_info}) {
$row = lower_keys($row);
next unless uc $row->{table_type} eq 'TABLE'; # skip SYSTEM TABLE values
next if $row->{table_schem} eq 'information_schema'; # special pg schema
next if $opt{schema} && $row->{table_schem} ne $opt{schema};
Expand All @@ -622,7 +650,16 @@ while (my $row = lower_keys($sth->fetchrow_hashref)) {
my $tname = $row->{pg_table} || unquote_identifier($row->{TABLE_NAME})
or die "no pg_table or TABLE_NAME value?!";

my $table = Table->new($sname, $tname);
my $columns = [map { $_->{column_name} } $dbh->selectall_array(qq{
SELECT column_name
FROM information_schema.columns
WHERE table_name=?
AND table_schema=?
AND is_generated='NEVER'
ORDER BY ordinal_position
}, { Slice => {} }, ($tname, $sname) ) ];

my $table = Table->new($sname, $tname, $columns);
push @tables, $table;

my $sample_table = sample_table($table);
Expand Down Expand Up @@ -684,15 +721,20 @@ while (my $row = lower_keys($sth->fetchrow_hashref)) {
notice "No candidate key found for '$table'; ignoring --ordered";
}
}
# Only extract non-generated columns, ORDER BY necessary because later
# joins rely on column order to be identical between tables.
my $quoted_cols = $table->columns_quoted;
notice "copying cols [$quoted_cols] ";
# Quote the column names just in case.

$dbh->do(qq{
CREATE $unlogged TABLE $sample_table AS
SELECT *
FROM ONLY $table
SELECT $quoted_cols
FROM ONLY $table
$tablesample
WHERE $where
$order
$limit
WHERE $where
$order
$limit
});

if ($opt{verbose}) {
Expand All @@ -718,6 +760,7 @@ foreach my $table (@tables) {
my $fk_table = Table->new(
unquote_identifier($row->{fk_table_schem}),
unquote_identifier($row->{fk_table_name}),
$table->columns,
);
my $fk_name = "$fk_table.$row->{fk_name}"; # unique key to group FK rows
my $fk_col = $row->{fk_column_name};
Expand Down Expand Up @@ -764,9 +807,10 @@ while ($num_rows) {

# Insert into the sample table all the rows needed to
# satisfy the fk table, except those already present.
my $quoted_cols = $target_table->columns_quoted("t1");
my $query = qq{
INSERT INTO $target_sample_table
SELECT DISTINCT t1.*
SELECT DISTINCT $quoted_cols
FROM $target_table t1
JOIN $sample_fk_table f1 ON ($join1)
LEFT JOIN $target_sample_table s1 ON ($join2)
Expand Down
68 changes: 60 additions & 8 deletions t/pg_sample.t
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use warnings;
use Carp;
use DBI;
use Getopt::Long qw/ GetOptions :config no_ignore_case /;
use Test::More tests => 15;
use Test::More tests => 27;

$| = 1;

Expand Down Expand Up @@ -273,13 +273,52 @@ $dbh->do(qq{CREATE INDEX "my_index" ON "test_ordered" (name);});
$dbh->do(qq{INSERT INTO "test_ordered" VALUES (1, 'b'), (2, 'a');});
$dbh->do(qq{CLUSTER "test_ordered" USING "my_index"; -- with this, default SELECT will return 2,1;});

### Generated Columns
# We have 3 columns here to catch problems with failing to order by our ordinal position.
$dbh->do(qq{
CREATE TABLE some_numbers(
number_id int PRIMARY KEY GENERATED ALWAYS AS IDENTITY
, base_val int
, double_val int GENERATED ALWAYS AS (base_val*2) STORED
, other_val text
);
});

$dbh->do(qq{
INSERT INTO some_numbers(base_val, other_val) (
SELECT
generate_series AS base_val,
RPAD('a', generate_series, 'x') AS other_val
FROM generate_series(1, 10)
);
});

$dbh->do(qq{
CREATE TABLE some_number_ref(
ref_id int PRIMARY KEY GENERATED ALWAYS AS IDENTITY
, number_id int REFERENCES some_numbers(number_id)
, note text
);
});

$dbh->do(qq{
INSERT INTO some_number_ref(number_id, note) (
SELECT number_id, RPAD('b',base_val,'y') FROM some_numbers
)
});

### End Generated Columns

# Perform code coverage analysis? Requires Devel::Cover module.
if ($opt{cover}) {
$ENV{PERL5OPT} .= ' -MDevel::Cover=+select,pg_sample,+ignore,.*';
}

my @opts = ('--limit=100');
push @opts, '--verbose' if $opt{verbose};
my @base_opts = ();
push @base_opts, '--db_pass='.$opt{db_pass} if $opt{db_pass};
push @base_opts, '--verbose' if $opt{verbose};
my @opts = (@base_opts, '--limit=100');

my $cmd = "pg_sample @opts $opt{db_name} > sample.sql";
system($cmd) == 0 or die "pg_sample failed: $?";

Expand All @@ -306,16 +345,29 @@ my $row = $dbh->selectrow_hashref(qq{
is($row->{name}, "\\.", "escaping");

# without --ordered, test_ordered returns as per clustered order
my($ord) = $dbh->selectrow_array(qq{ SELECT STRING_AGG(id::text, ',') FROM "test_ordered" GROUP BY TRUE });
my($ord) = $dbh->selectrow_array(qq{ SELECT STRING_AGG(id::text, ',') FROM "test_ordered" });
is($ord, '2,1', "ordered test case broken, this should return by clustered order");

($cnt) = $dbh->selectrow_array("SELECT count(*) FROM $long_schema.$long_name");
is($cnt, 10, "long table name should have 10 rows");

# Check the generated table loaded properly
my @generated_rows = $dbh->selectall_array(
qq{ SELECT base_val, double_val FROM some_numbers; },
{ Slice => {} }
);
is(scalar @generated_rows, 10, "some_numbers table should have 10 rows");
foreach my $row (@generated_rows) {
is($row->{double_val}, $row->{base_val}*2, "The double_val column is not 2x the base_val");
}
# Check generated table fk referential integrity
my @reference_rows = $dbh->selectall_array(
qq{ SELECT * FROM some_number_ref; },
{ Slice => {} }
);
is(scalar @reference_rows, 10, "The some_number_ref table should have 10 rows");


@opts = ('--ordered');
push @opts, '--verbose' if $opt{verbose};
@opts = (@base_opts, '--ordered');
$cmd = "pg_sample @opts $opt{db_name} > sample_ordered.sql";
system($cmd) == 0 or die "pg_sample failed: $?";

Expand All @@ -328,7 +380,7 @@ $dbh = connect_db();
$cmd = "psql -q -X -v ON_ERROR_STOP=1 $opt{db_name} < sample_ordered.sql";
system($cmd) == 0 or die "pg_sample failed: $?";

$ord = $dbh->selectrow_array(qq{ SELECT STRING_AGG(id::text, ',') FROM "test_ordered" GROUP BY TRUE });
$ord = $dbh->selectrow_array(qq{ SELECT STRING_AGG(id::text, ',') FROM "test_ordered" });
is($ord, '1,2', "results should be ordered");

$dbh->disconnect;
Expand Down