-
Notifications
You must be signed in to change notification settings - Fork 310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OL facets - PR2 - read facets from views based on lineage_events table #2355
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
b4b1903
OL facets - PR2 - read facets from views pointing to lineage_events t…
pawel-big-lebowski 45ecf15
add dataset_version_uuid to dataset_versions
pawel-big-lebowski 88b8c7d
OL facets - PR3 - migrate data to facet tables (#2359)
pawel-big-lebowski b835721
Merge branch 'main' into ol-facets/PR2-read-data-from-views
wslulciuc File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* Copyright 2018-2022 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.cli; | ||
|
||
import io.dropwizard.Application; | ||
import io.dropwizard.cli.EnvironmentCommand; | ||
import io.dropwizard.db.DataSourceFactory; | ||
import io.dropwizard.db.ManagedDataSource; | ||
import io.dropwizard.jdbi3.JdbiFactory; | ||
import io.dropwizard.setup.Environment; | ||
import javax.sql.DataSource; | ||
import lombok.extern.slf4j.Slf4j; | ||
import marquez.db.migrations.V57_1__BackfillFacets; | ||
import net.sourceforge.argparse4j.inf.Namespace; | ||
import net.sourceforge.argparse4j.inf.Subparser; | ||
import org.jdbi.v3.core.Jdbi; | ||
import org.jdbi.v3.jackson2.Jackson2Plugin; | ||
import org.jdbi.v3.postgres.PostgresPlugin; | ||
import org.jdbi.v3.sqlobject.SqlObjectPlugin; | ||
|
||
/** | ||
* A command to manually run database migrations when needed. This migration requires a heavy DB | ||
* operation which can be done asynchronously (with limited API downtime) due to separate migration | ||
* command. | ||
*/ | ||
@Slf4j | ||
public class DbMigrationCommand<MarquezConfig> extends EnvironmentCommand<marquez.MarquezConfig> { | ||
|
||
private static final String DB_MIGRATE = "db-migrate"; | ||
private static final String MIGRATION_V57_DESCRIPTION = | ||
""" | ||
A command to manually run V57 database migration. | ||
Please refer to https://github.com/MarquezProject/marquez/blob/main/api/src/main/resources/marquez/db/migration/V57__readme.md for more details. | ||
"""; | ||
|
||
private static final String COMMAND_DESCRIPTION = | ||
""" | ||
A command to manually run database migrations. | ||
Extra parameters are required to specify the migration to run. | ||
"""; | ||
|
||
/** | ||
* Creates a new environment command. | ||
* | ||
* @param application the application providing this command | ||
*/ | ||
public DbMigrationCommand(Application<marquez.MarquezConfig> application) { | ||
super(application, DB_MIGRATE, COMMAND_DESCRIPTION); | ||
} | ||
|
||
@Override | ||
public void configure(Subparser subparser) { | ||
subparser | ||
.addArgument("--chunkSize") | ||
.dest("chunkSize") | ||
.type(Integer.class) | ||
.required(false) | ||
.setDefault(V57_1__BackfillFacets.DEFAULT_CHUNK_SIZE) | ||
.help("amount of lineage_events rows processed in a single SQL query and transaction."); | ||
|
||
subparser | ||
.addArgument("--version") | ||
.dest("version") | ||
.type(String.class) | ||
.required(true) | ||
.help("migration version to apply like 'v57'"); | ||
|
||
addFileArgument(subparser); | ||
} | ||
|
||
@Override | ||
protected void run( | ||
Environment environment, Namespace namespace, marquez.MarquezConfig configuration) | ||
throws Exception { | ||
|
||
final DataSourceFactory sourceFactory = configuration.getDataSourceFactory(); | ||
final DataSource source = sourceFactory.build(environment.metrics(), "MarquezApp-source"); | ||
final JdbiFactory factory = new JdbiFactory(); | ||
|
||
Jdbi jdbi = | ||
factory | ||
.build( | ||
environment, | ||
configuration.getDataSourceFactory(), | ||
(ManagedDataSource) source, | ||
"postgresql-command") | ||
.installPlugin(new SqlObjectPlugin()) | ||
.installPlugin(new PostgresPlugin()) | ||
.installPlugin(new Jackson2Plugin()); | ||
|
||
MarquezMigrations.valueOf(namespace.getString("version")).run(jdbi, namespace); | ||
} | ||
|
||
enum MarquezMigrations { | ||
v57 { | ||
public void run(Jdbi jdbi, Namespace namespace) throws Exception { | ||
log.info("Running V57_1__BackfillFacets migration"); | ||
V57_1__BackfillFacets migration = new V57_1__BackfillFacets(); | ||
migration.setManual(true); | ||
migration.setJdbi(jdbi); | ||
migration.setChunkSize(namespace.getInt("chunkSize")); | ||
migration.migrate(null); | ||
} | ||
}; | ||
|
||
public void run(Jdbi jdbi, Namespace namespace) throws Exception { | ||
throw new UnsupportedOperationException(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The only purpose of this CTE is to determine which runs have input or output facets for the current dataset version. Given that the
dataset_facets_view
now hasdataset_version_uuid
, I think we can drop this whole subquery and join directly ondf.dataset_version_uuid=
d.current_version_uuid` belowThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PR #2407