Skip to content

Commit

Permalink
[source-mysql] No empty sample (#48545)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaohansong authored Nov 18, 2024
1 parent 3d99fde commit 4b39143
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-mysql/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ data:
connectorSubtype: database
connectorType: source
definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad
dockerImageTag: 3.9.0-rc.14
dockerImageTag: 3.9.0-rc.15
dockerRepository: airbyte/source-mysql
documentationUrl: https://docs.airbyte.com/integrations/sources/mysql
githubIssueLabel: source-mysql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,12 @@ class MysqlSourceOperations :
// of table on very large tables ( > 100s million of rows)
val greatestRate: String = 0.00005.toString()
// Quick approximation to "select count(*) from table" which doesn't require
// full table scan.
// full table scan. However, note this could give delayed summary info about a table
// and thus a new table could be treated as empty despite we recently added rows.
// To prevent that from happening and resulted for skipping the table altogether,
// the minimum count is set to 10.
val quickCount =
"SELECT table_rows FROM information_schema.tables WHERE table_schema = '$namespace' AND table_name = '$name'"
"SELECT GREATEST(10, table_rows) FROM information_schema.tables WHERE table_schema = '$namespace' AND table_name = '$name'"
val greatest = "GREATEST($greatestRate, $sampleSize / ($quickCount))"
// Rand returns a value between 0 and 1
val where = "WHERE RAND() < $greatest "
Expand Down

0 comments on commit 4b39143

Please sign in to comment.