Skip to content

Commit

Permalink
Use explicit values for MaxResults in all Glue APIs
Browse files Browse the repository at this point in the history
Most listing Glue APIs accept a MaxResults parameter which decides how
many objects are returned in a single API call. The default values are
not documented but observed behaviour is that the default values change
on some basis.

This commit adds explicit values for MaxResults in the APIs which
support it to the maximum possible values.

This possibly reduces the number of Glue calls when listing tables,
databases or functions in some cases.

This is similar to 4f22b0e and
45dc37d.
  • Loading branch information
hashhar committed Dec 28, 2023
1 parent ee7046b commit f124589
Showing 1 changed file with 9 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ public class GlueHiveMetastore
private static final int BATCH_CREATE_PARTITION_MAX_PAGE_SIZE = 100;
private static final int BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE = 100;
private static final int AWS_GLUE_GET_PARTITIONS_MAX_RESULTS = 1000;
private static final int AWS_GLUE_GET_DATABASES_MAX_RESULTS = 100;
private static final int AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS = 100;
private static final int AWS_GLUE_GET_TABLES_MAX_RESULTS = 100;
private static final Comparator<Iterable<String>> PARTITION_VALUE_COMPARATOR = lexicographical(String.CASE_INSENSITIVE_ORDER);
private static final Predicate<com.amazonaws.services.glue.model.Table> SOME_KIND_OF_VIEW_FILTER = table -> VIRTUAL_VIEW.name().equals(getTableTypeNullable(table));
private static final RetryPolicy<?> CONCURRENT_MODIFICATION_EXCEPTION_RETRY_POLICY = RetryPolicy.builder()
Expand Down Expand Up @@ -253,7 +256,8 @@ public List<String> getAllDatabases()
try {
List<String> databaseNames = getPaginatedResults(
glueClient::getDatabases,
new GetDatabasesRequest(),
new GetDatabasesRequest()
.withMaxResults(AWS_GLUE_GET_DATABASES_MAX_RESULTS),
GetDatabasesRequest::setNextToken,
GetDatabasesResult::getNextToken,
stats.getGetDatabases())
Expand Down Expand Up @@ -1291,7 +1295,8 @@ private Collection<LanguageFunction> getFunctionsByPattern(String databaseName,
glueClient::getUserDefinedFunctions,
new GetUserDefinedFunctionsRequest()
.withDatabaseName(databaseName)
.withPattern(functionNamePattern),
.withPattern(functionNamePattern)
.withMaxResults(AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS),
GetUserDefinedFunctionsRequest::setNextToken,
GetUserDefinedFunctionsResult::getNextToken,
stats.getGetUserDefinedFunctions())
Expand Down Expand Up @@ -1370,7 +1375,8 @@ private Stream<com.amazonaws.services.glue.model.Table> getGlueTables(String dat
return getPaginatedResults(
glueClient::getTables,
new GetTablesRequest()
.withDatabaseName(databaseName),
.withDatabaseName(databaseName)
.withMaxResults(AWS_GLUE_GET_TABLES_MAX_RESULTS),
GetTablesRequest::setNextToken,
GetTablesResult::getNextToken,
stats.getGetTables())
Expand Down

0 comments on commit f124589

Please sign in to comment.