From f124589b17bd2fc4abf3ddde499f3ead82c1398f Mon Sep 17 00:00:00 2001 From: Ashhar Hasan Date: Thu, 21 Dec 2023 14:15:45 +0530 Subject: [PATCH] Use explicit values for MaxResults in all Glue APIs Most listing Glue APIs accept a MaxResults parameter which decides how many objects are returned in a single API call. The default values are not documented but observed behaviour is that the default values change on some basis. This commit adds explicit values for MaxResults in the APIs which support it to the maximum possible values. This possibly reduces the number of Glue calls when listing tables, databases or functions in some cases. This is similar to 4f22b0ebb52c71e07370b41f785fa88dde289c22 and 45dc37d4b68b6e7e72d2b50810fe96ebab08c0c3. --- .../hive/metastore/glue/GlueHiveMetastore.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index e7ba1fab181c7..e25d21898776e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -185,6 +185,9 @@ public class GlueHiveMetastore private static final int BATCH_CREATE_PARTITION_MAX_PAGE_SIZE = 100; private static final int BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE = 100; private static final int AWS_GLUE_GET_PARTITIONS_MAX_RESULTS = 1000; + private static final int AWS_GLUE_GET_DATABASES_MAX_RESULTS = 100; + private static final int AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS = 100; + private static final int AWS_GLUE_GET_TABLES_MAX_RESULTS = 100; private static final Comparator> PARTITION_VALUE_COMPARATOR = lexicographical(String.CASE_INSENSITIVE_ORDER); private static final Predicate SOME_KIND_OF_VIEW_FILTER = table -> VIRTUAL_VIEW.name().equals(getTableTypeNullable(table)); private static final RetryPolicy CONCURRENT_MODIFICATION_EXCEPTION_RETRY_POLICY = RetryPolicy.builder() @@ -253,7 +256,8 @@ public List getAllDatabases() try { List databaseNames = getPaginatedResults( glueClient::getDatabases, - new GetDatabasesRequest(), + new GetDatabasesRequest() + .withMaxResults(AWS_GLUE_GET_DATABASES_MAX_RESULTS), GetDatabasesRequest::setNextToken, GetDatabasesResult::getNextToken, stats.getGetDatabases()) @@ -1291,7 +1295,8 @@ private Collection getFunctionsByPattern(String databaseName, glueClient::getUserDefinedFunctions, new GetUserDefinedFunctionsRequest() .withDatabaseName(databaseName) - .withPattern(functionNamePattern), + .withPattern(functionNamePattern) + .withMaxResults(AWS_GLUE_GET_FUNCTIONS_MAX_RESULTS), GetUserDefinedFunctionsRequest::setNextToken, GetUserDefinedFunctionsResult::getNextToken, stats.getGetUserDefinedFunctions()) @@ -1370,7 +1375,8 @@ private Stream getGlueTables(String dat return getPaginatedResults( glueClient::getTables, new GetTablesRequest() - .withDatabaseName(databaseName), + .withDatabaseName(databaseName) + .withMaxResults(AWS_GLUE_GET_TABLES_MAX_RESULTS), GetTablesRequest::setNextToken, GetTablesResult::getNextToken, stats.getGetTables())