From a49b574e61b12b9f01ec30f6fb9117b176e7dc2e Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Sat, 22 Jun 2024 17:33:54 -0400 Subject: [PATCH 01/10] feat: parse BUILD.bazel to find qualified commit --- library_generation/model/config_change.py | 32 +++++++++++++++++++++-- library_generation/model/gapic_inputs.py | 23 +++++++++++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/library_generation/model/config_change.py b/library_generation/model/config_change.py index bfb83a87e6..b826ac5942 100644 --- a/library_generation/model/config_change.py +++ b/library_generation/model/config_change.py @@ -16,6 +16,8 @@ from enum import Enum from typing import Optional from git import Commit, Repo + +from library_generation.model.gapic_inputs import parse_build_str from library_generation.model.generation_config import GenerationConfig from library_generation.model.library_config import LibraryConfig from library_generation.utils.utilities import sh_util @@ -144,10 +146,14 @@ def __create_qualified_commit( """ libraries = set() for file in commit.stats.files.keys(): - if file.endswith("BUILD.bazel"): - continue versioned_proto_path = find_versioned_proto_path(file) if versioned_proto_path in proto_paths: + if file.endswith("BUILD.bazel") and ( + not ConfigChange.__qualified_build_change( + commit=commit, build_file_path=file + ) + ): + continue # Even though a commit usually only changes one # library, we don't want to miss generating a # library because the commit may change multiple @@ -156,3 +162,25 @@ def __create_qualified_commit( if len(libraries) == 0: return None return QualifiedCommit(commit=commit, libraries=libraries) + + @staticmethod + def __qualified_build_change(commit: Commit, build_file_path: str) -> bool: + """ + Checks if the given commit containing a BUILD.bazel change is a + qualified commit. + + :param commit: a GitHub commit object. + :param build_file_path: the path of the BUILD.bazel + :return: True if the commit is a qualified commit; False otherwise. + """ + versioned_proto_path = find_versioned_proto_path(build_file_path) + build = str((commit.tree / build_file_path).data_stream.read()) + parent_commit = commit.parents[0] + parent_build = str((parent_commit.tree / build_file_path).data_stream.read()) + inputs = parse_build_str(build, versioned_proto_path) + parent_inputs = parse_build_str(parent_build, versioned_proto_path) + # If the GapicInputs objects parsed from BUILD.bazel (on the given + # commit and its parent) are different, there are changes in fields + # that used in library generation, then the given commit is a + # qualified commit. + return inputs != parent_inputs diff --git a/library_generation/model/gapic_inputs.py b/library_generation/model/gapic_inputs.py index 992e4c2e3c..5c07777965 100644 --- a/library_generation/model/gapic_inputs.py +++ b/library_generation/model/gapic_inputs.py @@ -72,6 +72,20 @@ def __init__( self.service_yaml = service_yaml self.include_samples = include_samples + def __eq__(self, other): + if not isinstance(other, GapicInputs): + return False + return ( + self.proto_only == other.proto_only + and self.additional_protos == other.additional_protos + and self.transport == other.transport + and self.rest_numeric_enum == other.rest_numeric_enum + and self.gapic_yaml == other.gapic_yaml + and self.service_config == other.service_config + and self.service_yaml == other.service_yaml + and self.include_samples == other.include_samples + ) + def parse( build_path: Path, versioned_path: str, build_file_name: str = "BUILD.bazel" @@ -86,16 +100,19 @@ def parse( """ with open(f"{build_path}/{build_file_name}") as build: content = build.read() + return parse_build_str(build_str=content, versioned_path=versioned_path) + +def parse_build_str(build_str: str, versioned_path: str) -> GapicInputs: proto_library_target = re.compile( proto_library_pattern, re.DOTALL | re.VERBOSE - ).findall(content) + ).findall(build_str) additional_protos = "" if len(proto_library_target) > 0: additional_protos = __parse_additional_protos(proto_library_target[0]) - gapic_target = re.compile(gapic_pattern, re.DOTALL | re.VERBOSE).findall(content) + gapic_target = re.compile(gapic_pattern, re.DOTALL | re.VERBOSE).findall(build_str) assembly_target = re.compile(assembly_pattern, re.DOTALL | re.VERBOSE).findall( - content + build_str ) include_samples = "false" if len(assembly_target) > 0: From 378ae558d8546290a68f7dc3c956171c60ca221b Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Sat, 22 Jun 2024 17:34:04 -0400 Subject: [PATCH 02/10] add unit test --- .../test/model/config_change_unit_tests.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/library_generation/test/model/config_change_unit_tests.py b/library_generation/test/model/config_change_unit_tests.py index 6207c2100a..c715aca473 100644 --- a/library_generation/test/model/config_change_unit_tests.py +++ b/library_generation/test/model/config_change_unit_tests.py @@ -208,6 +208,28 @@ def test_get_qualified_commits_success(self): qualified_commits[2].commit.hexsha, ) + def test_get_qualified_commits_build_only_commit_returns_a_commit(self): + baseline_commit = "45694d2bad602c52170096072d325aa644d550e5" + latest_commit = "758f0d1217d9c7fe398aa5efb1057ce4b6409e55" + config_change = ConfigChange( + change_to_libraries={}, + baseline_config=ConfigChangeTest.__get_a_gen_config( + googleapis_commitish=baseline_commit + ), + current_config=ConfigChangeTest.__get_a_gen_config( + googleapis_commitish=latest_commit, + libraries=[ + ConfigChangeTest.__get_a_library_config( + library_name="container", + gapic_configs=[GapicConfig(proto_path="google/container/v1")], + ) + ], + ), + ) + # one commit between latest_commit and baseline_commit which only + # changed BUILD.bazel. + self.assertTrue(len(config_change.get_qualified_commits()) == 1) + def test_get_qualified_commits_build_only_commit_returns_empty_list(self): baseline_commit = "bdda0174f68a738518ec311e05e6fd9bbe19cd78" latest_commit = "c9a5050ef225b0011603e1109cf53ab1de0a8e53" From 22208fa007a5e0021911d93d2f2ca4b1199a2570 Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Sat, 22 Jun 2024 17:37:58 -0400 Subject: [PATCH 03/10] add comments --- library_generation/test/model/config_change_unit_tests.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/library_generation/test/model/config_change_unit_tests.py b/library_generation/test/model/config_change_unit_tests.py index c715aca473..57dce7d254 100644 --- a/library_generation/test/model/config_change_unit_tests.py +++ b/library_generation/test/model/config_change_unit_tests.py @@ -228,6 +228,7 @@ def test_get_qualified_commits_build_only_commit_returns_a_commit(self): ) # one commit between latest_commit and baseline_commit which only # changed BUILD.bazel. + # this commit changed `rest_numeric_enums`. self.assertTrue(len(config_change.get_qualified_commits()) == 1) def test_get_qualified_commits_build_only_commit_returns_empty_list(self): @@ -250,6 +251,7 @@ def test_get_qualified_commits_build_only_commit_returns_empty_list(self): ) # one commit between latest_commit and baseline_commit which only # changed BUILD.bazel. + # this commit didn't change fields used in library generation. self.assertTrue(len(config_change.get_qualified_commits()) == 0) @staticmethod From c2907bcc1bc32495da9fd61439f87182ca8091dc Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Mon, 24 Jun 2024 15:07:30 -0400 Subject: [PATCH 04/10] early return for a new client commit --- library_generation/model/config_change.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/library_generation/model/config_change.py b/library_generation/model/config_change.py index b826ac5942..5ad9650821 100644 --- a/library_generation/model/config_change.py +++ b/library_generation/model/config_change.py @@ -79,6 +79,7 @@ def get_changed_libraries(self) -> Optional[list[str]]: Returns a unique, sorted list of library name of changed libraries. None if there is a repository level change, which means all libraries in the current_config will be generated. + :return: library names of change libraries. """ if ChangeType.REPO_LEVEL_CHANGE in self.change_to_libraries: @@ -176,7 +177,16 @@ def __qualified_build_change(commit: Commit, build_file_path: str) -> bool: versioned_proto_path = find_versioned_proto_path(build_file_path) build = str((commit.tree / build_file_path).data_stream.read()) parent_commit = commit.parents[0] - parent_build = str((parent_commit.tree / build_file_path).data_stream.read()) + # If BUILD.bazel doesn't exist in the parent commit, i.e., it is added + # in the current commit, `parent_commit.tree / build_file_path` will + # raise KeyError and the function should return early because the + # library is already generated through a new client generation request. + try: + parent_build = str( + (parent_commit.tree / build_file_path).data_stream.read() + ) + except KeyError: + return False inputs = parse_build_str(build, versioned_proto_path) parent_inputs = parse_build_str(parent_build, versioned_proto_path) # If the GapicInputs objects parsed from BUILD.bazel (on the given From 23e66d5303f6db2d0da7f8c702d8ed95770818d5 Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Mon, 24 Jun 2024 15:25:47 -0400 Subject: [PATCH 05/10] keep the new client commit --- library_generation/model/config_change.py | 5 ++-- .../test/model/config_change_unit_tests.py | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/library_generation/model/config_change.py b/library_generation/model/config_change.py index 5ad9650821..263845c85e 100644 --- a/library_generation/model/config_change.py +++ b/library_generation/model/config_change.py @@ -179,14 +179,13 @@ def __qualified_build_change(commit: Commit, build_file_path: str) -> bool: parent_commit = commit.parents[0] # If BUILD.bazel doesn't exist in the parent commit, i.e., it is added # in the current commit, `parent_commit.tree / build_file_path` will - # raise KeyError and the function should return early because the - # library is already generated through a new client generation request. + # raise KeyError and the function should return early. try: parent_build = str( (parent_commit.tree / build_file_path).data_stream.read() ) except KeyError: - return False + return True inputs = parse_build_str(build, versioned_proto_path) parent_inputs = parse_build_str(parent_build, versioned_proto_path) # If the GapicInputs objects parsed from BUILD.bazel (on the given diff --git a/library_generation/test/model/config_change_unit_tests.py b/library_generation/test/model/config_change_unit_tests.py index 57dce7d254..dae67d4349 100644 --- a/library_generation/test/model/config_change_unit_tests.py +++ b/library_generation/test/model/config_change_unit_tests.py @@ -254,6 +254,32 @@ def test_get_qualified_commits_build_only_commit_returns_empty_list(self): # this commit didn't change fields used in library generation. self.assertTrue(len(config_change.get_qualified_commits()) == 0) + def test_get_qualified_commits_new_client_commit_returns_a_commit(self): + baseline_commit = "d007ca1b3cc820651530d44d5388533047ae1414" + latest_commit = "05d889e7dfe087fc2ddc9de9579f01d4e1c2f35e" + config_change = ConfigChange( + change_to_libraries={}, + baseline_config=ConfigChangeTest.__get_a_gen_config( + googleapis_commitish=baseline_commit + ), + current_config=ConfigChangeTest.__get_a_gen_config( + googleapis_commitish=latest_commit, + libraries=[ + ConfigChangeTest.__get_a_library_config( + library_name="cloudcontrolspartner", + gapic_configs=[ + GapicConfig( + proto_path="google/cloud/cloudcontrolspartner/v1" + ) + ], + ) + ], + ), + ) + # one commit between latest_commit and baseline_commit which added + # google/cloud/cloudcontrolspartner/v1. + self.assertTrue(len(config_change.get_qualified_commits()) == 1) + @staticmethod def __get_a_gen_config( googleapis_commitish="", libraries: list[LibraryConfig] = None From 2a5cf365a65e14460774548f8377a543f2eaa64c Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Mon, 24 Jun 2024 15:39:14 -0400 Subject: [PATCH 06/10] only consider commit that only changed BUILD.bazel --- library_generation/model/config_change.py | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/library_generation/model/config_change.py b/library_generation/model/config_change.py index 263845c85e..8ebe722bb6 100644 --- a/library_generation/model/config_change.py +++ b/library_generation/model/config_change.py @@ -146,12 +146,19 @@ def __create_qualified_commit( :return: qualified commits. """ libraries = set() + file_change_num = len(commit.stats.files.keys()) for file in commit.stats.files.keys(): versioned_proto_path = find_versioned_proto_path(file) if versioned_proto_path in proto_paths: - if file.endswith("BUILD.bazel") and ( - not ConfigChange.__qualified_build_change( - commit=commit, build_file_path=file + # determine a commit that only contains `BUILD.bazel` + # separately. + if ( + file.endswith("BUILD.bazel") + and file_change_num == 1 + and ( + not ConfigChange.__qualified_build_change( + commit=commit, build_file_path=file + ) ) ): continue @@ -179,13 +186,10 @@ def __qualified_build_change(commit: Commit, build_file_path: str) -> bool: parent_commit = commit.parents[0] # If BUILD.bazel doesn't exist in the parent commit, i.e., it is added # in the current commit, `parent_commit.tree / build_file_path` will - # raise KeyError and the function should return early. - try: - parent_build = str( - (parent_commit.tree / build_file_path).data_stream.read() - ) - except KeyError: - return True + # raise KeyError. + # However, there's unlikely that a BUILD.bazel is the only file that + # added in a commit. + parent_build = str((parent_commit.tree / build_file_path).data_stream.read()) inputs = parse_build_str(build, versioned_proto_path) parent_inputs = parse_build_str(parent_build, versioned_proto_path) # If the GapicInputs objects parsed from BUILD.bazel (on the given From c7ab2672cc683411ef33b7e0033fb6f6cd73c23c Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Mon, 24 Jun 2024 17:09:39 -0400 Subject: [PATCH 07/10] change comments --- library_generation/model/config_change.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/library_generation/model/config_change.py b/library_generation/model/config_change.py index 8ebe722bb6..fa82dd203a 100644 --- a/library_generation/model/config_change.py +++ b/library_generation/model/config_change.py @@ -150,13 +150,14 @@ def __create_qualified_commit( for file in commit.stats.files.keys(): versioned_proto_path = find_versioned_proto_path(file) if versioned_proto_path in proto_paths: - # determine a commit that only contains `BUILD.bazel` - # separately. + # Skip a commit if the only changed file is `BUILD.bazel` + # and the change doesn't contain fields used in library + # generation, e.g., transport, rest_numeric_enum. if ( file.endswith("BUILD.bazel") and file_change_num == 1 and ( - not ConfigChange.__qualified_build_change( + not ConfigChange.__is_qualified_build_change( commit=commit, build_file_path=file ) ) @@ -172,11 +173,16 @@ def __create_qualified_commit( return QualifiedCommit(commit=commit, libraries=libraries) @staticmethod - def __qualified_build_change(commit: Commit, build_file_path: str) -> bool: + def __is_qualified_build_change(commit: Commit, build_file_path: str) -> bool: """ Checks if the given commit containing a BUILD.bazel change is a qualified commit. + The commit is a qualified commit if the + :class:`library_generation.model.gapic_inputs.GapicInputs` objects + parsed from the commit and its parent are different, since there are + changes in fields that used in library generation. + :param commit: a GitHub commit object. :param build_file_path: the path of the BUILD.bazel :return: True if the commit is a qualified commit; False otherwise. @@ -192,8 +198,4 @@ def __qualified_build_change(commit: Commit, build_file_path: str) -> bool: parent_build = str((parent_commit.tree / build_file_path).data_stream.read()) inputs = parse_build_str(build, versioned_proto_path) parent_inputs = parse_build_str(parent_build, versioned_proto_path) - # If the GapicInputs objects parsed from BUILD.bazel (on the given - # commit and its parent) are different, there are changes in fields - # that used in library generation, then the given commit is a - # qualified commit. return inputs != parent_inputs From 06176fa3f5f72339cbb7c9ba49a1d7c165baf817 Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Mon, 24 Jun 2024 20:37:54 -0400 Subject: [PATCH 08/10] qualify a commit that adding BUILD.bazel --- library_generation/model/config_change.py | 24 ++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/library_generation/model/config_change.py b/library_generation/model/config_change.py index fa82dd203a..1b65b004e2 100644 --- a/library_generation/model/config_change.py +++ b/library_generation/model/config_change.py @@ -23,6 +23,9 @@ from library_generation.utils.utilities import sh_util from library_generation.utils.proto_path_utils import find_versioned_proto_path +INSERTIONS = "insertions" +LINES = "lines" + class ChangeType(Enum): GOOGLEAPIS_COMMIT = 1 @@ -146,16 +149,16 @@ def __create_qualified_commit( :return: qualified commits. """ libraries = set() - file_change_num = len(commit.stats.files.keys()) - for file in commit.stats.files.keys(): + for file, changes in commit.stats.files.items(): versioned_proto_path = find_versioned_proto_path(file) if versioned_proto_path in proto_paths: - # Skip a commit if the only changed file is `BUILD.bazel` - # and the change doesn't contain fields used in library - # generation, e.g., transport, rest_numeric_enum. if ( file.endswith("BUILD.bazel") - and file_change_num == 1 + # Qualify a commit if the commit only added BUILD.bazel + # because it's very unlikely that a commit added BUILD.bazel + # without adding proto files. Therefore, the commit is + # qualified duo to the proto change eventually. + and (not ConfigChange.__is_added(changes)) and ( not ConfigChange.__is_qualified_build_change( commit=commit, build_file_path=file @@ -172,6 +175,10 @@ def __create_qualified_commit( return None return QualifiedCommit(commit=commit, libraries=libraries) + @staticmethod + def __is_added(changes: dict[str, int]) -> bool: + return changes[INSERTIONS] == changes[LINES] + @staticmethod def __is_qualified_build_change(commit: Commit, build_file_path: str) -> bool: """ @@ -190,11 +197,6 @@ def __is_qualified_build_change(commit: Commit, build_file_path: str) -> bool: versioned_proto_path = find_versioned_proto_path(build_file_path) build = str((commit.tree / build_file_path).data_stream.read()) parent_commit = commit.parents[0] - # If BUILD.bazel doesn't exist in the parent commit, i.e., it is added - # in the current commit, `parent_commit.tree / build_file_path` will - # raise KeyError. - # However, there's unlikely that a BUILD.bazel is the only file that - # added in a commit. parent_build = str((parent_commit.tree / build_file_path).data_stream.read()) inputs = parse_build_str(build, versioned_proto_path) parent_inputs = parse_build_str(parent_build, versioned_proto_path) From 00e96e42ad65a7515e10214b5af7a47768a4de8c Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Tue, 25 Jun 2024 17:47:49 -0400 Subject: [PATCH 09/10] change test name --- .../test/model/config_change_unit_tests.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/library_generation/test/model/config_change_unit_tests.py b/library_generation/test/model/config_change_unit_tests.py index dae67d4349..147753ec99 100644 --- a/library_generation/test/model/config_change_unit_tests.py +++ b/library_generation/test/model/config_change_unit_tests.py @@ -208,7 +208,9 @@ def test_get_qualified_commits_success(self): qualified_commits[2].commit.hexsha, ) - def test_get_qualified_commits_build_only_commit_returns_a_commit(self): + def test_get_qualified_commits_rest_numeric_enum_change_returns_a_qualified_commit( + self, + ): baseline_commit = "45694d2bad602c52170096072d325aa644d550e5" latest_commit = "758f0d1217d9c7fe398aa5efb1057ce4b6409e55" config_change = ConfigChange( @@ -231,7 +233,9 @@ def test_get_qualified_commits_build_only_commit_returns_a_commit(self): # this commit changed `rest_numeric_enums`. self.assertTrue(len(config_change.get_qualified_commits()) == 1) - def test_get_qualified_commits_build_only_commit_returns_empty_list(self): + def test_get_qualified_commits_irrelevant_build_field_change_returns_empty_list( + self, + ): baseline_commit = "bdda0174f68a738518ec311e05e6fd9bbe19cd78" latest_commit = "c9a5050ef225b0011603e1109cf53ab1de0a8e53" config_change = ConfigChange( @@ -254,7 +258,7 @@ def test_get_qualified_commits_build_only_commit_returns_empty_list(self): # this commit didn't change fields used in library generation. self.assertTrue(len(config_change.get_qualified_commits()) == 0) - def test_get_qualified_commits_new_client_commit_returns_a_commit(self): + def test_get_qualified_commits_add_build_file_returns_a_qualified_commit(self): baseline_commit = "d007ca1b3cc820651530d44d5388533047ae1414" latest_commit = "05d889e7dfe087fc2ddc9de9579f01d4e1c2f35e" config_change = ConfigChange( From fd7318f3c79ae82efff74fb9d3739edd225eb860 Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Wed, 26 Jun 2024 08:16:53 -0400 Subject: [PATCH 10/10] refactor --- library_generation/model/config_change.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/library_generation/model/config_change.py b/library_generation/model/config_change.py index 1b65b004e2..8a5e813244 100644 --- a/library_generation/model/config_change.py +++ b/library_generation/model/config_change.py @@ -149,11 +149,11 @@ def __create_qualified_commit( :return: qualified commits. """ libraries = set() - for file, changes in commit.stats.files.items(): - versioned_proto_path = find_versioned_proto_path(file) + for file_path, changes in commit.stats.files.items(): + versioned_proto_path = find_versioned_proto_path(file_path) if versioned_proto_path in proto_paths: if ( - file.endswith("BUILD.bazel") + file_path.endswith("BUILD.bazel") # Qualify a commit if the commit only added BUILD.bazel # because it's very unlikely that a commit added BUILD.bazel # without adding proto files. Therefore, the commit is @@ -161,7 +161,7 @@ def __create_qualified_commit( and (not ConfigChange.__is_added(changes)) and ( not ConfigChange.__is_qualified_build_change( - commit=commit, build_file_path=file + commit=commit, build_file_path=file_path ) ) ):