Skip to content

Commit

Permalink
Feature / Documentation and code generator updates (#445)
Browse files Browse the repository at this point in the history
* Simple API for exposing batch job status in the runtime

* Update runtime proto version

* Update GCP dependencies to work with later proto version

* Fix output path for pb2 generated files in build_runtime.py

* Add support for python gRPC to the code generator

* Allow specifying runtime API server params in the CLI

* Allow specifying model class as a string in dev mode for run model jobs

* Update codegen scripts to handle gRPC gen for Python

* Add gRPC dependencies to the runtime (optional in distribution)

* Add a placeholder package for the generated gRPC code

* Stub implementation of the runtime API server, with start / stop control

* Do not depend on config in the api package (this dependency could be added but doesn't exist at present)

* Fix for proto build ordering

* Add gRPC status lib as a runtime dependency

* Codec module to convert to/from gRPC messages

* Add a job list API call to the runtime API

* Auto pre-start the runtime if warm up not called before start()

* First basic test talking to the runtime API

* Allow static modules in _impl.grpc for dist builds (generated code does not overwrite static modules)

* Include gRPC tools in runtime dev dependencies

* Fix logic for model selectors in dev mode translation

* Do some housekeeping in dev_mode.py

* Update web API builder to handle nested packages and exclude internal APIs

* Fix order forcing in API proto files

* Remove redundant proto imports

* Enable package grouping in the TRAC protoc generator plugin

* Fix protoc-ctrl for Windows

* Fix handling of non-public APIs in protoc-ctrl.py

* Use the same public API filters for JavaScript and Python

* Fix generated enums and enum doc comments

* Add basic doc comments to PartType (prevent strange behaviour in Spinx)

* Make the gRPC sever use the new Enum behavior and the associated test expect that

* Remove capability to start the runtime server and do not include it in the runtime CLI

* Fill in missing top-level doc comments in the metadata model

* Fill in missing top-level doc comments in the TRAC platform APIs

* Fill in missing top-level doc comments in the runtime launch module

* Fix markup in proto file doc comments
  • Loading branch information
Martin Traverse authored Sep 1, 2024
1 parent 2680a3c commit f3b849d
Show file tree
Hide file tree
Showing 29 changed files with 892 additions and 214 deletions.
13 changes: 7 additions & 6 deletions dev/codegen/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ class TracGenerator:
'{ENUM_VALUES}')

ENUM_VALUE_TEMPLATE = (
'{INDENT}{ENUM_VALUE_NAME} = {ENUM_VALUE_NUMBER}, {QUOTED_COMMENT}\n\n')
'{INDENT}{ENUM_VALUE_NAME} = {ENUM_VALUE_NUMBER}\n\n'
'{DOC_COMMENT}')

DATA_CLASS_TEMPLATE = (
'{INDENT}@_dc.dataclass\n'
Expand Down Expand Up @@ -193,11 +194,11 @@ class TracGenerator:
'{INDENT}"""\n\n')

ENUM_COMMENT_SINGLE_LINE = \
'"""{COMMENT}"""'
'{INDENT}"""{COMMENT}"""\n\n'

ENUM_COMMENT_MULTI_LINE = \
'"""{COMMENT}\n' \
'{INDENT}"""'
'{INDENT}"""{COMMENT}\n' \
'{INDENT}"""\n\n'

def __init__(self, options: tp.Dict[str, tp.Any] = None):

Expand Down Expand Up @@ -667,9 +668,9 @@ def generate_enum_value(self, ctx: LocationContext, descriptor: pb_desc.EnumValu
# Populate the template
return self.ENUM_VALUE_TEMPLATE \
.replace("{INDENT}", self.INDENT_TEMPLATE * ctx.indent) \
.replace("{QUOTED_COMMENT}", formatted_comment) \
.replace("{ENUM_VALUE_NAME}", descriptor.name) \
.replace("{ENUM_VALUE_NUMBER}", str(descriptor.number))
.replace("{ENUM_VALUE_NUMBER}", str(descriptor.number)) \
.replace("{DOC_COMMENT}", formatted_comment)

# Python type hints

Expand Down
192 changes: 158 additions & 34 deletions dev/codegen/protoc-ctrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import fileinput
import os
import pathlib
import platform
import re
import shutil
import subprocess as sp
import argparse
import logging
import sys
import tempfile

import protoc
Expand All @@ -42,8 +46,26 @@
_log = logging.getLogger(SCRIPT_NAME)


PUBLIC_API_EXCLUSIONS = [
re.compile(r".*[/\\]internal$"),
re.compile(r".*_trusted\.proto$")]


def is_public_api(path: pathlib.Path):

return not any(map(lambda excl: excl.match(str(path)), PUBLIC_API_EXCLUSIONS))

def is_in_packages(path: pathlib.Path, packages):

unix_like_path = str(path).replace(os.sep, "/")

return any(map(lambda pkg: pkg in unix_like_path, packages))


def _copytree(src, dst):

_log.info(f"Copying {src} -> {dst}")

# In shutil.copytree, dir_exists_ok is only available from Python 3.8, but we need Python 3.7
# Codegen is part of the core build tools so needs to match supported Python versions of the TRAC runtime

Expand All @@ -57,64 +79,140 @@ def _copytree(src, dst):
rel_item = src_item.relative_to(src_dir)
dst_item = dst_dir.joinpath(rel_item)

if src_item.name == "__pycache__":
continue

if src_item.is_dir():
_copytree(src_item, dst_item)
else:
if not dst_item.exists() or src_item.stat().st_mtime > dst_item.stat().st_mtime:
shutil.copy2(src_item, dst_item)


class ProtoApiExtensions:
class ProtoCtrlContext:

# Provide some key extension protos from Google to handle web api annotations
# The googleapis package would have the venv root as its namespace, so we need to copy to a temp dir

def __init__(self):
self._temp_dir = tempfile.TemporaryDirectory()
self.temp_dir_name = ""
self.proto_path = pathlib.Path()
self.support_path = pathlib.Path()

def __enter__(self):

self.temp_dir_name = self._temp_dir.__enter__()
self._temp_dir.__enter__()

temp_dir = pathlib.Path(self._temp_dir.name)
proto_dir = temp_dir.joinpath("proto")
proto_dir.mkdir()
support_dir = temp_dir.joinpath("support")
support_dir.mkdir()

self.proto_path = proto_dir
self.support_path = support_dir

return self

def __exit__(self, exc_type, exc_val, exc_tb):
self._temp_dir.__exit__(exc_type, exc_val, exc_tb)

def copy_common_protos(self):

# Core protos used by the protoc compiler itself
protoc_inc_src = pathlib.Path(protoc.PROTOC_INCLUDE_DIR)
protoc_inc_dst = pathlib.Path(self.temp_dir_name)
protoc_inc_dst = pathlib.Path(self.support_path)

_log.info(f"Copying {protoc_inc_src} -> {protoc_inc_dst}")
_copytree(protoc_inc_src, protoc_inc_dst)

def copy_google_api_protos(self):

# Google API protos for annotating web services
gapi_src = pathlib.Path(gapi_http_module.__file__).parent
gapi_dst = pathlib.Path(self.temp_dir_name).joinpath("google/api")
gapi_dst = pathlib.Path(self.support_path).joinpath("google/api")

_log.info(f"Copying {gapi_src} -> {gapi_dst}")
_copytree(gapi_src, gapi_dst)

return self.temp_dir_name

def __exit__(self, exc_type, exc_val, exc_tb):
self._temp_dir.__exit__(exc_type, exc_val, exc_tb)
def relocate_proto_package(proto_path: pathlib.Path, relocate):

if isinstance(proto_path, str):
proto_path = pathlib.Path(proto_path)

source, target = relocate.split(":")

# Move source -> temp -> target
# Avoid conflicts if target is a sub-package of source

source_pkg = proto_path.joinpath(source)
temp_pkg = proto_path.joinpath("__temp")
target_pkg = proto_path.joinpath(target)

_log.info(f"Moving {source_pkg} -> {target_pkg}")

shutil.move(source_pkg, temp_pkg)

if not target_pkg.parent.exists():
target_pkg.parent.mkdir(parents=True)

shutil.move(temp_pkg, target_pkg)

def find_proto_files(proto_paths, ignore_trusted=False):
_log.info(f"Relocating imports for {source} -> {target}")

match = re.compile(rf"import \"{source}/", re.MULTILINE)
replace = f"import \"{target}/"

_relocate_proto_imports(target_pkg, match, replace)


def _relocate_proto_imports(proto_path: pathlib.Path, match: re.Pattern, replace: str):

for dir_entry in proto_path.iterdir():

if dir_entry.name.endswith(".proto"):
for line in fileinput.input(dir_entry, inplace=True):
print(re.sub(match, replace, line), end="")

elif dir_entry.is_dir():
_relocate_proto_imports(dir_entry, match, replace)


def find_proto_files(proto_paths, packages, no_internal=False):

proto_path_list = proto_paths if isinstance(proto_paths, list) else [proto_paths]

for proto_path in proto_path_list:

for entry in proto_path.iterdir():
for entry in find_proto_files_in_dir(proto_path, proto_path, packages, no_internal):
yield entry


def find_proto_files_in_dir(proto_path, root_proto_path, packages, no_internal):

# Do not include trusted (private) parts of the API when generating for API docs
if ignore_trusted and "_trusted.proto" in entry.name:
continue
package_paths = list(map(lambda p: p.replace(".", "/"), packages)) if packages else None

path_str = str(proto_path)

if "=" in path_str:
proto_path_ = pathlib.Path(path_str[path_str.index("=") + 1:])
else:
proto_path_ = pathlib.Path(path_str)

if entry.is_file() and entry.name.endswith(".proto"):
yield proto_path.joinpath(entry.name)
for entry in proto_path_.iterdir():

elif entry.is_dir():
for sub_entry in find_proto_files(proto_path.joinpath(entry.name), ignore_trusted):
yield sub_entry
# Do not include internal parts of the API when generating for API docs
if no_internal and not is_public_api(entry):
_log.info(f"Excluding non-public API: [{entry.relative_to(root_proto_path)}]")
continue

if entry.is_dir():
sub_path = proto_path_.joinpath(entry.name)
for sub_entry in find_proto_files_in_dir(sub_path, root_proto_path, packages, no_internal):
yield sub_entry

elif entry.is_file() and entry.name.endswith(".proto"):
if packages is None or is_in_packages(entry, package_paths):
yield proto_path_.joinpath(entry.name)


def platform_args(base_args, proto_files):
Expand Down Expand Up @@ -142,13 +240,20 @@ def build_protoc_args(generator, proto_paths, output_location, packages):

proto_path_args = list(map(lambda pp: f"--proto_path={pp}", proto_paths))

packages_option = "packages=" + ",".join(map(str, packages)) if packages else ""
packages_option = "packages=" + ",".join(packages) if packages else ""

if generator == "python_proto":

proto_args = [
f"--plugin=python",
f"--python_out={output_location}"
f"--python_out={output_location}",
f"--pyi_out={output_location}"
]

elif generator == "python_grpc":

proto_args = [
f"--grpc_python_out={output_location}"
]

else:
Expand Down Expand Up @@ -187,7 +292,7 @@ def cli_args():

parser.add_argument(
"generator", type=str, metavar="generator",
choices=["python_proto", "python_runtime", "python_doc", "api_doc"],
choices=["python_proto", "python_grpc", "python_runtime", "python_doc", "api_doc"],
help="The documentation targets to build")

parser.add_argument(
Expand All @@ -199,36 +304,55 @@ def cli_args():
help="Location where output files will be generated, relative to the repository root")

parser.add_argument(
"--package", type=pathlib.Path, action="append", dest="packages",
"--package", type=str, action="append", dest="packages",
help="Filter packages to include in generated output (TRAC generator only, default = generate all packages)")

parser.add_argument(
"--relocate", type=str, required=False, dest="relocate",
help="Relocate packages in the generated code (source:dest e.g. tracdap:tracdap.rt._grpc)")

parser.add_argument(
"--no-internal", default=False, action="store_true", dest="no_internal",
help="Ignore internal messages and APIs (for producing public-facing APIs and documentation)")

return parser.parse_args()


def main():

script_args = cli_args()
proto_paths = list(map(lambda pp: ROOT_DIR.joinpath(pp), script_args.proto_paths))
output_dir = ROOT_DIR.joinpath(script_args.out)
packages = script_args.packages

# Provide some key extension protos from Google to handle web api annotations
with ProtoApiExtensions() as proto_ext_path:
with ProtoCtrlContext() as context:

# Include all available proto paths when generating proto args, so they're available to protoc if referenced
all_proto_paths = proto_paths + [proto_ext_path]
protoc_args = build_protoc_args(script_args.generator, all_proto_paths, output_dir, packages)
context.copy_common_protos()
context.copy_google_api_protos()

if script_args.generator == "api_doc":
ignore_trusted_api = True
if script_args.relocate:
for proto_path in script_args.proto_paths:
_copytree(proto_path, context.proto_path)
relocate_proto_package(context.proto_path, script_args.relocate)
proto_paths = [context.proto_path]
else:
ignore_trusted_api = False
proto_paths = [ROOT_DIR.joinpath(pp) for pp in script_args.proto_paths]

# Only look for files to generate that were explicitly specified
protoc_files = list(find_proto_files(proto_paths, ignore_trusted_api))
protoc_files = list(find_proto_files(proto_paths, script_args.packages, script_args.no_internal))

# Now add supporting proto paths (needed during generation)
proto_paths.append(context.support_path)

protoc_args = build_protoc_args(script_args.generator, proto_paths, output_dir, packages)
protoc_argv = platform_args(protoc_args, protoc_files)

if script_args.generator == "python_grpc":
protoc_executable = sys.executable
protoc_argv = [sys.executable, "-m", "grpc_tools.protoc"] + protoc_argv[1:]
else:
protoc_executable = protoc.PROTOC_EXE

newline = "\n"
_log.info(f"Running protoc: {newline.join(map(str, protoc_argv))}")

Expand All @@ -237,7 +361,7 @@ def main():

# Always run protoc from the codegen folder
# This makes finding the TRAC protoc plugin much easier
result = sp.run(executable=protoc.PROTOC_EXE, args=protoc_argv, cwd=SCRIPT_DIR, stdout=sp.PIPE)
result = sp.run(executable=protoc_executable, args=protoc_argv, cwd=SCRIPT_DIR, stdout=sp.PIPE)

# We are not piping stdout/stderr
# Logs and errors will show up as protoc is running
Expand Down
7 changes: 3 additions & 4 deletions dev/codegen/protoc-gen-trac.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@

class TracPlugin:

# TODO: Pass this in as an option
TARGET_PACKAGE = "tracdap.rt"

def __init__(self, pb_request: pb_plugin.CodeGeneratorRequest):

logging_format = f"%(levelname)s %(name)s: %(message)s"
Expand Down Expand Up @@ -71,7 +68,9 @@ def generate(self):
# Build a static type map in a separate first pass
type_map = generator.build_type_map(self._request.proto_file)

sorted_files = input_files # sorted(input_files, key=lambda f: f.package)
# Sort files by package, so all the files in a package are processed at once
# Required for package aggregation, when a whole package must be output as a single file
sorted_files = sorted(input_files, key=lambda f: f.package)
packages = it.groupby(sorted_files, lambda f: f.package)

for package, files in packages:
Expand Down
3 changes: 2 additions & 1 deletion dev/docgen/docgen-ctrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def main_codegen(self):
"--proto_path", "tracdap-api/tracdap-services/src/main/proto",
"--proto_path", "tracdap-api/tracdap-metadata/src/main/proto",
"--out", "build/doc/code/platform_api",
"--package", "tracdap"]
"--package", "tracdap",
"--no-internal"]

self._run_subprocess(codegen_exe, codegen_args)

Expand Down
3 changes: 2 additions & 1 deletion dev/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Requirements for all the Python build scripts

# Protoc is required for the codegen scripts which work off the API proto files
protobuf ~= 4.25.3
protobuf ~= 5.27.4
protoc-wheel-0 ~= 25.0
grpcio-tools ~= 1.66.1
googleapis-common-protos ~= 1.63

# Doc generation
Expand Down
Loading

0 comments on commit f3b849d

Please sign in to comment.