-
Notifications
You must be signed in to change notification settings - Fork 496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Torch-MLIR LTC Backend Lowering Codegen #621
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,296 @@ | ||
import argparse | ||
import hashlib | ||
import os | ||
import subprocess | ||
import sys | ||
from dataclasses import dataclass | ||
from pathlib import Path | ||
from shutil import which | ||
from textwrap import dedent | ||
|
||
import yaml | ||
|
||
TORCH_MLIR_DIR = Path(__file__).parent.parent.resolve() | ||
TORCH_DIR = TORCH_MLIR_DIR.parent.joinpath("pytorch") | ||
|
||
sys.path.append(str(TORCH_DIR.joinpath("tools"))) | ||
|
||
# PyTorch's LTC backend autogen script | ||
import codegen.dest.lazy_ir | ||
import codegen.gen_lazy_tensor | ||
from codegen.api.lazy import LazyIrSchema | ||
from codegen.gen import get_grouped_native_functions, parse_native_yaml | ||
from codegen.model import NativeFunctionsGroup | ||
from codegen.gen_backend_stubs import parse_backend_yaml | ||
from codegen.api.types import kernel_signature | ||
from codegen.dest.lazy_ir import ComputeShapeSignature | ||
from codegen.gen_lazy_tensor import parse_full_codegen_ops | ||
|
||
|
||
def generate_native_functions(aten_ops_file: Path, out_file: Path): | ||
print("Generating Native Functions Yaml") | ||
|
||
native_yaml_path = TORCH_DIR.joinpath( | ||
"aten", "src", "ATen", "native", "native_functions.yaml" | ||
) | ||
|
||
parsed_yaml = parse_native_yaml(native_yaml_path) | ||
native_functions = parsed_yaml.native_functions | ||
grouped_native_functions = get_grouped_native_functions(native_functions) | ||
|
||
def get_native_function_name(f): | ||
func = f.func if hasattr(f, "func") else f.functional.func | ||
return str(func.name) | ||
|
||
aten_funcs = set(map(get_native_function_name, grouped_native_functions)) | ||
|
||
# List of unsupported ops in LTC autogen because of some error | ||
blacklist = { | ||
"arange", # Error: Code below assumes there is at least one tensor arg | ||
"bernoulli", # Error: TODO add support for type BaseType(name=<BaseTy.Generator: 1>) | ||
"bernoulli_", # Error: TODO add support for type BaseType(name=<BaseTy.Generator: 1>) | ||
"cat", # Error: TODO not sure if there are other valid types to handle here | ||
"clone", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"contiguous", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"empty_like", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"empty.memory_format", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"index.Tensor", # Error: TODO not sure if there are other valid types to handle here | ||
"index_put", # Error: TODO not sure if there are other valid types to handle here | ||
"index_put_", # Error: TODO not sure if there are other valid types to handle here | ||
"ones", # Error: Code below assumes there is at least one tensor arg | ||
"ones_like", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"resize_", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"stack", # Error: TODO not sure if there are other valid types to handle here | ||
"to.dtype", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"to.other", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"uniform_", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
"zeros", # Error: Code below assumes there is at least one tensor arg | ||
"zeros_like", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>) | ||
} | ||
|
||
# Additional ops which autogen is supported for but don't compile yet | ||
blacklist |= {"item", "size", "where"} | ||
|
||
# List of supported ops that we don't want to do the full codegen for | ||
# primarily view ops | ||
supported = { | ||
"expand", | ||
# "native_batch_norm_backward", | ||
"native_batch_norm", | ||
"permute", | ||
"repeat", | ||
"squeeze", | ||
"t", | ||
"unsqueeze", | ||
"view", | ||
} | ||
|
||
if which("rg") is not None: # use ripgrep if available as its much faster | ||
cmd = ["rg", "-o", "-N", r"aten::[0-9a-zA-Z_\.]+"] | ||
else: | ||
cmd = ["grep", "-o", r"aten::[0-9a-zA-Z_\.]\+"] | ||
|
||
output = ( | ||
subprocess.check_output( | ||
cmd + [str(aten_ops_file)], | ||
encoding="utf-8", | ||
) | ||
.strip() | ||
.split(os.linesep) | ||
) | ||
|
||
# process ops list | ||
ops = [] | ||
supported_ops = [] | ||
skipped = [] | ||
|
||
for op in output: | ||
op = op[6:] | ||
opname = op.split(".")[0] | ||
|
||
if opname in blacklist or op in blacklist: | ||
continue | ||
|
||
if opname in supported: | ||
supported_ops.append(op) | ||
continue | ||
|
||
if op not in aten_funcs: | ||
skipped.append(op) | ||
continue | ||
|
||
ops.append(op) | ||
|
||
opnames = sorted(set(ops)) | ||
|
||
with out_file.open("w") as f: | ||
yaml.dump( | ||
{ | ||
"backend": "Lazy", | ||
"cpp_namespace": "torch_lazy_tensors", | ||
"full_codegen": opnames, | ||
"supported": sorted(supported_ops), | ||
}, | ||
f, | ||
default_flow_style=False, | ||
) | ||
f.write( | ||
dedent( | ||
""" | ||
|
||
# Skipped ops (supported by Torch-MLIR but no equivalent native function) | ||
""" | ||
) | ||
+ os.linesep.join(f"# - {op}" for op in sorted(skipped)) | ||
) | ||
|
||
return parsed_yaml, grouped_native_functions | ||
|
||
|
||
@dataclass(frozen=True) | ||
class MlirLazyIr(codegen.gen_lazy_tensor.dest.LazyIR): | ||
lowering_function_type: str = "torch::lazy::MlirFunction" | ||
lowering_context_type: str = "torch::lazy::MlirLoweringContext*" | ||
lowering_return_type: str = "torch::lazy::MlirOpVector" | ||
|
||
def lowering_body(self, f): | ||
func = ( | ||
f.functional.func if isinstance(f, NativeFunctionsGroup) else f.func | ||
) | ||
schema = LazyIrSchema(func) | ||
|
||
return f""" | ||
UNIMPLEMENTED_ERROR( | ||
"'{func}' lowering not yet implemented" | ||
); | ||
""".rstrip() | ||
|
||
|
||
def generate_backend( | ||
source_yaml: Path, backend_path: Path, parsed_yaml: dict, grouped_native_functions: list | ||
): | ||
print("Running Lazy Tensor Autogen") | ||
|
||
# No fallback code allowed | ||
def gen_fallback_code(*args, **kwargs): | ||
return "" | ||
|
||
codegen.dest.lazy_ir.gen_fallback_code = gen_fallback_code | ||
|
||
codegen.gen_lazy_tensor.run( | ||
source_yaml=str(source_yaml), | ||
output_dir=str(backend_path), | ||
dry_run=False, | ||
impl_path=str(backend_path.joinpath("aten_ltc_mlir_type.cpp")), | ||
gen_ts_lowerings=False, | ||
node_base="torch::lazy::MlirNode", | ||
node_base_hdr=str(backend_path.joinpath("mlir_node.h")), | ||
tensor_class="torch::lazy::LazyTensor", | ||
tensor_class_hdr="torch/csrc/lazy/core/tensor.h", | ||
shape_inference_hdr=str(backend_path.joinpath("LazyShapeInference.h")), | ||
lazy_ir_cls=MlirLazyIr, | ||
) | ||
|
||
# Remove lazy_tensor_core imports | ||
subprocess.check_call( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let me know if you have a proposal for how to make this cleaner. I'm working on getting lazy tensor/TorchScript codegen running in the main pytorch build and can consider making some API changes to help here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general, removing all |
||
[ | ||
"sed", | ||
"-i", | ||
"/lazy_tensor_core/d", | ||
str(backend_path.joinpath("LazyNativeFunctions.cpp")), | ||
] | ||
) | ||
|
||
# Autogenerate shape inference placeholders | ||
import re | ||
|
||
sig_re = re.compile(f"std::vector<Shape> (?P<name>[_a-zA-Z0-9]+)\((?P<signature>.+)\);") | ||
upstream_shape_inference_decls = set( | ||
(name, signature) | ||
for name, signature in sig_re.findall( | ||
TORCH_DIR.joinpath("torch", "csrc", "lazy", "core", "shape_inference.h").read_text() | ||
) | ||
) | ||
shape_inference_decls = backend_path.joinpath("LazyShapeInference.h").read_text() | ||
|
||
shape_inference_defs = [] | ||
for name, signature in sig_re.findall(shape_inference_decls): | ||
if (name, signature) in upstream_shape_inference_decls: | ||
continue | ||
|
||
shape_inference_defs.append( | ||
dedent( | ||
f""" | ||
std::vector<Shape> {name}({signature}) {{ | ||
UNIMPLEMENTED_ERROR("{name}"); | ||
}} | ||
""" | ||
) | ||
) | ||
|
||
backend_path.joinpath("LazyShapeInference.cpp").write_text( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note that I've just landed pytorch/pytorch#72756 so you may need to update this part. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. e.g. you probably shouldn't have to do anything at all for shape inference, other than contribute new functions to it if found missing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @wconstab are lazy shape inference declarations not being autogenerated at all anymore? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. correct: the new way is the header and impl is handwritten, but the generator spits out a message saying 'missing this function please add it' and gives you a signature you can copy/paste into the header. This way backends can share the inference functions and there isn't an issue that each backend generates a different header that compiles against a shared implementation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. Thanks! |
||
dedent( | ||
""" | ||
// This file contains autogenerated Lazy Shape Inference placeholders | ||
// for ops that dont have a corresponding structured kernel | ||
#include "LazyShapeInference.h" | ||
#include "../utils/exception.h" | ||
|
||
|
||
namespace torch {{ | ||
namespace lazy {{ | ||
|
||
{} | ||
|
||
}} // namespace lazy | ||
}} // namespace torch | ||
""" | ||
).format("".join(shape_inference_defs)) | ||
) | ||
|
||
|
||
def main(args): | ||
script_path = Path(__file__).resolve() | ||
aten_ops_file = TORCH_MLIR_DIR.joinpath( | ||
"include", "torch-mlir", "Dialect", "Torch", "IR", "GeneratedAtenOps.td" | ||
) | ||
assert aten_ops_file.exists() | ||
native_functions = TORCH_MLIR_DIR.joinpath( | ||
"generated_native_functions.yaml" | ||
) | ||
|
||
prev_hash = None | ||
hash_file = TORCH_MLIR_DIR.joinpath("generated_backend.hash") | ||
if hash_file.exists(): | ||
prev_hash = hash_file.read_text().strip() | ||
|
||
m = hashlib.sha256() | ||
m.update(script_path.read_bytes()) | ||
m.update(aten_ops_file.read_bytes()) | ||
if native_functions.exists(): | ||
m.update(native_functions.read_bytes()) | ||
|
||
new_hash = m.hexdigest().strip() | ||
|
||
if args.force or new_hash != prev_hash: | ||
hash_file.write_text(new_hash) | ||
parsed_yaml, grouped_native_functions = generate_native_functions( | ||
aten_ops_file, native_functions | ||
) | ||
|
||
backend_path = TORCH_MLIR_DIR.joinpath( | ||
"python", "torch_mlir", "csrc", "backend" | ||
) | ||
generate_backend( | ||
native_functions, backend_path, parsed_yaml, grouped_native_functions | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"-f", | ||
"--force", | ||
action="store_true", | ||
) | ||
main(parser.parse_args()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i'm curious, how do you plan to code-generate the lowerings to MLIR? i guess you have the aten-mlir dialect and can piggy-back off that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The design for that is still up in the air. However, I was thinking that I could do something similar to what you did here
https://github.com/pytorch/pytorch/blob/d9896b8b4f198fff43ff8003553c13a26381c7ce/tools/codegen/dest/lazy_ts_lowering.py#L37
And add a generic
LowerMlirBuiltin
function that handles lowering to MLIR. There is a Torch-MLIR ATen dialect that will be used here, yes.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yea im less concerned with how you call or generate some function, more curious where the source of truth for the lowering is maintained (And if you had some way of actually generating the lowering as opposed to hand-implementing the lowering)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would definitely prefer if we went through the JIR IR path rather than directly creating Lazy -> MLIR. That would let us more easily share op decompositions and other infra built at the JIT IR level.
Ideally upstream would allow us to share all the ts_backend code and just need to implement the runtime bits and TS -> MLIR bits, but not LTC Node -> MLIR.
cc @Chillee
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @eellison
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think(?) this makes sense? Like, Torch-MLIR has spent the most time investing in a Torchscript => MLIR lowering, and LazyTensor has spent a lot of effort in a LTC => Torchscript lowering. Is there any reason these couldn't be (relatively) trivially combined together, instead of adding a whole new lowering path for MLIR?
I don't know the other design restrictions here, but that makes sense to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea, I think aside from one issue i'm aware of, which is that we haven't landed the entire TS backend yet and it's tough for the MLIR team to take a c++ build dep on our code until we do land it all and they can pick it up in nightlies, this should be the best plan and was our intent. I'm trying to push for landing it sooner rather than later, but its at least a month out still.