Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Torch-MLIR LTC Backend Lowering Codegen #621

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,14 @@ __pycache__
*.egg-info
*.whl
/wheelhouse


# Autogenerated files
/generated_native_functions.yaml
/generated_backend.hash
/python/torch_mlir/csrc/backend/LazyLazyIr.h
/python/torch_mlir/csrc/backend/LazyNativeFunctions.cpp
/python/torch_mlir/csrc/backend/LazyNativeFunctions.h
/python/torch_mlir/csrc/backend/LazyShapeInference.h
/python/torch_mlir/csrc/backend/LazyShapeInference.cpp
/python/torch_mlir/csrc/backend/RegisterLazy.cpp
290 changes: 290 additions & 0 deletions build_tools/autogen_ltc_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
import argparse
import hashlib
import os
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from shutil import which
from textwrap import dedent

import yaml

TORCH_MLIR_DIR = Path(__file__).parent.parent.resolve()
TORCH_DIR = TORCH_MLIR_DIR.parent.joinpath("pytorch")

sys.path.append(str(TORCH_DIR.joinpath("tools")))

# PyTorch's LTC backend autogen script
import codegen.dest.lazy_ir
import codegen.gen_lazy_tensor
from codegen.api.lazy import LazyIrSchema
from codegen.gen import get_grouped_native_functions, parse_native_yaml
from codegen.model import NativeFunctionsGroup
from codegen.gen_backend_stubs import parse_backend_yaml
from codegen.api.types import kernel_signature
from codegen.dest.lazy_ir import ComputeShapeSignature
from codegen.gen_lazy_tensor import parse_full_codegen_ops


def generate_native_functions(aten_ops_file: Path, out_file: Path):
print("Generating Native Functions Yaml")

native_yaml_path = TORCH_DIR.joinpath(
"aten", "src", "ATen", "native", "native_functions.yaml"
)

parsed_yaml = parse_native_yaml(native_yaml_path)
native_functions = parsed_yaml.native_functions
grouped_native_functions = get_grouped_native_functions(native_functions)

def get_native_function_name(f):
func = f.func if hasattr(f, "func") else f.functional.func
return str(func.name)

aten_funcs = set(map(get_native_function_name, grouped_native_functions))

# List of unsupported ops in LTC autogen because of some error
blacklist = {
"arange", # Error: Code below assumes there is at least one tensor arg
"bernoulli", # Error: TODO add support for type BaseType(name=<BaseTy.Generator: 1>)
"bernoulli_", # Error: TODO add support for type BaseType(name=<BaseTy.Generator: 1>)
"cat", # Error: TODO not sure if there are other valid types to handle here
"clone", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"contiguous", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"empty_like", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"empty.memory_format", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"index.Tensor", # Error: TODO not sure if there are other valid types to handle here
"index_put", # Error: TODO not sure if there are other valid types to handle here
"index_put_", # Error: TODO not sure if there are other valid types to handle here
"ones", # Error: Code below assumes there is at least one tensor arg
"ones_like", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"resize_", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"stack", # Error: TODO not sure if there are other valid types to handle here
"to.dtype", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"to.other", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"uniform_", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
"zeros", # Error: Code below assumes there is at least one tensor arg
"zeros_like", # Error: TODO add support for type BaseType(name=<BaseTy.MemoryFormat: 12>)
}

# Additional ops which autogen is supported for but don't compile yet
blacklist |= {"item", "size", "where"}

# List of supported ops that we don't want to do the full codegen for
# primarily view ops
supported = {
"expand",
# "native_batch_norm_backward",
"native_batch_norm",
"permute",
"repeat",
"squeeze",
"t",
"unsqueeze",
"view",
}

if which("rg") is not None: # use ripgrep if available as its much faster
cmd = ["rg", "-o", "-N", r"aten::[0-9a-zA-Z_\.]+"]
else:
cmd = ["grep", "-o", r"aten::[0-9a-zA-Z_\.]\+"]

output = (
subprocess.check_output(
cmd + [str(aten_ops_file)],
encoding="utf-8",
)
.strip()
.split(os.linesep)
)

# process ops list
ops = []
supported_ops = []
skipped = []

for op in output:
op = op[6:]
opname = op.split(".")[0]

if opname in blacklist or op in blacklist:
continue

if opname in supported:
supported_ops.append(op)
continue

if op not in aten_funcs:
skipped.append(op)
continue

ops.append(op)

opnames = sorted(set(ops))

with out_file.open("w") as f:
yaml.dump(
{
"backend": "Lazy",
"cpp_namespace": "torch_lazy_tensors",
"full_codegen": opnames,
"supported": sorted(supported_ops),
},
f,
default_flow_style=False,
)
f.write(
dedent(
"""

# Skipped ops (supported by Torch-MLIR but no equivalent native function)
"""
)
+ os.linesep.join(f"# - {op}" for op in sorted(skipped))
)

return parsed_yaml, grouped_native_functions


@dataclass(frozen=True)
class MlirLazyIr(codegen.gen_lazy_tensor.dest.LazyIR):
lowering_function_type: str = "torch::lazy::MlirFunction"
lowering_context_type: str = "torch::lazy::MlirLoweringContext*"
lowering_return_type: str = "torch::lazy::MlirOpVector"

def lowering_body(self, f):
func = (
f.functional.func if isinstance(f, NativeFunctionsGroup) else f.func
)
schema = LazyIrSchema(func)

return f"""
UNIMPLEMENTED_ERROR(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm curious, how do you plan to code-generate the lowerings to MLIR? i guess you have the aten-mlir dialect and can piggy-back off that?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The design for that is still up in the air. However, I was thinking that I could do something similar to what you did here
https://github.com/pytorch/pytorch/blob/d9896b8b4f198fff43ff8003553c13a26381c7ce/tools/codegen/dest/lazy_ts_lowering.py#L37
And add a generic LowerMlirBuiltin function that handles lowering to MLIR. There is a Torch-MLIR ATen dialect that will be used here, yes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea im less concerned with how you call or generate some function, more curious where the source of truth for the lowering is maintained (And if you had some way of actually generating the lowering as opposed to hand-implementing the lowering)

Copy link
Contributor

@silvasean silvasean Feb 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would definitely prefer if we went through the JIR IR path rather than directly creating Lazy -> MLIR. That would let us more easily share op decompositions and other infra built at the JIT IR level.

Ideally upstream would allow us to share all the ts_backend code and just need to implement the runtime bits and TS -> MLIR bits, but not LTC Node -> MLIR.

cc @Chillee

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would definitely prefer if we went through the JIR IR path rather than directly creating Lazy -> MLIR.

I think(?) this makes sense? Like, Torch-MLIR has spent the most time investing in a Torchscript => MLIR lowering, and LazyTensor has spent a lot of effort in a LTC => Torchscript lowering. Is there any reason these couldn't be (relatively) trivially combined together, instead of adding a whole new lowering path for MLIR?

I don't know the other design restrictions here, but that makes sense to me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I think aside from one issue i'm aware of, which is that we haven't landed the entire TS backend yet and it's tough for the MLIR team to take a c++ build dep on our code until we do land it all and they can pick it up in nightlies, this should be the best plan and was our intent. I'm trying to push for landing it sooner rather than later, but its at least a month out still.

"'{func}' lowering not yet implemented"
);
""".rstrip()


def generate_backend(
source_yaml: Path, backend_path: Path, parsed_yaml: dict, grouped_native_functions: list
):
print("Running Lazy Tensor Autogen")

# No fallback code allowed
def gen_fallback_code(*args, **kwargs):
return ""

codegen.dest.lazy_ir.gen_fallback_code = gen_fallback_code

codegen.gen_lazy_tensor.run(
source_yaml=str(source_yaml),
output_dir=str(backend_path),
dry_run=False,
impl_path=str(backend_path.joinpath("aten_ltc_mlir_type.cpp")),
gen_ts_lowerings=False,
node_base="torch::lazy::MlirNode",
node_base_hdr=str(backend_path.joinpath("mlir_node.h")),
tensor_class="torch::lazy::LazyTensor",
tensor_class_hdr="torch/csrc/lazy/core/tensor.h",
lazy_ir_cls=MlirLazyIr,
)

# Remove lazy_tensor_core imports
subprocess.check_call(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me know if you have a proposal for how to make this cleaner. I'm working on getting lazy tensor/TorchScript codegen running in the main pytorch build and can consider making some API changes to help here

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, removing all lazy_tensor_core includes will be helpful I think. I don't think there is a need for any other backend besides the TorchScript backend to include anything from that directory. The only includes that I think a generic vendor backend should need should be in torch/csrc/lazy

[
"sed",
"-i",
"/lazy_tensor_core/d",
str(backend_path.joinpath("LazyNativeFunctions.cpp")),
]
)

# Autogenerate shape inference placeholders
import re

sig_re = re.compile(f"std::vector<Shape> (?P<name>[_a-zA-Z0-9]+)\((?P<signature>.+)\);")
shape_inference_decls = backend_path.joinpath("LazyShapeInference.h").read_text()

shape_inference_defs = []
for name, signature in sig_re.findall(shape_inference_decls):
shape_inference_defs.append(
dedent(
f"""
std::vector<Shape> {name}({signature}) {{
UNIMPLEMENTED_ERROR("{name}");
}}
"""
)
)

backend_path.joinpath("LazyShapeInference.cpp").write_text(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note that I've just landed pytorch/pytorch#72756 so you may need to update this part.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g. you probably shouldn't have to do anything at all for shape inference, other than contribute new functions to it if found missing.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wconstab are lazy shape inference declarations not being autogenerated at all anymore?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correct: the new way is the header and impl is handwritten, but the generator spits out a message saying 'missing this function please add it' and gives you a signature you can copy/paste into the header. This way backends can share the inference functions and there isn't an issue that each backend generates a different header that compiles against a shared implementation.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. Thanks!

dedent(
"""
// This file contains autogenerated Lazy Shape Inference placeholders
// for ops that dont have a corresponding structured kernel
#include "LazyShapeInference.h"
#include "../utils/exception.h"


namespace torch_lazy_tensors {{
namespace ir {{
namespace ops {{

using Shape = torch::lazy::Shape;

{}

}} // namespace ops
}} // namespace ir
}} // namespace torch_lazy_tensors
"""
).format("".join(shape_inference_defs))
)


def main(args):
script_path = Path(__file__).resolve()
aten_ops_file = TORCH_MLIR_DIR.joinpath(
"include", "torch-mlir", "Dialect", "Torch", "IR", "GeneratedAtenOps.td"
)
assert aten_ops_file.exists()
native_functions = TORCH_MLIR_DIR.joinpath(
"generated_native_functions.yaml"
)

prev_hash = None
hash_file = TORCH_MLIR_DIR.joinpath("generated_backend.hash")
if hash_file.exists():
prev_hash = hash_file.read_text().strip()

m = hashlib.sha256()
m.update(script_path.read_bytes())
m.update(aten_ops_file.read_bytes())
if native_functions.exists():
m.update(native_functions.read_bytes())

new_hash = m.hexdigest().strip()

if args.force or new_hash != prev_hash:
hash_file.write_text(new_hash)
parsed_yaml, grouped_native_functions = generate_native_functions(
aten_ops_file, native_functions
)

backend_path = TORCH_MLIR_DIR.joinpath(
"python", "torch_mlir", "csrc", "backend"
)
generate_backend(
native_functions, backend_path, parsed_yaml, grouped_native_functions
)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-f",
"--force",
action="store_true",
)
main(parser.parse_args())
16 changes: 11 additions & 5 deletions python/torch_mlir/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,14 @@ link_directories("${TORCH_INSTALL_PREFIX}/lib")


add_library(torch_mlir_ltc_backend SHARED
backend/backend_impl.cc
backend/mlir_lowering_context.cc
backend/mlir_node.cc
backend/aten_eager_fallback.cpp
backend/aten_ltc_mlir_type.cpp
backend/backend_impl.cpp
backend/LazyNativeFunctions.cpp
backend/LazyShapeInference.cpp
backend/mlir_lowering_context.cpp
backend/mlir_node.cpp
backend/RegisterLazy.cpp
)

target_link_libraries(torch_mlir_ltc_backend
Expand All @@ -32,12 +37,13 @@ target_link_libraries(torch_mlir_ltc_backend
torch_python
)

message(STATUS "TORCH_CXXFLAGS=${TORCH_CXXFLAGS} -Wpedantic")
message(STATUS "TORCH_CXXFLAGS=${TORCH_CXXFLAGS} -Wno-pedantic")
set_target_properties(torch_mlir_ltc_backend PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${TORCH_MLIR_PYTHON_PACKAGES_DIR}/torch_mlir/"
OUTPUT_NAME _MLIR_LTC
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
CXX_VISIBILITY_PRESET "hidden"
COMPILE_FLAGS "${TORCH_CXXFLAGS} -Wpedantic"
COMPILE_FLAGS "${TORCH_CXXFLAGS} -Wno-pedantic"
)

38 changes: 38 additions & 0 deletions python/torch_mlir/csrc/backend/aten_eager_fallback.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===- aten_eager_fallback.cpp --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Also available under a BSD-style license. See LICENSE.
//
//===----------------------------------------------------------------------===//
// This file is adapted from pytorch/pytorch
// https://github.com/pytorch/pytorch/blob/lazy_tensor_staging/lazy_tensor_core/lazy_tensor_core/csrc/ts_backend/aten_eager_fallback.cpp
//===----------------------------------------------------------------------===//

#include <unordered_map>

#include <torch/csrc/lazy/backend/backend_interface.h>
#include <torch/csrc/lazy/core/metrics.h>

#include "aten_eager_fallback.h"
#include "../utils/exception.h"

namespace torch_lazy_tensors {

static std::unordered_map<std::string, ::torch::lazy::Counter*> _eager_fallback_counters;

bool force_eager_fallback(c10::Symbol op) {
return false; // Never force eager fallback
}

void ltc_eager_fallback(const c10::OperatorHandle& op,
torch::jit::Stack* stack) {
UNSUPPORTED_ERROR("ltc_eager_fallback is not supported");
}

TORCH_LIBRARY_IMPL(_, Lazy, m) {
m.fallback(torch::CppFunction::makeFromBoxedFunction<&ltc_eager_fallback>());
}

} // namespace torch_lazy_tensors
Loading