Skip to content

Commit

Permalink
[Doc] Quick Start
Browse files Browse the repository at this point in the history
This PR introduces a new quick start tutorial to the documentation.
  • Loading branch information
Hzfengsy committed Aug 22, 2024
1 parent 1ca9833 commit 39f3ccb
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 1 deletion.
1 change: 0 additions & 1 deletion docs/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
doxygen
modules
tutorials
6 changes: 6 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
from sphinx_gallery.sorting import ExplicitOrder

examples_dirs = [
# legacy tutorial structure under gallery folder
tvm_path.joinpath("gallery", "tutorial"),
tvm_path.joinpath("gallery", "how_to", "compile_models"),
tvm_path.joinpath("gallery", "how_to", "deploy_models"),
Expand All @@ -419,9 +420,12 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
tvm_path.joinpath("gallery", "how_to", "work_with_microtvm"),
tvm_path.joinpath("gallery", "how_to", "extend_tvm"),
tvm_path.joinpath("vta", "tutorials"),
# New tutorial structure under docs folder
tvm_path.joinpath("docs", "get_started", "tutorials"),
]

gallery_dirs = [
# legacy tutorial structure under gallery folder
"tutorial",
"how_to/compile_models",
"how_to/deploy_models",
Expand All @@ -433,6 +437,8 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
"how_to/work_with_microtvm",
"how_to/extend_tvm",
"topic/vta/tutorials",
# New tutorial structure under docs folder
"get_started/tutorials/",
]


Expand Down
2 changes: 2 additions & 0 deletions docs/get_started/tutorials/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Get Started
-----------
193 changes: 193 additions & 0 deletions docs/get_started/tutorials/quick_start.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""
.. _quick_start:
Quick Start
===========
This tutorial is for people who are new to Apache TVM. Taking an simple example
to show how to use Apache TVM to compile a simple neural network.
.. contents:: Table of Contents
:local:
:depth: 2
"""

################################################################################
# Overview
# --------
# Apache TVM is a machine learning compilation framework, following the principle of
# **Python-first development** and **universal deployment**. It takes in pre-trained
# machine learning models, compiles and generates deployable modules that can be embedded
# and run everywhere.
# Apache TVM also enables customizing optimization processes to introduce new optimizations,
# libraries, codegen and more.
#
# Apache TVM can help to:
#
# - **Optimize** performance of ML workloads, composing libraries and codegen.
# - **Deploy** ML workloads to a diverse set of new environments, including new runtime and new
# hardware.
# - **Continuously improve and customize** ML deployment pipeline in Python by quickly customizing
# library dispatching, bringing in customized operators and code generation.

################################################################################
# Overall Flow
# ------------
# Then we will show the overall flow of using Apache TVM to compile a neural network model,
# showing how to optimize, deploy and run the model.
# The overall flow is illustrated as the figure:
#
# .. figure:: https://raw.githubusercontent.com/tlc-pack/web-data/main/images/design/tvm_overall_flow.svg
# :align: center
# :width: 80%
#
# The overall flow consists of the following steps:
#
# - **Construct or Import a Model**: Construct a neural network model or import a pre-trained
# model from other frameworks (e.g. PyTorch, ONNX), and create the TVM IRModule, which contains
# all the information needed for compilation, including high-level Relax functions for
# computational graph, and low-level TensorIR functions for tensor program.
# - **Perform Composable Optimizations**: Perform a series of optimization transformations,
# such as graph optimizations, tensor program optimizations, and library dispatching.
# - **Build and Universal Deployment**: Build the optimized model to a deployable module to the
# universal runtime, and execute it on different devices, such as CPU, GPU, or other accelerators.

################################################################################
# Construct or Import a Model
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Before we get started, let's construct a neural network model first.
# In this tutorial, to make things simple, we will defined a two-layer MLP networks
# directly in this script with TVM Relax frontend, which is a similar API to PyTorch.
#

import tvm
from tvm import relax
from tvm.relax.frontend import nn


class MLPModel(nn.Module):
def __init__(self):
super(MLPModel, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(256, 10)

def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
return x


################################################################################
# Then we can export the model to TVM IRModule, which is the central intermediate representation
# in TVM.

mod, param_spec = MLPModel().export_tvm(
spec={"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
)
mod.show()

################################################################################
# Perform Optimization Transformations
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Apache TVM leverage ``pipeline`` to transform and optimize program.
# The pipeline encapsulates a collection of transformation that gets two goals (at the same level):
#
# - **Model optimizations**: such as operator fusion, layout rewrites.
# - **Tensor program optimization**: Map the operators to low-level implementations
# (both library or codegen)
#
# .. note::
# The twos are goals but not the stages of the pipeline. The two optimizations are performed
# **at the same level**, or separately in two stages.
#
# .. note::
# In this tutorial we only demonstrate the overall flow, by leverage ``zero`` optimization
# pipeline, instead of optimizing for any specific target.

mod = relax.get_pipeline("zero")(mod)


################################################################################
# Build and Universal Deployment
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# After the optimization, we can build the model to a deployable module and run it on
# different devices.


import numpy as np

target = tvm.target.Target("llvm")
ex = relax.build(mod, target)
device = tvm.cpu()
vm = relax.VirtualMachine(ex, device)
data = np.random.rand(1, 784).astype("float32")
tvm_data = tvm.nd.array(data, device=device)
params = [np.random.rand(*param.shape).astype("float32") for _, param in param_spec]
params = [tvm.nd.array(param, device=device) for param in params]
print(vm["forward"](tvm_data, *params).numpy())

################################################################################
# Our goal is to bring machine learning to the application with any language of interest,
# with the minimum runtime support.
#
# - Each function in IRModule becomes a runnable function in the runtime. For example in LLM
# cases, we can call ``prefill`` and ``decode`` functions directly.
#
# .. code-block:: Python
#
# prefill_logits = vm["prefill"](inputs, weight, kv_cache)
# decoded_logits = vm["decode"](inputs, weight, kv_cache)
#
# - TVM runtime comes with native data structures, such as NDArray, can also have zero
# copy exchange with existing ecosystem (DLPack exchange with PyTorch)
#
# .. code-block:: Python
#
# # Convert PyTorch tensor to TVM NDArray
# x_tvm = tvm.nd.from_dlpack(x_torch.to_dlpack())
# # Convert TVM NDArray to PyTorch tensor
# x_torch = torch.from_dlpack(x_tvm.to_dlpack())
#
# - TVM runtime works in non-python environments, so it works on settings such as mobile
#
# .. code-block:: C++
#
# // C++ snippet
# runtime::Module vm = ex.GetFunction("load_executable")();
# vm.GetFunction("init")(...);
# NDArray out = vm.GetFunction("prefill")(data, weight, kv_cache);
#
# .. code-block:: Java
#
# // Java snippet
# Module vm = ex.getFunction("load_executable").invoke();
# vm.getFunction("init").pushArg(...).invoke;
# NDArray out = vm.getFunction("prefill").pushArg(data).pushArg(weight).pushArg(kv_cache).invoke();
#

################################################################################
# Read next
# ---------
# This tutorial demonstrates the overall flow of using Apache TVM to compile a neural network model.
# For more advanced or specific topics, please refer to the following tutorials
#
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ driving its costs down.
:caption: Getting Started

install/index
get_started/tutorials/quick_start
contribute/index

.. toctree::
Expand Down
2 changes: 2 additions & 0 deletions tests/scripts/task_python_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ IGNORED_WARNINGS=(
'absl:For model inputs containing unsupported operations which cannot be quantized, the `inference_input_type` attribute will default to the original type.'
'absl:Found untraced functions such as _jit_compiled_convolution_op'
'You are using pip version'
# Tutorial READMEs can be ignored, but other docs should be included
"tutorials/README.rst: WARNING: document isn't included in any toctree"
)

JOINED_WARNINGS=$(join_by '|' "${IGNORED_WARNINGS[@]}")
Expand Down

0 comments on commit 39f3ccb

Please sign in to comment.