From 39f3ccb74c2ca5d636806424881df63855220e74 Mon Sep 17 00:00:00 2001
From: Siyuan Feng <hzfengsy@sjtu.edu.cn>
Date: Wed, 21 Aug 2024 18:24:20 +0800
Subject: [PATCH] [Doc] Quick Start

This PR introduces a new quick start tutorial to the documentation.
---
 docs/.gitignore                           |   1 -
 docs/conf.py                              |   6 +
 docs/get_started/tutorials/README.txt     |   2 +
 docs/get_started/tutorials/quick_start.py | 193 ++++++++++++++++++++++
 docs/index.rst                            |   1 +
 tests/scripts/task_python_docs.sh         |   2 +
 6 files changed, 204 insertions(+), 1 deletion(-)
 create mode 100644 docs/get_started/tutorials/README.txt
 create mode 100644 docs/get_started/tutorials/quick_start.py

diff --git a/docs/.gitignore b/docs/.gitignore
index 84b247d3699c..041cf3588799 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,3 +1,2 @@
 doxygen
 modules
-tutorials
diff --git a/docs/conf.py b/docs/conf.py
index be1ba11aa091..c3472c15de91 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -408,6 +408,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
 from sphinx_gallery.sorting import ExplicitOrder
 
 examples_dirs = [
+    # legacy tutorial structure under gallery folder
     tvm_path.joinpath("gallery", "tutorial"),
     tvm_path.joinpath("gallery", "how_to", "compile_models"),
     tvm_path.joinpath("gallery", "how_to", "deploy_models"),
@@ -419,9 +420,12 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     tvm_path.joinpath("gallery", "how_to", "work_with_microtvm"),
     tvm_path.joinpath("gallery", "how_to", "extend_tvm"),
     tvm_path.joinpath("vta", "tutorials"),
+    # New tutorial structure under docs folder
+    tvm_path.joinpath("docs", "get_started", "tutorials"),
 ]
 
 gallery_dirs = [
+    # legacy tutorial structure under gallery folder
     "tutorial",
     "how_to/compile_models",
     "how_to/deploy_models",
@@ -433,6 +437,8 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     "how_to/work_with_microtvm",
     "how_to/extend_tvm",
     "topic/vta/tutorials",
+    # New tutorial structure under docs folder
+    "get_started/tutorials/",
 ]
 
 
diff --git a/docs/get_started/tutorials/README.txt b/docs/get_started/tutorials/README.txt
new file mode 100644
index 000000000000..62e2c7b770fb
--- /dev/null
+++ b/docs/get_started/tutorials/README.txt
@@ -0,0 +1,2 @@
+Get Started
+-----------
diff --git a/docs/get_started/tutorials/quick_start.py b/docs/get_started/tutorials/quick_start.py
new file mode 100644
index 000000000000..a4edf0b7c4fe
--- /dev/null
+++ b/docs/get_started/tutorials/quick_start.py
@@ -0,0 +1,193 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+.. _quick_start:
+
+Quick Start
+===========
+
+This tutorial is for people who are new to Apache TVM. Taking an simple example
+to show how to use Apache TVM to compile a simple neural network.
+
+.. contents:: Table of Contents
+    :local:
+    :depth: 2
+
+"""
+
+################################################################################
+# Overview
+# --------
+# Apache TVM is a machine learning compilation framework, following the principle of
+# **Python-first development** and **universal deployment**. It takes in pre-trained
+# machine learning models, compiles and generates deployable modules that can be embedded
+# and run everywhere.
+# Apache TVM also enables customizing optimization processes to introduce new optimizations,
+# libraries, codegen and more.
+#
+# Apache TVM can help to:
+#
+# - **Optimize** performance of ML workloads, composing libraries and codegen.
+# - **Deploy** ML workloads to a diverse set of new environments, including new runtime and new
+#   hardware.
+# - **Continuously improve and customize** ML deployment pipeline in Python by quickly customizing
+#   library dispatching, bringing in customized operators and code generation.
+
+################################################################################
+# Overall Flow
+# ------------
+# Then we will show the overall flow of using Apache TVM to compile a neural network model,
+# showing how to optimize, deploy and run the model.
+# The overall flow is illustrated as the figure:
+#
+# .. figure:: https://raw.githubusercontent.com/tlc-pack/web-data/main/images/design/tvm_overall_flow.svg
+#    :align: center
+#    :width: 80%
+#
+# The overall flow consists of the following steps:
+#
+# - **Construct or Import a Model**: Construct a neural network model or import a pre-trained
+#   model from other frameworks (e.g. PyTorch, ONNX), and create the TVM IRModule, which contains
+#   all the information needed for compilation, including high-level Relax functions for
+#   computational graph, and low-level TensorIR functions for tensor program.
+# - **Perform Composable Optimizations**: Perform a series of optimization transformations,
+#   such as graph optimizations, tensor program optimizations, and library dispatching.
+# - **Build and Universal Deployment**: Build the optimized model to a deployable module to the
+#   universal runtime, and execute it on different devices, such as CPU, GPU, or other accelerators.
+
+################################################################################
+# Construct or Import a Model
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Before we get started, let's construct a neural network model first.
+# In this tutorial, to make things simple, we will defined a two-layer MLP networks
+# directly in this script with TVM Relax frontend, which is a similar API to PyTorch.
+#
+
+import tvm
+from tvm import relax
+from tvm.relax.frontend import nn
+
+
+class MLPModel(nn.Module):
+    def __init__(self):
+        super(MLPModel, self).__init__()
+        self.fc1 = nn.Linear(784, 256)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Linear(256, 10)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.relu1(x)
+        x = self.fc2(x)
+        return x
+
+
+################################################################################
+# Then we can export the model to TVM IRModule, which is the central intermediate representation
+# in TVM.
+
+mod, param_spec = MLPModel().export_tvm(
+    spec={"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
+)
+mod.show()
+
+################################################################################
+# Perform Optimization Transformations
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Apache TVM leverage ``pipeline`` to transform and optimize program.
+# The pipeline encapsulates a collection of transformation that gets two goals (at the same level):
+#
+# - **Model optimizations**: such as operator fusion, layout rewrites.
+# - **Tensor program optimization**: Map the operators to low-level implementations
+#   (both library or codegen)
+#
+# .. note::
+#   The twos are goals but not the stages of the pipeline. The two optimizations are performed
+#   **at the same level**, or separately in two stages.
+#
+# .. note::
+#   In this tutorial we only demonstrate the overall flow, by leverage ``zero`` optimization
+#   pipeline, instead of optimizing for any specific target.
+
+mod = relax.get_pipeline("zero")(mod)
+
+
+################################################################################
+# Build and Universal Deployment
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# After the optimization, we can build the model to a deployable module and run it on
+# different devices.
+
+
+import numpy as np
+
+target = tvm.target.Target("llvm")
+ex = relax.build(mod, target)
+device = tvm.cpu()
+vm = relax.VirtualMachine(ex, device)
+data = np.random.rand(1, 784).astype("float32")
+tvm_data = tvm.nd.array(data, device=device)
+params = [np.random.rand(*param.shape).astype("float32") for _, param in param_spec]
+params = [tvm.nd.array(param, device=device) for param in params]
+print(vm["forward"](tvm_data, *params).numpy())
+
+################################################################################
+# Our goal is to bring machine learning to the application with any language of interest,
+# with the minimum runtime support.
+#
+# - Each function in IRModule becomes a runnable function in the runtime. For example in LLM
+#   cases, we can call ``prefill`` and ``decode`` functions directly.
+#
+#   .. code-block:: Python
+#
+#       prefill_logits = vm["prefill"](inputs, weight, kv_cache)
+#       decoded_logits = vm["decode"](inputs, weight, kv_cache)
+#
+# - TVM runtime comes with native data structures, such as NDArray, can also have zero
+#   copy exchange with existing ecosystem (DLPack exchange with PyTorch)
+#
+#   .. code-block:: Python
+#
+#       # Convert PyTorch tensor to TVM NDArray
+#       x_tvm = tvm.nd.from_dlpack(x_torch.to_dlpack())
+#       # Convert TVM NDArray to PyTorch tensor
+#       x_torch = torch.from_dlpack(x_tvm.to_dlpack())
+#
+# - TVM runtime works in non-python environments, so it works on settings such as mobile
+#
+#   .. code-block:: C++
+#
+#       // C++ snippet
+#       runtime::Module vm = ex.GetFunction("load_executable")();
+#       vm.GetFunction("init")(...);
+#       NDArray out = vm.GetFunction("prefill")(data, weight, kv_cache);
+#
+#   .. code-block:: Java
+#
+#       // Java snippet
+#       Module vm = ex.getFunction("load_executable").invoke();
+#       vm.getFunction("init").pushArg(...).invoke;
+#       NDArray out = vm.getFunction("prefill").pushArg(data).pushArg(weight).pushArg(kv_cache).invoke();
+#
+
+################################################################################
+# Read next
+# ---------
+# This tutorial demonstrates the overall flow of using Apache TVM to compile a neural network model.
+# For more advanced or specific topics, please refer to the following tutorials
+#
diff --git a/docs/index.rst b/docs/index.rst
index 95b1937671ea..7f13101f741e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -33,6 +33,7 @@ driving its costs down.
    :caption: Getting Started
 
    install/index
+   get_started/tutorials/quick_start
    contribute/index
 
 .. toctree::
diff --git a/tests/scripts/task_python_docs.sh b/tests/scripts/task_python_docs.sh
index 9690c330c0df..2a213ddd1843 100755
--- a/tests/scripts/task_python_docs.sh
+++ b/tests/scripts/task_python_docs.sh
@@ -90,6 +90,8 @@ IGNORED_WARNINGS=(
     'absl:For model inputs containing unsupported operations which cannot be quantized, the `inference_input_type` attribute will default to the original type.'
     'absl:Found untraced functions such as _jit_compiled_convolution_op'
     'You are using pip version'
+    # Tutorial READMEs can be ignored, but other docs should be included
+    "tutorials/README.rst: WARNING: document isn't included in any toctree"
 )
 
 JOINED_WARNINGS=$(join_by '|' "${IGNORED_WARNINGS[@]}")