Skip to content

Commit

Permalink
2024-10-17 nightly release (ad0e5e8)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Oct 17, 2024
1 parent 400150b commit 1aaeaa7
Show file tree
Hide file tree
Showing 174 changed files with 1,918 additions and 1,321 deletions.
4 changes: 2 additions & 2 deletions .ci/scripts/build_llama_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ build_llama_runner() {
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-Bcmake-android-out/examples/models/llama2 examples/models/llama2
-Bcmake-android-out/examples/models/llama examples/models/llama

cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
cmake --build cmake-android-out/examples/models/llama -j4 --config Release
}
install_flatc_from_source
install_executorch_and_backend_lib
Expand Down
8 changes: 4 additions & 4 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ cmake_install_executorch_libraries() {

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
dir="examples/models/llama"
retry cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Debug \
Expand Down Expand Up @@ -206,7 +206,7 @@ if [[ "${QNN}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

# Create tokenizer.bin.
echo "Creating tokenizer.bin"
Expand All @@ -219,15 +219,15 @@ echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
# Run model.
# shellcheck source=/dev/null
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
$BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
cmake_install_executorch_libraries
cmake_build_llama_runner
# Run llama runner
NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
# shellcheck source=/dev/null
cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
NOW=$(date +"%H:%M:%S")
echo "Finished at ${NOW}"
else
Expand Down
6 changes: 3 additions & 3 deletions .ci/scripts/test_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ run_portable_executor_runner() {
test_model() {
if [[ "${MODEL_NAME}" == "llama2" ]]; then
# Install requirements for export_llama
bash examples/models/llama2/install_requirements.sh
# Test export_llama script: python3 -m examples.models.llama2.export_llama
"${PYTHON_EXECUTABLE}" -m examples.models.llama2.export_llama -c examples/models/llama2/params/demo_rand_params.pth -p examples/models/llama2/params/demo_config.json
bash examples/models/llama/install_requirements.sh
# Test export_llama script: python3 -m examples.models.llama.export_llama
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
run_portable_executor_runner
rm "./${MODEL_NAME}.pte"
fi
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ jobs:
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
DELEGATE_CONFIG="xnnpack+custom+qe"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/apple-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
bash examples/models/llama2/install_requirements.sh
bash examples/models/llama/install_requirements.sh
# Test llama2
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
Expand Down
12 changes: 9 additions & 3 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ jobs:
- dtype: bf16
build-tool: buck2
mode: portable
- dtype: bf16
build-tool: cmake
mode: custom
- dtype: bf16
build-tool: buck2
mode: custom
fail-fast: false
with:
runner: linux.2xlarge
Expand All @@ -117,7 +123,7 @@ jobs:
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
Expand Down Expand Up @@ -216,7 +222,7 @@ jobs:
bash install_requirements.sh --pybind xnnpack
# install Llava requirements
bash examples/models/llama2/install_requirements.sh
bash examples/models/llama/install_requirements.sh
bash examples/models/llava/install_requirements.sh
# run python unittest
Expand Down Expand Up @@ -411,7 +417,7 @@ jobs:
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ jobs:
include:
- dtype: bf16
mode: portable
- dtype: bf16
mode: custom
fail-fast: false
with:
runner: macos-m1-stable
Expand Down Expand Up @@ -255,7 +257,7 @@ jobs:
fi
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}"
Expand All @@ -279,7 +281,7 @@ jobs:
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"

# # install Llava requirements
# ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh

# # run python unittest
Expand Down Expand Up @@ -385,7 +387,7 @@ jobs:
cmake --build cmake-out -j9 --target install --config Release
echo "Build llama runner"
dir="examples/models/llama2"
dir="examples/models/llama"
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
Expand Down Expand Up @@ -437,5 +439,5 @@ jobs:
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
echo "::endgroup::"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ please visit our documentation website [for the latest release](https://pytorch.

Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) page for a quick spin.

Check out the examples of [Llama](./examples/models/llama2/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
Check out the examples of [Llama](./examples/models/llama/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.


**[UPDATE - 09/25]** We have added support for running [Llama 3.2 1B/3B](./examples/models/llama2/README.md) models via ExecuTorch.
**[UPDATE - 09/25]** We have added support for running [Llama 3.2 1B/3B](./examples/models/llama/README.md) models via ExecuTorch.

## Feedback

Expand Down
12 changes: 6 additions & 6 deletions backends/apple/coreml/runtime/inmemoryfs/inmemory_filesystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,11 @@ bool write_directory_node(InMemoryDirectoryNode* node,
return false;
}

for (const auto& [_, node]: node->get_items()) {
if (node.get()->isDirectory() && !recursive) {
for (const auto& [_, node_2]: node->get_items()) {
if (node_2.get()->isDirectory() && !recursive) {
continue;
}
if (!write_node(node.get(), dir_path, recursive, error)) {
if (!write_node(node_2.get(), dir_path, recursive, error)) {
return false;
}
}
Expand Down Expand Up @@ -383,9 +383,9 @@ FlattenedInMemoryNode::unflatten(const std::vector<FlattenedInMemoryNode>& flatt
case InMemoryFileSystem::InMemoryNode::Kind::Directory: {
std::unordered_map<std::string, std::unique_ptr<InMemoryFileSystem::InMemoryNode>> items;
items.reserve(flattened_node_metadata.child_name_to_indices_map.size());
for (const auto& [name, index]: flattened_node_metadata.child_name_to_indices_map) {
auto moveIt = std::make_move_iterator(nodes.begin() + index);
items[name] = *moveIt;
for (const auto& [name_2, index_2]: flattened_node_metadata.child_name_to_indices_map) {
auto moveIt = std::make_move_iterator(nodes.begin() + index_2);
items[name_2] = *moveIt;
}
auto directory_node =
std::make_unique<InMemoryDirectoryNode>(std::move(name), std::move(attributes), std::move(items));
Expand Down
28 changes: 24 additions & 4 deletions backends/qualcomm/_passes/annotate_quant_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ class AnnotateQuantAttrs(ExportPass):
generated after quatization process.
"""

def __init__(self, edge_program: torch.export.ExportedProgram):
def __init__(
self, edge_program: torch.export.ExportedProgram, skip_advanced_requat: bool
):
super(AnnotateQuantAttrs, self).__init__()
self.edge_program = edge_program
self.skip_advanced_requant = skip_advanced_requat

def _annotate_source_nodes(
self, quant_node: torch.fx.Node, quant_attrs: Dict[str, Any]
Expand Down Expand Up @@ -68,9 +71,26 @@ def _annotate_requant(self, n):

# TODO: Store multiple pairs of requantize attributes when we have an op builder
# that has multiple outputs that requires quant attributes.
if q_attrs["dtype"] != dq_attrs["dtype"]:
dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
n.args[0].meta[QCOM_REQUANTIZE] = dq_attrs
if self.skip_advanced_requant:
if q_attrs["dtype"] != dq_attrs["dtype"]:
dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
n.args[0].meta[QCOM_REQUANTIZE] = dq_attrs
else:
# When dtype is the same but other specs such as scale and offset are different,
# insert requant to improve accuracy.
# Users can turn this feature off if any inference speed drop is observed.
if any(
q_attrs[attr] != dq_attrs[attr]
for attr in [
"scale",
"zero_point",
"quant_min",
"quant_max",
"dtype",
]
):
dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
n.args[0].meta[QCOM_REQUANTIZE] = dq_attrs

# Dequant all the fold_quant parameters back to fp32.
# If an operation is not supported by QNN and got fallback, it will expect a fp32 param.
Expand Down
3 changes: 2 additions & 1 deletion backends/qualcomm/aot/ir/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ load(
)
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/backends/qualcomm:targets.bzl", "generate_schema_header")
load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")

QCIR_NAME = "qcir"
INPUT_QCIR = QCIR_NAME + ".fbs"
Expand Down Expand Up @@ -55,7 +56,7 @@ def define_common_targets():
platforms = [ANDROID],
visibility = ["@EXECUTORCH_CLIENTS"],
deps = [
"fbsource//third-party/qualcomm/qnn:api",
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
"//executorch/runtime/backend:interface",
"//executorch/runtime/core:core",
"//executorch/backends/qualcomm/aot/wrappers:wrappers",
Expand Down
7 changes: 4 additions & 3 deletions backends/qualcomm/aot/python/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ load(
"ANDROID",
)
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")

PYTHON_MODULE_NAME = "PyQnnManagerAdaptor"

Expand Down Expand Up @@ -32,7 +33,7 @@ def define_common_targets():
"//executorch/backends/qualcomm:schema",
"//executorch/backends/qualcomm/aot/ir:qcir_utils",
"//executorch/backends/qualcomm/runtime:runtime",
"fbsource//third-party/qualcomm/qnn:api",
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
],
external_deps = [
"pybind11",
Expand Down Expand Up @@ -65,7 +66,7 @@ def define_common_targets():
"//executorch/backends/qualcomm:schema",
"//executorch/backends/qualcomm/aot/ir:qcir_utils",
"//executorch/backends/qualcomm/runtime:runtime",
"fbsource//third-party/qualcomm/qnn:api",
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
],
external_deps = [
"pybind11",
Expand All @@ -92,7 +93,7 @@ def define_common_targets():
"//executorch/backends/qualcomm:schema",
"//executorch/backends/qualcomm/aot/ir:qcir_utils",
"//executorch/backends/qualcomm/runtime:runtime",
"fbsource//third-party/qualcomm/qnn:api",
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
],
external_deps = [
"pybind11",
Expand Down
3 changes: 2 additions & 1 deletion backends/qualcomm/aot/wrappers/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ load(
"ANDROID",
)
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.
Expand All @@ -22,7 +23,7 @@ def define_common_targets():
platforms = [ANDROID],
visibility = ["@EXECUTORCH_CLIENTS"],
deps = [
"fbsource//third-party/qualcomm/qnn:api",
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
"//executorch/runtime/backend:interface",
"//executorch/runtime/core:core",
],
Expand Down
2 changes: 0 additions & 2 deletions backends/qualcomm/qnn_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager

import torch # noqa: F401
from executorch.backends.qualcomm._passes.convert_to_linear import ConvertToLinear
from executorch.backends.qualcomm._passes.fuse_consecutive_transpose import (
FuseConsecutiveTranspose,
)
Expand Down Expand Up @@ -49,7 +48,6 @@ def preprocess(
# QNN Delegate Specific Passes
qnn_compiler_passes = PassManager(
passes=[
ConvertToLinear(),
InsertRequantize(edge_program),
InsertIOQDQ(edge_program),
LayoutTransform(edge_program, insert_permute=True),
Expand Down
2 changes: 1 addition & 1 deletion backends/qualcomm/quantizer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def get_ptq_per_channel_quant_config(
quant_min=torch.iinfo(act_dtype).min,
quant_max=torch.iinfo(act_dtype).max,
qscheme=torch.per_tensor_affine,
observer_or_fake_quant_ctr=MinMaxObserver.with_args(**extra_args),
observer_or_fake_quant_ctr=MovingAverageMinMaxObserver.with_args(**extra_args),
)

weight_quantization_spec = QuantizationSpec(
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/runtime/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def define_common_targets():
platforms = [ANDROID],
visibility = ["@EXECUTORCH_CLIENTS"],
deps = [
"fbsource//third-party/qualcomm/qnn:api",
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
"//executorch/runtime/backend:interface",
],
exported_deps = [
Expand Down Expand Up @@ -53,7 +53,7 @@ def define_common_targets():
exclude = ["Logging.h"],
),
define_static_target = True,
link_whole = True, # needed for executorch/examples/models/llama2:main to register QnnBackend
link_whole = True, # needed for executorch/examples/models/llama:main to register QnnBackend
platforms = [ANDROID],
visibility = ["@EXECUTORCH_CLIENTS"],
resources = {
Expand Down
6 changes: 2 additions & 4 deletions backends/qualcomm/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ load(
"ANDROID",
)
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")

# Construct the input and output file names. All input and output files rely on scalar_type file.
SCHEMA_NAME = "schema"
Expand Down Expand Up @@ -83,7 +84,7 @@ def define_common_targets():
define_static_target = True,
visibility = ["@EXECUTORCH_CLIENTS"],
deps = [
"fbsource//third-party/qualcomm/qnn:api",
"fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
"//executorch/runtime/backend:interface",
"//executorch/runtime/core:core",
"//executorch/backends/qualcomm/runtime:runtime",
Expand All @@ -92,6 +93,3 @@ def define_common_targets():
":schema",
],
)

def get_qnn_library_verision():
return "2.26"
Loading

0 comments on commit 1aaeaa7

Please sign in to comment.