Skip to content

Commit

Permalink
Codemod examples/models/llama2 to examples/models/llama
Browse files Browse the repository at this point in the history
Summary:
Squash of D64446460, D64446461, D64446462, D64446459

allow-large-files

Differential Revision: D64446900
  • Loading branch information
mergennachin authored and facebook-github-bot committed Oct 16, 2024
1 parent 35aeaca commit 9b4f038
Show file tree
Hide file tree
Showing 118 changed files with 637 additions and 923 deletions.
4 changes: 2 additions & 2 deletions .ci/scripts/build_llama_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ build_llama_runner() {
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-Bcmake-android-out/examples/models/llama2 examples/models/llama2
-Bcmake-android-out/examples/models/llama examples/models/llama

cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
cmake --build cmake-android-out/examples/models/llama -j4 --config Release
}
install_flatc_from_source
install_executorch_and_backend_lib
Expand Down
8 changes: 4 additions & 4 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ cmake_install_executorch_libraries() {

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
dir="examples/models/llama"
retry cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Debug \
Expand Down Expand Up @@ -206,7 +206,7 @@ if [[ "${QNN}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

# Create tokenizer.bin.
echo "Creating tokenizer.bin"
Expand All @@ -219,15 +219,15 @@ echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
# Run model.
# shellcheck source=/dev/null
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
$BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
cmake_install_executorch_libraries
cmake_build_llama_runner
# Run llama runner
NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
# shellcheck source=/dev/null
cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
NOW=$(date +"%H:%M:%S")
echo "Finished at ${NOW}"
else
Expand Down
6 changes: 3 additions & 3 deletions .ci/scripts/test_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ run_portable_executor_runner() {
test_model() {
if [[ "${MODEL_NAME}" == "llama2" ]]; then
# Install requirements for export_llama
bash examples/models/llama2/install_requirements.sh
# Test export_llama script: python3 -m examples.models.llama2.export_llama
"${PYTHON_EXECUTABLE}" -m examples.models.llama2.export_llama -c examples/models/llama2/params/demo_rand_params.pth -p examples/models/llama2/params/demo_config.json
bash examples/models/llama/install_requirements.sh
# Test export_llama script: python3 -m examples.models.llama.export_llama
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
run_portable_executor_runner
rm "./${MODEL_NAME}.pte"
fi
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ jobs:
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
DELEGATE_CONFIG="xnnpack+custom+qe"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/apple-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
bash examples/models/llama2/install_requirements.sh
bash examples/models/llama/install_requirements.sh
# Test llama2
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
Expand Down Expand Up @@ -216,7 +216,7 @@ jobs:
bash install_requirements.sh --pybind xnnpack
# install Llava requirements
bash examples/models/llama2/install_requirements.sh
bash examples/models/llama/install_requirements.sh
bash examples/models/llava/install_requirements.sh
# run python unittest
Expand Down Expand Up @@ -411,7 +411,7 @@ jobs:
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ jobs:
fi
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}"
Expand All @@ -279,7 +279,7 @@ jobs:
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"

# # install Llava requirements
# ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh

# # run python unittest
Expand Down Expand Up @@ -385,7 +385,7 @@ jobs:
cmake --build cmake-out -j9 --target install --config Release
echo "Build llama runner"
dir="examples/models/llama2"
dir="examples/models/llama"
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
Expand Down Expand Up @@ -437,5 +437,5 @@ jobs:
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
echo "::endgroup::"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ please visit our documentation website [for the latest release](https://pytorch.

Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) page for a quick spin.

Check out the examples of [Llama](./examples/models/llama2/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
Check out the examples of [Llama](./examples/models/llama/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.


**[UPDATE - 09/25]** We have added support for running [Llama 3.2 1B/3B](./examples/models/llama2/README.md) models via ExecuTorch.
**[UPDATE - 09/25]** We have added support for running [Llama 3.2 1B/3B](./examples/models/llama/README.md) models via ExecuTorch.

## Feedback

Expand Down
2 changes: 1 addition & 1 deletion backends/qualcomm/runtime/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def define_common_targets():
exclude = ["Logging.h"],
),
define_static_target = True,
link_whole = True, # needed for executorch/examples/models/llama2:main to register QnnBackend
link_whole = True, # needed for executorch/examples/models/llama:main to register QnnBackend
platforms = [ANDROID],
visibility = ["@EXECUTORCH_CLIENTS"],
resources = {
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
skip_annotation,
)

from executorch.examples.models.llama2.llama_transformer import (
from executorch.examples.models.llama.llama_transformer import (
ModelArgs,
MOEFeedForward,
)
Expand All @@ -51,7 +51,7 @@
from executorch.examples.models.inception_v3 import InceptionV3Model
from executorch.examples.models.inception_v4 import InceptionV4Model

# from executorch.examples.models.llama2 import Llama2Model
# from executorch.examples.models.llama import Llama2Model
from executorch.examples.models.mobilebert import MobileBertModelExample
from executorch.examples.models.mobilenet_v2 import MV2Model
from executorch.examples.models.mobilenet_v3 import MV3Model
Expand Down
12 changes: 6 additions & 6 deletions backends/vulkan/docs/android_demo.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ partially lower the Llama model to Vulkan.

```shell
# The files will usually be downloaded to ~/.llama
python -m examples.models.llama2.export_llama \
python -m examples.models.llama.export_llama \
--disable_dynamic_shape --vulkan -kv --use_sdpa_with_kv_cache -d fp32 \
-c ~/.llama/checkpoints/Llama3.2-1B/consolidated.00.pth \
-p ~/.llama/checkpoints/Llama3.2-1B/params.json \
Expand Down Expand Up @@ -95,23 +95,23 @@ binary using the Android NDK toolchain.
cmake --build cmake-android-out -j16 --target install)

# Build LLaMA Runner library
(rm -rf cmake-android-out/examples/models/llama2 && \
cmake examples/models/llama2 \
(rm -rf cmake-android-out/examples/models/llama && \
cmake examples/models/llama \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=$ANDROID_ABI \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DPYTHON_EXECUTABLE=python \
-Bcmake-android-out/examples/models/llama2 && \
cmake --build cmake-android-out/examples/models/llama2 -j16)
-Bcmake-android-out/examples/models/llama && \
cmake --build cmake-android-out/examples/models/llama -j16)
```

Finally, push and run the llama runner binary on your Android device. Note that
your device must have sufficient GPU memory to execute the model.

```shell
adb push cmake-android-out/examples/models/llama2/llama_main /data/local/tmp/llama_main
adb push cmake-android-out/examples/models/llama/llama_main /data/local/tmp/llama_main

adb shell /data/local/tmp/llama_main \
--model_path=/data/local/tmp/vulkan_llama2.pte \
Expand Down
2 changes: 1 addition & 1 deletion backends/xnnpack/test/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ runtime.python_test(
"fbsource//third-party/pypi/torchsr:torchsr", # @manual
"fbsource//third-party/pypi/transformers:transformers", # @manual
"//executorch/backends/xnnpack/test/tester:tester",
"//executorch/examples/models/llama2:llama2_model",
"//executorch/examples/models/llama:llama2_model",
"//pytorch/audio/src:torchaudio_core",
"//pytorch/vision:torchvision", # @manual
],
Expand Down
2 changes: 1 addition & 1 deletion backends/xnnpack/test/models/llama2_et_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch

from executorch.backends.xnnpack.test.tester import Tester
from executorch.examples.models.llama2.model import Llama2Model
from executorch.examples.models.llama.model import Llama2Model


class TestLlama2ETExample(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion build/cmake_deps.toml
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ deps = [

[targets.llama_runner]
buck_targets = [
"//examples/models/llama2/runner:runner",
"//examples/models/llama/runner:runner",
]
filters = [
".cpp$",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This tutorial demonstrates how to export Llama 3 8B Instruct for Qualcomm AI Eng

- Set up your ExecuTorch repo and environment if you haven’t done so by following [the Setting up ExecuTorch](../getting-started-setup.md) to set up the repo and dev environment.
- Read [the Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend page](../build-run-qualcomm-ai-engine-direct-backend.md) to understand how to export and run a model with Qualcomm AI Engine Direct Backend on Qualcomm device.
- Follow [the README for executorch llama](https://github.com/pytorch/executorch/tree/main/examples/models/llama2) to know how to run a llama model on mobile via ExecuTorch.
- Follow [the README for executorch llama](https://github.com/pytorch/executorch/tree/main/examples/models/llama) to know how to run a llama model on mobile via ExecuTorch.
- A Qualcomm device with 16GB RAM
- We are continuing to optimize our memory usage to ensure compatibility with lower memory devices.
- The version of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) is 2.26.0 or above.
Expand Down Expand Up @@ -39,7 +39,7 @@ To export Llama 3 8B instruct with the Qualcomm AI Engine Direct Backend, ensure

```bash
# Please note that calibration_data must include the prompt template for special tokens.
python -m examples.models.llama2.export_llama -t <path_to_tokenizer.model>
python -m examples.models.llama.export_llama -t <path_to_tokenizer.model>
llama3/Meta-Llama-3-8B-Instruct/tokenizer.model -p <path_to_params.json> -c <path_to_checkpoint_for_Meta-Llama-3-8B-Instruct> --use_kv_cache --qnn --pt2e_quantize qnn_16a4w --disable_dynamic_shape --num_sharding 8 --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --optimized_rotation_path <path_to_optimized_matrix> --calibration_data "<|start_header_id|>system<|end_header_id|>\n\nYou are a funny chatbot.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nCould you tell me about Facebook?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
```

Expand Down Expand Up @@ -76,9 +76,9 @@ llama3/Meta-Llama-3-8B-Instruct/tokenizer.model -p <path_to_params.json> -c <pat
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-Bcmake-android-out/examples/models/llama2 examples/models/llama2
-Bcmake-android-out/examples/models/llama examples/models/llama
cmake --build cmake-android-out/examples/models/llama2 -j16 --config Release
cmake --build cmake-android-out/examples/models/llama -j16 --config Release
```
3. Run on Android via adb shell
*Pre-requisite*: Make sure you enable USB debugging via developer options on your phone
Expand All @@ -105,7 +105,7 @@ adb push ${QNN_SDK_ROOT}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${DEVICE_D
adb push <model.pte> ${DEVICE_DIR}
adb push <tokenizer.model> ${DEVICE_DIR}
adb push cmake-android-out/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
adb push cmake-out-android/examples/models/llama2/llama_main ${DEVICE_DIR}
adb push cmake-out-android/examples/models/llama/llama_main ${DEVICE_DIR}
```

**3.4 Run model**
Expand Down
2 changes: 1 addition & 1 deletion docs/source/llm/llama.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Llama on ExecuTorch

See
[Llama readme](https://github.com/pytorch/executorch/blob/main/examples/models/llama2/README.md)
[Llama readme](https://github.com/pytorch/executorch/blob/main/examples/models/llama/README.md)
for detailed information about running Llama on ExecuTorch.
2 changes: 1 addition & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ For specific details related to models and backend, you can explore the various

### Llama Models

[This page](./models/llama2/README.md) demonstrates how to run Llama 3.2 (1B, 3B), Llama 3.1 (8B), Llama 3 (8B), and Llama 2 7B models on mobile via ExecuTorch. We use XNNPACK, QNNPACK, MediaTek, and MPS to accelerate the performance and 4-bit groupwise PTQ quantization to fit the model on Android and iOS mobile phones.
[This page](./models/llama/README.md) demonstrates how to run Llama 3.2 (1B, 3B), Llama 3.1 (8B), Llama 3 (8B), and Llama 2 7B models on mobile via ExecuTorch. We use XNNPACK, QNNPACK, MediaTek, and MPS to accelerate the performance and 4-bit groupwise PTQ quantization to fit the model on Android and iOS mobile phones.

### Llava1.5 7B

Expand Down
2 changes: 1 addition & 1 deletion examples/apple/mps/executor_runner/mps_executor_runner.mm
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ HierarchicalAllocator planned_memory(
strstr(model_path, "emformer_transcribe") ||
strstr(model_path, "emformer_join") ||
strstr(model_path, "edsr") ||
strstr(model_path, "llama2") ||
strstr(model_path, "llama") ||
strstr(model_path, "ic3") ||
strstr(model_path, "ic4")) {
atol = 1e-04;
Expand Down
2 changes: 1 addition & 1 deletion examples/cadence/models/babyllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from executorch.backends.cadence.aot.export_example import export_model

from executorch.examples.models.llama2.llama_transformer import ModelArgs, Transformer
from executorch.examples.models.llama.llama_transformer import ModelArgs, Transformer


FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
Expand Down
Loading

0 comments on commit 9b4f038

Please sign in to comment.