Skip to content

Commit

Permalink
fix llama eager runner and add ci
Browse files Browse the repository at this point in the history
ghstack-source-id: 8a2df48b583f5c32976e6550b1ea94bb7b8afbe0
Pull Request resolved: #6344
  • Loading branch information
helunwencser committed Oct 18, 2024
1 parent 5add88d commit 30959de
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 5 deletions.
62 changes: 62 additions & 0 deletions .ci/scripts/test_llama_runner_eager.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
fi

# Download and prepare stories model artifacts
prepare_model_artifacts() {
echo "Preparing stories model artifacts"
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
}

run_and_verify() {
NOW=$(date +"%H:%M:%S")
echo "Starting to run eval_llama at ${NOW}"
if [[ ! -f "stories110M.pt" ]]; then
echo "stories110M.pt is missing."
exit 1
fi
if [[ ! -f "tokenizer.model" ]]; then
echo "tokenizer.model is missing."
exit 1
fi
if [[ ! -f "params.json" ]]; then
echo "params.json is missing."
exit 1
fi
$PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \
-c stories110M.pt \
-p params.json \
-t tokenizer.model \
-kv \
-d fp32 \
--max_seq_length 32 \
--temperature 0 \
--prompt "Once upon a time," > result.txt

# Verify result.txt
RESULT=$(cat result.txt)
EXPECTED_RESULT="there was a little girl"
if [[ "${RESULT}" == "${EXPECTED_TASK}"* ]]; then
echo "Actual result: ${RESULT}"
echo "Success"
exit 0
else
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"
exit 1
fi
}

prepare_model_artifacts
run_and_verify
27 changes: 27 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -501,3 +501,30 @@ jobs:
# run eval_llama mmlu task
PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
test-llama_runner_eager-linux:
name: test-llama_runner_eager-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
fail-fast: false
with:
runner: linux.24xlarge
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
# install pybind
bash install_requirements.sh --pybind xnnpack
# install llama requirements
bash examples/models/llama/install_requirements.sh
# run llama runner in eager mode
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
4 changes: 2 additions & 2 deletions examples/models/llama/runner/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
import torch

from examples.models.llama.llama_transformer import ModelArgs
from executorch.examples.models.llama2.export_llama_lib import (
from executorch.examples.models.llama.export_llama_lib import (
_prepare_for_llama_export,
build_args_parser as _build_args_parser,
)
from executorch.examples.models.llama2.runner.generation import LlamaRunner
from executorch.examples.models.llama.runner.generation import LlamaRunner
from executorch.extension.llm.export import LLMEdgeManager


Expand Down
8 changes: 5 additions & 3 deletions examples/models/llama/runner/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import torch

from executorch.examples.models.llama.llama_transformer import ModelArgs
from executorch.examples.models.llama.tokenizer.tiktoken import Tokenizer
from executorch.extension.llm.tokenizer.utils import get_tokenizer


class CompletionPrediction(TypedDict, total=False):
Expand Down Expand Up @@ -53,7 +53,7 @@ def next_token(logits: torch.Tensor, temperature: float, top_p: float) -> int:
class LlamaRunner(ABC):
def __init__(self, tokenizer_path: str, model_args: ModelArgs):
self.params = model_args
self.tokenizer = Tokenizer(tokenizer_path)
self.tokenizer = get_tokenizer(tokenizer_path)
assert model_args.vocab_size == self.tokenizer.n_words

@abstractmethod
Expand Down Expand Up @@ -93,7 +93,9 @@ def generate( # noqa: C901
else:
logits = self.forward(tokens=torch.tensor([tokens], dtype=torch.long))
current_token = next_token(logits, temperature, top_p)
if current_token in self.tokenizer.stop_tokens:
if current_token == self.tokenizer.eos_id or (
hasattr(self, "stop_tokens") and current_token in self.stop_tokens
):
break
tokens.append(current_token)

Expand Down

0 comments on commit 30959de

Please sign in to comment.