Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
cad-audio authored Oct 19, 2024
2 parents 4e89e2a + 324f021 commit e9d9f6c
Show file tree
Hide file tree
Showing 101 changed files with 2,168 additions and 531 deletions.
2 changes: 0 additions & 2 deletions .ci/scripts/build_llama_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ install_executorch_and_backend_lib() {
cmake -DBUCK2="${BUCK2}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
Expand All @@ -41,7 +40,6 @@ build_llama_runner() {
cmake -DBUCK2="${BUCK2}" \
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=python \
-DEXECUTORCH_BUILD_XNNPACK=ON \
Expand Down
64 changes: 64 additions & 0 deletions .ci/scripts/test_eval_llama_mmlu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
fi

# Download and prepare stories model artifacts
prepare_model_artifacts() {
echo "Preparing stories model artifacts"
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
}

run_and_verify() {
NOW=$(date +"%H:%M:%S")
echo "Starting to run eval_llama at ${NOW}"
if [[ ! -f "stories110M.pt" ]]; then
echo "stories110M.pt is missing."
exit 1
fi
if [[ ! -f "tokenizer.model" ]]; then
echo "tokenizer.model is missing."
exit 1
fi
if [[ ! -f "params.json" ]]; then
echo "params.json is missing."
exit 1
fi
$PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
-c stories110M.pt \
-p params.json \
-t tokenizer.model \
-kv \
-d fp32 \
--tasks mmlu \
-f 5 \
--max_seq_length 2048 \
--limit 5 > result.txt

# Verify result.txt
RESULT=$(cat result.txt)
EXPECTED_TASK="mmlu"
EXPECTED_RESULT="acc"
if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
echo "Actual result: ${RESULT}"
echo "Success"
exit 0
else
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"
exit 1
fi
}

prepare_model_artifacts
run_and_verify
62 changes: 62 additions & 0 deletions .ci/scripts/test_eval_llama_wikitext.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
fi

# Download and prepare stories model artifacts
prepare_model_artifacts() {
echo "Preparing stories model artifacts"
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
}

run_and_verify() {
NOW=$(date +"%H:%M:%S")
echo "Starting to run eval_llama at ${NOW}"
if [[ ! -f "stories110M.pt" ]]; then
echo "stories110M.pt is missing."
exit 1
fi
if [[ ! -f "tokenizer.model" ]]; then
echo "tokenizer.model is missing."
exit 1
fi
if [[ ! -f "params.json" ]]; then
echo "params.json is missing."
exit 1
fi
$PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
-c stories110M.pt \
-p params.json \
-t tokenizer.model \
-kv \
-d fp32 \
--max_seq_length 2048 \
--limit 5 > result.txt

# Verify result.txt
RESULT=$(cat result.txt)
EXPECTED_TASK="wikitext"
EXPECTED_RESULT="word_perplexity"
if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
echo "Actual result: ${RESULT}"
echo "Success"
exit 0
else
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"
exit 1
fi
}

prepare_model_artifacts
run_and_verify
62 changes: 62 additions & 0 deletions .ci/scripts/test_llama_runner_eager.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
fi

# Download and prepare stories model artifacts
prepare_model_artifacts() {
echo "Preparing stories model artifacts"
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
}

run_and_verify() {
NOW=$(date +"%H:%M:%S")
echo "Starting to run eval_llama at ${NOW}"
if [[ ! -f "stories110M.pt" ]]; then
echo "stories110M.pt is missing."
exit 1
fi
if [[ ! -f "tokenizer.model" ]]; then
echo "tokenizer.model is missing."
exit 1
fi
if [[ ! -f "params.json" ]]; then
echo "params.json is missing."
exit 1
fi
$PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \
-c stories110M.pt \
-p params.json \
-t tokenizer.model \
-kv \
-d fp32 \
--max_seq_length 32 \
--temperature 0 \
--prompt "Once upon a time," > result.txt

# Verify result.txt
RESULT=$(cat result.txt)
EXPECTED_RESULT="there was a little girl"
if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
echo "Actual result: ${RESULT}"
echo "Success"
exit 0
else
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"
exit 1
fi
}

prepare_model_artifacts
run_and_verify
2 changes: 0 additions & 2 deletions .ci/scripts/test_llava.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ cmake_install_executorch_libraries_for_android() {
cmake \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-23 \
${EXECUTORCH_COMMON_CMAKE_ARGS} \
-B${BUILD_DIR} .

Expand Down Expand Up @@ -93,7 +92,6 @@ cmake_build_llava_runner_for_android() {
cmake \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-23 \
${LLAVA_COMMON_CMAKE_ARGS} \
-DCMAKE_PREFIX_PATH="$python_lib" \
-DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \
Expand Down
135 changes: 135 additions & 0 deletions .github/scripts/propose_ghstack_orig_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import os
import re

from typing import List

# Provided by the PyGithub pip package.
from github import Auth, Github
from github.Repository import Repository


def parse_args():
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"--repo",
type=str,
help='The github repo to modify: e.g. "pytorch/executorch".',
required=True,
)
parser.add_argument(
"--pr",
type=int,
help="Number of the PR in the stack to check and create corresponding PR",
required=True,
)
return parser.parse_args()


def extract_stack_from_body(pr_body: str) -> List[int]:
"""Extracts a list of PR numbers from a ghexport-generated PR body.
The base of the stack is in index 0.
"""

# Expected format. The `__->__` could appear on any line. Stop parsing
# after the blank line. This would return [1, 2, 3].
"""
Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom):
* #3
* __->__ #2
* #1
<PR description details>
"""

prs = []
ghstack_begin = (
"Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom):"
)
ghstack_begin_seen = False
for line in pr_body.splitlines():
if ghstack_begin in line:
ghstack_begin_seen = True
if not ghstack_begin_seen:
continue
match = re.match(r"\*(?:.*?)? #(\d+)", line)
if match:
# It's a bullet followed by an integer.
prs.append(int(match.group(1)))
return list(reversed(prs))


def get_pr_stack_from_number(pr_number: int, repo: Repository) -> List[int]:
pr_stack = extract_stack_from_body(repo.get_pull(pr_number).body)

if not pr_stack:
raise Exception(
f"Could not find PR stack in body of #{pr_number}. "
+ "Please make sure that the PR was created with ghstack."
)

return pr_stack


def create_prs_for_orig_branch(pr_stack: List[int], repo: Repository):
# For the first PR, we want to merge to `main` branch, and we will update
# as we go through the stack
orig_branch_merge_base = "main"
for i in range(len(pr_stack)):
pr = repo.get_pull(pr_stack[i])
if not pr.is_merged():
print("The PR (and stack above) is not merged yet, skipping")
return
# Check for invariant: For the current PR, it must be gh/user/x/base <- gh/user/x/head
assert pr.base.ref.replace("base", "head") == pr.head.ref
# The PR we want to create is then "branch_to_merge" <- gh/user/x/orig
# gh/user/x/orig is the clean diff between gh/user/x/base <- gh/user/x/head
orig_branch_merge_head = pr.base.ref.replace("base", "orig")
bot_metadata = f"""This PR was created by the merge bot to help merge the original PR into the main branch.
ghstack PR number: https://github.com/pytorch/executorch/pull/{pr.number}
^ Please use this as the source of truth for the PR details, comments, and reviews
ghstack PR base: https://github.com/pytorch/executorch/tree/{pr.base.ref}
ghstack PR head: https://github.com/pytorch/executorch/tree/{pr.head.ref}
Merge bot PR base: https://github.com/pytorch/executorch/tree/{orig_branch_merge_base}
Merge bot PR head: https://github.com/pytorch/executorch/tree/{orig_branch_merge_head}"""

existing_orig_pr = repo.get_pulls(
head="pytorch:" + orig_branch_merge_head,
base=orig_branch_merge_base,
state="open",
)
if existing_orig_pr.totalCount > 0:
print(
f"PR for {orig_branch_merge_head} already exists {existing_orig_pr[0]}"
)
# We don't need to create/edit because the head PR is merged and orig is finalized.
else:
repo.create_pull(
base=orig_branch_merge_base,
head=orig_branch_merge_head,
title=pr.title,
body=bot_metadata,
)
# Advance the base for the next PR
orig_branch_merge_base = orig_branch_merge_head


def main():
args = parse_args()

with Github(auth=Auth.Token(os.environ["GITHUB_TOKEN"])) as gh:
repo = gh.get_repo(args.repo)
create_prs_for_orig_branch(get_pr_stack_from_number(args.pr, repo), repo)


if __name__ == "__main__":
main()
40 changes: 40 additions & 0 deletions .github/workflows/ghstack_land.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Propose to merge ghstack orig PRs to main
on:
pull_request:
types: [closed]
branches:
- 'gh/cccclai/[0-9]+/base'
- 'gh/dbort/[0-9]+/base'
- 'gh/guangy10/[0-9]+/base'
- 'gh/helunwencser/[0-9]+/base'
- 'gh/jorgep31415/[0-9]+/base'
- 'gh/kimishpatel/[0-9]+/base'
- 'gh/kirklandsign/[0-9]+/base'
- 'gh/larryliu0820/[0-9]+/base'
- 'gh/manuelcandales/[0-9]+/base'
- 'gh/mcr229/[0-9]+/base'
- 'gh/swolchok/[0-9]+/base'
- 'gh/SS-JIA/[0-9]+/base'

jobs:
ghstack_merge_to_main:
name: Try to create a PR with ghstack /orig branch
runs-on: ubuntu-22.04
environment: cherry-pick-bot
steps:
- uses: actions/checkout@v3
with:
fetch-depth: '0'
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Try to merge PR to main
run: |
pip install pygithub
PR_NUMBER=$(echo "$GITHUB_REF" | grep -oE '[0-9]+')
python .github/scripts/propose_ghstack_orig_pr.py --pr $PR_NUMBER --repo pytorch/executorch
env:
GITHUB_TOKEN: ${{ secrets.GH_PYTORCHBOT_CHERRY_PICK_TOKEN }}
GITHUB_REF: ${{ github.ref }}
Loading

0 comments on commit e9d9f6c

Please sign in to comment.