Skip to content

Commit

Permalink
Merge pull request #432 from facebookresearch/unrolling-example-service
Browse files Browse the repository at this point in the history
Added an example service that focuses on loop unrolling optimization
  • Loading branch information
mostafaelhoushi authored Oct 14, 2021
2 parents 6fe92d6 + aa24d2c commit 00ae8c0
Show file tree
Hide file tree
Showing 20 changed files with 1,086 additions and 49 deletions.
45 changes: 5 additions & 40 deletions compiler_gym/envs/llvm/llvm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,21 @@
import os
import random
import subprocess
import sys
import tempfile
from concurrent.futures import as_completed
from datetime import datetime
from pathlib import Path
from signal import Signals
from typing import Iterable, List, Optional, Union

from compiler_gym.datasets import Benchmark, BenchmarkInitError
from compiler_gym.third_party import llvm
from compiler_gym.util.commands import communicate, run_command
from compiler_gym.util.runfiles_path import transient_cache_path
from compiler_gym.util.thread_pool import get_thread_pool_executor

logger = logging.getLogger(__name__)


def _communicate(process, input=None, timeout=None):
"""subprocess.communicate() which kills subprocess on timeout."""
try:
return process.communicate(input=input, timeout=timeout)
except subprocess.TimeoutExpired:
# kill() was added in Python 3.7.
if sys.version_info >= (3, 7, 0):
process.kill()
else:
process.terminate()
process.communicate(timeout=timeout) # Wait for shutdown to complete.
raise


def get_compiler_includes(compiler: str) -> Iterable[Path]:
"""Run the system compiler in verbose mode on a dummy input to get the
system header search path.
Expand All @@ -58,7 +43,7 @@ def get_compiler_includes(compiler: str) -> Iterable[Path]:
f"Is there a working system compiler?\n"
f"Error: {e}"
) from e
_, stderr = _communicate(process, input="", timeout=30)
_, stderr = communicate(process, input="", timeout=30)
if process.returncode:
raise OSError(
f"Failed to invoke {compiler}. "
Expand Down Expand Up @@ -190,26 +175,6 @@ def from_c_file(
)


def _run_command(cmd: List[str], timeout: int):
process = subprocess.Popen(
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, universal_newlines=True
)
_, stderr = _communicate(process, timeout=timeout)
if process.returncode:
returncode = process.returncode
try:
# Try and decode the name of a signal. Signal returncodes
# are negative.
returncode = f"{returncode} ({Signals(abs(returncode)).name})"
except ValueError:
pass
raise BenchmarkInitError(
f"Compilation job failed with returncode {returncode}\n"
f"Command: {' '.join(cmd)}\n"
f"Stderr: {stderr.strip()}"
)


def make_benchmark(
inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]],
copt: Optional[List[str]] = None,
Expand Down Expand Up @@ -369,10 +334,10 @@ def _add_path(path: Path):

# Fire off the clang and llvm-as jobs.
futures = [
executor.submit(_run_command, job.command(out), job.timeout)
executor.submit(run_command, job.command(out), job.timeout)
for job, out in zip(clang_jobs, clang_outs)
] + [
executor.submit(_run_command, command, timeout)
executor.submit(run_command, command, timeout)
for command in llvm_as_commands
]

Expand Down Expand Up @@ -404,7 +369,7 @@ def _add_path(path: Path):
llvm_link = subprocess.Popen(
llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
bitcode, stderr = _communicate(llvm_link, timeout=timeout)
bitcode, stderr = communicate(llvm_link, timeout=timeout)
if llvm_link.returncode:
raise BenchmarkInitError(
f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}"
Expand Down
5 changes: 3 additions & 2 deletions compiler_gym/service/runtime/BenchmarkCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ const Benchmark* BenchmarkCache::get(const std::string& uri) const {

void BenchmarkCache::add(const Benchmark&& benchmark) {
const size_t benchmarkSize = benchmark.ByteSizeLong();
VLOG(3) << "Caching benchmark " << benchmark.uri() << " (" << benchmarkSize
<< " bytes). Cache size = " << sizeInBytes() << " bytes, " << size() << " items";

// Remove any existing value to keep the cache size consistent.
const auto it = benchmarks_.find(benchmark.uri());
Expand All @@ -52,6 +50,9 @@ void BenchmarkCache::add(const Benchmark&& benchmark) {

benchmarks_.insert({benchmark.uri(), std::move(benchmark)});
sizeInBytes_ += benchmarkSize;

VLOG(3) << "Cached benchmark " << benchmark.uri() << " (" << benchmarkSize
<< " bytes). Cache size = " << sizeInBytes() << " bytes, " << size() << " items";
}

void BenchmarkCache::evictToCapacity(std::optional<size_t> targetSize) {
Expand Down
14 changes: 7 additions & 7 deletions compiler_gym/service/runtime/benchmark_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ def __contains__(self, uri: str):

def __setitem__(self, uri: str, benchmark: Benchmark):
"""Add benchmark to cache."""
logger.debug(
"Caching benchmark %s. Cache size = %d bytes, %d items",
uri,
self.size_in_bytes,
self.size,
)

# Remove any existing value to keep the cache size consistent.
if uri in self._benchmarks:
self._size_in_bytes -= self._benchmarks[uri].ByteSize()
Expand Down Expand Up @@ -80,6 +73,13 @@ def __setitem__(self, uri: str, benchmark: Benchmark):
self._benchmarks[uri] = benchmark
self._size_in_bytes += size

logger.debug(
"Cached benchmark %s. Cache size = %d bytes, %d items",
uri,
self.size_in_bytes,
self.size,
)

def evict_to_capacity(self, target_size_in_bytes: Optional[int] = None) -> None:
"""Evict benchmarks randomly to reduce the capacity below 50%."""
evicted = 0
Expand Down
10 changes: 10 additions & 0 deletions compiler_gym/third_party/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,21 @@ def lli_path() -> Path:
return download_llvm_files() / "bin/lli"


def llc_path() -> Path:
"""Return the path of llc."""
return download_llvm_files() / "bin/llc"


def llvm_as_path() -> Path:
"""Return the path of llvm-as."""
return download_llvm_files() / "bin/llvm-as"


def llvm_dis_path() -> Path:
"""Return the path of llvm-as."""
return download_llvm_files() / "bin/llvm-dis"


def llvm_link_path() -> Path:
"""Return the path of llvm-link."""
return download_llvm_files() / "bin/llvm-link"
Expand Down
5 changes: 5 additions & 0 deletions compiler_gym/third_party/neuro-vectorizer/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
filegroup(
name = "header",
srcs = ["header.h"],
visibility = ["//visibility:public"],
)
29 changes: 29 additions & 0 deletions compiler_gym/third_party/neuro-vectorizer/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
BSD 3-Clause License

Copyright (c) 2019, Ameer Haj Ali (UC Berkeley), and Intel Corporation
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 changes: 51 additions & 0 deletions compiler_gym/third_party/neuro-vectorizer/header.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
Copyright (c) 2019, Ameer Haj Ali (UC Berkeley), and Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdbool.h>
#include <stdio.h>
#include <sys/time.h>

/**
* Warmup and then measure.
*
* Adapted from Neurovectorizer's implementation:
* https://github.com/intel/neuro-vectorizer/blob/d1b068998c08865c59f1586845bb947229f70a51/training_data/header.h
*
* Which was in turn adapted from LLVM:
* https://github.com/llvm/llvm-test-suite/blob/7eca159e29ca4308256ef6e35560a2d884ac6b01/SingleSource/UnitTests/Vectorizer/gcc-loops.cpp#L330-L336
*/
#define BENCH(NAME, RUN_LINE, ITER, DIGEST_LINE) \
{ \
struct timeval Start, End; \
RUN_LINE; \
gettimeofday(&Start, 0); \
for (int i = 0; i < (ITER); ++i) RUN_LINE; \
gettimeofday(&End, 0); \
unsigned r = DIGEST_LINE; \
long mtime, s, us; \
s = End.tv_sec - Start.tv_sec; \
us = End.tv_usec - Start.tv_usec; \
mtime = (s * 1000 + us / 1000.0) + 0.5; \
printf("%ld", mtime); \
}
1 change: 1 addition & 0 deletions compiler_gym/util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ py_library(
srcs = [
"__init__.py",
"capture_output.py",
"commands.py",
"debug_util.py",
"decorators.py",
"download.py",
Expand Down
44 changes: 44 additions & 0 deletions compiler_gym/util/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import subprocess
import sys
from signal import Signals
from typing import List


def run_command(cmd: List[str], timeout: int):
process = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
)
stdout, stderr = communicate(process, timeout=timeout)
if process.returncode:
returncode = process.returncode
try:
# Try and decode the name of a signal. Signal returncodes
# are negative.
returncode = f"{returncode} ({Signals(abs(returncode)).name})"
except ValueError:
pass
raise OSError(
f"Compilation job failed with returncode {returncode}\n"
f"Command: {' '.join(cmd)}\n"
f"Stderr: {stderr.strip()}"
)
return stdout


def communicate(process, input=None, timeout=None):
"""subprocess.communicate() which kills subprocess on timeout."""
try:
return process.communicate(input=input, timeout=timeout)
except subprocess.TimeoutExpired:
# kill() was added in Python 3.7.
if sys.version_info >= (3, 7, 0):
process.kill()
else:
process.terminate()
process.communicate(timeout=timeout) # Wait for shutdown to complete.
raise
38 changes: 38 additions & 0 deletions examples/example_unrolling_service/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
load("@rules_python//python:defs.bzl", "py_library", "py_test")

py_library(
name = "example_unrolling_service",
srcs = ["__init__.py"],
data = [
"//examples/example_unrolling_service/benchmarks",
"//examples/example_unrolling_service/service_py:example-unrolling-service-py",
],
visibility = ["//visibility:public"],
deps = [
"//compiler_gym/envs/llvm",
"//compiler_gym/util",
],
)

py_test(
name = "env_tests",
srcs = ["env_tests.py"],
deps = [
":example_unrolling_service",
"//compiler_gym",
"//tests:test_main",
],
)

py_binary(
name = "example",
srcs = ["example.py"],
deps = [
":example_unrolling_service",
"//compiler_gym",
],
)
30 changes: 30 additions & 0 deletions examples/example_unrolling_service/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Unrolling CompilerGym Service Example

This is an example of how to create your own CompilerGym environment. All paths listed below are relative to the path of this README file.

* Actions: this environment focuses on the unrolling optimization. The actions are the different unrolling factors.
- The actions are listed in `action_spaces` struct in `service_py/example_service.py`
- The actions are implemented in `apply_action(...)` function in `service_py/example_service.py`
* Observations: the observations are: textual form of the LLVM IR, statistical features of different types of IR instructions, runtime execution, or code size
- The observations are listed in `observation_spaces` struct in `service_py/example_service.py`.
- The observations are implemented in `get_observation(...)` function in `service_py/example_service.py`
* Rewards: the rewards could be runtime or code size.
- The rewards are implemented in `__init__.py` and they reuse the runtime and code size observations mentioned above
* Benchmarks: this environment expects your benchmarks to follow the templates from the [Neruovectorizer repo](https://github.com/intel/neuro-vectorizer/tree/master/training_data) repo, that was in turn adapted from the [LLVM loop test suite](https://github.com/llvm/llvm-test-suite/blob/main/SingleSource/UnitTests/Vectorizer/gcc-loops.cpp).
- To implement your benchmark, you need to: include the `header.h` file, implement your benchmark in a custom function, then invoke it using `BENCH` macro inside the `main()` function.
- Following this template is necessary in order for the benchmark to measure the execution runtime and write it to stdout, which is in turn parsed by this environment to measure the runtime reward.
- You can view and add examples of benchmarks in `benchmarks` directory
- Also, when adding your own benchmark, you need to add it to the `UnrollingDataset` class in `__init__.py`

## Usage

Run `example.py` example:
```sh
$ bazel run //examples/example_unrolling_service:example
```

Run `env_tests.py` unit tests:

```sh
$ bazel test //examples/example_unrolling_service:env_tests
```
Loading

0 comments on commit 00ae8c0

Please sign in to comment.