Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement LLVM-based Lexer for IR #742

Merged
merged 18 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions compiler_gym/envs/llvm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,17 @@ py_library(
],
)

py_library(
name = "lexed_ir",
srcs = ["lexed_ir.py"],
)

py_library(
name = "llvm_env",
srcs = ["llvm_env.py"],
deps = [
":benchmark_from_command_line",
":lexed_ir",
":llvm_benchmark",
":llvm_rewards",
"//compiler_gym/datasets",
Expand Down
8 changes: 8 additions & 0 deletions compiler_gym/envs/llvm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ cg_py_library(
PUBLIC
)

cg_py_library(
NAME
lexed_ir
SRCS
"lexed_ir.py"
PUBLIC
)

cg_py_library(
NAME
llvm_env
Expand Down
13 changes: 13 additions & 0 deletions compiler_gym/envs/llvm/lexed_ir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Utilities for LexedIRTuple derived observation space."""
import typing


class LexedToken(typing.NamedTuple):
ID: int
kind: str
category: str
value: str
25 changes: 25 additions & 0 deletions compiler_gym/envs/llvm/llvm_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from compiler_gym.datasets import Benchmark, Dataset
from compiler_gym.envs.llvm.benchmark_from_command_line import BenchmarkFromCommandLine
from compiler_gym.envs.llvm.datasets import get_llvm_datasets
from compiler_gym.envs.llvm.lexed_ir import LexedToken
from compiler_gym.envs.llvm.llvm_benchmark import (
ClangInvocation,
get_system_library_flags,
Expand Down Expand Up @@ -309,6 +310,30 @@ def __init__(
for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation)
},
},
{
"id": "LexedIrTuple",
"base_id": "LexedIr",
"space": Sequence(
name="LexedToken",
size_range=(0, None),
dtype=LexedToken,
),
"translate": lambda base_observation: [
LexedToken(tid, kind, cat, val)
for tid, kind, cat, val in zip(
base_observation["token_id"],
base_observation["token_kind"],
base_observation["token_category"],
base_observation["token_value"],
)
],
"default_value": {
"token_id": [],
"token_kind": [],
"token_category": [],
"token_value": [],
},
},
],
)

Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ cc_library(
":Cost",
":ObservationSpaces",
"//compiler_gym/service/proto:compiler_gym_service_cc_grpc",
"//compiler_gym/third_party/LexedIr",
"//compiler_gym/third_party/autophase:InstCount",
"//compiler_gym/third_party/cpuinfo",
"//compiler_gym/util:GrpcStatusMacros",
Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/service/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ cg_cc_library(
::ObservationSpaces
compiler_gym::service::proto::compiler_gym_service_cc_grpc
compiler_gym::third_party::autophase::InstCount
compiler_gym::third_party::LexedIr::LexedIr
compiler_gym::util::GrpcStatusMacros
ABS_DEPS
CpuInfo::cpuinfo
Expand Down
35 changes: 35 additions & 0 deletions compiler_gym/envs/llvm/service/Observation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "compiler_gym/envs/llvm/service/Benchmark.h"
#include "compiler_gym/envs/llvm/service/Cost.h"
#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"
#include "compiler_gym/third_party/LexedIr/lexed_ir.h"
#include "compiler_gym/third_party/autophase/InstCount.h"
#include "compiler_gym/third_party/llvm/InstCount.h"
#include "compiler_gym/util/GrpcStatusMacros.h"
Expand Down Expand Up @@ -228,6 +229,40 @@ Status setObservation(LlvmObservationSpace space, const fs::path& workingDirecto
case LlvmObservationSpace::BUILDTIME: {
return benchmark.computeBuildtime(reply);
}
case LlvmObservationSpace::LEXED_IR: {
// Serialize the LLVM module to an IR string.
std::string ir;
llvm::raw_string_ostream rso(ir);
benchmark.module().print(rso, /*AAW=*/nullptr);
rso.flush();

const auto lexed = LexedIr::LexIR(ir);
const auto token_id = lexed.first.first;
const auto token_kind = lexed.first.second;
const auto token_cat = lexed.second.first;
const auto token_values = lexed.second.second;

Event token_id_ev, token_kind_ev, token_cat_ev, token_values_ev;
token_id_ev.mutable_int64_tensor()->add_shape(token_id.size());
*token_id_ev.mutable_int64_tensor()->mutable_value() = {token_id.begin(), token_id.end()};

token_kind_ev.mutable_string_tensor()->add_shape(token_kind.size());
*token_kind_ev.mutable_string_tensor()->mutable_value() = {token_kind.begin(),
token_kind.end()};

token_cat_ev.mutable_string_tensor()->add_shape(token_cat.size());
*token_cat_ev.mutable_string_tensor()->mutable_value() = {token_cat.begin(), token_cat.end()};

token_values_ev.mutable_string_tensor()->add_shape(token_values.size());
*token_values_ev.mutable_string_tensor()->mutable_value() = {token_values.begin(),
token_values.end()};

(*reply.mutable_event_dict()->mutable_event())["token_id"] = token_id_ev;
(*reply.mutable_event_dict()->mutable_event())["token_kind"] = token_kind_ev;
(*reply.mutable_event_dict()->mutable_event())["token_category"] = token_cat_ev;
(*reply.mutable_event_dict()->mutable_event())["token_value"] = token_values_ev;
break;
}
}

return Status::OK;
Expand Down
6 changes: 6 additions & 0 deletions compiler_gym/envs/llvm/service/ObservationSpaces.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ std::vector<ObservationSpace> getLlvmObservationSpaceList() {
observationSpace.mutable_default_observation()->set_int64_value(0);
break;
}
case LlvmObservationSpace::LEXED_IR: {
space.mutable_string_value()->mutable_length_range()->set_min(0);
observationSpace.set_deterministic(true);
observationSpace.set_platform_dependent(false);
break;
}
}
observationSpaces.push_back(observationSpace);
}
Expand Down
6 changes: 6 additions & 0 deletions compiler_gym/envs/llvm/service/ObservationSpaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ enum class LlvmObservationSpace {
* benchmarks. When not available, a list of zeros are returned.
*/
BUILDTIME,
/** The LLVM-lexer token IDs of the input IR.
*
* Returns a dictionary of aligned lists (token_idx, token_kind,token_category, str_token_value)
* one list element for every tokenized word in the IR.
*/
LEXED_IR,
};

/** Return the list of available observation spaces. */
Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class observation_spaces(Enum):
InstCountNorm = "InstCountNorm"
InstCountNormDict = "InstCountNormDict"
AutophaseDict = "AutophaseDict"
LexedIr = "LexedIr"


class reward_spaces(Enum):
Expand Down
30 changes: 30 additions & 0 deletions compiler_gym/third_party/LexedIr/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# load("@rules_python//python:defs.bzl", "py_library")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")

cc_library(
name = "LexedIr",
srcs = [
"LLLexer.cc",
"llvm_lexer_token_info.cc",
],
hdrs = [
"LLLexer.h",
"LLToken.h",
"escape.h",
"lexed_ir.h",
"llvm_lexer.h",
"llvm_lexer_token_info.h",
],
copts = [
"-DGOOGLE_PROTOBUF_NO_RTTI",
"-fno-rtti",
],
visibility = ["//visibility:public"],
deps = [
"@llvm//10.0.0",
],
)
29 changes: 29 additions & 0 deletions compiler_gym/third_party/LexedIr/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cg_add_all_subdirs()

llvm_map_components_to_libnames(_LLVM_LIBS
core support bitwriter
)
cg_cc_library(
NAME LexedIr
SRCS
LLLexer.cc
llvm_lexer_token_info.cc
HDRS
escape.h
LLLexer.h
LLToken.h
llvm_lexer_token_info.h
llvm_lexer.h
lexed_ir.h
ABS_DEPS
${_LLVM_LIBS}
INCLUDES
${LLVM_INCLUDE_DIRS}
DEFINES
${LLVM_DEFINITIONS}
)
Loading