Skip to content

Commit

Permalink
Merge pull request #742 from fivosts/lexed_IR
Browse files Browse the repository at this point in the history
Implement LLVM-based Lexer for IR
  • Loading branch information
ChrisCummins authored Aug 8, 2022
2 parents b58e83b + 48c0ec5 commit 8d25c10
Show file tree
Hide file tree
Showing 22 changed files with 2,983 additions and 1 deletion.
6 changes: 6 additions & 0 deletions compiler_gym/envs/llvm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,17 @@ py_library(
],
)

py_library(
name = "lexed_ir",
srcs = ["lexed_ir.py"],
)

py_library(
name = "llvm_env",
srcs = ["llvm_env.py"],
deps = [
":benchmark_from_command_line",
":lexed_ir",
":llvm_benchmark",
":llvm_rewards",
"//compiler_gym/datasets",
Expand Down
8 changes: 8 additions & 0 deletions compiler_gym/envs/llvm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ cg_py_library(
PUBLIC
)

cg_py_library(
NAME
lexed_ir
SRCS
"lexed_ir.py"
PUBLIC
)

cg_py_library(
NAME
llvm_env
Expand Down
13 changes: 13 additions & 0 deletions compiler_gym/envs/llvm/lexed_ir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Utilities for LexedIRTuple derived observation space."""
import typing


class LexedToken(typing.NamedTuple):
ID: int
kind: str
category: str
value: str
25 changes: 25 additions & 0 deletions compiler_gym/envs/llvm/llvm_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from compiler_gym.datasets import Benchmark, Dataset
from compiler_gym.envs.llvm.benchmark_from_command_line import BenchmarkFromCommandLine
from compiler_gym.envs.llvm.datasets import get_llvm_datasets
from compiler_gym.envs.llvm.lexed_ir import LexedToken
from compiler_gym.envs.llvm.llvm_benchmark import (
ClangInvocation,
get_system_library_flags,
Expand Down Expand Up @@ -309,6 +310,30 @@ def __init__(
for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation)
},
},
{
"id": "LexedIrTuple",
"base_id": "LexedIr",
"space": Sequence(
name="LexedToken",
size_range=(0, None),
dtype=LexedToken,
),
"translate": lambda base_observation: [
LexedToken(tid, kind, cat, val)
for tid, kind, cat, val in zip(
base_observation["token_id"],
base_observation["token_kind"],
base_observation["token_category"],
base_observation["token_value"],
)
],
"default_value": {
"token_id": [],
"token_kind": [],
"token_category": [],
"token_value": [],
},
},
],
)

Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ cc_library(
":Cost",
":ObservationSpaces",
"//compiler_gym/service/proto:compiler_gym_service_cc_grpc",
"//compiler_gym/third_party/LexedIr",
"//compiler_gym/third_party/autophase:InstCount",
"//compiler_gym/third_party/cpuinfo",
"//compiler_gym/util:GrpcStatusMacros",
Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/service/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ cg_cc_library(
::ObservationSpaces
compiler_gym::service::proto::compiler_gym_service_cc_grpc
compiler_gym::third_party::autophase::InstCount
compiler_gym::third_party::LexedIr::LexedIr
compiler_gym::util::GrpcStatusMacros
ABS_DEPS
CpuInfo::cpuinfo
Expand Down
35 changes: 35 additions & 0 deletions compiler_gym/envs/llvm/service/Observation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "compiler_gym/envs/llvm/service/Benchmark.h"
#include "compiler_gym/envs/llvm/service/Cost.h"
#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"
#include "compiler_gym/third_party/LexedIr/lexed_ir.h"
#include "compiler_gym/third_party/autophase/InstCount.h"
#include "compiler_gym/third_party/llvm/InstCount.h"
#include "compiler_gym/util/GrpcStatusMacros.h"
Expand Down Expand Up @@ -228,6 +229,40 @@ Status setObservation(LlvmObservationSpace space, const fs::path& workingDirecto
case LlvmObservationSpace::BUILDTIME: {
return benchmark.computeBuildtime(reply);
}
case LlvmObservationSpace::LEXED_IR: {
// Serialize the LLVM module to an IR string.
std::string ir;
llvm::raw_string_ostream rso(ir);
benchmark.module().print(rso, /*AAW=*/nullptr);
rso.flush();

const auto lexed = LexedIr::LexIR(ir);
const auto token_id = lexed.first.first;
const auto token_kind = lexed.first.second;
const auto token_cat = lexed.second.first;
const auto token_values = lexed.second.second;

Event token_id_ev, token_kind_ev, token_cat_ev, token_values_ev;
token_id_ev.mutable_int64_tensor()->add_shape(token_id.size());
*token_id_ev.mutable_int64_tensor()->mutable_value() = {token_id.begin(), token_id.end()};

token_kind_ev.mutable_string_tensor()->add_shape(token_kind.size());
*token_kind_ev.mutable_string_tensor()->mutable_value() = {token_kind.begin(),
token_kind.end()};

token_cat_ev.mutable_string_tensor()->add_shape(token_cat.size());
*token_cat_ev.mutable_string_tensor()->mutable_value() = {token_cat.begin(), token_cat.end()};

token_values_ev.mutable_string_tensor()->add_shape(token_values.size());
*token_values_ev.mutable_string_tensor()->mutable_value() = {token_values.begin(),
token_values.end()};

(*reply.mutable_event_dict()->mutable_event())["token_id"] = token_id_ev;
(*reply.mutable_event_dict()->mutable_event())["token_kind"] = token_kind_ev;
(*reply.mutable_event_dict()->mutable_event())["token_category"] = token_cat_ev;
(*reply.mutable_event_dict()->mutable_event())["token_value"] = token_values_ev;
break;
}
}

return Status::OK;
Expand Down
6 changes: 6 additions & 0 deletions compiler_gym/envs/llvm/service/ObservationSpaces.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ std::vector<ObservationSpace> getLlvmObservationSpaceList() {
observationSpace.mutable_default_observation()->set_int64_value(0);
break;
}
case LlvmObservationSpace::LEXED_IR: {
space.mutable_string_value()->mutable_length_range()->set_min(0);
observationSpace.set_deterministic(true);
observationSpace.set_platform_dependent(false);
break;
}
}
observationSpaces.push_back(observationSpace);
}
Expand Down
6 changes: 6 additions & 0 deletions compiler_gym/envs/llvm/service/ObservationSpaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ enum class LlvmObservationSpace {
* benchmarks. When not available, a list of zeros are returned.
*/
BUILDTIME,
/** The LLVM-lexer token IDs of the input IR.
*
* Returns a dictionary of aligned lists (token_idx, token_kind,token_category, str_token_value)
* one list element for every tokenized word in the IR.
*/
LEXED_IR,
};

/** Return the list of available observation spaces. */
Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class observation_spaces(Enum):
InstCountNorm = "InstCountNorm"
InstCountNormDict = "InstCountNormDict"
AutophaseDict = "AutophaseDict"
LexedIr = "LexedIr"


class reward_spaces(Enum):
Expand Down
30 changes: 30 additions & 0 deletions compiler_gym/third_party/LexedIr/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# load("@rules_python//python:defs.bzl", "py_library")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")

cc_library(
name = "LexedIr",
srcs = [
"LLLexer.cc",
"llvm_lexer_token_info.cc",
],
hdrs = [
"LLLexer.h",
"LLToken.h",
"escape.h",
"lexed_ir.h",
"llvm_lexer.h",
"llvm_lexer_token_info.h",
],
copts = [
"-DGOOGLE_PROTOBUF_NO_RTTI",
"-fno-rtti",
],
visibility = ["//visibility:public"],
deps = [
"@llvm//10.0.0",
],
)
29 changes: 29 additions & 0 deletions compiler_gym/third_party/LexedIr/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cg_add_all_subdirs()

llvm_map_components_to_libnames(_LLVM_LIBS
core support bitwriter
)
cg_cc_library(
NAME LexedIr
SRCS
LLLexer.cc
llvm_lexer_token_info.cc
HDRS
escape.h
LLLexer.h
LLToken.h
llvm_lexer_token_info.h
llvm_lexer.h
lexed_ir.h
ABS_DEPS
${_LLVM_LIBS}
INCLUDES
${LLVM_INCLUDE_DIRS}
DEFINES
${LLVM_DEFINITIONS}
)
Loading

0 comments on commit 8d25c10

Please sign in to comment.