Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolved stability issues #121

Merged
merged 2 commits into from
May 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions .github/workflows/rake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,11 @@ jobs:
rake:
name: test on ruby-${{ matrix.ruby }} ${{ matrix.os }}
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
strategy:
fail-fast: false
matrix:
ruby: [ '3.1', '3.0', '2.7' ]
os: [ ubuntu-latest, windows-latest, macos-latest ]
experimental: [ false ]
include:
- ruby: 'head'
os: 'ubuntu-latest'
experimental: true
- ruby: 'head'
os: 'windows-latest'
experimental: true
- ruby: 'head'
os: 'macos-latest'
experimental: true

steps:
- name: Checkout
Expand Down
8 changes: 8 additions & 0 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ Run the test suite
bin/rspec
----

=== Grammar updates
EXPRESS grammar is lined as git submodule to ext/express-grammar
Shoudl you update it, run ```rake generate```. This command will generate source code for updated native extension using antlr4-native gem.
Please note that we create several classes on top of antlr4-native output so using embedded rake task is a real requirement.

When new extension is gnerated and tested plase check in updated C++ files to git (```rake generate``` is NOT a CI step,
extension source files are pulled from the repo).

== Installation

Add this line to your application's `Gemfile`:
Expand Down
100 changes: 88 additions & 12 deletions ext/express-parser/express_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ Class rb_cParser;
Class rb_cParseTree;
Class rb_cTerminalNode;
Class rb_cContextProxy;
Class rb_cParserExt;
Class rb_cTokenExt;

namespace Rice::detail {
template <>
Expand Down Expand Up @@ -258,6 +260,28 @@ namespace Rice::detail {
};
}

class TokenProxy : public Object {
public:
TokenProxy(Token* orig) {
this -> orig = orig;
}

std::string getText() {
return orig->getText();
}

size_t getChannel() {
return orig->getChannel();
}

size_t getTokenIndex() {
return orig->getTokenIndex();
}

private:
Token * orig = nullptr;
};

class ContextProxy {
public:
ContextProxy(tree::ParseTree* orig) {
Expand Down Expand Up @@ -17256,17 +17280,6 @@ class ParserProxy {
return detail::To_Ruby<SyntaxContextProxy>().convert(proxy);
}

Array getTokens() {
Array a;

std::vector<Token*> tokens = this -> tokens -> getTokens();

for (auto &token : tokens) {
a.push(token);
}

return a;
}
Object visit(VisitorProxy* visitor) {
auto result = visitor -> visit(this -> parser -> syntax());

Expand Down Expand Up @@ -18121,6 +18134,59 @@ Object ContextProxy::wrapParseTree(tree::ParseTree* node) {
}
}

class ParserProxyExt : public Object {
public:
ParserProxyExt(Object self, string file) {
ifstream stream;
stream.open(file);
input = new ANTLRInputStream(stream);
lexer = new ExpressLexer(input);
tokens = new CommonTokenStream(lexer);
parser = new ExpressParser(tokens);
stream.close();
};

~ParserProxyExt() {
delete parser;
delete tokens;
delete lexer;
delete input;
}

Object syntax() {
auto ctx = parser -> syntax();

SyntaxContextProxy proxy((ExpressParser::SyntaxContext*) ctx);
return detail::To_Ruby<SyntaxContextProxy>().convert(proxy);
}

Array getTokens() {
Array a;
for (auto token : tokens -> getTokens()) {
a.push(new TokenProxy(token));
}
return a;
}

Object visit(VisitorProxy* visitor) {
auto result = visitor -> visit(parser -> syntax());

lexer -> reset();
parser -> reset();

try {
return std::any_cast<Object>(result);
} catch(std::bad_cast) {
return Qnil;
}
}

private:
ANTLRInputStream* input;
ExpressLexer* lexer;
CommonTokenStream* tokens;
ExpressParser* parser;
};

extern "C"
void Init_express_parser() {
Expand Down Expand Up @@ -18353,9 +18419,19 @@ void Init_express_parser() {
.define_singleton_function("parse", &ParserProxy::parse)
.define_singleton_function("parse_file", &ParserProxy::parseFile)
.define_method("syntax", &ParserProxy::syntax, Return().keepAlive())
.define_method("tokens", &ParserProxy::getTokens)
.define_method("visit", &ParserProxy::visit, Return().keepAlive());

rb_cTokenExt = define_class_under<TokenProxy>(rb_mExpressParser, "TokenExt")
.define_method("text", &TokenProxy::getText)
.define_method("channel", &TokenProxy::getChannel)
.define_method("token_index", &TokenProxy::getTokenIndex);

rb_cParserExt = define_class_under<ParserProxyExt>(rb_mExpressParser, "ParserExt")
.define_constructor(Constructor<ParserProxyExt, Object, string>())
.define_method("syntax", &ParserProxyExt::syntax, Return().keepAlive())
.define_method("tokens", &ParserProxyExt::getTokens)
.define_method("visit", &ParserProxyExt::visit, Return().keepAlive());

rb_cAttributeRefContext = define_class_under<AttributeRefContextProxy, ContextProxy>(rb_mExpressParser, "AttributeRefContext")
.define_method("attribute_id", &AttributeRefContextProxy::attributeId);

Expand Down
2 changes: 1 addition & 1 deletion ext/express-parser/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# workaround for LoadError: 127: The specified procedure could not be found.
$DLDFLAGS << " -static-libgcc -static-libstdc++"
when /darwin/
$CXXFLAGS << " -mmacosx-version-min=10.14 -Wno-register -fno-c++-static-destructors"
$CXXFLAGS << " -mmacosx-version-min=10.14 -fno-c++-static-destructors"
$DLDFLAGS << " -mmacosx-version-min=10.14"
end
else
Expand Down
7 changes: 3 additions & 4 deletions lib/expressir/express/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,12 @@ class Parser
# @param [Boolean] include_source attach original source code to model elements
# @return [Model::Repository]
def self.from_file(file, skip_references: nil, include_source: nil)
@input = File.read(file)

# An important note re memory management
# parse, syntax, visitor methods return complex tree structures created in netive (C++) extension
# parse, syntax, visitor methods return complex tree structures created in native (C++) extension
# visit method references nodes and leaves of this structures but it is totally untransparent for Ruby GarbageCllector
# so in this class we keep those C++ structure marked for GC so theu are not freed
@parser = ::ExpressParser::Parser.parse(@input)
# so in this class we keep those C++ structure marked for GC so they are not freed
@parser = ::ExpressParser::ParserExt.new(file.to_s)
@parse_tree = @parser.syntax()

@visitor = Visitor.new(@parser.tokens, include_source: include_source)
Expand Down
146 changes: 122 additions & 24 deletions rakelib/antlr4-native.rake
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,138 @@ require "fileutils"
require "antlr4-native"
require "rake"

def create_tokens_api(parser_source_lines)
# - add ParserProxy tokens method, simple compensation for missing exposed BufferedTokenStream
i = parser_source_lines.index { |x| x == " Object syntax() {" }
parser_source_lines[i + 6] += <<~CPP.split("\n").map { |x| x == "" ? x : " #{x}" }.join("\n")
def create_class_declarations(parser_source_lines)
i = parser_source_lines.index { |x| x == "Class rb_cContextProxy;" }
parser_source_lines[i] += <<~CPP.split("\n").map { |x| x == "" ? x : x.to_s }.join("\n")

Array getTokens() {
Array a;
Class rb_cParserExt;
Class rb_cTokenExt;

std::vector<Token*> tokens = this -> tokens -> getTokens();
CPP
end

def create_tp_class_definition(parser_source_lines)
i = parser_source_lines.index { |x| x == "class ContextProxy {" }
parser_source_lines[i - 2] += <<~CPP.split("\n").map { |x| x == "" ? x : x.to_s }.join("\n")


class TokenProxy : public Object {
public:
TokenProxy(Token* orig) {
this -> orig = orig;
}

std::string getText() {
return orig->getText();
}

size_t getChannel() {
return orig->getChannel();
}

size_t getTokenIndex() {
return orig->getTokenIndex();
}

private:
Token * orig = nullptr;
};

CPP
end

def create_pp_class_definition(parser_source_lines)
i = parser_source_lines.index { |x| x == "extern \"C\"" }
parser_source_lines[i - 2] += <<~CPP.split("\n").map { |x| x == "" ? x : x.to_s }.join("\n")

class ParserProxyExt : public Object {
public:
ParserProxyExt(Object self, string file) {
ifstream stream;
stream.open(file);
input = new ANTLRInputStream(stream);
lexer = new ExpressLexer(input);
tokens = new CommonTokenStream(lexer);
parser = new ExpressParser(tokens);
stream.close();
};

~ParserProxyExt() {
delete parser;
delete tokens;
delete lexer;
delete input;
}

Object syntax() {
auto ctx = parser -> syntax();

SyntaxContextProxy proxy((ExpressParser::SyntaxContext*) ctx);
return detail::To_Ruby<SyntaxContextProxy>().convert(proxy);
}

for (auto &token : tokens) {
a.push(token);
}
Array getTokens() {
Array a;
for (auto token : tokens -> getTokens()) {
a.push(new TokenProxy(token));
}
return a;
}

Object visit(VisitorProxy* visitor) {
auto result = visitor -> visit(parser -> syntax());

lexer -> reset();
parser -> reset();

try {
return std::any_cast<Object>(result);
} catch(std::bad_cast) {
return Qnil;
}
}

private:
ANTLRInputStream* input;
ExpressLexer* lexer;
CommonTokenStream* tokens;
ExpressParser* parser;
};

return a;
}

CPP
end

def create_tokens_method(parser_source_lines)
i = parser_source_lines.index { |x| x == ' .define_method("syntax", &ParserProxy::syntax, Return().keepAlive())' }
parser_source_lines[i] += <<~CPP.split("\n").map { |x| x == "" ? x : " #{x}" }.join("\n")
def create_class_api(parser_source_lines)
i = parser_source_lines.index { |x| x == " .define_method(\"visit\", &ParserProxy::visit, Return().keepAlive());" }
parser_source_lines[i] += <<~CPP.split("\n").map { |x| x == "" ? x : " #{x}" }.join("\n")


rb_cTokenExt = define_class_under<TokenProxy>(rb_mExpressParser, "TokenExt")
.define_method("text", &TokenProxy::getText)
.define_method("channel", &TokenProxy::getChannel)
.define_method("token_index", &TokenProxy::getTokenIndex);

rb_cParserExt = define_class_under<ParserProxyExt>(rb_mExpressParser, "ParserExt")
.define_constructor(Constructor<ParserProxyExt, Object, string>())
.define_method("syntax", &ParserProxyExt::syntax, Return().keepAlive())
.define_method("tokens", &ParserProxyExt::getTokens)
.define_method("visit", &ParserProxyExt::visit, Return().keepAlive());

.define_method("tokens", &ParserProxy::getTokens)
CPP
end

def generate_extended_parser
# Generate extended parser that provide Ruby access to token stream
parser_source_file = File.join("ext", "express-parser", "express_parser.cpp")
parser_source_lines = File.read(parser_source_file).split("\n")
create_class_declarations(parser_source_lines)
create_tp_class_definition(parser_source_lines)
create_pp_class_definition(parser_source_lines)
create_class_api(parser_source_lines)
File.write(parser_source_file, "#{parser_source_lines.join("\n")}\n")
end

desc "Generate parser (Usage: 'rake generate <grammar_file>')"
task "generate" do
grammar_file = ARGV[1]
Expand All @@ -38,7 +142,6 @@ task "generate" do
end

puts "Generating parser from '#{grammar_file}'"

# ANTLR does weird things if the grammar file isn't in the current working directory
temp_grammar_file = File.join(FileUtils.pwd, File.basename(grammar_file))
FileUtils.cp(grammar_file, temp_grammar_file)
Expand All @@ -51,13 +154,8 @@ task "generate" do
)
generator.generate

# fix issues with generated parser
parser_source_file = File.join("ext", "express-parser", "express_parser.cpp")
parser_source_lines = File.read(parser_source_file).split("\n")
create_tokens_api(parser_source_lines)
create_tokens_method(parser_source_lines)
File.write(parser_source_file, "#{parser_source_lines.join("\n")}\n")

puts "Generating extended parser"
generate_extended_parser
# cleanup
FileUtils.rm(temp_grammar_file)
end
Loading