Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Reader] Support escape codes for any character #125

Merged
merged 1 commit into from
Nov 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 81 additions & 29 deletions common/goos/Reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,45 @@
#include "Reader.h"
#include "third-party/linenoise.h"
#include "common/util/FileUtil.h"
#include "third-party/fmt/core.h"

namespace goos {

namespace {
/*!
* Is this a valid character to start a decimal integer number?
*/
bool decimal_start(char c) {
return (c >= '0' && c <= '9') || c == '-';
}

/*!
* Is this a valid character to start a floating point number?
*/
bool float_start(char c) {
return (c >= '0' && c <= '9') || c == '-' || c == '.';
}

/*!
* Is this a valid character for a hex number?
*/
bool hex_char(char c) {
return !((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F'));
}

/*!
* Does the given string contain c?
*/
bool str_contains(const std::string& str, char c) {
for (auto& x : str) {
if (x == c) {
return true;
}
}
return false;
}
} // namespace

/*!
* Advance a TextStream through any comments or whitespace.
* This will leave the stream at the next non-whitespace character (or at the end)
Expand Down Expand Up @@ -464,6 +500,7 @@ bool Reader::try_token_as_symbol(const Token& tok, Object& obj) {
/*!
* Read a string and escape. Start on the first char after the first double quote.
* Supported escapes are \n, \t, \\ and work like they do in C.
* An arbitrary character can be entered as \c12 where the "12" is hexadecimal.
*/
bool Reader::read_string(TextStream& stream, Object& obj) {
bool got_close_quote = false;
Expand Down Expand Up @@ -493,6 +530,24 @@ bool Reader::read_string(TextStream& stream, Object& obj) {
} else if (stream.peek() == '"') {
stream.read();
str.push_back('"');
} else if (stream.peek() == 'c') {
stream.read();
if (!stream.text_remains(2)) {
throw_reader_error(stream, "incomplete string escape code", -1);
}
auto first = stream.read();
auto second = stream.read();
if (!hex_char(first) || !hex_char(second)) {
throw_reader_error(stream, "invalid character escape hex number", -3);
}
char hex_num[3] = {first, second, '\0'};
std::size_t end = 0;
auto value = std::stoul(hex_num, &end, 16);
if (end != 2) {
throw_reader_error(stream, "invalid character escape", -2);
}
assert(value < 256);
str.push_back(char(value));
} else {
throw_reader_error(stream, "unknown string escape code", -1);
}
Expand All @@ -504,34 +559,6 @@ bool Reader::read_string(TextStream& stream, Object& obj) {
return got_close_quote;
}

namespace {
/*!
* Is this a valid character to start a decimal integer number?
*/
bool decimal_start(char c) {
return (c >= '0' && c <= '9') || c == '-';
}

/*!
* Is this a valid character to start a floating point number?
*/
bool float_start(char c) {
return (c >= '0' && c <= '9') || c == '-' || c == '.';
}

/*!
* Does the given string contain c?
*/
bool str_contains(const std::string& str, char c) {
for (auto& x : str) {
if (x == c) {
return true;
}
}
return false;
}
} // namespace

/*!
* Try decoding as a float. Must have a "." in it.
* Otherwise all combinations of leading zeros, "."'s, negative signs, etc are ok.
Expand Down Expand Up @@ -604,7 +631,7 @@ bool Reader::try_token_as_hex(const Token& tok, Object& obj) {
// it means that the number is too big or too small, and we should error
for (size_t offset = 2; offset < tok.text.size(); offset++) {
char c = tok.text.at(offset);
if ((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) {
if (!hex_char(c)) {
return false;
}
}
Expand Down Expand Up @@ -697,4 +724,29 @@ void Reader::throw_reader_error(TextStream& here, const std::string& err, int se
std::string Reader::get_source_dir() {
return file_util::get_project_path();
}

/*!
* Convert any string into one that can be read.
* Unprintable characters become escape sequences, including tab and newline.
*/
std::string get_readable_string(const char* in) {
std::string result;
while (*in) {
if (file_util::is_printable_char(*in) && *in != '\\' && *in != '"') {
result.push_back(*in);
} else if (*in == '\n') {
result += "\\n";
} else if (*in == '\t') {
result += "\\t";
} else if (*in == '\\') {
result += "\\\\";
} else if (*in == '"') {
result += "\\\"";
} else {
result += fmt::format("\\c{:02x}", uint8_t(*in));
}
in++;
}
return result;
}
} // namespace goos
2 changes: 2 additions & 0 deletions common/goos/Reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class Reader {

std::unordered_map<std::string, std::string> reader_macros;
};

std::string get_readable_string(const char* in);
} // namespace goos

#endif // JAK1_READER_H
1 change: 1 addition & 0 deletions doc/goal_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ There is an escape code `\` for string:
- `\t` tab character
- `\\` the `\` character
- `\"` the `"` character
- `\cXX` where `XX` is a two character hex number: insert this character.
- Any other character following a `\` is an error.

OpenGOAL stores strings in the same segment of the function which uses the string. I believe GOAL does the same.
Expand Down
22 changes: 22 additions & 0 deletions test/test_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,28 @@ TEST(GoosReader, String) {
EXPECT_ANY_THROW(reader.read_from_string("\"\\w\"")); // "\w" invalid escape
}

TEST(GoosReader, StringWithNumberEscapes) {
Reader reader;

// build a weird test string
std::string str;
for (int i = 1; i < 256; i++) {
str.push_back(i);
}

// create a readable string:
std::string readable = "\"";
readable += goos::get_readable_string(str.data());
readable.push_back('"');

EXPECT_TRUE(check_first_string(reader.read_from_string(readable), str));
EXPECT_ANY_THROW(reader.read_from_string("\"\\c\""));
EXPECT_ANY_THROW(reader.read_from_string("\"\\c1\""));
EXPECT_ANY_THROW(reader.read_from_string("\"\\cag\""));
EXPECT_ANY_THROW(reader.read_from_string("\"\\c-1\""));
EXPECT_ANY_THROW(reader.read_from_string("\"\\c-2\""));
}

TEST(GoosReader, Symbol) {
std::vector<std::string> test_symbols = {
"test", "test-two", "__werid-sym__", "-a", "-", "/", "*", "+", "a", "#f"};
Expand Down