From f630a1e26c301122f904004037a9b8a275fbfb49 Mon Sep 17 00:00:00 2001 From: water Date: Tue, 17 Nov 2020 15:14:23 -0500 Subject: [PATCH] support escaping any character --- common/goos/Reader.cpp | 110 ++++++++++++++++++++++++++++++----------- common/goos/Reader.h | 2 + doc/goal_doc.md | 1 + test/test_reader.cpp | 22 +++++++++ 4 files changed, 106 insertions(+), 29 deletions(-) diff --git a/common/goos/Reader.cpp b/common/goos/Reader.cpp index a830564831..ec348f0957 100644 --- a/common/goos/Reader.cpp +++ b/common/goos/Reader.cpp @@ -12,9 +12,45 @@ #include "Reader.h" #include "third-party/linenoise.h" #include "common/util/FileUtil.h" +#include "third-party/fmt/core.h" namespace goos { +namespace { +/*! + * Is this a valid character to start a decimal integer number? + */ +bool decimal_start(char c) { + return (c >= '0' && c <= '9') || c == '-'; +} + +/*! + * Is this a valid character to start a floating point number? + */ +bool float_start(char c) { + return (c >= '0' && c <= '9') || c == '-' || c == '.'; +} + +/*! + * Is this a valid character for a hex number? + */ +bool hex_char(char c) { + return !((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')); +} + +/*! + * Does the given string contain c? + */ +bool str_contains(const std::string& str, char c) { + for (auto& x : str) { + if (x == c) { + return true; + } + } + return false; +} +} // namespace + /*! * Advance a TextStream through any comments or whitespace. * This will leave the stream at the next non-whitespace character (or at the end) @@ -464,6 +500,7 @@ bool Reader::try_token_as_symbol(const Token& tok, Object& obj) { /*! * Read a string and escape. Start on the first char after the first double quote. * Supported escapes are \n, \t, \\ and work like they do in C. + * An arbitrary character can be entered as \c12 where the "12" is hexadecimal. */ bool Reader::read_string(TextStream& stream, Object& obj) { bool got_close_quote = false; @@ -493,6 +530,24 @@ bool Reader::read_string(TextStream& stream, Object& obj) { } else if (stream.peek() == '"') { stream.read(); str.push_back('"'); + } else if (stream.peek() == 'c') { + stream.read(); + if (!stream.text_remains(2)) { + throw_reader_error(stream, "incomplete string escape code", -1); + } + auto first = stream.read(); + auto second = stream.read(); + if (!hex_char(first) || !hex_char(second)) { + throw_reader_error(stream, "invalid character escape hex number", -3); + } + char hex_num[3] = {first, second, '\0'}; + std::size_t end = 0; + auto value = std::stoul(hex_num, &end, 16); + if (end != 2) { + throw_reader_error(stream, "invalid character escape", -2); + } + assert(value < 256); + str.push_back(char(value)); } else { throw_reader_error(stream, "unknown string escape code", -1); } @@ -504,34 +559,6 @@ bool Reader::read_string(TextStream& stream, Object& obj) { return got_close_quote; } -namespace { -/*! - * Is this a valid character to start a decimal integer number? - */ -bool decimal_start(char c) { - return (c >= '0' && c <= '9') || c == '-'; -} - -/*! - * Is this a valid character to start a floating point number? - */ -bool float_start(char c) { - return (c >= '0' && c <= '9') || c == '-' || c == '.'; -} - -/*! - * Does the given string contain c? - */ -bool str_contains(const std::string& str, char c) { - for (auto& x : str) { - if (x == c) { - return true; - } - } - return false; -} -} // namespace - /*! * Try decoding as a float. Must have a "." in it. * Otherwise all combinations of leading zeros, "."'s, negative signs, etc are ok. @@ -604,7 +631,7 @@ bool Reader::try_token_as_hex(const Token& tok, Object& obj) { // it means that the number is too big or too small, and we should error for (size_t offset = 2; offset < tok.text.size(); offset++) { char c = tok.text.at(offset); - if ((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) { + if (!hex_char(c)) { return false; } } @@ -697,4 +724,29 @@ void Reader::throw_reader_error(TextStream& here, const std::string& err, int se std::string Reader::get_source_dir() { return file_util::get_project_path(); } + +/*! + * Convert any string into one that can be read. + * Unprintable characters become escape sequences, including tab and newline. + */ +std::string get_readable_string(const char* in) { + std::string result; + while (*in) { + if (file_util::is_printable_char(*in) && *in != '\\' && *in != '"') { + result.push_back(*in); + } else if (*in == '\n') { + result += "\\n"; + } else if (*in == '\t') { + result += "\\t"; + } else if (*in == '\\') { + result += "\\\\"; + } else if (*in == '"') { + result += "\\\""; + } else { + result += fmt::format("\\c{:02x}", uint8_t(*in)); + } + in++; + } + return result; +} } // namespace goos diff --git a/common/goos/Reader.h b/common/goos/Reader.h index f5aafec45e..4ae716859a 100644 --- a/common/goos/Reader.h +++ b/common/goos/Reader.h @@ -101,6 +101,8 @@ class Reader { std::unordered_map reader_macros; }; + +std::string get_readable_string(const char* in); } // namespace goos #endif // JAK1_READER_H diff --git a/doc/goal_doc.md b/doc/goal_doc.md index 8518c55a18..1535156eec 100644 --- a/doc/goal_doc.md +++ b/doc/goal_doc.md @@ -776,6 +776,7 @@ There is an escape code `\` for string: - `\t` tab character - `\\` the `\` character - `\"` the `"` character +- `\cXX` where `XX` is a two character hex number: insert this character. - Any other character following a `\` is an error. OpenGOAL stores strings in the same segment of the function which uses the string. I believe GOAL does the same. diff --git a/test/test_reader.cpp b/test/test_reader.cpp index 5b5e6b885e..06fcf9a82e 100644 --- a/test/test_reader.cpp +++ b/test/test_reader.cpp @@ -171,6 +171,28 @@ TEST(GoosReader, String) { EXPECT_ANY_THROW(reader.read_from_string("\"\\w\"")); // "\w" invalid escape } +TEST(GoosReader, StringWithNumberEscapes) { + Reader reader; + + // build a weird test string + std::string str; + for (int i = 1; i < 256; i++) { + str.push_back(i); + } + + // create a readable string: + std::string readable = "\""; + readable += goos::get_readable_string(str.data()); + readable.push_back('"'); + + EXPECT_TRUE(check_first_string(reader.read_from_string(readable), str)); + EXPECT_ANY_THROW(reader.read_from_string("\"\\c\"")); + EXPECT_ANY_THROW(reader.read_from_string("\"\\c1\"")); + EXPECT_ANY_THROW(reader.read_from_string("\"\\cag\"")); + EXPECT_ANY_THROW(reader.read_from_string("\"\\c-1\"")); + EXPECT_ANY_THROW(reader.read_from_string("\"\\c-2\"")); +} + TEST(GoosReader, Symbol) { std::vector test_symbols = { "test", "test-two", "__werid-sym__", "-a", "-", "/", "*", "+", "a", "#f"};