From e4667229a0145008c6afde6c4e34d6e7bdb5080f Mon Sep 17 00:00:00 2001 From: Raiki Tamura Date: Sun, 18 Jun 2023 23:19:11 +0900 Subject: [PATCH] modify InputSource [skip ci] --- gcc/rust/lex/rust-lex.h | 67 +++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 837e95b5282..a6aa2cc018a 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -234,11 +234,47 @@ class Lexer // Input source wrapper thing. class InputSource { + private: + unsigned int pos; + std::vector chars; + + // Overload operator () to return next char from input stream. + virtual int next_byte () = 0; + + int next () + { + if (pos >= chars.size ()) + return EOF; + else + { + int c = chars[pos]; + pos++; + return c; + } + } + public: + InputSource () : pos (0), chars ({}) {} + virtual ~InputSource () {} - // Overload operator () to return next char from input stream. - virtual int next () = 0; + // Check if the input source is valid as utf-8 and buffer all characters to + // `chars`. + void init () + { + // TODO remove + std::cout << "Checking if input is valid as utf-8." << std::endl; + + // TODO skip UTF BOM + + int c = next_byte (); + while (c != EOF) + { + // TODO validate utf-8 encoding and push one codepoint to `chars` + chars.push_back (c); + c = next_byte (); + } + } }; class FileInputSource : public InputSource @@ -247,11 +283,15 @@ class Lexer // Input source file. FILE *input; + int next_byte () override { return fgetc (input); } + public: // Create new input source from file. - FileInputSource (FILE *input) : input (input) {} - - int next () override { return fgetc (input); } + FileInputSource (FILE *input) : InputSource (), input (input) + { + // TODO make this better + init (); + } }; class BufferInputSource : public InputSource @@ -260,19 +300,22 @@ class Lexer const std::string &buffer; size_t offs; - public: - // Create new input source from file. - BufferInputSource (const std::string &b, size_t offset) - : buffer (b), offs (offset) - {} - - int next () override + int next_byte () override { if (offs >= buffer.size ()) return EOF; return buffer.at (offs++); } + + public: + // Create new input source from file. + BufferInputSource (const std::string &b, size_t offset) + : InputSource (), buffer (b), offs (offset) + { + // TODO make this better + init (); + } }; // The input source for the lexer.