From 739c16f6506a89db2c44111baa5fad0ac1f48f04 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Wed, 22 Mar 2023 00:17:24 +0300 Subject: [PATCH] Infer format specificer enclosing automatically --- Parser/tokenizer.c | 20 +++++++------------- Parser/tokenizer.h | 4 ---- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5b069f7af4ed37..f40d78ea1ba298 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2190,7 +2190,6 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t current_tok->last_expr_buffer = NULL; current_tok->last_expr_size = 0; current_tok->last_expr_end = -1; - current_tok->format_spec = 0; switch (*tok->start) { case 'F': @@ -2326,7 +2325,6 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t if (c == ':' && cursor == mark) { current_tok->kind = TOK_FSTRING_MODE; - current_tok->format_spec = 1; p_start = tok->start; p_end = tok->cur; return MAKE_TOKEN(_PyToken_OneChar(c)); @@ -2400,12 +2398,6 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t if (tok->tok_mode_stack_index > 0) { current_tok->bracket_stack--; if (c == '}' && current_tok->bracket_stack == current_tok->bracket_mark[current_tok->bracket_mark_index]) { - // When the expression is complete, we can exit the format - // spec mode (no matter if we were in it or not). - if (current_tok->bracket_mark_index <= 0) { - current_tok->format_spec = 0; - } - current_tok->bracket_mark_index--; current_tok->kind = TOK_FSTRING_MODE; } @@ -2504,11 +2496,13 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct return MAKE_TOKEN(FSTRING_MIDDLE); } char peek = tok_nextc(tok); - if (peek == '}' && current_tok->bracket_mark_index <= 0 - // We can not have }} inside the format spec, so we are going to assume - // this that the first closing brace belongs to the f-string expression - // and the second one needs to deal with later (e.g. f"{1:<3}}}"). - && !current_tok->format_spec) { + + // The tokenizer can only be in the format spec if we have already completed the expression + // scanning (indicated by the end of the expression being set) and we are not at the top level + // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double + // brackets, we can bypass it here. + int in_format_spec = current_tok->last_expr_end != -1 && current_tok->bracket_mark_index >= 0; + if (peek == '}' && !in_format_spec) { p_start = tok->start; p_end = tok->cur - 1; } else { diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 5e7a065133a0a9..0c542775572bd6 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -43,10 +43,6 @@ enum tokenizer_mode_kind_t { typedef struct _tokenizer_mode { enum tokenizer_mode_kind_t kind; - // TODO: we probably can infer this without storing it - // from the other information available here. - int format_spec; - int bracket_stack; int bracket_mark[MAX_EXPR_NEXTING]; int bracket_mark_index;