Skip to content

Commit

Permalink
gh-104825: Remove implicit newline in the line attribute in tokens em…
Browse files Browse the repository at this point in the history
…itted in the tokenize module (#104846)
  • Loading branch information
pablogsal authored May 24, 2023
1 parent c45701e commit c8cf9b4
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 8 deletions.
4 changes: 2 additions & 2 deletions Lib/idlelib/idle_test/test_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ def test_searcher(self):
test_info = (# text, (block, indent))
("", (None, None)),
("[1,", (None, None)), # TokenError
("if 1:\n", ('if 1:\n', None)),
("if 1:\n 2\n 3\n", ('if 1:\n', ' 2\n')),
("if 1:\n", ('if 1:', None)),
("if 1:\n 2\n 3\n", ('if 1:', ' 2')),
)
for code, expected_pair in test_info:
with self.subTest(code=code):
Expand Down
8 changes: 4 additions & 4 deletions Lib/test/test_tabnanny.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def test_when_nannynag_error_verbose(self):
"""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path:
out = f"{file_path!r}: *** Line 3: trouble in tab city! ***\n"
out += "offending line: '\\tprint(\"world\")\\n'\n"
out += "offending line: '\\tprint(\"world\")'\n"
out += "inconsistent use of tabs and spaces in indentation\n"

tabnanny.verbose = 1
Expand All @@ -231,7 +231,7 @@ def test_when_nannynag_error_verbose(self):
def test_when_nannynag_error(self):
"""A python source code file eligible for raising `tabnanny.NannyNag`."""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as file_path:
out = f"{file_path} 3 '\\tprint(\"world\")\\n'\n"
out = f"{file_path} 3 '\\tprint(\"world\")'\n"
self.verify_tabnanny_check(file_path, out=out)

def test_when_no_file(self):
Expand Down Expand Up @@ -341,14 +341,14 @@ def test_verbose_mode(self):
"""Should display more error information if verbose mode is on."""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path:
stdout = textwrap.dedent(
"offending line: '\\tprint(\"world\")\\n'"
"offending line: '\\tprint(\"world\")'"
).strip()
self.validate_cmd("-v", path, stdout=stdout, partial=True)

def test_double_verbose_mode(self):
"""Should display detailed error information if double verbose is on."""
with TemporaryPyFile(SOURCE_CODES["nannynag_errored"]) as path:
stdout = textwrap.dedent(
"offending line: '\\tprint(\"world\")\\n'"
"offending line: '\\tprint(\"world\")'"
).strip()
self.validate_cmd("-vv", path, stdout=stdout, partial=True)
4 changes: 2 additions & 2 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def k(x):
e.exception.msg,
'unindent does not match any outer indentation level')
self.assertEqual(e.exception.offset, 9)
self.assertEqual(e.exception.text, ' x += 5\n')
self.assertEqual(e.exception.text, ' x += 5')

def test_int(self):
# Ordinary integers and binary operators
Expand Down Expand Up @@ -1157,7 +1157,7 @@ def readline():

# skip the initial encoding token and the end tokens
tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
self.assertEqual(tokens, expected_tokens,
"bytes not decoded with encoding")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Tokens emitted by the :mod:`tokenize` module do not include an implicit
``\n`` character in the ``line`` attribute anymore. Patch by Pablo Galindo
4 changes: 4 additions & 0 deletions Python/Python-tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ _tokenizer_error(struct tok_state *tok)
int result = 0;

Py_ssize_t size = tok->inp - tok->buf;
assert(tok->buf[size-1] == '\n');
size -= 1; // Remove the newline character from the end of the line
error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace");
if (!error_line) {
result = -1;
Expand Down Expand Up @@ -193,6 +195,8 @@ tokenizeriter_next(tokenizeriterobject *it)
}

Py_ssize_t size = it->tok->inp - it->tok->buf;
assert(it->tok->buf[size-1] == '\n');
size -= 1; // Remove the newline character from the end of the line
PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
if (line == NULL) {
Py_DECREF(str);
Expand Down

0 comments on commit c8cf9b4

Please sign in to comment.