-
-
Notifications
You must be signed in to change notification settings - Fork 30.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-99554: Pack location tables more effectively #99556
Changes from 4 commits
2b90c42
6d96e8c
f191f95
24ce84f
be7eb0a
37909a4
d616c2d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Pack debugging location tables more efficiently during bytecode compilation. |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -140,6 +140,18 @@ location_is_after(location loc1, location loc2) { | |
(loc1.col_offset > loc2.end_col_offset)); | ||
} | ||
|
||
static inline bool | ||
same_location(location a, location b) | ||
{ | ||
if (a.lineno < 0 && b.lineno < 0) { | ||
return true; | ||
} | ||
return a.lineno == b.lineno && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to git diff you have a trailing space here. Surprised CI didn't catch it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wow, nice catch! I'll fix it. |
||
a.end_lineno == b.end_lineno && | ||
a.col_offset == b.col_offset && | ||
a.end_col_offset == b.end_col_offset; | ||
} | ||
|
||
#define LOC(x) SRC_LOCATION_FROM_AST(x) | ||
|
||
typedef struct jump_target_label_ { | ||
|
@@ -7759,15 +7771,15 @@ write_location_info_oneline_form(struct assembler* a, int length, int line_delta | |
} | ||
|
||
static void | ||
write_location_info_long_form(struct assembler* a, struct instr* i, int length) | ||
write_location_info_long_form(struct assembler* a, location loc, int length) | ||
{ | ||
assert(length > 0 && length <= 8); | ||
write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length); | ||
write_location_signed_varint(a, i->i_loc.lineno - a->a_lineno); | ||
assert(i->i_loc.end_lineno >= i->i_loc.lineno); | ||
write_location_varint(a, i->i_loc.end_lineno - i->i_loc.lineno); | ||
write_location_varint(a, i->i_loc.col_offset + 1); | ||
write_location_varint(a, i->i_loc.end_col_offset + 1); | ||
write_location_signed_varint(a, loc.lineno - a->a_lineno); | ||
assert(loc.end_lineno >= loc.lineno); | ||
write_location_varint(a, loc.end_lineno - loc.lineno); | ||
write_location_varint(a, loc.col_offset + 1); | ||
write_location_varint(a, loc.end_col_offset + 1); | ||
} | ||
|
||
static void | ||
|
@@ -7786,7 +7798,7 @@ write_location_info_no_column(struct assembler* a, int length, int line_delta) | |
#define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */ | ||
|
||
static int | ||
write_location_info_entry(struct assembler* a, struct instr* i, int isize) | ||
write_location_info_entry(struct assembler* a, location loc, int isize) | ||
{ | ||
Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable); | ||
if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) { | ||
|
@@ -7795,49 +7807,51 @@ write_location_info_entry(struct assembler* a, struct instr* i, int isize) | |
return 0; | ||
} | ||
} | ||
if (i->i_loc.lineno < 0) { | ||
if (loc.lineno < 0) { | ||
write_location_info_none(a, isize); | ||
return 1; | ||
} | ||
int line_delta = i->i_loc.lineno - a->a_lineno; | ||
int column = i->i_loc.col_offset; | ||
int end_column = i->i_loc.end_col_offset; | ||
int line_delta = loc.lineno - a->a_lineno; | ||
int column = loc.col_offset; | ||
int end_column = loc.end_col_offset; | ||
assert(column >= -1); | ||
assert(end_column >= -1); | ||
if (column < 0 || end_column < 0) { | ||
if (i->i_loc.end_lineno == i->i_loc.lineno || i->i_loc.end_lineno == -1) { | ||
if (loc.end_lineno == loc.lineno || loc.end_lineno == -1) { | ||
write_location_info_no_column(a, isize, line_delta); | ||
a->a_lineno = i->i_loc.lineno; | ||
a->a_lineno = loc.lineno; | ||
return 1; | ||
} | ||
} | ||
else if (i->i_loc.end_lineno == i->i_loc.lineno) { | ||
else if (loc.end_lineno == loc.lineno) { | ||
if (line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column) { | ||
write_location_info_short_form(a, isize, column, end_column); | ||
return 1; | ||
} | ||
if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) { | ||
write_location_info_oneline_form(a, isize, line_delta, column, end_column); | ||
a->a_lineno = i->i_loc.lineno; | ||
a->a_lineno = loc.lineno; | ||
return 1; | ||
} | ||
} | ||
write_location_info_long_form(a, i, isize); | ||
a->a_lineno = i->i_loc.lineno; | ||
write_location_info_long_form(a, loc, isize); | ||
a->a_lineno = loc.lineno; | ||
return 1; | ||
} | ||
|
||
static int | ||
assemble_emit_location(struct assembler* a, struct instr* i) | ||
assemble_emit_location(struct assembler* a, location loc, int isize) | ||
{ | ||
int isize = instr_size(i); | ||
if (isize == 0) { | ||
return 1; | ||
} | ||
while (isize > 8) { | ||
if (!write_location_info_entry(a, i, 8)) { | ||
if (!write_location_info_entry(a, loc, 8)) { | ||
return 0; | ||
} | ||
isize -= 8; | ||
} | ||
return write_location_info_entry(a, i, isize); | ||
return write_location_info_entry(a, loc, isize); | ||
} | ||
|
||
/* assemble_emit() | ||
|
@@ -8860,10 +8874,22 @@ assemble(struct compiler *c, int addNone) | |
|
||
/* Emit location info */ | ||
a.a_lineno = c->u->u_firstlineno; | ||
location loc = NO_LOCATION; | ||
int size = 0; | ||
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { | ||
for (int j = 0; j < b->b_iused; j++) | ||
if (!assemble_emit_location(&a, &b->b_instr[j])) | ||
goto error; | ||
for (int j = 0; j < b->b_iused; j++) { | ||
if (!same_location(loc, b->b_instr[j].i_loc)) { | ||
if (!assemble_emit_location(&a, loc, size)) { | ||
goto error; | ||
} | ||
loc = b->b_instr[j].i_loc; | ||
size = 0; | ||
} | ||
size += instr_size(&b->b_instr[j]); | ||
} | ||
} | ||
if (!assemble_emit_location(&a, loc, size)) { | ||
goto error; | ||
} | ||
|
||
if (!assemble_exception_table(&a, g->g_entryblock)) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’d check also that they are the same value in case we will want to distinguish between ‘no location’ and ‘unknown location’ or something like that.
(I’m assuming you don’t rely on the full equality check because we’re inconsistent about what the other 3 values are).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should an "unknown location" ever exist? That sounds like a compiler bug.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm thinking of cases where we just want to take the location of the previous instruction. Currently we just say "NO_LOCATION", but would we ever want to distinguish between this case and the case where there truly is no location (because the instructions are fake)? I don't know, but we might not want to rule it out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, it appears that we can remove this special case entirely. Just using the full equality check results in the exact same PYC sizes for the entire stdlib.