Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-99554: Pack location tables more effectively #99556

Merged
merged 7 commits into from
Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pack debugging location tables more efficiently during bytecode compilation.
19 changes: 8 additions & 11 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

74 changes: 50 additions & 24 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ location_is_after(location loc1, location loc2) {
(loc1.col_offset > loc2.end_col_offset));
}

static inline bool
same_location(location a, location b)
{
if (a.lineno < 0 && b.lineno < 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’d check also that they are the same value in case we will want to distinguish between ‘no location’ and ‘unknown location’ or something like that.

(I’m assuming you don’t rely on the full equality check because we’re inconsistent about what the other 3 values are).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should an "unknown location" ever exist? That sounds like a compiler bug.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking of cases where we just want to take the location of the previous instruction. Currently we just say "NO_LOCATION", but would we ever want to distinguish between this case and the case where there truly is no location (because the instructions are fake)? I don't know, but we might not want to rule it out.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I’m assuming you don’t rely on the full equality check because we’re inconsistent about what the other 3 values are).

Actually, it appears that we can remove this special case entirely. Just using the full equality check results in the exact same PYC sizes for the entire stdlib.

return true;
}
return a.lineno == b.lineno &&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to git diff you have a trailing space here. Surprised CI didn't catch it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, nice catch! I'll fix it.

a.end_lineno == b.end_lineno &&
a.col_offset == b.col_offset &&
a.end_col_offset == b.end_col_offset;
}

#define LOC(x) SRC_LOCATION_FROM_AST(x)

typedef struct jump_target_label_ {
Expand Down Expand Up @@ -7759,15 +7771,15 @@ write_location_info_oneline_form(struct assembler* a, int length, int line_delta
}

static void
write_location_info_long_form(struct assembler* a, struct instr* i, int length)
write_location_info_long_form(struct assembler* a, location loc, int length)
{
assert(length > 0 && length <= 8);
write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length);
write_location_signed_varint(a, i->i_loc.lineno - a->a_lineno);
assert(i->i_loc.end_lineno >= i->i_loc.lineno);
write_location_varint(a, i->i_loc.end_lineno - i->i_loc.lineno);
write_location_varint(a, i->i_loc.col_offset + 1);
write_location_varint(a, i->i_loc.end_col_offset + 1);
write_location_signed_varint(a, loc.lineno - a->a_lineno);
assert(loc.end_lineno >= loc.lineno);
write_location_varint(a, loc.end_lineno - loc.lineno);
write_location_varint(a, loc.col_offset + 1);
write_location_varint(a, loc.end_col_offset + 1);
}

static void
Expand All @@ -7786,7 +7798,7 @@ write_location_info_no_column(struct assembler* a, int length, int line_delta)
#define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */

static int
write_location_info_entry(struct assembler* a, struct instr* i, int isize)
write_location_info_entry(struct assembler* a, location loc, int isize)
{
Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable);
if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) {
Expand All @@ -7795,49 +7807,51 @@ write_location_info_entry(struct assembler* a, struct instr* i, int isize)
return 0;
}
}
if (i->i_loc.lineno < 0) {
if (loc.lineno < 0) {
write_location_info_none(a, isize);
return 1;
}
int line_delta = i->i_loc.lineno - a->a_lineno;
int column = i->i_loc.col_offset;
int end_column = i->i_loc.end_col_offset;
int line_delta = loc.lineno - a->a_lineno;
int column = loc.col_offset;
int end_column = loc.end_col_offset;
assert(column >= -1);
assert(end_column >= -1);
if (column < 0 || end_column < 0) {
if (i->i_loc.end_lineno == i->i_loc.lineno || i->i_loc.end_lineno == -1) {
if (loc.end_lineno == loc.lineno || loc.end_lineno == -1) {
write_location_info_no_column(a, isize, line_delta);
a->a_lineno = i->i_loc.lineno;
a->a_lineno = loc.lineno;
return 1;
}
}
else if (i->i_loc.end_lineno == i->i_loc.lineno) {
else if (loc.end_lineno == loc.lineno) {
if (line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column) {
write_location_info_short_form(a, isize, column, end_column);
return 1;
}
if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) {
write_location_info_oneline_form(a, isize, line_delta, column, end_column);
a->a_lineno = i->i_loc.lineno;
a->a_lineno = loc.lineno;
return 1;
}
}
write_location_info_long_form(a, i, isize);
a->a_lineno = i->i_loc.lineno;
write_location_info_long_form(a, loc, isize);
a->a_lineno = loc.lineno;
return 1;
}

static int
assemble_emit_location(struct assembler* a, struct instr* i)
assemble_emit_location(struct assembler* a, location loc, int isize)
{
int isize = instr_size(i);
if (isize == 0) {
return 1;
}
while (isize > 8) {
if (!write_location_info_entry(a, i, 8)) {
if (!write_location_info_entry(a, loc, 8)) {
return 0;
}
isize -= 8;
}
return write_location_info_entry(a, i, isize);
return write_location_info_entry(a, loc, isize);
}

/* assemble_emit()
Expand Down Expand Up @@ -8860,10 +8874,22 @@ assemble(struct compiler *c, int addNone)

/* Emit location info */
a.a_lineno = c->u->u_firstlineno;
location loc = NO_LOCATION;
int size = 0;
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
for (int j = 0; j < b->b_iused; j++)
if (!assemble_emit_location(&a, &b->b_instr[j]))
goto error;
for (int j = 0; j < b->b_iused; j++) {
if (!same_location(loc, b->b_instr[j].i_loc)) {
if (!assemble_emit_location(&a, loc, size)) {
goto error;
}
loc = b->b_instr[j].i_loc;
size = 0;
}
size += instr_size(&b->b_instr[j]);
}
}
if (!assemble_emit_location(&a, loc, size)) {
goto error;
}

if (!assemble_exception_table(&a, g->g_entryblock)) {
Expand Down