-
-
Notifications
You must be signed in to change notification settings - Fork 30.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-101291: Rearrange the size bits in PyLongObject #102464
Changes from 29 commits
0ec07e4
292b9d0
5c54894
029aaa4
b56e6da
91269fc
c48e825
449c0e2
c5ba601
4b3a3e8
9ef9d2c
9c408c1
548d656
3e3fefd
391fb51
df8c7d3
bc14fa6
54c6f1b
ce6bfb2
4c1956b
301158b
1aa1891
bf2a9af
169f521
90f9072
f143443
a0d661e
145a2e4
638a98f
7f5acc0
b06bb6f
a19b0a7
87f49b2
f764aa8
9843ac0
d6cb917
469d26f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,8 +82,6 @@ PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); | |
PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right); | ||
PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right); | ||
|
||
int _PyLong_AssignValue(PyObject **target, Py_ssize_t value); | ||
|
||
/* Used by Python/mystrtoul.c, _PyBytes_FromHex(), | ||
_PyBytes_DecodeEscape(), etc. */ | ||
PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; | ||
|
@@ -110,25 +108,149 @@ PyAPI_FUNC(char*) _PyLong_FormatBytesWriter( | |
int base, | ||
int alternate); | ||
|
||
/* Long value tag bits: | ||
* 0-1: Sign bits value = (1-sign), ie. negative=2, positive=0, zero=1. | ||
* 2: Reserved for immortality bit | ||
Comment on lines
+112
to
+113
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need an immortality flag here, but we do need a static flag (immortality should be marked by the refcount and this marks if the object is static or not. Using this, we can do the static check at dealloc time to prevent the deallocation of the objects |
||
* 3+ Unsigned digit count | ||
*/ | ||
#define SIGN_MASK 3 | ||
#define SIGN_ZERO 1 | ||
#define SIGN_NEGATIVE 2 | ||
#define NON_SIZE_BITS 3 | ||
|
||
/* All "single digit" values are guaranteed to fit into | ||
* a Py_ssize_t with at least one bit to spare. | ||
*/ | ||
|
||
/* Return 1 if the argument is positive single digit int */ | ||
static inline int | ||
_PyLong_IsPositiveSingleDigit(PyObject* sub) { | ||
/* For a positive single digit int, the value of Py_SIZE(sub) is 0 or 1. | ||
|
||
We perform a fast check using a single comparison by casting from int | ||
to uint which casts negative numbers to large positive numbers. | ||
For details see Section 14.2 "Bounds Checking" in the Agner Fog | ||
optimization manual found at: | ||
https://www.agner.org/optimize/optimizing_cpp.pdf | ||
|
||
The function is not affected by -fwrapv, -fno-wrapv and -ftrapv | ||
compiler options of GCC and clang | ||
*/ | ||
assert(PyLong_CheckExact(sub)); | ||
Py_ssize_t signed_size = Py_SIZE(sub); | ||
return ((size_t)signed_size) <= 1; | ||
_PyLong_IsNonNegativeCompact(const PyLongObject* op) { | ||
assert(PyLong_Check(op)); | ||
return op->long_value.lv_tag <= (1 << NON_SIZE_BITS); | ||
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't work if we set the second (immortal/static) bit, i.e: the immortal small int 1 since it will have an lv_tag of I'll create a new PR to restructure this a bit to make it work with the new bit flag. |
||
} | ||
|
||
static inline int | ||
_PyLong_IsCompact(const PyLongObject* op) { | ||
assert(PyLong_Check(op)); | ||
return op->long_value.lv_tag < (2 << NON_SIZE_BITS); | ||
} | ||
|
||
static inline int | ||
_PyLong_BothAreCompact(const PyLongObject* a, const PyLongObject* b) { | ||
assert(PyLong_Check(a)); | ||
assert(PyLong_Check(b)); | ||
return (a->long_value.lv_tag | b->long_value.lv_tag) < (2 << NON_SIZE_BITS); | ||
} | ||
|
||
/* The value returned by this function will have at least one bit to spare, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "one bit to spare" feels ambiguous, since the return type is signed -- is the spare bit the sign bit, or should there be at least one additional spare bit? (I know in practice we have 4 spare bits including the sign, but still, I'm not sure whether a 63-bit digit would be acceptable or not, from this description (or others).) |
||
* so that addition and subtraction can be performed on the values | ||
* without risk of overflow. | ||
*/ | ||
static inline Py_ssize_t | ||
_PyLong_CompactValue(const PyLongObject *op) | ||
{ | ||
assert(PyLong_Check(op)); | ||
assert(_PyLong_IsCompact(op)); | ||
Py_ssize_t sign = 1 - (op->long_value.lv_tag & SIGN_MASK); | ||
return sign * (Py_ssize_t)op->long_value.ob_digit[0]; | ||
} | ||
|
||
static inline bool | ||
_PyLong_IsZero(const PyLongObject *op) | ||
{ | ||
return (op->long_value.lv_tag & SIGN_MASK) == SIGN_ZERO; | ||
} | ||
|
||
static inline bool | ||
_PyLong_IsNegative(const PyLongObject *op) | ||
{ | ||
return (op->long_value.lv_tag & SIGN_MASK) == SIGN_NEGATIVE; | ||
} | ||
|
||
static inline bool | ||
_PyLong_IsPositive(const PyLongObject *op) | ||
{ | ||
return (op->long_value.lv_tag & SIGN_MASK) == 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want these functions to be the only way to determine the sign. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, fine. Next question: maybe we also need a |
||
} | ||
|
||
static inline Py_ssize_t | ||
_PyLong_DigitCount(const PyLongObject *op) | ||
{ | ||
assert(PyLong_Check(op)); | ||
return op->long_value.lv_tag >> NON_SIZE_BITS; | ||
} | ||
|
||
/* Equivalent to _PyLong_DigitCount(op) * _PyLong_NonZeroSign(op) */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I take it this is for algorithms where the old "signed size" representation worked well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is for code that uses the "signed size" representation. |
||
static inline Py_ssize_t | ||
_PyLong_SignedDigitCount(const PyLongObject *op) | ||
{ | ||
assert(PyLong_Check(op)); | ||
Py_ssize_t sign = 1 - (op->long_value.lv_tag & SIGN_MASK); | ||
return sign * (Py_ssize_t)(op->long_value.lv_tag >> NON_SIZE_BITS); | ||
} | ||
|
||
/* Like _PyLong_DigitCount but asserts that op is non-negative */ | ||
static inline Py_ssize_t | ||
_PyLong_UnsignedDigitCount(const PyLongObject *op) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not excited about this name; I keep having to look up how it differs from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I needed this for the extra check during implementation. I'll remove it. |
||
{ | ||
assert(PyLong_Check(op)); | ||
assert(!_PyLong_IsNegative(op)); | ||
return op->long_value.lv_tag >> NON_SIZE_BITS; | ||
} | ||
|
||
static inline int | ||
_PyLong_NonZeroSign(const PyLongObject *op) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name confused me -- why not just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because it shouldn't be called with It probably should be called on compact ints either. I'll check if it is, and rename it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've renamed it to |
||
{ | ||
assert(PyLong_Check(op)); | ||
return 1 - (op->long_value.lv_tag & SIGN_MASK); | ||
} | ||
|
||
/* Do a and b have the same sign? */ | ||
static inline int | ||
_PyLong_SameSign(const PyLongObject *a, const PyLongObject *b) | ||
{ | ||
return (a->long_value.lv_tag & SIGN_MASK) == (b->long_value.lv_tag & SIGN_MASK); | ||
} | ||
|
||
#define TAG_FROM_SIGN_AND_SIZE(sign, size) ((1 - (sign)) | ((size) << NON_SIZE_BITS)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I also haven't checked the assembly here, but I don't really know what happens when OR-ing a signed 64-bit int with a signed 32-bit int, and if this is doing work that's not strictly necessary. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is only in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So maybe add a comment that this macro should only be used with literal or size_t arguments? |
||
|
||
static inline void | ||
_PyLong_SetSignAndSize(PyLongObject *op, int sign, Py_ssize_t size) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this use DigitCount instead of Size, for consistency with earlier APIs? Same for the next one. |
||
{ | ||
assert(size >= 0); | ||
assert(-1 <= sign && sign <= 1); | ||
assert(sign != 0 || size == 0); | ||
op->long_value.lv_tag = TAG_FROM_SIGN_AND_SIZE(sign, (size_t)size); | ||
} | ||
|
||
static inline void | ||
_PyLong_SetSize(PyLongObject *op, Py_ssize_t size) | ||
{ | ||
assert(size >= 0); | ||
op->long_value.lv_tag = (((size_t)size) << NON_SIZE_BITS) | (op->long_value.lv_tag & SIGN_MASK); | ||
} | ||
|
||
static inline void | ||
_PyLong_FlipSign(PyLongObject *op) { | ||
unsigned int flipped_sign = 2 - (op->long_value.lv_tag & SIGN_MASK); | ||
op->long_value.lv_tag &= ~7; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you want to use some defined name instead of hardcoding 7? Perhaps |
||
op->long_value.lv_tag |= flipped_sign; | ||
} | ||
|
||
#define _PyLong_DIGIT_INIT(val) \ | ||
{ \ | ||
.ob_base = _PyObject_IMMORTAL_INIT(&PyLong_Type), \ | ||
.long_value = { \ | ||
.lv_tag = TAG_FROM_SIGN_AND_SIZE( \ | ||
(val) == 0 ? 0 : ((val) < 0 ? -1 : 1), \ | ||
(val) == 0 ? 0 : 1), \ | ||
{ ((val) >= 0 ? (val) : -(val)) }, \ | ||
} \ | ||
} | ||
|
||
#define _PyLong_FALSE_TAG TAG_FROM_SIGN_AND_SIZE(0, 0) | ||
#define _PyLong_TRUE_TAG TAG_FROM_SIGN_AND_SIZE(1, 1) | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,7 @@ | ||||||
Rearrage bits in first field (after header) of PyLongObject. * Bits 0 and 1: | ||||||
1- sign. I.e. 0 for positive numbers, 1 for zero and 2 for negative numbers. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use consistent spacing around binary
Suggested change
|
||||||
* Bit 2 reserved (probably for the immortal bit) * Bits 3+ the unsigned | ||||||
size. | ||||||
|
||||||
This makes a few operations slightly more efficient, and will enable a more | ||||||
compact and faster 2s-complement representation of most ints in future. |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -32,6 +32,8 @@ Copyright (C) 1994 Steen Lumholt. | |||||||||||
# include "pycore_fileutils.h" // _Py_stat() | ||||||||||||
#endif | ||||||||||||
|
||||||||||||
#include "pycore_long.h" | ||||||||||||
|
||||||||||||
#ifdef MS_WINDOWS | ||||||||||||
#include <windows.h> | ||||||||||||
#endif | ||||||||||||
|
@@ -887,8 +889,8 @@ asBignumObj(PyObject *value) | |||||||||||
PyObject *hexstr; | ||||||||||||
const char *hexchars; | ||||||||||||
mp_int bigValue; | ||||||||||||
|
||||||||||||
neg = Py_SIZE(value) < 0; | ||||||||||||
assert(PyLong_Check(value)); | ||||||||||||
neg = _PyLong_IsNegative((PyLongObject *)value); | ||||||||||||
Comment on lines
+891
to
+892
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please put the blank line back.
Suggested change
|
||||||||||||
hexstr = _PyLong_Format(value, 16); | ||||||||||||
if (hexstr == NULL) | ||||||||||||
return NULL; | ||||||||||||
|
@@ -1960,7 +1962,7 @@ _tkinter_tkapp_getboolean(TkappObject *self, PyObject *arg) | |||||||||||
int v; | ||||||||||||
|
||||||||||||
if (PyLong_Check(arg)) { /* int or bool */ | ||||||||||||
return PyBool_FromLong(Py_SIZE(arg) != 0); | ||||||||||||
return PyBool_FromLong(!_PyLong_IsZero((PyLongObject *)arg)); | ||||||||||||
} | ||||||||||||
|
||||||||||||
if (PyTclObject_Check(arg)) { | ||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You didn't update this comment that documents
_longobject
, it's still talking aboutob_size
andPyVarObject