python · markshannon · Mar 22, 2023 · Jan 25, 2023 · Jan 25, 2023 · Feb 28, 2023
diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h
@@ -80,7 +80,7 @@ typedef long stwodigits; /* signed variant of twodigits */
 */
 
 typedef struct _PyLongValue {
-    Py_ssize_t ob_size; /* Number of items in variable part */
+    uintptr_t lv_tag; /* Number of digits, sign and flags */
     digit ob_digit[1];
 } _PyLongValue;
 
@@ -94,6 +94,10 @@ PyAPI_FUNC(PyLongObject *) _PyLong_New(Py_ssize_t);
 /* Return a copy of src. */
 PyAPI_FUNC(PyObject *) _PyLong_Copy(PyLongObject *src);
 
+PyAPI_FUNC(PyLongObject *)
+_PyLong_FromDigits(int negative, Py_ssize_t digit_count, digit *digits);
+
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h
@@ -82,8 +82,6 @@ PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right);
 PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right);
 PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right);
 
-int _PyLong_AssignValue(PyObject **target, Py_ssize_t value);
-
 /* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
    _PyBytes_DecodeEscape(), etc. */
 PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
@@ -110,25 +108,149 @@ PyAPI_FUNC(char*) _PyLong_FormatBytesWriter(
     int base,
     int alternate);
 
+/* Long value tag bits:
+ * 0-1: Sign bits value = (1-sign), ie. negative=2, positive=0, zero=1.
+ * 2: Reserved for immortality bit
+ * 3+ Unsigned digit count
+ */
+#define SIGN_MASK 3
+#define SIGN_ZERO 1
+#define SIGN_NEGATIVE 2
+#define NON_SIZE_BITS 3
+
+/* All "single digit" values are guaranteed to fit into
+ * a Py_ssize_t with at least one bit to spare.
+ */
+
 /* Return 1 if the argument is positive single digit int */
 static inline int
-_PyLong_IsPositiveSingleDigit(PyObject* sub) {
-    /*  For a positive single digit int, the value of Py_SIZE(sub) is 0 or 1.
-
-        We perform a fast check using a single comparison by casting from int
-        to uint which casts negative numbers to large positive numbers.
-        For details see Section 14.2 "Bounds Checking" in the Agner Fog
-        optimization manual found at:
-        https://www.agner.org/optimize/optimizing_cpp.pdf
-
-        The function is not affected by -fwrapv, -fno-wrapv and -ftrapv
-        compiler options of GCC and clang
-    */
-    assert(PyLong_CheckExact(sub));
-    Py_ssize_t signed_size = Py_SIZE(sub);
-    return ((size_t)signed_size) <= 1;
+_PyLong_IsNonNegativeCompact(const PyLongObject* op) {
+    assert(PyLong_Check(op));
+    return op->long_value.lv_tag <= (1 << NON_SIZE_BITS);
+}
+
+static inline int
+_PyLong_IsCompact(const PyLongObject* op) {
+    assert(PyLong_Check(op));
+    return op->long_value.lv_tag < (2 << NON_SIZE_BITS);
+}
+
+static inline int
+_PyLong_BothAreCompact(const PyLongObject* a, const PyLongObject* b) {
+    assert(PyLong_Check(a));
+    assert(PyLong_Check(b));
+    return (a->long_value.lv_tag | b->long_value.lv_tag) < (2 << NON_SIZE_BITS);
+}
+
+/* The value returned by this function will have at least one bit to spare,
+ * so that addition and subtraction can be performed on the values
+ * without risk of overflow.
+ */
+static inline Py_ssize_t
+_PyLong_CompactValue(const PyLongObject *op)
+{
+    assert(PyLong_Check(op));
+    assert(_PyLong_IsCompact(op));
+    Py_ssize_t sign = 1 - (op->long_value.lv_tag & SIGN_MASK);
+    return sign * (Py_ssize_t)op->long_value.ob_digit[0];
+}
+
+static inline bool
+_PyLong_IsZero(const PyLongObject *op)
+{
+    return (op->long_value.lv_tag & SIGN_MASK) == SIGN_ZERO;
+}
+
+static inline bool
+_PyLong_IsNegative(const PyLongObject *op)
+{
+    return (op->long_value.lv_tag & SIGN_MASK) == SIGN_NEGATIVE;
+}
+
+static inline bool
+_PyLong_IsPositive(const PyLongObject *op)
+{
+    return (op->long_value.lv_tag & SIGN_MASK) == 0;
+}
+
+static inline Py_ssize_t
+_PyLong_DigitCount(const PyLongObject *op)
+{
+    assert(PyLong_Check(op));
+    return op->long_value.lv_tag >> NON_SIZE_BITS;
+}
+
+/* Equivalent to _PyLong_DigitCount(op) * _PyLong_NonZeroSign(op) */
+static inline Py_ssize_t
+_PyLong_SignedDigitCount(const PyLongObject *op)
+{
+    assert(PyLong_Check(op));
+    Py_ssize_t sign = 1 - (op->long_value.lv_tag & SIGN_MASK);
+    return sign * (Py_ssize_t)(op->long_value.lv_tag >> NON_SIZE_BITS);
+}
+
+/* Like _PyLong_DigitCount but asserts that op is non-negative */
+static inline Py_ssize_t
+_PyLong_UnsignedDigitCount(const PyLongObject *op)
+{
+    assert(PyLong_Check(op));
+    assert(!_PyLong_IsNegative(op));
+    return op->long_value.lv_tag >> NON_SIZE_BITS;
+}
+
+static inline int
+_PyLong_NonZeroSign(const PyLongObject *op)
+{
+    assert(PyLong_Check(op));
+    return 1 - (op->long_value.lv_tag & SIGN_MASK);
+}
+
+/* Do a and b have the same sign? */
+static inline int
+_PyLong_SameSign(const PyLongObject *a, const PyLongObject *b)
+{
+    return (a->long_value.lv_tag & SIGN_MASK) == (b->long_value.lv_tag & SIGN_MASK);
 }
 
+#define TAG_FROM_SIGN_AND_SIZE(sign, size) ((1 - (sign)) | ((size) << NON_SIZE_BITS))
+
+static inline void
+_PyLong_SetSignAndSize(PyLongObject *op, int sign, Py_ssize_t size)
+{
+    assert(size >= 0);
+    assert(-1 <= sign && sign <= 1);
+    assert(sign != 0 || size == 0);
+    op->long_value.lv_tag = TAG_FROM_SIGN_AND_SIZE(sign, (size_t)size);
+}
+
+static inline void
+_PyLong_SetSize(PyLongObject *op, Py_ssize_t size)
+{
+    assert(size >= 0);
+    op->long_value.lv_tag = (((size_t)size) << NON_SIZE_BITS) | (op->long_value.lv_tag & SIGN_MASK);
+}
+
+static inline void
+_PyLong_FlipSign(PyLongObject *op) {
+    unsigned int flipped_sign = 2 - (op->long_value.lv_tag & SIGN_MASK);
+    op->long_value.lv_tag &= ~7;
+    op->long_value.lv_tag |= flipped_sign;
+}
+
+#define _PyLong_DIGIT_INIT(val) \
+    { \
+        .ob_base = _PyObject_IMMORTAL_INIT(&PyLong_Type), \
+        .long_value  = { \
+            .lv_tag = TAG_FROM_SIGN_AND_SIZE( \
+                (val) == 0 ? 0 : ((val) < 0 ? -1 : 1), \
+                (val) == 0 ? 0 : 1), \
+            { ((val) >= 0 ? (val) : -(val)) }, \
+        } \
+    }
+
+#define _PyLong_FALSE_TAG TAG_FROM_SIGN_AND_SIZE(0, 0)
+#define _PyLong_TRUE_TAG TAG_FROM_SIGN_AND_SIZE(1, 1)
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
@@ -118,8 +118,9 @@ static inline void
 _PyObject_InitVar(PyVarObject *op, PyTypeObject *typeobj, Py_ssize_t size)
 {
     assert(op != NULL);
-    Py_SET_SIZE(op, size);
+    assert(typeobj != &PyLong_Type);
     _PyObject_Init((PyObject *)op, typeobj);
+    Py_SET_SIZE(op, size);
 }
 
 

@@ -8,6 +8,7 @@ extern "C" {
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
+#include "pycore_long.h"
 #include "pycore_object.h"
 #include "pycore_parser.h"
 #include "pycore_pymem_init.h"
@@ -134,15 +135,6 @@ extern "C" {
 
 // global objects
 
-#define _PyLong_DIGIT_INIT(val) \
-    { \
-        .ob_base = _PyObject_IMMORTAL_INIT(&PyLong_Type), \
-        .long_value  = { \
-            ((val) == 0 ? 0 : ((val) > 0 ? 1 : -1)), \
-            { ((val) >= 0 ? (val) : -(val)) }, \
-        } \
-    }
-
 #define _PyBytes_SIMPLE_INIT(CH, LEN) \
     { \
         _PyVarObject_IMMORTAL_INIT(&PyBytes_Type, (LEN)), \

diff --git a/Include/object.h b/Include/object.h
@@ -138,8 +138,13 @@ static inline PyTypeObject* Py_TYPE(PyObject *ob) {
 #  define Py_TYPE(ob) Py_TYPE(_PyObject_CAST(ob))
 #endif
 
+PyAPI_DATA(PyTypeObject) PyLong_Type;
+PyAPI_DATA(PyTypeObject) PyBool_Type;
+
 // bpo-39573: The Py_SET_SIZE() function must be used to set an object size.
 static inline Py_ssize_t Py_SIZE(PyObject *ob) {
+    assert(ob->ob_type != &PyLong_Type);
+    assert(ob->ob_type != &PyBool_Type);
     PyVarObject *var_ob = _PyVarObject_CAST(ob);
     return var_ob->ob_size;
 }
@@ -171,8 +176,9 @@ static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) {
 #  define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type)
 #endif
 
-
 static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) {
+    assert(ob->ob_base.ob_type != &PyLong_Type);
+    assert(ob->ob_base.ob_type != &PyBool_Type);
     ob->ob_size = size;
 }
 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000

diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-03-06-10-02-22.gh-issue-101291.0FT2QS.rst b/Misc/NEWS.d/next/Core and Builtins/2023-03-06-10-02-22.gh-issue-101291.0FT2QS.rst
@@ -0,0 +1,7 @@
+Rearrage bits in first field (after header) of PyLongObject. * Bits 0 and 1:
+1- sign. I.e. 0 for positive numbers, 1 for zero and 2 for negative numbers.
-1- sign. I.e. 0 for positive numbers, 1 for zero and 2 for negative numbers.
+1 - sign. I.e. 0 for positive numbers, 1 for zero and 2 for negative numbers.
-1- sign. I.e. 0 for positive numbers, 1 for zero and 2 for negative numbers.
+1 - sign. I.e. 0 for positive numbers, 1 for zero and 2 for negative numbers.
+* Bit 2 reserved (probably for the immortal bit) * Bits 3+ the unsigned
+size.
+
+This makes a few operations slightly more efficient, and will enable a more
+compact and faster 2s-complement representation of most ints in future.
diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c
@@ -30,6 +30,7 @@
 #endif
 
 #include <Python.h>
+#include "pycore_long.h"          // _PyLong_IsZero()
 #include "pycore_pystate.h"       // _PyThreadState_GET()
 #include "complexobject.h"
 #include "mpdecimal.h"
@@ -2146,35 +2147,25 @@ dec_from_long(PyTypeObject *type, PyObject *v,
 {
     PyObject *dec;
     PyLongObject *l = (PyLongObject *)v;
-    Py_ssize_t ob_size;
-    size_t len;
-    uint8_t sign;
 
     dec = PyDecType_New(type);
     if (dec == NULL) {
         return NULL;
     }
 
-    ob_size = Py_SIZE(l);
-    if (ob_size == 0) {
+    if (_PyLong_IsZero(l)) {
         _dec_settriple(dec, MPD_POS, 0, 0);
         return dec;
     }
 
-    if (ob_size < 0) {
-        len = -ob_size;
-        sign = MPD_NEG;
-    }
-    else {
-        len = ob_size;
-        sign = MPD_POS;
-    }
+    uint8_t sign = _PyLong_IsNegative(l) ? MPD_NEG :  MPD_POS;
 
-    if (len == 1) {
-        _dec_settriple(dec, sign, *l->long_value.ob_digit, 0);
+    if (_PyLong_IsCompact(l)) {
+        _dec_settriple(dec, sign, l->long_value.ob_digit[0], 0);
         mpd_qfinalize(MPD(dec), ctx, status);
         return dec;
     }
+    size_t len = _PyLong_DigitCount(l);
 
 #if PYLONG_BITS_IN_DIGIT == 30
     mpd_qimport_u32(MPD(dec), l->long_value.ob_digit, len, sign, PyLong_BASE,
@@ -3482,7 +3473,6 @@ dec_as_long(PyObject *dec, PyObject *context, int round)
     PyLongObject *pylong;
     digit *ob_digit;
     size_t n;
-    Py_ssize_t i;
     mpd_t *x;
     mpd_context_t workctx;
     uint32_t status = 0;
@@ -3536,26 +3526,9 @@ dec_as_long(PyObject *dec, PyObject *context, int round)
     }
 
     assert(n > 0);
-    pylong = _PyLong_New(n);
-    if (pylong == NULL) {
-        mpd_free(ob_digit);
-        mpd_del(x);
-        return NULL;
-    }
-
-    memcpy(pylong->long_value.ob_digit, ob_digit, n * sizeof(digit));
+    assert(!mpd_iszero(x));
+    pylong = _PyLong_FromDigits(mpd_isnegative(x), n, ob_digit);
     mpd_free(ob_digit);
-
-    i = n;
-    while ((i > 0) && (pylong->long_value.ob_digit[i-1] == 0)) {
-        i--;
-    }
-
-    Py_SET_SIZE(pylong, i);
-    if (mpd_isnegative(x) && !mpd_iszero(x)) {
-        Py_SET_SIZE(pylong, -i);
-    }
-
     mpd_del(x);
     return (PyObject *) pylong;
 }

diff --git a/Modules/_testcapi/mem.c b/Modules/_testcapi/mem.c
@@ -347,7 +347,7 @@ test_pyobject_new(PyObject *self, PyObject *Py_UNUSED(ignored))
 {
     PyObject *obj;
     PyTypeObject *type = &PyBaseObject_Type;
-    PyTypeObject *var_type = &PyLong_Type;
+    PyTypeObject *var_type = &PyBytes_Type;
 
     // PyObject_New()
     obj = PyObject_New(PyObject, type);

diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c
@@ -32,6 +32,8 @@ Copyright (C) 1994 Steen Lumholt.
 #  include "pycore_fileutils.h"   // _Py_stat()
 #endif
 
+#include "pycore_long.h"
+
 #ifdef MS_WINDOWS
 #include <windows.h>
 #endif
@@ -887,8 +889,8 @@ asBignumObj(PyObject *value)
     PyObject *hexstr;
     const char *hexchars;
     mp_int bigValue;
-
-    neg = Py_SIZE(value) < 0;
+    assert(PyLong_Check(value));
+    neg = _PyLong_IsNegative((PyLongObject *)value);
-    assert(PyLong_Check(value));
-    neg = _PyLong_IsNegative((PyLongObject *)value);
+
+    assert(PyLong_Check(value));
+    neg = _PyLong_IsNegative((PyLongObject *)value);
-    assert(PyLong_Check(value));
-    neg = _PyLong_IsNegative((PyLongObject *)value);
+
+    assert(PyLong_Check(value));
+    neg = _PyLong_IsNegative((PyLongObject *)value);
     hexstr = _PyLong_Format(value, 16);
     if (hexstr == NULL)
         return NULL;
@@ -1960,7 +1962,7 @@ _tkinter_tkapp_getboolean(TkappObject *self, PyObject *arg)
     int v;
 
     if (PyLong_Check(arg)) { /* int or bool */
-        return PyBool_FromLong(Py_SIZE(arg) != 0);
+        return PyBool_FromLong(!_PyLong_IsZero((PyLongObject *)arg));
     }
 
     if (PyTclObject_Check(arg)) {