Skip to content
This repository has been archived by the owner on Jan 30, 2023. It is now read-only.

Commit

Permalink
Add new string conversion utilities.
Browse files Browse the repository at this point in the history
  • Loading branch information
embray committed Nov 20, 2017
1 parent a5d5219 commit 627d4c7
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/sage/misc/string.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cdef str char_to_str(char* c, encoding=*)
cpdef str bytes_to_str(bytes b, encoding=*)
cpdef bytes str_to_bytes(str s, encoding=*)
108 changes: 108 additions & 0 deletions src/sage/misc/string.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# -*- encoding: utf-8 -*-
"""
String conversion and encoding/decoding utilities, in particular for Python 2/3
compatibility.
"""

#*****************************************************************************
# Copyright (C) 2017 Erik M. Bray <erik.bray@lri.fr>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# http://www.gnu.org/licenses/
#*****************************************************************************

from __future__ import absolute_import

import locale
import six
import sys

from libc.string cimport strlen

from cpython.bytes cimport PyBytes_AsString as PyString_AsString
if six.PY2:
from cpython.string cimport PyString_FromString
else:
from cpython.bytes cimport PyBytes_FromString as PyString_FromString
from cpython.unicode cimport (PyUnicode_Decode, PyUnicode_AsEncodedString)
from cpython.version cimport PY_MAJOR_VERSION

cdef extern from "Python.h":
# Missing from cpython.unicode
char* PyUnicode_AsUTF8(object unicode)


DEFAULT_ENCODING = locale.getpreferredencoding()
FS_ENCODING = sys.getfilesystemencoding()


cdef inline str char_to_str(char* c, encoding=DEFAULT_ENCODING):
"""
Converts a C ``char`` array to a Python `str` object.
On Python 3 this requires an encoding to be specified with which to
decode the bytes in the ``char`` array.
"""

if PY_MAJOR_VERSION <= 2:
# <str> is needed here to "trick" Cython into thinking we expect
# this to return a str (on Python 2 it has no problem with this,
# but on Python 3 it balks)
# It doesn't matter that this cast doesn't make sense since it
# will never happen on Python 3
return <str>PyString_FromString(c)
else:
return PyUnicode_Decode(c, strlen(c), PyUnicode_AsUTF8(encoding),
"surrogateescape")


cpdef inline str bytes_to_str(bytes b, encoding=DEFAULT_ENCODING):
"""
Convertes `bytes` to `str`.
On Python 2 this is a no-op since ``bytes is str``. On Python 3
this decodes the given `bytes` to a Python 3 unicode `str` using the
specified encoding.
EXAMPLES::
sage: from six import PY2; from sage.misc import six
sage: six.DEFAULT_ENCODING = 'utf-8'
sage: s = six.bytes_to_str(b'\xe2\x98\x83')
sage: if PY2:
....: s == b'\xe2\x98\x83'
....: else:
....: s == u'☃'
True
"""

return char_to_str(PyString_AsString(b), encoding=encoding)


cpdef inline bytes str_to_bytes(str s, encoding=DEFAULT_ENCODING):
"""
Convertes `str` to `bytes`.
On Python 2 this is a no-op since ``str is bytes``. On Python 3
this encodes the given `str` to a Python 3 `bytes` using the
specified encoding.
EXAMPLES::
sage: from six import PY2; from sage.misc import six
sage: six.DEFAULT_ENCODING = 'utf-8'
sage: if PY2:
....: b = six.str_to_bytes('\xe2\x98\x83')
....: else:
....: b = six.str_to_bytes(u'☃')
sage: b == b'\xe2\x98\x83'
True
"""
if PY_MAJOR_VERSION <= 2:
return <bytes>s
else:
return PyUnicode_AsEncodedString(s, PyUnicode_AsUTF8(encoding),
"surrogateescape")

0 comments on commit 627d4c7

Please sign in to comment.