This repository has been archived by the owner on Jan 30, 2023. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new string conversion utilities.
- Loading branch information
Showing
2 changed files
with
110 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# -*- encoding: utf-8 -*- | ||
#***************************************************************************** | ||
# Copyright (C) 2017 Erik M. Bray <erik.bray@lri.fr> | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 2 of the License, or | ||
# (at your option) any later version. | ||
# http://www.gnu.org/licenses/ | ||
#***************************************************************************** | ||
from __future__ import absolute_import | ||
|
||
from libc.string cimport strlen | ||
|
||
from cpython.bytes cimport (PyBytes_AsString as PyString_AsString, | ||
PyBytes_FromString as PyString_FromString) | ||
from cpython.unicode cimport PyUnicode_Decode, PyUnicode_AsEncodedString | ||
|
||
IF PY_MAJOR_VERSION >= 3: | ||
cdef extern from "Python.h": | ||
# Missing from cpython.unicode | ||
char* PyUnicode_AsUTF8(object unicode) | ||
object PyUnicode_DecodeLocale(const char* str, const char* errors) | ||
object PyUnicode_EncodeLocale(object unicode, const char* errors) | ||
|
||
|
||
cdef inline str char_to_str(char* c, encoding=None): | ||
IF PY_MAJOR_VERSION <= 2: | ||
return <str>PyString_FromString(c) | ||
ELSE: | ||
if encoding is None: | ||
return PyUnicode_DecodeLocale(c, "surrogateescape") | ||
|
||
return PyUnicode_Decode(c, strlen(c), PyUnicode_AsUTF8(encoding), | ||
"surrogateescape") | ||
|
||
|
||
cpdef inline str bytes_to_str(bytes b, encoding=None): | ||
""" | ||
Convertes `bytes` to `str`. | ||
On Python 2 this is a no-op since ``bytes is str``. On Python 3 | ||
this decodes the given `bytes` to a Python 3 unicode `str` using the | ||
specified encoding. | ||
EXAMPLES:: | ||
sage: import six | ||
sage: from sage.cpython.string import bytes_to_str | ||
sage: s = bytes_to_str(b'\xe2\x98\x83') | ||
sage: if six.PY2: | ||
....: s == b'\xe2\x98\x83' | ||
....: else: | ||
....: s == u'☃' | ||
True | ||
""" | ||
|
||
IF PY_MAJOR_VERSION <= 2: | ||
return <str>b | ||
ELSE: | ||
return char_to_str(PyString_AsString(b), encoding=encoding) | ||
|
||
|
||
cpdef inline bytes str_to_bytes(str s, encoding=None): | ||
""" | ||
Convertes `str` to `bytes`. | ||
On Python 2 this is a no-op since ``str is bytes``. On Python 3 | ||
this encodes the given `str` to a Python 3 `bytes` using the | ||
specified encoding. | ||
EXAMPLES:: | ||
sage: import six | ||
sage: from sage.cpython.string import str_to_bytes | ||
sage: if six.PY2: | ||
....: b = str_to_bytes('\xe2\x98\x83') | ||
....: else: | ||
....: b = str_to_bytes(u'☃') | ||
sage: b == b'\xe2\x98\x83' | ||
True | ||
""" | ||
IF PY_MAJOR_VERSION <= 2: | ||
return <bytes>s | ||
ELSE: | ||
if encoding is None: | ||
return PyUnicode_EncodeLocale(s, "surrogateescape") | ||
|
||
return PyUnicode_AsEncodedString(s, PyUnicode_AsUTF8(encoding), | ||
"surrogateescape") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# -*- encoding: utf-8 -*- | ||
#***************************************************************************** | ||
# Copyright (C) 2017 Erik M. Bray <erik.bray@lri.fr> | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 2 of the License, or | ||
# (at your option) any later version. | ||
# http://www.gnu.org/licenses/ | ||
#***************************************************************************** | ||
from __future__ import absolute_import | ||
|
||
import sys | ||
|
||
|
||
# Provide this as a shortcut to calling sys.getfilesystemencoding(), which | ||
# after interpeter initialization is constant. | ||
FS_ENCODING = sys.getfilesystemencoding() | ||
|
||
# Functions in this module are implemented in the .pxd file for inlining. |