From 9403629235ffb078c0eda9a46154d882c62760b2 Mon Sep 17 00:00:00 2001 From: Matthias Koeppe Date: Sun, 20 Sep 2020 22:21:22 -0700 Subject: [PATCH 1/2] build/bin/sage-system-python: Work around LC_ALL=C --- build/bin/sage-system-python | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/build/bin/sage-system-python b/build/bin/sage-system-python index a72b1527df5..fdf0ad7444d 100755 --- a/build/bin/sage-system-python +++ b/build/bin/sage-system-python @@ -28,6 +28,16 @@ fi # is accessible by this python; this is to guard on Cygwin against Pythons # installed somewhere else in Windows. +# Trac #30008: Make it work even if the environment tries to sabotage UTF-8 +# operation in Python 3.0.x-3.6.x by setting LC_ALL=C or similar. + +if [ "$LC_ALL" = "C" -o "$LANG" = "C" -o "$LC_CTYPE" = "C" ]; then + LC_ALL=$(locale -a | grep -E -i '^(c|en_us)[-.]utf-?8$' | head -n 1) + LANG=$LC_ALL + export LC_ALL + export LANG +fi + PYTHONS="python python3 python3.8 python3.7 python2.7 python3.6 python2" for PY in $PYTHONS; do PYTHON="$(PATH="$SAGE_ORIG_PATH" command -v $PY)" From ff0dbc622b8b3f4ce467a0324c3fdbd134df24e3 Mon Sep 17 00:00:00 2001 From: Matthias Koeppe Date: Mon, 21 Sep 2020 14:28:36 -0700 Subject: [PATCH 2/2] build/sage_bootstrap/uncompress/tar_file.py: Fix encoding to utf-8 --- build/sage_bootstrap/uncompress/tar_file.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/build/sage_bootstrap/uncompress/tar_file.py b/build/sage_bootstrap/uncompress/tar_file.py index cf707e78a3a..9777d265b4d 100644 --- a/build/sage_bootstrap/uncompress/tar_file.py +++ b/build/sage_bootstrap/uncompress/tar_file.py @@ -28,8 +28,10 @@ class SageBaseTarFile(tarfile.TarFile): """ - Sage as tarfile.TarFile, but applies a reasonable umask (0022) to the - permissions of all extracted files and directories. + Same as tarfile.TarFile, but applies a reasonable umask (0022) to the + permissions of all extracted files and directories, and fixes + the encoding of file names in the tarball to be 'utf-8' instead of + depending on locale settings. Previously this applied the user's current umask per the default behavior of the ``tar`` utility, but this did not provide sufficiently reliable @@ -46,6 +48,9 @@ class SageBaseTarFile(tarfile.TarFile): umask = 0o022 def __init__(self, *args, **kwargs): + + kwargs['encoding'] = 'utf-8' + # Unfortunately the only way to get the current umask is to set it # and then restore it super(SageBaseTarFile, self).__init__(*args, **kwargs)