Skip to content

Commit

Permalink
Explicitly use utf-8 when decoding bytestrings
Browse files Browse the repository at this point in the history
While Python 3 defaults to utf-8 in `bytes.decode()`, Python 2's
equivalent (`str.decode()`) will use the default encoding as set by
site.py (which is almost always ascii).

From looking at the code, it seems that these decodes have just sort of
been fixed piecemeal (likely when someone realized that pygit2 was
failing to handle unicode properly, but any decodes which run on Python
2 that don't specify utf-8 as the encoding are a ticking time bomb. I
personally noticed this was a problem when I encountered a traceback in
the RemoteCallbacks while fetching a new branch which contained utf-8
characters. During the fetch, when `pygit2.remote.maybe_string()` was
invoked by `_update_tips_cb()` with a pointer to a bytestring containing
unicode, the decode fails because the default encoding is ascii. As it
turns out, this was fixed in master, but there are a number which still
have no explicit encoding.

This commit explicitly uses utf-8 for all remaining bytestring decodes
which do not have an encoding specified, aside from one in PY3-specific
code where doing so would be redundant.
  • Loading branch information
terminalmage committed Feb 7, 2018
1 parent 368730f commit 6e71992
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pygit2/blame.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def orig_path(self):
if not path:
return None

return ffi.string(path).decode()
return ffi.string(path).decode('utf-8')


class Blame(object):
Expand Down
2 changes: 1 addition & 1 deletion pygit2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def _from_found_config(fn):
buf = ffi.new('git_buf *', (ffi.NULL, 0))
err = fn(buf)
check_error(err, True)
cpath = ffi.string(buf.ptr).decode()
cpath = ffi.string(buf.ptr).decode('utf-8')
C.git_buf_free(buf)

return Config(cpath)
Expand Down
8 changes: 4 additions & 4 deletions pygit2/refspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ def __init__(self, owner, ptr):
@property
def src(self):
"""Source or lhs of the refspec"""
return ffi.string(C.git_refspec_src(self._refspec)).decode()
return ffi.string(C.git_refspec_src(self._refspec)).decode('utf-8')

@property
def dst(self):
"""Destinaton or rhs of the refspec"""
return ffi.string(C.git_refspec_dst(self._refspec)).decode()
return ffi.string(C.git_refspec_dst(self._refspec)).decode('utf-8')

@property
def force(self):
Expand All @@ -58,7 +58,7 @@ def force(self):
@property
def string(self):
"""String which was used to create this refspec"""
return ffi.string(C.git_refspec_string(self._refspec)).decode()
return ffi.string(C.git_refspec_string(self._refspec)).decode('utf-8')

@property
def direction(self):
Expand All @@ -82,7 +82,7 @@ def _transform(self, ref, fn):
check_error(err)

try:
return ffi.string(buf.ptr).decode()
return ffi.string(buf.ptr).decode('utf-8')
finally:
C.git_buf_free(buf)

Expand Down
2 changes: 1 addition & 1 deletion pygit2/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def _sideband_progress_cb(string, length, data):
return 0

try:
s = ffi.string(string, length).decode()
s = ffi.string(string, length).decode('utf-8')
progress(s)
except Exception as e:
self._stored_exception = e
Expand Down
2 changes: 1 addition & 1 deletion pygit2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
def strarray_to_strings(arr):
l = [None] * arr.count
for i in range(arr.count):
l[i] = ffi.string(arr.strings[i]).decode()
l[i] = ffi.string(arr.strings[i]).decode('utf-8')

return l

Expand Down

0 comments on commit 6e71992

Please sign in to comment.