Skip to content

Commit

Permalink
#1040: on py 3.6 use sys.getfilesystemencodeerrors() to determined th…
Browse files Browse the repository at this point in the history
…e default error handler instead of guessing it
  • Loading branch information
giampaolo committed May 4, 2017
1 parent a1f2a09 commit 982f255
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 15 deletions.
8 changes: 5 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2250,11 +2250,13 @@ The notes below apply to *any* API returning a string such as
:meth:`Process.exe` or :meth:`Process.cwd`, including non-filesystem related
methods such as :meth:`Process.username` or :meth:`WindowsService.description`:

* all strings are encoded by using the OS filesystem encoding which varies
depending on the platform (e.g. UTF-8 on Linux, mbcs on Win)
* all strings are encoded by using the OS filesystem encoding
(``sys.getfilesystemencoding()``) which varies depending on the platform
(e.g. "UTF-8" on OSX, "mbcs" on Win)
* no API call is supposed to crash with ``UnicodeDecodeError``
* instead, in case of badly encoded data returned by the OS, the following error handlers are used to replace the corrupted characters in the string:
* Python 3: ``"surrogatescape"`` on POSIX and ``"replace"`` on Windows
* Python 3: ``sys.getfilesystemencodeerrors()`` (PY 3.6+) or
``"surrogatescape"`` on POSIX and ``"replace"`` on Windows
* Python 2: ``"replace"``
* on Python 2 all APIs return bytes (``str`` type), never ``unicode``
* on Python 2, you can go back to ``unicode`` by doing:
Expand Down
13 changes: 13 additions & 0 deletions psutil/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
except ImportError:
AF_UNIX = None

from psutil._compat import PY3

if sys.version_info >= (3, 4):
import enum
else:
Expand Down Expand Up @@ -132,6 +134,17 @@ class BatteryTime(enum.IntEnum):

globals().update(BatteryTime.__members__)

# --- others

ENCODING = sys.getfilesystemencoding()
if not PY3:
ENCODING_ERRS = "replace"
else:
try:
ENCODING_ERRS = sys.getfilesystemencodeerrors() # py 3.6
except AttributeError:
ENCODING_ERRS = "surrogateescape" if POSIX else "replace"


# ===================================================================
# --- namedtuples
Expand Down
13 changes: 6 additions & 7 deletions psutil/_pslinux.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
from . import _psposix
from . import _psutil_linux as cext
from . import _psutil_posix as cext_posix
from ._common import ENCODING
from ._common import ENCODING_ERRS
from ._common import isfile_strict
from ._common import memoize
from ._common import memoize_when_activated
from ._common import parse_environ_block
from ._common import NIC_DUPLEX_FULL
from ._common import NIC_DUPLEX_HALF
from ._common import NIC_DUPLEX_UNKNOWN
from ._common import parse_environ_block
from ._common import path_exists_strict
from ._common import supports_ipv6
from ._common import usage_percent
Expand Down Expand Up @@ -84,9 +86,6 @@
BIGGER_FILE_BUFFERING = -1 if PY3 else 8192
LITTLE_ENDIAN = sys.byteorder == 'little'
SECTOR_SIZE_FALLBACK = 512
if PY3:
FS_ENCODING = sys.getfilesystemencoding()
ENCODING_ERRORS_HANDLER = 'surrogateescape'
if enum is None:
AF_LINK = socket.AF_PACKET
else:
Expand Down Expand Up @@ -200,14 +199,14 @@ def open_text(fname, **kwargs):
# See:
# https://github.com/giampaolo/psutil/issues/675
# https://github.com/giampaolo/psutil/pull/733
kwargs.setdefault('encoding', FS_ENCODING)
kwargs.setdefault('errors', ENCODING_ERRORS_HANDLER)
kwargs.setdefault('encoding', ENCODING)
kwargs.setdefault('errors', ENCODING_ERRS)
return open(fname, "rt", **kwargs)


if PY3:
def decode(s):
return s.decode(encoding=FS_ENCODING, errors=ENCODING_ERRORS_HANDLER)
return s.decode(encoding=ENCODING, errors=ENCODING_ERRS)
else:
def decode(s):
return s
Expand Down
12 changes: 7 additions & 5 deletions psutil/_pswindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@
raise

from ._common import conn_tmap
from ._common import ENCODING
from ._common import ENCODING_ERRS
from ._common import isfile_strict
from ._common import memoize_when_activated
from ._common import parse_environ_block
from ._common import sockfam_to_enum
from ._common import socktype_to_enum
from ._common import memoize_when_activated
from ._common import usage_percent
from ._compat import long
from ._compat import lru_cache
Expand Down Expand Up @@ -71,8 +73,6 @@
# --- globals
# =====================================================================

FS_ENCODING = sys.getfilesystemencoding()
PY2_ENCODING_ERRS = "replace"
CONN_DELETE_TCB = "DELETE_TCB"
WAIT_TIMEOUT = 0x00000102 # 258 in decimal
ACCESS_DENIED_SET = frozenset([errno.EPERM, errno.EACCES,
Expand Down Expand Up @@ -198,7 +198,7 @@ def py2_strencode(s):
if isinstance(s, str):
return s
else:
return s.encode(FS_ENCODING, errors=PY2_ENCODING_ERRS)
return s.encode(ENCODING, errors=ENCODING_ERRS)


# =====================================================================
Expand Down Expand Up @@ -240,7 +240,9 @@ def swap_memory():
def disk_usage(path):
"""Return disk usage associated with path."""
if PY3 and isinstance(path, bytes):
path = path.decode(FS_ENCODING)
# XXX: do we want to use "strict"? Probably yes, in order
# to fail immediately. After all we are accepting input here...
path = path.decode(ENCODING, errors="strict")
total, free = cext.disk_usage(path)
used = total - free
percent = usage_percent(used, total, _round=1)
Expand Down

0 comments on commit 982f255

Please sign in to comment.