Skip to content

Commit

Permalink
CLN: Remove PY2/3 references io directory (#25886)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfyoung authored and jreback committed Mar 28, 2019
1 parent 31b4019 commit 96a128e
Show file tree
Hide file tree
Showing 23 changed files with 111 additions and 473 deletions.
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=E1101,W0232

from shutil import get_terminal_size
import textwrap
from warnings import warn

Expand Down Expand Up @@ -38,7 +39,6 @@
from pandas.core.sorting import nargsort

from pandas.io.formats import console
from pandas.io.formats.terminal import get_terminal_size

from .base import ExtensionArray, _extension_array_shared_docs

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Data structure for 1-dimensional cross-sectional and time series data
"""
from collections import OrderedDict
from shutil import get_terminal_size
from textwrap import dedent
import warnings

Expand Down Expand Up @@ -47,7 +48,6 @@
from pandas.core.tools.datetimes import to_datetime

import pandas.io.formats.format as fmt
from pandas.io.formats.terminal import get_terminal_size
import pandas.plotting._core as gfx

# pylint: disable=E1101,E1103
Expand Down
12 changes: 3 additions & 9 deletions pandas/io/clipboard/clipboards.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import subprocess

from pandas.compat import PY2, text_type

from .exceptions import PyperclipException

EXCEPT_MSG = """
Expand Down Expand Up @@ -66,7 +64,7 @@ def copy_qt(text):

def paste_qt():
cb = app.clipboard()
return text_type(cb.text())
return str(cb.text())

return copy_qt, paste_qt

Expand Down Expand Up @@ -135,11 +133,7 @@ class ClipboardUnavailable(object):
def __call__(self, *args, **kwargs):
raise PyperclipException(EXCEPT_MSG)

if PY2:
def __nonzero__(self):
return False
else:
def __bool__(self):
return False
def __bool__(self):
return False

return ClipboardUnavailable(), ClipboardUnavailable()
31 changes: 11 additions & 20 deletions pandas/io/clipboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import warnings

import pandas.compat as compat
from pandas.compat import PY2, PY3, StringIO
from pandas.compat import StringIO

from pandas.core.dtypes.generic import ABCDataFrame

Expand Down Expand Up @@ -36,16 +36,14 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
from pandas.io.parsers import read_csv
text = clipboard_get()

# try to decode (if needed on PY3)
# Strange. linux py33 doesn't complain, win py33 does
if PY3:
try:
text = compat.bytes_to_str(
text, encoding=(kwargs.get('encoding') or
get_option('display.encoding'))
)
except AttributeError:
pass
# Try to decode (if needed, as "text" might already be a string here).
try:
text = compat.bytes_to_str(
text, encoding=(kwargs.get('encoding') or
get_option('display.encoding'))
)
except AttributeError:
pass

# Excel copies into clipboard with \t separation
# inspect no more then the 10 first lines, if they
Expand Down Expand Up @@ -75,13 +73,6 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
warnings.warn('read_clipboard with regex separator does not work'
' properly with c engine')

# In PY2, the c table reader first encodes text with UTF-8 but Python
# table reader uses the format of the passed string. For consistency,
# encode strings for python engine so that output from python and c
# engines produce consistent results
if kwargs.get('engine') == 'python' and PY2:
text = text.encode('utf-8')

return read_csv(StringIO(text), sep=sep, **kwargs)


Expand Down Expand Up @@ -123,11 +114,11 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
if sep is None:
sep = '\t'
buf = StringIO()

# clipboard_set (pyperclip) expects unicode
obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
text = buf.getvalue()
if PY2:
text = text.decode('utf-8')

clipboard_set(text)
return
except TypeError:
Expand Down
175 changes: 30 additions & 145 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,26 @@

import bz2
import codecs
from contextlib import closing, contextmanager
import csv
import gzip
from http.client import HTTPException # noqa
import lzma
import mmap
import os
from urllib.error import URLError # noqa
from urllib.parse import ( # noqa
urlencode, urljoin, urlparse as parse_url, uses_netloc, uses_params,
uses_relative)
from urllib.request import pathname2url, urlopen
import zipfile

import pandas.compat as compat
from pandas.compat import BytesIO, StringIO, string_types, text_type
from pandas.compat import BytesIO, string_types, text_type
from pandas.errors import ( # noqa
AbstractMethodError, DtypeWarning, EmptyDataError, ParserError,
ParserWarning)

from pandas.core.dtypes.common import is_file_like, is_number

from pandas.io.formats.printing import pprint_thing
from pandas.core.dtypes.common import is_file_like

# gh-12665: Alias for now and remove later.
CParserError = ParserError
Expand All @@ -31,31 +34,6 @@
'-nan', ''}


if compat.PY3:
from urllib.request import urlopen, pathname2url
_urlopen = urlopen
from urllib.parse import urlparse as parse_url
from urllib.parse import (uses_relative, uses_netloc, uses_params,
urlencode, urljoin)
from urllib.error import URLError
from http.client import HTTPException # noqa
else:
from urllib2 import urlopen as _urlopen
from urllib import urlencode, pathname2url # noqa
from urlparse import urlparse as parse_url
from urlparse import uses_relative, uses_netloc, uses_params, urljoin
from urllib2 import URLError # noqa
from httplib import HTTPException # noqa
from contextlib import contextmanager, closing # noqa
from functools import wraps # noqa

# @wraps(_urlopen)
@contextmanager
def urlopen(*args, **kwargs):
with closing(_urlopen(*args, **kwargs)) as f:
yield f


_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
_VALID_URLS.discard('')

Expand All @@ -72,10 +50,6 @@ def __next__(self):
raise AbstractMethodError(self)


if not compat.PY3:
BaseIterator.next = lambda self: self.__next__()


def _is_url(url):
"""Check to see if a URL has a valid protocol.
Expand Down Expand Up @@ -189,7 +163,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
----------
filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
or buffer
encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional
encoding : the encoding to use to decode bytes, default is 'utf-8'
mode : str, optional
Returns
Expand All @@ -202,7 +177,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
filepath_or_buffer = _stringify_path(filepath_or_buffer)

if _is_url(filepath_or_buffer):
req = _urlopen(filepath_or_buffer)
req = urlopen(filepath_or_buffer)
content_encoding = req.headers.get('Content-Encoding', None)
if content_encoding == 'gzip':
# Override compression based on Content-Encoding header
Expand Down Expand Up @@ -361,10 +336,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,

if compression:

if compat.PY2 and not is_path and encoding:
msg = 'compression with encoding is not yet supported in Python 2'
raise ValueError(msg)

# GZ Compression
if compression == 'gzip':
if is_path:
Expand All @@ -376,11 +347,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
elif compression == 'bz2':
if is_path:
f = bz2.BZ2File(path_or_buf, mode)
elif compat.PY2:
# Python 2's bz2 module can't take file objects, so have to
# run through decompress manually
f = StringIO(bz2.decompress(path_or_buf.read()))
path_or_buf.close()
else:
f = bz2.BZ2File(path_or_buf)

Expand Down Expand Up @@ -415,24 +381,19 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
handles.append(f)

elif is_path:
if compat.PY2:
# Python 2
mode = "wb" if mode == "w" else mode
f = open(path_or_buf, mode)
elif encoding:
# Python 3 and encoding
if encoding:
# Encoding
f = open(path_or_buf, mode, encoding=encoding, newline="")
elif is_text:
# Python 3 and no explicit encoding
# No explicit encoding
f = open(path_or_buf, mode, errors='replace', newline="")
else:
# Python 3 and binary mode
# Binary mode
f = open(path_or_buf, mode)
handles.append(f)

# in Python 3, convert BytesIO or fileobjects passed with an encoding
if (compat.PY3 and is_text and
(compression or isinstance(f, need_text_wrapping))):
# Convert BytesIO or file objects passed with an encoding
if is_text and (compression or isinstance(f, need_text_wrapping)):
from io import TextIOWrapper
f = TextIOWrapper(f, encoding=encoding, newline='')
handles.append(f)
Expand Down Expand Up @@ -499,11 +460,9 @@ def __iter__(self):
def __next__(self):
newline = self.mmap.readline()

# readline returns bytes, not str, in Python 3,
# but Python's CSV reader expects str, so convert
# the output to str before continuing
if compat.PY3:
newline = compat.bytes_to_str(newline)
# readline returns bytes, not str, but Python's CSV reader
# expects str, so convert the output to str before continuing
newline = compat.bytes_to_str(newline)

# mmap doesn't raise if reading past the allocated
# data but instead returns an empty string, so raise
Expand All @@ -513,14 +472,10 @@ def __next__(self):
return newline


if not compat.PY3:
MMapWrapper.next = lambda self: self.__next__()


class UTF8Recoder(BaseIterator):

"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
Iterator that reads an encoded stream and re-encodes the input to UTF-8
"""

def __init__(self, f, encoding):
Expand All @@ -536,82 +491,12 @@ def next(self):
return next(self.reader).encode("utf-8")


if compat.PY3: # pragma: no cover
def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
# ignore encoding
return csv.reader(f, dialect=dialect, **kwds)

def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
return csv.writer(f, dialect=dialect, **kwds)
else:
class UnicodeReader(BaseIterator):

"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
On Python 3, this is replaced (below) by csv.reader, which handles
unicode.
"""

def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)

def __next__(self):
row = next(self.reader)
return [compat.text_type(s, "utf-8") for s in row]

class UnicodeWriter(object):

"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""

def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
self.quoting = kwds.get("quoting", None)

def writerow(self, row):
def _check_as_is(x):
return (self.quoting == csv.QUOTE_NONNUMERIC and
is_number(x)) or isinstance(x, str)

row = [x if _check_as_is(x)
else pprint_thing(x).encode("utf-8") for x in row]

self.writer.writerow([s for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and re-encode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)

def writerows(self, rows):
def _check_as_is(x):
return (self.quoting == csv.QUOTE_NONNUMERIC and
is_number(x)) or isinstance(x, str)

for i, row in enumerate(rows):
rows[i] = [x if _check_as_is(x)
else pprint_thing(x).encode("utf-8") for x in row]

self.writer.writerows([[s for s in row] for row in rows])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and re-encode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
# Keeping these class for now because it provides a necessary convenience
# for "dropping" the "encoding" argument from our I/O arguments when
# creating a Unicode I/O object.
def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
return csv.reader(f, dialect=dialect, **kwds)


def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
return csv.writer(f, dialect=dialect, **kwds)
Loading

0 comments on commit 96a128e

Please sign in to comment.