Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR: Fix error when viewing dataframe with a non-ascii index #7242

Merged
merged 6 commits into from
Jun 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 33 additions & 29 deletions spyder/widgets/variableexplorer/dataframeeditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
from qtpy.compat import from_qvariant, to_qvariant
from qtpy.QtCore import QAbstractTableModel, QModelIndex, Qt, Signal, Slot
from qtpy.QtGui import QColor, QCursor
from qtpy.QtWidgets import (QApplication, QCheckBox, QDialogButtonBox, QDialog,
QGridLayout, QHBoxLayout, QInputDialog, QLineEdit,
QMenu, QMessageBox, QPushButton, QTableView,
QHeaderView)
from qtpy.QtWidgets import (QApplication, QCheckBox, QDialog, QGridLayout,
QHBoxLayout, QInputDialog, QLineEdit, QMenu,
QMessageBox, QPushButton, QTableView, QHeaderView)

from pandas import DataFrame, DatetimeIndex, Series
try:
Expand All @@ -34,9 +33,8 @@
from spyder.config.base import _
from spyder.config.fonts import DEFAULT_SMALL_DELTA
from spyder.config.gui import get_font, config_shortcut
from spyder.py3compat import (io, is_text_string, PY2, to_text_string,
TEXT_TYPES)
from spyder.utils import encoding
from spyder.py3compat import (io, is_text_string, is_type_text_string, PY2,
to_text_string)
from spyder.utils import icon_manager as ima
from spyder.utils.qthelpers import (add_actions, create_action,
keybinding, qapplication)
Expand Down Expand Up @@ -195,31 +193,26 @@ def headerData(self, section, orientation, role=Qt.DisplayRole):
if section == 0:
return 'Index'
elif section == 1 and PY2:
header = self.df_header[0]
# Get rid of possible BOM utf-8 data present at the
# beginning of a file, which gets attached to the first
# column header when headers are present in the first
# row.
# Fixes Issue 2514
try:
header = to_text_string(self.df_header[0],
encoding='utf-8-sig')
header = to_text_string(header, encoding='utf-8-sig')
except:
header = to_text_string(self.df_header[0])
# Don't perform any conversion on strings because it
# leads to differences between the data present in
# the dataframe and what is shown by Spyder
if not is_type_text_string(header):
header = to_text_string(header)
return to_qvariant(header)
elif isinstance(self.df_header[section-1], TEXT_TYPES):
# Get the proper encoding of the text in the header.
# Fixes Issue 3896
if not PY2:
try:
header = self.df_header[section-1].encode('utf-8')
coding = 'utf-8-sig'
except:
header = self.df_header[section-1].encode('utf-8')
coding = encoding.get_coding(header)
else:
header = self.df_header[section-1]
coding = encoding.get_coding(header)
return to_qvariant(to_text_string(header, encoding=coding))
elif is_type_text_string(self.df_header[section-1]):
# Don't perform any conversion on strings because it
# leads to differences between the data present in
# the dataframe and what is shown by Spyder
return to_qvariant(self.df_header[section-1])
else:
return to_qvariant(to_text_string(self.df_header[section-1]))
else:
Expand Down Expand Up @@ -280,7 +273,15 @@ def data(self, index, role=Qt.DisplayRole):
column = index.column()
row = index.row()
if column == 0:
return to_qvariant(to_text_string(self.df_index[row]))
df_idx = self.df_index[row]
if is_type_text_string(df_idx):
# Don't perform any conversion on strings
# because it leads to differences between
# the data present in the dataframe and
# what is shown by Spyder
return df_idx
else:
return to_qvariant(to_text_string(df_idx))
else:
value = self.get_value(row, column-1)
if isinstance(value, float):
Expand All @@ -290,11 +291,14 @@ def data(self, index, role=Qt.DisplayRole):
# may happen if format = '%d' and value = NaN;
# see issue 4139
return to_qvariant(DEFAULT_FORMAT % value)
elif is_type_text_string(value):
# Don't perform any conversion on strings
# because it leads to differences between
# the data present in the dataframe and
# what is shown by Spyder
return value
else:
try:
return to_qvariant(to_text_string(value))
except UnicodeDecodeError:
return to_qvariant(encoding.to_unicode(value))
return to_qvariant(to_text_string(value))
elif role == Qt.BackgroundColorRole:
return to_qvariant(self.get_bgcolor(index))
elif role == Qt.FontRole:
Expand Down
2 changes: 2 additions & 0 deletions spyder/widgets/variableexplorer/tests/issue_5833.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Это,кодирование
пример,файла
26 changes: 25 additions & 1 deletion spyder/widgets/variableexplorer/tests/test_dataframeeditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def test_header_encoding():
model = editor.dataModel
assert model.headerData(0, orientation=Qt.Horizontal) == "Index"
assert model.headerData(1, orientation=Qt.Horizontal) == "Unnamed: 0"
assert model.headerData(2, orientation=Qt.Horizontal) == "Unieke_Idcode"
assert "Unieke_Idcode" in model.headerData(2, orientation=Qt.Horizontal)
assert model.headerData(3, orientation=Qt.Horizontal) == "a"
assert model.headerData(4, orientation=Qt.Horizontal) == "b"
assert model.headerData(5, orientation=Qt.Horizontal) == "c"
Expand Down Expand Up @@ -492,5 +492,29 @@ def test_dataframeeditor_edit_bool(qtbot, monkeypatch):
len(expected_df))


def test_non_ascii_index():
"""
Test that there are no errors when displaying a dataframe with
a non-ascii index.
"""
df = read_csv(os.path.join(FILES_PATH, 'issue_5833.csv'), index_col=0)
dfm = DataFrameModel(df)
assert data(dfm, 0, 0) == 'пример'


def test_no_convert_strings_to_unicode():
"""
Test that we don't apply any conversion to strings in headers,
indexes or data.
"""
df = read_csv(os.path.join(FILES_PATH, 'issue_5833.csv'), index_col=0,
encoding='koi8_r')
dfm = DataFrameModel(df)

assert dfm.headerData(1, orientation=Qt.Horizontal) != u"Это"
assert data(dfm, 0, 0) != u'пример'
assert data(dfm, 0, 1) != u'файла'


if __name__ == "__main__":
pytest.main()