Skip to content

Commit

Permalink
Partially revert d52a483
Browse files Browse the repository at this point in the history
  • Loading branch information
BoboTiG committed Oct 5, 2021
1 parent fd5be7d commit 93590b1
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
8 changes: 5 additions & 3 deletions httpie/encoding.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Union

from charset_normalizer import from_bytes
from charset_normalizer.constant import TOO_SMALL_SEQUENCE

UTF8 = 'utf-8'

Expand All @@ -20,9 +21,10 @@ def detect_encoding(content: ContentBytes) -> str:
']"foo"'
"""
encoding = UTF8
match = from_bytes(bytes(content)).best()
if match:
encoding = match.encoding
if len(content) > TOO_SMALL_SEQUENCE:
match = from_bytes(bytes(content)).best()
if match:
encoding = match.encoding
return encoding


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
'wheel',
]
install_requires = [
'charset_normalizer~=2.0.5',
'charset_normalizer>=2.0.0',
'defusedxml>=0.6.0',
'requests[socks]>=2.22.0',
'Pygments>=2.5.2',
Expand Down
4 changes: 3 additions & 1 deletion tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import pytest
import responses
from charset_normalizer.constant import TOO_SMALL_SEQUENCE

from httpie.cli.constants import PRETTY_MAP
from httpie.encoding import UTF8
Expand All @@ -12,7 +13,8 @@
from .fixtures import UNICODE


CZECH_TEXT = 'Všichni lidé jsou si rovni.'
CZECH_TEXT = 'Všichni lidé jsou si rovni. Všichni lidé jsou si rovni.'
assert len(CZECH_TEXT) > TOO_SMALL_SEQUENCE
CZECH_TEXT_SPECIFIC_CHARSET = 'windows-1250'
ENCODINGS = [UTF8, CZECH_TEXT_SPECIFIC_CHARSET]

Expand Down

0 comments on commit 93590b1

Please sign in to comment.