Skip to content

Commit

Permalink
Refactors
Browse files Browse the repository at this point in the history
  • Loading branch information
ppizarror committed Jan 15, 2024
1 parent d901bed commit fef33d6
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 17 deletions.
13 changes: 6 additions & 7 deletions PyMultiDictionary/_goslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ class Goslate(object):
>>> print(gs_roman.translate('hello', 'zh'))
Nín hǎo
"""

_MAX_LENGTH_PER_QUERY = 1800

def __init__(self, writing=WRITING_NATIVE, opener=None, retry_times=4, executor=_g_executor,
Expand Down Expand Up @@ -295,7 +294,7 @@ def get_languages(self):
self._languages = languages
return self._languages

_SEPERATORS = [quote_plus(i.encode('utf-8')) for i in
_SEPERATORS = [quote_plus(i.encode()) for i in
u'.!?,;。,?!::"“”’‘#$%&()()*×+/<=>@#¥[\]…[]^`{|}{}~~\n\r\t ']

def _translate_single_text(self, text, target_language, source_lauguage):
Expand Down Expand Up @@ -332,7 +331,7 @@ def translate(self, text, target_language, source_language='auto'):
- Input all source strings at once. Goslate will batch and fetch concurrently for maximize speed.
- `futures <https://pypi.python.org/pypi/futures>`_ is required for best performance.
- It returns generator on batch input in order to better fit pipeline architecture
- It returns generator on batch input to better fit pipeline architecture
:param text: The source text(s) to be translated. Batch translation is supported via sequence input
:type text: UTF-8 str; unicode; string sequence (list, tuple, iterator, generator)
Expand Down Expand Up @@ -394,17 +393,17 @@ def translate(self, text, target_language, source_language='auto'):

if not _is_sequence(text):
if isinstance(text, str):
text = text.encode('utf-8')
text = text.encode()
return _unwrapper_single_element(self._translate_single_text(text, target_language, source_language))

JOINT = u'\u26ff'
UTF8_JOINT = (u'\n%s\n' % JOINT).encode('utf-8')
UTF8_JOINT = (u'\n%s\n' % JOINT).encode()

def join_texts(texts):
def convert_to_utf8(texts):
for i in texts:
if isinstance(i, str):
i = i.encode('utf-8')
i = i.encode()
yield i.strip()

texts = convert_to_utf8(texts)
Expand Down Expand Up @@ -443,7 +442,7 @@ def detect(self, text):
- Input all source strings at once. Goslate will detect concurrently for maximize speed.
- `futures <https://pypi.python.org/pypi/futures>`_ is required for best performance.
- It returns generator on batch input in order to better fit pipeline architecture.
- It returns generator on batch input to better fit pipeline architecture.
:param text: The source text(s) whose language you want to identify.
Batch detection is supported via sequence input
Expand Down
1 change: 0 additions & 1 deletion PyMultiDictionary/_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ class RegexpTokenizer(TokenizerI):
:param flags: The regexp flags used to compile this
tokenizer's pattern. By default, the following flags are
used: `re.UNICODE | re.MULTILINE | re.DOTALL`.
"""

def __init__(
Expand Down
24 changes: 16 additions & 8 deletions PyMultiDictionary/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,14 @@
('no', 'Norvegese'), ('pl', 'Polacco'), ('pt', 'Portoghese'), ('ro', 'Rumeno'), ('ru', 'Russo'),
('sv', 'Svedese'), ('ta', 'Tamil'), ('tr', 'Turco'), ('uk', 'Ucraino'), ('vi', 'Vietnamita'),
('zh', 'Cinese')],
'ja': [('af', 'アフリカーンス語'), ('ar', 'アラビア語'), ('bn', 'ベンガル語'), ('de', 'ドイツ語'), ('el', 'ギリシャ語'), ('en', '英語'),
('es', 'スペイン語'), ('fr', 'フランス語'), ('hi', 'ヒンディー語'), ('it', 'イタリア語'), ('ja', '日本語'), ('jv', 'ジャワ語'),
('ko', '韓国語'), ('mr', 'マラーティー語'), ('ms', 'マレー語'), ('no', 'ノルウェー語'), ('pl', 'ポーランド語'), ('pt', 'ポルトガル語'),
('ro', 'ルーマニア語'), ('ru', 'ロシア語'), ('sv', 'スウェーデン語'), ('ta', 'タミル語'), ('tr', 'トルコ語'), ('uk', 'ウクライナ語'),
'ja': [('af', 'アフリカーンス語'), ('ar', 'アラビア語'), ('bn', 'ベンガル語'), ('de', 'ドイツ語'),
('el', 'ギリシャ語'), ('en', '英語'),
('es', 'スペイン語'), ('fr', 'フランス語'), ('hi', 'ヒンディー語'), ('it', 'イタリア語'), ('ja', '日本語'),
('jv', 'ジャワ語'),
('ko', '韓国語'), ('mr', 'マラーティー語'), ('ms', 'マレー語'), ('no', 'ノルウェー語'),
('pl', 'ポーランド語'), ('pt', 'ポルトガル語'),
('ro', 'ルーマニア語'), ('ru', 'ロシア語'), ('sv', 'スウェーデン語'), ('ta', 'タミル語'), ('tr', 'トルコ語'),
('uk', 'ウクライナ語'),
('vi', 'ベトナム語'), ('zh', '中国語')],
'jv': [('af', 'Basa afrikaans'), ('ar', 'Basa arab'), ('bn', 'Basa bengali'), ('de', 'Basa jerman'),
('el', 'Basa yunani'), ('en', 'Basa inggris'), ('es', 'Basa spanyol'), ('fr', 'Basa prancis'),
Expand Down Expand Up @@ -130,10 +134,14 @@
('no', 'Норвезька'), ('pl', 'Польська'), ('pt', 'Португальська'), ('ro', 'Румунська'), ('ru', 'Російська'),
('sv', 'Шведська'), ('ta', 'Тамільська'), ('tr', 'Турецька'), ('uk', 'Українська'), ('vi', 'В’єтнамська'),
('zh', 'Китайська')],
'zh': [('af', '布尔语(南非荷兰语)'), ('ar', '阿拉伯语'), ('bn', '孟加拉语'), ('de', '德语'), ('el', '希腊语'), ('en', '英语'),
('es', '西班牙语'), ('fr', '法语'), ('hi', '印地语'), ('it', '意大利语'), ('ja', '日语'), ('jv', '印尼爪哇语'), ('ko', '韩语'),
('mr', '马拉地语'), ('ms', '马来语'), ('no', '挪威语'), ('pl', '波兰语'), ('pt', '葡萄牙语'), ('ro', '罗马尼亚语'), ('ru', '俄语'),
('sv', '瑞典语'), ('ta', '泰米尔语'), ('tr', '土耳其语'), ('uk', '乌克兰语'), ('vi', '越南语'), ('zh', '中文')]
'zh': [('af', '布尔语(南非荷兰语)'), ('ar', '阿拉伯语'), ('bn', '孟加拉语'), ('de', '德语'), ('el', '希腊语'),
('en', '英语'),
('es', '西班牙语'), ('fr', '法语'), ('hi', '印地语'), ('it', '意大利语'), ('ja', '日语'),
('jv', '印尼爪哇语'), ('ko', '韩语'),
('mr', '马拉地语'), ('ms', '马来语'), ('no', '挪威语'), ('pl', '波兰语'), ('pt', '葡萄牙语'),
('ro', '罗马尼亚语'), ('ru', '俄语'),
('sv', '瑞典语'), ('ta', '泰米尔语'), ('tr', '土耳其语'), ('uk', '乌克兰语'), ('vi', '越南语'),
('zh', '中文')]

}

Expand Down
2 changes: 1 addition & 1 deletion PyMultiDictionary/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Version(tuple):
__slots__ = ()
fields = 'major', 'minor', 'patch'

def __new__(cls, major, minor, patch) -> 'Version':
def __new__(cls, major, minor, patch) -> tuple:
return tuple.__new__(cls, (major, minor, patch))

def __repr__(self) -> str:
Expand Down

0 comments on commit fef33d6

Please sign in to comment.