Skip to content

Commit

Permalink
🔀 Merge pull request
Browse files Browse the repository at this point in the history
Merge pull request #19 from kaplanPRO/development
  • Loading branch information
csengor authored Feb 22, 2022
2 parents 996c63a + 80641ed commit 472c546
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 7 deletions.
4 changes: 2 additions & 2 deletions kaplan/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.14.2'
__version__ = '0.15.0'

def can_process(input_file):
'''
Expand All @@ -7,7 +7,7 @@ def can_process(input_file):
Args:
input_file: Path to a file.
'''
if input_file.lower().endswith(('.docx', '.kxliff', '.odp', '.ods', '.odt', '.po', '.sdlxliff', '.txt', '.xliff')):
if input_file.lower().endswith(('.docx', '.json', '.kxliff', '.odp', '.ods', '.odt', '.po', '.sdlxliff', '.txt', '.xliff')):
return True
else:
return False
Expand Down
85 changes: 85 additions & 0 deletions kaplan/kxliff.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from copy import deepcopy
from datetime import datetime
import html
import json
from pathlib import Path
import random
import string
Expand Down Expand Up @@ -560,6 +561,59 @@ def add_text(last_span, text):
else:
outfile.write(segment.find('source', self.nsmap).text)

elif source_filename.lower().endswith('.json'):
target_list = []
for trans_unit in source_file.findall('.//unit', self.nsmap):
target_segment = ''
for segment in trans_unit.xpath('.//xliff:segment|.//xliff:ignorable', namespaces={'xliff':self.nsmap[None]}):
target = segment.find('target', self.nsmap)
if target is not None and target.text is not None:
target_segment += target.text

else:
target_segment += segment.find('source', self.nsmap).text

key = trans_unit.attrib['{{{0}}}key'.format(self.nsmap['kaplan'])]
target_list.append([key, target_segment])

for i in range(1, len(target_list)):
p_i = i - 1
while target_list[i][0] == target_list[p_i][0]:
if target_list[p_i][1] is None:
p_i -= 1
continue
if isinstance(target_list[p_i][1], list):
target_list[p_i][1].append(target_list[i][1])
else:
target_list[p_i][1] = [target_list[p_i][1], target_list[i][1]]
target_list[i][1] = None
break

target_list = list(filter(lambda x: x[1] is not None, target_list))

for i, segment in enumerate(target_list):
segment_keys = segment[0].split('.')
segment_keys.reverse()
segment_new = {segment_keys[0]: segment[1]}
for segment_key in segment_keys[1:]:
segment_new = {segment_key: segment_new}
target_list[i] = segment_new

def _mergedict(to_dict, from_dict):
for k, v in from_dict.items():
if k not in to_dict:
to_dict[k] = v
else:
to_dict[k] = _mergedict(to_dict[k], from_dict[k])
return to_dict

target_dict = {}
for segment in target_list:
target_dict = _mergedict(target_dict, segment)

with open((output_directory / target_filename), 'w') as outfile:
json.dump(target_dict, outfile, indent=4)

else:
raise ValueError('Filetype incompatible for this task!')

Expand Down Expand Up @@ -1117,6 +1171,37 @@ def entry_checkpoint(entry, entry_metadata, entries):

_source.text = line

elif name.lower().endswith('.json'):
def _create_unit(source, key):

_tu = deepcopy(_tu_template)
_tu.attrib['{{{0}}}key'.format(nsmap['kaplan'])] = '.'.join(key)
source_file_reference.append(_tu)

_source = _tu.find('.//xliff:source', nsmap)
_target = _tu.find('.//xliff:target', nsmap)

if source.strip() == '':
_tu[0].tag = '{{{0}}}ignorable'.format(nsmap['xliff'])
_tu[0].remove(_target)
else:
_tu.attrib['id'] = str(len(source_file_reference.findall('xliff:unit', nsmap)))

_source.text = source

def _extract_json(kaynak, keys=[]):
for k, v in kaynak.items():
if isinstance(v, dict):
_extract_json(v, (keys + [k]))
elif isinstance(v, list):
for child_v in v:
_create_unit(child_v, (keys + [k]))
else:
_create_unit(v, (keys + [k]))

with open(source_file_path, 'rb') as source_file:
_extract_json(json.load(source_file))

if not segmentation:
for tu in source_file_reference.findall('xliff:unit', nsmap):
segment = tu.find('xliff:segment', nsmap)
Expand Down
7 changes: 2 additions & 5 deletions kaplan/xliff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from datetime import datetime
import difflib
import html
from io import BytesIO
from pathlib import Path

nsmap = {
Expand Down Expand Up @@ -155,10 +154,8 @@ def open_bilingualfile(cls, bilingualfile):
Opens an .xliff file.
'''
xml_root = etree.parse(bilingualfile).getroot()
if isinstance(bilingualfile, BytesIO): # TODO remove BytesIO for 0.15.0
name = bilingualfile.name
else:
name = Path(bilingualfile).name

name = Path(bilingualfile).name

return cls(name, xml_root)

Expand Down

0 comments on commit 472c546

Please sign in to comment.