🔀 Merge pull request

Merge pull request #19 from kaplanPRO/development
kaplanPRO · Feb 22, 2022 · 472c546 · 472c546
2 parents 996c63a + 80641ed
commit 472c546
Show file tree

Hide file tree

Showing 3 changed files with 89 additions and 7 deletions.
diff --git a/kaplan/__init__.py b/kaplan/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.14.2'
+__version__ = '0.15.0'
 
 def can_process(input_file):
     '''
@@ -7,7 +7,7 @@ def can_process(input_file):
     Args:
         input_file: Path to a file.
     '''
-    if input_file.lower().endswith(('.docx', '.kxliff', '.odp', '.ods', '.odt', '.po', '.sdlxliff', '.txt', '.xliff')):
+    if input_file.lower().endswith(('.docx', '.json', '.kxliff', '.odp', '.ods', '.odt', '.po', '.sdlxliff', '.txt', '.xliff')):
         return True
     else:
         return False

diff --git a/kaplan/kxliff.py b/kaplan/kxliff.py
@@ -6,6 +6,7 @@
 from copy import deepcopy
 from datetime import datetime
 import html
+import json
 from pathlib import Path
 import random
 import string
@@ -560,6 +561,59 @@ def add_text(last_span, text):
                         else:
                             outfile.write(segment.find('source', self.nsmap).text)
 
+        elif source_filename.lower().endswith('.json'):
+            target_list = []
+            for trans_unit in source_file.findall('.//unit', self.nsmap):
+                target_segment = ''
+                for segment in trans_unit.xpath('.//xliff:segment|.//xliff:ignorable', namespaces={'xliff':self.nsmap[None]}):
+                    target = segment.find('target', self.nsmap)
+                    if target is not None and target.text is not None:
+                        target_segment += target.text
+
+                    else:
+                        target_segment += segment.find('source', self.nsmap).text
+
+                key = trans_unit.attrib['{{{0}}}key'.format(self.nsmap['kaplan'])]
+                target_list.append([key, target_segment])
+
+            for i in range(1, len(target_list)):
+                p_i = i - 1
+                while target_list[i][0] == target_list[p_i][0]:
+                    if target_list[p_i][1] is None:
+                        p_i -= 1
+                        continue
+                    if isinstance(target_list[p_i][1], list):
+                        target_list[p_i][1].append(target_list[i][1])
+                    else:
+                        target_list[p_i][1] = [target_list[p_i][1], target_list[i][1]]
+                    target_list[i][1] = None
+                    break
+
+            target_list = list(filter(lambda x: x[1] is not None, target_list))
+
+            for i, segment in enumerate(target_list):
+                segment_keys = segment[0].split('.')
+                segment_keys.reverse()
+                segment_new = {segment_keys[0]: segment[1]}
+                for segment_key in segment_keys[1:]:
+                    segment_new = {segment_key: segment_new}
+                target_list[i] = segment_new
+
+            def _mergedict(to_dict, from_dict):
+                for k, v in from_dict.items():
+                    if k not in to_dict:
+                        to_dict[k] = v
+                    else:
+                        to_dict[k] = _mergedict(to_dict[k], from_dict[k])
+                return to_dict
+
+            target_dict = {}
+            for segment in target_list:
+                target_dict = _mergedict(target_dict, segment)
+
+            with open((output_directory / target_filename), 'w') as outfile:
+                json.dump(target_dict, outfile, indent=4)
+
         else:
             raise ValueError('Filetype incompatible for this task!')
 
@@ -1117,6 +1171,37 @@ def entry_checkpoint(entry, entry_metadata, entries):
 
                     _source.text = line
 
+        elif name.lower().endswith('.json'):
+            def _create_unit(source, key):
+
+                _tu = deepcopy(_tu_template)
+                _tu.attrib['{{{0}}}key'.format(nsmap['kaplan'])] = '.'.join(key)
+                source_file_reference.append(_tu)
+
+                _source = _tu.find('.//xliff:source', nsmap)
+                _target = _tu.find('.//xliff:target', nsmap)
+
+                if source.strip() == '':
+                    _tu[0].tag = '{{{0}}}ignorable'.format(nsmap['xliff'])
+                    _tu[0].remove(_target)
+                else:
+                    _tu.attrib['id'] = str(len(source_file_reference.findall('xliff:unit', nsmap)))
+
+                _source.text = source
+
+            def _extract_json(kaynak, keys=[]):
+                for k, v in kaynak.items():
+                    if isinstance(v, dict):
+                        _extract_json(v, (keys + [k]))
+                    elif isinstance(v, list):
+                        for child_v in v:
+                            _create_unit(child_v, (keys + [k]))
+                    else:
+                        _create_unit(v, (keys + [k]))
+
+            with open(source_file_path, 'rb') as source_file:
+                _extract_json(json.load(source_file))
+
         if not segmentation:
             for tu in source_file_reference.findall('xliff:unit', nsmap):
                 segment = tu.find('xliff:segment', nsmap)

diff --git a/kaplan/xliff.py b/kaplan/xliff.py
@@ -7,7 +7,6 @@
 from datetime import datetime
 import difflib
 import html
-from io import BytesIO
 from pathlib import Path
 
 nsmap = {
@@ -155,10 +154,8 @@ def open_bilingualfile(cls, bilingualfile):
         Opens an .xliff file.
         '''
         xml_root = etree.parse(bilingualfile).getroot()
-        if isinstance(bilingualfile, BytesIO): # TODO remove BytesIO for 0.15.0
-            name = bilingualfile.name
-        else:
-            name = Path(bilingualfile).name
+
+        name = Path(bilingualfile).name
 
         return cls(name, xml_root)