openvinotoolkit · zhiltsov-max · Jun 23, 2021 · Jun 22, 2021 · Jun 23, 2021 · Jun 23, 2021
@@ -100,8 +100,8 @@ def write(self, path):
                 ann['id'] = next_id
                 next_id += 1
 
-        with open(path, 'w') as outfile:
-            json.dump(self._data, outfile)
+        with open(path, 'w', encoding='utf-8') as outfile:
+            json.dump(self._data, outfile, ensure_ascii=False)
 
     @property
     def annotations(self):
@@ -461,8 +461,8 @@ class _StuffConverter(_InstancesConverter):
 
 class _PanopticConverter(_TaskConverter):
     def write(self, path):
-        with open(path, 'w') as outfile:
-            json.dump(self._data, outfile)
+        with open(path, 'w', encoding='utf-8') as outfile:
+            json.dump(self._data, outfile, ensure_ascii=False)
 
     def save_categories(self, dataset):
         label_categories = dataset.categories().get(AnnotationType.label)

@@ -100,8 +100,9 @@ def write_categories(self, categories):
             self.categories[ann_type.name] = converted_desc
 
     def write(self, save_dir):
-        with open(osp.join(save_dir, '%s.json' % self._name), 'w') as f:
-            json.dump(self._data, f)
+        with open(osp.join(save_dir, '%s.json' % self._name), 'w',
+                encoding='utf-8') as f:
+            json.dump(self._data, f, ensure_ascii=False)
 
     def _convert_annotation(self, obj):
         assert isinstance(obj, Annotation)
@@ -290,4 +291,4 @@ def convert(cls, extractor, save_dir, **kwargs):
         DatumaroConverter.convert(extractor,
             save_dir=osp.join(
                 project.config.project_dir, project.config.dataset_dir),
-            **kwargs)
+            **kwargs)
@@ -18,6 +18,7 @@ class Requirements:
 
     # GitHub issues (not bugs)
     # https://github.com/openvinotoolkit/datumaro/issues
+    DATUM_231 = "Readable formats for CJK"
     DATUM_244 = "Add Snyk integration"
     DATUM_267 = "Add Image zip format"
     DATUM_280 = "Support KITTI dataset formats"

@@ -718,6 +718,36 @@ def test_can_save_and_load_images(self):
             self._test_save_and_load(expected_dataset,
                 CocoImageInfoConverter.convert, test_dir)
 
+    @mark_requirement(Requirements.DATUM_231)
+    def test_can_save_dataset_with_cjk_categories(self):
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Bbox(0, 1, 2, 2,
+                        label=0, group=1, id=1,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Bbox(1, 0, 2, 2, label=1, group=2, id=2,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Bbox(0, 1, 2, 2, label=2, group=3, id=3,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 3}),
+            ],
+            categories=[
+                "고양이", "ネコ", "猫"
+            ]
+        )
+
+        with TestDir() as test_dir:
+            self._test_save_and_load(expected_dataset,
+                CocoInstancesConverter.convert, test_dir)
+
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
         expected_dataset = Dataset.from_iterable([

@@ -111,6 +111,37 @@ def test_relative_paths(self):
             self._test_save_and_load(test_dataset,
                 partial(DatumaroConverter.convert, save_images=True), test_dir)
 
+
+    @mark_requirement(Requirements.DATUM_231)
+    def test_can_save_dataset_with_cjk_categories(self):
+        expected = Dataset.from_iterable([
+            DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Bbox(0, 1, 2, 2,
+                        label=0, group=1, id=1,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 1}),
+            DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Bbox(1, 0, 2, 2, label=1, group=2, id=2,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 2}),
+
+            DatasetItem(id=3, subset='train', image=np.ones((4, 4, 3)),
+                annotations=[
+                    Bbox(0, 1, 2, 2, label=2, group=3, id=3,
+                        attributes={ 'is_crowd': False }),
+                ], attributes={'id': 3}),
+            ],
+            categories=[
+                "고양이", "ネコ", "猫"
+            ]
+        )
+
+        with TestDir() as test_dir:
+            self._test_save_and_load(expected,
+                partial(DatumaroConverter.convert, save_images=True), test_dir)
+
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
         test_dataset = Dataset.from_iterable([