diff --git a/docker/Dockerfile b/docker/Dockerfile index 2d3a1919f..771f25e9b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update && \ git \ libarchive-dev \ libboost-python-dev \ + libicu-dev \ libosmpbf-dev \ libprotobuf-dev \ locales \ @@ -23,6 +24,7 @@ RUN apt-get update && \ postgresql-client \ protobuf-compiler \ python3-dev \ + python3-icu \ python3-pip \ python3-setuptools \ python3-wheel \ diff --git a/mypy.ini b/mypy.ini index f5b4fd823..f28589069 100644 --- a/mypy.ini +++ b/mypy.ini @@ -10,12 +10,16 @@ ignore_errors = True # External [mypy-pandas.*] ignore_missing_imports = True +[mypy-icu.*] +ignore_missing_imports = True [mypy-ipyleaflet.*] ignore_missing_imports = True [mypy-ipywidgets.*] ignore_missing_imports = True [mypy-antlr4.*] ignore_missing_imports = True +[mypy-myanmartools.*] +ignore_missing_imports = True [mypy-Pyro.*] ignore_missing_imports = True [mypy-shapely.*] diff --git a/plugins/TagFix_ZawgyiBurmese.py b/plugins/TagFix_ZawgyiBurmese.py new file mode 100644 index 000000000..8f688518a --- /dev/null +++ b/plugins/TagFix_ZawgyiBurmese.py @@ -0,0 +1,102 @@ +#-*- coding: utf-8 -*- + +########################################################################### +## ## +## Copyrights Sascha Brawer 2025 ## +## ## +## This program is free software: you can redistribute it and/or modify ## +## it under the terms of the GNU General Public License as published by ## +## the Free Software Foundation, either version 3 of the License, or ## +## (at your option) any later version. ## +## ## +## This program is distributed in the hope that it will be useful, ## +## but WITHOUT ANY WARRANTY; without even the implied warranty of ## +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## +## GNU General Public License for more details. ## +## ## +## You should have received a copy of the GNU General Public License ## +## along with this program. If not, see . ## +## ## +########################################################################### + +from modules.OsmoseTranslation import T_ +from plugins.Plugin import Plugin + +import myanmartools +import icu + + +# https://en.wikipedia.org/wiki/Zawgyi_font + +class TagFix_ZawgyiBurmese(Plugin): + + only_for = ['MM'] + + def init(self, logger): + Plugin.init(self, logger) + self.errors[50706] = self.def_class( + item = 5070, + level = 2, + tags = ['value', 'fix:chair'], + title = T_('Value contains Zawgyi-encoded Burmese characters'), + detail = T_( +'''Tag values should be stored in Unicode. However, this +value contains Burmese characters in the obsolete “Zawgyi” font encoding. +As long as this value is stored in a non-standard way, modern devices cannot +display it correctly. Please change the text to be encoded in Unicode.'''), + ) + self.detector = myanmartools.ZawgyiDetector() + self.converter = icu.Transliterator.createInstance('Zawgyi-my') + + def node(self, data, tags): + errs = [] + for key, value in tags.items(): + if not any(0x1000 <= ord(c) <= 0x109F for c in value): + continue + score = self.detector.get_zawgyi_probability(value) + if score < 0.8: + continue + fixed_value = self.converter.transliterate(value) + if value == fixed_value: + continue + errs.append({'class': 50706, 'subclass': 0, 'fix': {key: fixed_value}}) + return errs + + def way(self, data, tags, nodes): + return self.node(data, tags) + + def relation(self, data, tags, members): + return self.node(data, tags) + + +########################################################################### +from plugins.Plugin import TestPluginCommon + + +class Test(TestPluginCommon): + def test(self): + a = TagFix_ZawgyiBurmese(None) + a.init(None) + for name in [ + '', + 'foo', + 'ဘားအံ', + 'ကျိုက်မရော အဝေးပြေးလမ်း', + ]: + assert not a.node(None, {'name': name}), name + assert not a.way(None, {'name': name}, nodes=None), name + assert not a.relation(None, {'name': name}, members=None), name + + for zawgyi, uni in [('မ္း', 'မ်း'), ('က္ေ', 'က်ေ')]: + self.check_err( + a.node(None, {'addr:street': zawgyi}), + {'class': 50706, 'subclass': 0, 'fix': {'addr:street': uni}}, + ) + self.check_err( + a.way(None, {'addr:city': zawgyi}, nodes=None), + {'class': 50706, 'subclass': 0, 'fix': {'addr:city': uni}}, + ) + self.check_err( + a.relation(None, {'fixme': zawgyi}, members=None), + {'class': 50706, 'subclass': 0, 'fix': {'fixme': uni}}, + ) diff --git a/requirements.txt b/requirements.txt index f53d5e9be..f89106d0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,6 +17,8 @@ tiletanic sentry-sdk wikitextparser pycountry +myanmartools +PyICU # Tests pytest == 7.4.4 # In v8 it skips the plugins folder, see our issue #2266 and https://github.com/pytest-dev/pytest/issues/12605