From d16c6250193fd2a189a62ff1360967118fced8df Mon Sep 17 00:00:00 2001 From: SomberNight Date: Sat, 5 Jul 2025 23:46:50 +0000 Subject: [PATCH] i18n: syntax-check translations at runtime We often call str.format() on translated strings. E.g. `_("time left: {} seconds").format(t1)` If the translated string has a different format syntax, this can raise at runtime. This PR adds some runtime checks that try to ensure the source string and the translated string have a similar format syntax. If the checks fail, `_()` will "reject" the translation by returning the source string. fixes https://github.com/spesmilo/electrum/issues/10010 ref https://github.com/spesmilo/electrum/issues/10007#issue-3203378250 --- electrum/i18n.py | 32 ++++++++++++++++ tests/test_i18n.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 tests/test_i18n.py diff --git a/electrum/i18n.py b/electrum/i18n.py index fc2e68e6a..348411c4f 100644 --- a/electrum/i18n.py +++ b/electrum/i18n.py @@ -22,7 +22,9 @@ # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import functools import os +import string from typing import Optional import gettext @@ -44,6 +46,35 @@ def _get_null_translations(): _language = _get_null_translations() +def _ensure_translation_keeps_format_string_syntax_similar(translator): + """This checks that the source string is syntactically similar to the translated string. + If not, translations are rejected by falling back to the source string. + """ + sf = string.Formatter() + @functools.wraps(translator) + def safe_translator(msg: str, **kwargs): + translation = translator(msg, **kwargs) + parsed1 = list(sf.parse(msg)) # iterable of tuples (literal_text, field_name, format_spec, conversion) + try: + parsed2 = list(sf.parse(translation)) + except ValueError: # malformed format string in translation + _logger.info(f"rejected translation string: failed to parse. original={msg!r}. {translation=!r}") + return msg + # num of replacement fields must match: + if len(parsed1) != len(parsed2): + _logger.info(f"rejected translation string: num replacement fields mismatch. original={msg!r}. {translation=!r}") + return msg + # set of "field_name"s must not change. (re-ordering is explicitly allowed): + field_names1 = set(tupl[1] for tupl in parsed1) + field_names2 = set(tupl[1] for tupl in parsed2) + if field_names1 != field_names2: + _logger.info(f"rejected translation string: set of field_names mismatch. original={msg!r}. {translation=!r}") + return msg + # checks done. + return translation + return safe_translator + + # note: do not use old-style (%) formatting inside translations, # as syntactically incorrectly translated strings often raise exceptions (see #3237). # e.g. consider _("Connected to %d nodes.") % n # <- raises. do NOT use @@ -57,6 +88,7 @@ _language = _get_null_translations() # However, only if the translators understand and use it correctly! # _("time left: {0} minutes, {1} seconds").format(t//60, t%60) # <- works. ok to use # _("time left: {mins} minutes, {secs} seconds").format(mins=t//60, secs=t%60) # <- works, but too complex +@_ensure_translation_keeps_format_string_syntax_similar def _(msg: str, *, context=None) -> str: if msg == "": return "" # empty string must not be translated. see #7158 diff --git a/tests/test_i18n.py b/tests/test_i18n.py new file mode 100644 index 000000000..ec2874d41 --- /dev/null +++ b/tests/test_i18n.py @@ -0,0 +1,93 @@ +from electrum import i18n +from electrum.i18n import _ensure_translation_keeps_format_string_syntax_similar + +from . import ElectrumTestCase + + +syntax_check_decorator = _ensure_translation_keeps_format_string_syntax_similar + + +class TestSyntaxChecks(ElectrumTestCase): + # convention: source strings are lowercase, dest strings are uppercase + + def test_no_format(self): + src, dst = ("hello there", "HELLO THEEEEERE") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + + def test_malformed_src_string_raises(self): + src, dst = ("hel{lo there", "HELLO THE{}RE") + with self.assertRaises(ValueError): + syntax_check_decorator(lambda x: dst)(src) + + def test_malformed_dst_string_gets_rejected(self): + src, dst = ("hel{}lo there", "HELLO THE{RE") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello there", "HELLO THE{RE") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello there", "HELLO THE{{}RE") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + + def test_simple_substitution(self): + src, dst = ("hel{}lo there", "HELLO THE{}RE") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{}lo {} there {}", "HELLO {} THE{}RE {}") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + + def test_positional_substitution(self): + src, dst = ("hel{0}lo there", "HELLO THE{0}RE") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{0}lo there {1}", "HELLO THE{0}RE {1}") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{0}lo {2} there {1}", "HELLO THE{0}RE {2} {1}") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + + def test_keyword_substitution(self): + src, dst = ("hello there {title}. {name}. welc", "HELLO THERE {title}. {name}. WELC") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + + def test_mixed_sub(self): + src, dst = ("{1} aaa {qq} {0} bbb", "{1} AAA {qq} {0} BBB") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("{1} aaa {pp} {qq} {0} bbb", "{1} AAA {pp} {qq} {0} BBB") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + + def test_allow_reordering_replacement_fields(self): # language-flexibility + src, dst = ("time left: {0} minutes, {1} seconds", "TIME LEFT: {1} SECONDS, {0} MINUTES") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("{1} aaa {pp} {qq} {0} bbb", "{qq} AAA {0} {1} {pp} BBB") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + + def test_replacement_field_name_cannot_change(self): + # rejects: + src, dst = ("hel{}lo there", "HELLO THE{RE}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{}lo there", "HELLO THE{0}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{0}lo there", "HELLO THE{}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{0}lo there", "HELLO THE{RE}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{RE}lo there", "HELLO THE{}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hel{RE}lo there", "HELLO THE{0}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + # we only check the set of field_names is invariant, so this is allowed: + src, dst = ("hello there {} {} {} {p} {q}", "HELLO THERE {} {q} {q} {p} {q}") + self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src)) + + def test_replacement_field_count_cannot_change(self): + # rejects: + src, dst = ("hello there", "HELLO THERE {}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello there", "HELLO {} {} THERE") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello {} there", "HELLO THERE {} {}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello there {}", "HELLO THERE") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello there {p} {q} {r}", "HELLO THERE {p} {q}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello there {p} {q} {r}", "HELLO THERE {p} {q} {r} {s}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src)) + src, dst = ("hello there {p} {0}", "HELLO THERE {p}") + self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))