i18n: syntax-check translations at runtime
We often call str.format() on translated strings.
E.g. `_("time left: {} seconds").format(t1)`
If the translated string has a different format syntax, this can raise at runtime.
This PR adds some runtime checks that try to ensure the source string and the translated string
have a similar format syntax. If the checks fail, `_()` will "reject" the translation by
returning the source string.
fixes https://github.com/spesmilo/electrum/issues/10010
ref https://github.com/spesmilo/electrum/issues/10007#issue-3203378250
This commit is contained in:
@@ -22,7 +22,9 @@
|
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
import functools
|
||||
import os
|
||||
import string
|
||||
from typing import Optional
|
||||
|
||||
import gettext
|
||||
@@ -44,6 +46,35 @@ def _get_null_translations():
|
||||
_language = _get_null_translations()
|
||||
|
||||
|
||||
def _ensure_translation_keeps_format_string_syntax_similar(translator):
|
||||
"""This checks that the source string is syntactically similar to the translated string.
|
||||
If not, translations are rejected by falling back to the source string.
|
||||
"""
|
||||
sf = string.Formatter()
|
||||
@functools.wraps(translator)
|
||||
def safe_translator(msg: str, **kwargs):
|
||||
translation = translator(msg, **kwargs)
|
||||
parsed1 = list(sf.parse(msg)) # iterable of tuples (literal_text, field_name, format_spec, conversion)
|
||||
try:
|
||||
parsed2 = list(sf.parse(translation))
|
||||
except ValueError: # malformed format string in translation
|
||||
_logger.info(f"rejected translation string: failed to parse. original={msg!r}. {translation=!r}")
|
||||
return msg
|
||||
# num of replacement fields must match:
|
||||
if len(parsed1) != len(parsed2):
|
||||
_logger.info(f"rejected translation string: num replacement fields mismatch. original={msg!r}. {translation=!r}")
|
||||
return msg
|
||||
# set of "field_name"s must not change. (re-ordering is explicitly allowed):
|
||||
field_names1 = set(tupl[1] for tupl in parsed1)
|
||||
field_names2 = set(tupl[1] for tupl in parsed2)
|
||||
if field_names1 != field_names2:
|
||||
_logger.info(f"rejected translation string: set of field_names mismatch. original={msg!r}. {translation=!r}")
|
||||
return msg
|
||||
# checks done.
|
||||
return translation
|
||||
return safe_translator
|
||||
|
||||
|
||||
# note: do not use old-style (%) formatting inside translations,
|
||||
# as syntactically incorrectly translated strings often raise exceptions (see #3237).
|
||||
# e.g. consider _("Connected to %d nodes.") % n # <- raises. do NOT use
|
||||
@@ -57,6 +88,7 @@ _language = _get_null_translations()
|
||||
# However, only if the translators understand and use it correctly!
|
||||
# _("time left: {0} minutes, {1} seconds").format(t//60, t%60) # <- works. ok to use
|
||||
# _("time left: {mins} minutes, {secs} seconds").format(mins=t//60, secs=t%60) # <- works, but too complex
|
||||
@_ensure_translation_keeps_format_string_syntax_similar
|
||||
def _(msg: str, *, context=None) -> str:
|
||||
if msg == "":
|
||||
return "" # empty string must not be translated. see #7158
|
||||
|
||||
93
tests/test_i18n.py
Normal file
93
tests/test_i18n.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from electrum import i18n
|
||||
from electrum.i18n import _ensure_translation_keeps_format_string_syntax_similar
|
||||
|
||||
from . import ElectrumTestCase
|
||||
|
||||
|
||||
syntax_check_decorator = _ensure_translation_keeps_format_string_syntax_similar
|
||||
|
||||
|
||||
class TestSyntaxChecks(ElectrumTestCase):
|
||||
# convention: source strings are lowercase, dest strings are uppercase
|
||||
|
||||
def test_no_format(self):
|
||||
src, dst = ("hello there", "HELLO THEEEEERE")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_malformed_src_string_raises(self):
|
||||
src, dst = ("hel{lo there", "HELLO THE{}RE")
|
||||
with self.assertRaises(ValueError):
|
||||
syntax_check_decorator(lambda x: dst)(src)
|
||||
|
||||
def test_malformed_dst_string_gets_rejected(self):
|
||||
src, dst = ("hel{}lo there", "HELLO THE{RE")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello there", "HELLO THE{RE")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello there", "HELLO THE{{}RE")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_simple_substitution(self):
|
||||
src, dst = ("hel{}lo there", "HELLO THE{}RE")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{}lo {} there {}", "HELLO {} THE{}RE {}")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_positional_substitution(self):
|
||||
src, dst = ("hel{0}lo there", "HELLO THE{0}RE")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{0}lo there {1}", "HELLO THE{0}RE {1}")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{0}lo {2} there {1}", "HELLO THE{0}RE {2} {1}")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_keyword_substitution(self):
|
||||
src, dst = ("hello there {title}. {name}. welc", "HELLO THERE {title}. {name}. WELC")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_mixed_sub(self):
|
||||
src, dst = ("{1} aaa {qq} {0} bbb", "{1} AAA {qq} {0} BBB")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("{1} aaa {pp} {qq} {0} bbb", "{1} AAA {pp} {qq} {0} BBB")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_allow_reordering_replacement_fields(self): # language-flexibility
|
||||
src, dst = ("time left: {0} minutes, {1} seconds", "TIME LEFT: {1} SECONDS, {0} MINUTES")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("{1} aaa {pp} {qq} {0} bbb", "{qq} AAA {0} {1} {pp} BBB")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_replacement_field_name_cannot_change(self):
|
||||
# rejects:
|
||||
src, dst = ("hel{}lo there", "HELLO THE{RE}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{}lo there", "HELLO THE{0}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{0}lo there", "HELLO THE{}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{0}lo there", "HELLO THE{RE}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{RE}lo there", "HELLO THE{}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hel{RE}lo there", "HELLO THE{0}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
# we only check the set of field_names is invariant, so this is allowed:
|
||||
src, dst = ("hello there {} {} {} {p} {q}", "HELLO THERE {} {q} {q} {p} {q}")
|
||||
self.assertEqual(dst, syntax_check_decorator(lambda x: dst)(src))
|
||||
|
||||
def test_replacement_field_count_cannot_change(self):
|
||||
# rejects:
|
||||
src, dst = ("hello there", "HELLO THERE {}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello there", "HELLO {} {} THERE")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello {} there", "HELLO THERE {} {}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello there {}", "HELLO THERE")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello there {p} {q} {r}", "HELLO THERE {p} {q}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello there {p} {q} {r}", "HELLO THERE {p} {q} {r} {s}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
src, dst = ("hello there {p} {0}", "HELLO THERE {p}")
|
||||
self.assertEqual(src, syntax_check_decorator(lambda x: dst)(src))
|
||||
Reference in New Issue
Block a user