1
0

Merge pull request #9801 from SomberNight/202505_ban_unicode

ci: add linter task "ban unicode" to protect against malicious unicode
This commit is contained in:
ThomasV
2025-05-19 15:02:45 +02:00
committed by GitHub
3 changed files with 67 additions and 1 deletions

View File

@@ -174,6 +174,14 @@ task:
ELECTRUM_LINTERS_IGNORE: ""
allow_failures: true
task:
name: "linter: ban unicode"
container:
image: python:3.10
cpu: 1
memory: 1G
main_script:
- contrib/ban_unicode.py
# Cron jobs configured in https://cirrus-ci.com/settings/...
# - job "nightly" on branch "master" at "0 30 2 * * ?" (every day at 02:30Z)

58
contrib/ban_unicode.py Executable file
View File

@@ -0,0 +1,58 @@
#!/usr/bin/env python3
#
# Copyright (C) 2025 The Electrum developers
# Distributed under the MIT software license, see the accompanying
# file LICENCE or http://www.opensource.org/licenses/mit-license.php
#
# This script scans the whole codebase for unicode characters and
# errors if it finds any, unless the character is specifically whitelisted below.
# The motivation is to protect against homoglyph attacks, invisible unicode characters,
# bidirectional and other control characters, and other malicious unicode usage.
# Given that we mostly expect to use ASCII characters in the source code,
# the most robust and generic fix seems to be to just ban all unicode usage.
import os.path
import subprocess
import sys
project_root = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
os.chdir(project_root)
EXCLUDE_PATH_PREFIX = {
"electrum/wordlist/",
"fastlane/",
"tests/",
}
EXCLUDE_EXTENSIONS = {
".jpg", ".jpeg", ".png", ".ttf", ".otf", ".pdn", ".icns", ".ico", ".gif",
}
UNICODE_WHITELIST = {
"💬", "🗯", "", chr(0xfe0f), "", "", "", "", "", "", "", "",
"á", "é", "",
"", "", "", "",
}
exit_code = 0
bfiles = subprocess.check_output(["git", "ls-files"])
bfiles = bfiles.decode("utf-8")
for file_path in bfiles.splitlines():
if os.path.isdir(file_path):
continue
if any(file_path.startswith(pattern) for pattern in EXCLUDE_PATH_PREFIX):
continue
_fname, ext = os.path.splitext(file_path)
if ext in EXCLUDE_EXTENSIONS:
continue
# open file
try:
with open(file_path, "r", encoding="utf-8") as f:
for line_no, line in enumerate(f.read().splitlines()):
for char in line:
if ord(char)>0x7f and char not in UNICODE_WHITELIST:
print(f"{file_path}:{line_no}. {line=}. hex={hex(ord(char))}. {char=}")
exit_code = 1
except UnicodeDecodeError as e:
raise Exception(f"cannot parse file {file_path=}") from e
sys.exit(exit_code)

View File

@@ -1,4 +1,4 @@
Copyright (c) 2011, ParaType Ltd. (http://www.paratype.com/public),
Copyright (c) 2011, ParaType Ltd. (http://www.paratype.com/public),
with Reserved Font Names "PT Sans", "PT Serif", "PT Mono" and "ParaType".
This Font Software is licensed under the SIL Open Font License, Version 1.1.