-
Notifications
You must be signed in to change notification settings - Fork 71
/
replacements.py
126 lines (86 loc) · 3.39 KB
/
replacements.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""Simple typographic replacements
* ``(c)``, ``(C)`` → ©
* ``(tm)``, ``(TM)`` → ™
* ``(r)``, ``(R)`` → ®
* ``+-`` → ±
* ``...`` → …
* ``?....`` → ?..
* ``!....`` → !..
* ``????????`` → ???
* ``!!!!!`` → !!!
* ``,,,`` → ,
* ``--`` → &ndash
* ``---`` → &mdash
"""
from __future__ import annotations
import logging
import re
from ..token import Token
from .state_core import StateCore
LOGGER = logging.getLogger(__name__)
# TODO:
# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
# - multiplication 2 x 4 -> 2 × 4
RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
# Workaround for phantomjs - need regex without /g flag,
# or root check will fail every second time
# SCOPED_ABBR_TEST_RE = r"\((c|tm|r)\)"
SCOPED_ABBR_RE = re.compile(r"\((c|tm|r)\)", flags=re.IGNORECASE)
PLUS_MINUS_RE = re.compile(r"\+-")
ELLIPSIS_RE = re.compile(r"\.{2,}")
ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
COMMA_RE = re.compile(r",{2,}")
EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
SCOPED_ABBR = {"c": "©", "r": "®", "tm": "™"}
def replaceFn(match: re.Match[str]) -> str:
return SCOPED_ABBR[match.group(1).lower()]
def replace_scoped(inlineTokens: list[Token]) -> None:
inside_autolink = 0
for token in inlineTokens:
if token.type == "text" and not inside_autolink:
token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
if token.type == "link_open" and token.info == "auto":
inside_autolink -= 1
if token.type == "link_close" and token.info == "auto":
inside_autolink += 1
def replace_rare(inlineTokens: list[Token]) -> None:
inside_autolink = 0
for token in inlineTokens:
if (
token.type == "text"
and (not inside_autolink)
and RARE_RE.search(token.content)
):
# +- -> ±
token.content = PLUS_MINUS_RE.sub("±", token.content)
# .., ..., ....... -> …
token.content = ELLIPSIS_RE.sub("…", token.content)
# but ?..... & !..... -> ?.. & !..
token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub("\\1..", token.content)
token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
# ,, ,,, ,,,, -> ,
token.content = COMMA_RE.sub(",", token.content)
# em-dash
token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
# en-dash
token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
if token.type == "link_open" and token.info == "auto":
inside_autolink -= 1
if token.type == "link_close" and token.info == "auto":
inside_autolink += 1
def replace(state: StateCore) -> None:
if not state.md.options.typographer:
return
for token in state.tokens:
if token.type != "inline":
continue
if token.children is None:
continue
if SCOPED_ABBR_RE.search(token.content):
replace_scoped(token.children)
if RARE_RE.search(token.content):
replace_rare(token.children)