Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rollup of 5 pull requests #62452

Merged
merged 22 commits into from
Jul 7, 2019
Merged
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
89feb6d
Clean up unicode.py script
pawroman Apr 18, 2019
a580421
More cleanups for unicode.py
pawroman Apr 18, 2019
edbc27d
Fix tidy errors
pawroman Apr 18, 2019
2c9c978
Refactor and document unicode.py script
pawroman Apr 19, 2019
60ccf89
Apply suggestions from code review
pawroman Jun 10, 2019
49fbd76
Make the Weak::{into,as}_raw methods
vorner Jun 15, 2019
2b47a08
Address review remarks in unicode.py
pawroman Jun 10, 2019
05c1e92
Correct definition of CONSOLE_SCREEN_BUFFER_INFO
tesuji Jul 5, 2019
42c3d37
Remove use of mem::uninitialized in libterm crate
tesuji Jul 5, 2019
7646d49
Remove use of mem::uninitialized in code_gen crate
tesuji Jul 5, 2019
15042a3
`#[rustc_doc_only_macro]` -> `#[rustc_builtin_macro]`
petrochenkov Jun 29, 2019
22d6d8a
`#[rustc_transparent_macro]` -> `#[rustc_macro_transparency = ...]`
petrochenkov Jun 23, 2019
ab112ca
Improve documentation for built-in macros
petrochenkov Jun 29, 2019
987be89
Fix tidy issues
petrochenkov Jun 29, 2019
920a17a
privacy: Only opaque macros leak private things
petrochenkov Jun 29, 2019
3274507
resolve: Reserve cfg/cfg_attr/derive only in attribute sub-namespace
petrochenkov Jun 29, 2019
7a2a17a
normalize use of backticks/lowercase in compiler messages for librust…
Jul 6, 2019
327c54e
Rollup merge of #60081 - pawroman:cleanup_unicode_script, r=varkor
Centril Jul 6, 2019
296e825
Rollup merge of #61862 - vorner:weak-into-raw-methods, r=sfackler
Centril Jul 6, 2019
154726c
Rollup merge of #62243 - petrochenkov:macrodoc, r=eddyb
Centril Jul 6, 2019
30e4a87
Rollup merge of #62422 - lzutao:remove-some-mem-uinit, r=alexcrichton
Centril Jul 6, 2019
7ef02dc
Rollup merge of #62436 - fakenine:normalize_use_of_backticks_compiler…
Centril Jul 6, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 23 additions & 25 deletions src/libcore/unicode/unicode.py
Original file line number Diff line number Diff line change
@@ -28,14 +28,14 @@
# we don't use enum.Enum because of Python 2.7 compatibility
class UnicodeFiles(object):
# ReadMe does not contain any unicode data, we
# use it to extract versions.
# only use it to extract versions.
README = "ReadMe.txt"

DERIVED_CORE_PROPERTIES = "DerivedCoreProperties.txt"
DERIVED_NORMALIZATION_PROPS = "DerivedNormalizationProps.txt"
SPECIAL_CASING = "SpecialCasing.txt"
SCRIPTS = "Scripts.txt"
PROPS = "PropList.txt"
SCRIPTS = "Scripts.txt"
SPECIAL_CASING = "SpecialCasing.txt"
UNICODE_DATA = "UnicodeData.txt"


@@ -66,15 +66,15 @@ class UnicodeFiles(object):
# Mapping taken from Table 12 from:
# http://www.unicode.org/reports/tr44/#General_Category_Values
EXPANDED_CATEGORIES = {
'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
'Lm': ['L'], 'Lo': ['L'],
'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
'Nd': ['N'], 'Nl': ['N'], 'No': ['N'],
'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
"Lu": ["LC", "L"], "Ll": ["LC", "L"], "Lt": ["LC", "L"],
"Lm": ["L"], "Lo": ["L"],
"Mn": ["M"], "Mc": ["M"], "Me": ["M"],
"Nd": ["N"], "Nl": ["N"], "No": ["N"],
"Pc": ["P"], "Pd": ["P"], "Ps": ["P"], "Pe": ["P"],
"Pi": ["P"], "Pf": ["P"], "Po": ["P"],
"Sm": ["S"], "Sc": ["S"], "Sk": ["S"], "So": ["S"],
"Zs": ["Z"], "Zl": ["Z"], "Zp": ["Z"],
"Cc": ["C"], "Cf": ["C"], "Cs": ["C"], "Co": ["C"], "Cn": ["C"],
}

# these are the surrogate codepoints, which are not valid rust characters
@@ -115,7 +115,7 @@ def fetch_files(version=None):
readme_content = subprocess.check_output(("curl", readme_url))

unicode_version = parse_unicode_version(
str(readme_content, "utf8")
readme_content.decode("utf8")
)

download_dir = os.path.join(FETCH_DIR, unicode_version.as_str)
@@ -415,7 +415,7 @@ def compute_trie(rawdata, chunksize):
child_data = []
for i in range(len(rawdata) // chunksize):
data = rawdata[i * chunksize: (i + 1) * chunksize]
child = '|'.join(map(str, data))
child = "|".join(map(str, data))
if child not in childmap:
childmap[child] = len(childmap)
child_data.extend(data)
@@ -444,34 +444,34 @@ def emit_bool_trie(f, name, t_data, is_pub=True):
pub_string = "pub "
f.write(" %sconst %s: &super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name))
f.write(" r1: [\n")
data = ','.join('0x%016x' % chunk for chunk in chunks[0:0x800 // chunk_size])
data = ",".join("0x%016x" % chunk for chunk in chunks[0:0x800 // chunk_size])
format_table_content(f, data, 12)
f.write("\n ],\n")

# 0x800..0x10000 trie
(r2, r3) = compute_trie(chunks[0x800 // chunk_size : 0x10000 // chunk_size], 64 // chunk_size)
f.write(" r2: [\n")
data = ','.join(str(node) for node in r2)
data = ",".join(str(node) for node in r2)
format_table_content(f, data, 12)
f.write("\n ],\n")
f.write(" r3: &[\n")
data = ','.join('0x%016x' % chunk for chunk in r3)
data = ",".join("0x%016x" % chunk for chunk in r3)
format_table_content(f, data, 12)
f.write("\n ],\n")

# 0x10000..0x110000 trie
(mid, r6) = compute_trie(chunks[0x10000 // chunk_size : 0x110000 // chunk_size], 64 // chunk_size)
(r4, r5) = compute_trie(mid, 64)
f.write(" r4: [\n")
data = ','.join(str(node) for node in r4)
data = ",".join(str(node) for node in r4)
format_table_content(f, data, 12)
f.write("\n ],\n")
f.write(" r5: &[\n")
data = ','.join(str(node) for node in r5)
data = ",".join(str(node) for node in r5)
format_table_content(f, data, 12)
f.write("\n ],\n")
f.write(" r6: &[\n")
data = ','.join('0x%016x' % chunk for chunk in r6)
data = ",".join("0x%016x" % chunk for chunk in r6)
format_table_content(f, data, 12)
f.write("\n ],\n")

@@ -497,12 +497,12 @@ def emit_small_bool_trie(f, name, t_data, is_pub=True):
(r1, r2) = compute_trie(chunks, 1)

f.write(" r1: &[\n")
data = ','.join(str(node) for node in r1)
data = ",".join(str(node) for node in r1)
format_table_content(f, data, 12)
f.write("\n ],\n")

f.write(" r2: &[\n")
data = ','.join('0x%016x' % node for node in r2)
data = ",".join("0x%016x" % node for node in r2)
format_table_content(f, data, 12)
f.write("\n ],\n")

@@ -599,11 +599,9 @@ def main():
print("Using Unicode version: {}".format(unicode_version.as_str))

tables_rs_path = os.path.join(THIS_DIR, "tables.rs")
if os.path.exists(tables_rs_path):
os.remove(tables_rs_path)

# will overwrite the file if it exists
with open(tables_rs_path, "w") as rf:
# write the file's preamble
rf.write(PREAMBLE)

unicode_version_notice = textwrap.dedent("""