Skip to content

Commit

Permalink
optimize Unicode tables
Browse files Browse the repository at this point in the history
Apply optimization described in
rust-lang/regex#73 (comment)
to rust's copy of `unicode.py`.

This shrinks librustc_unicode's tables.rs from 479kB to 456kB,
and should improve performance slightly for related operations
(e.g., is_alphabetic(), is_xid_start(), etc).

In addition, pull in fix from @dscorbett's commit
d25c39f86568a147f9b7080c25711fb1f98f056a in regex, which
makes `load_properties()` more tolerant of whitespace
in the Unicode tables. (This fix does not result in any
changes to tables.rs, but could if the Unicode tables
change in the future.)
  • Loading branch information
kwantam committed Apr 18, 2015
1 parent a81ce5f commit f14d289
Show file tree
Hide file tree
Showing 2 changed files with 656 additions and 897 deletions.
11 changes: 8 additions & 3 deletions src/etc/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import fileinput, re, os, sys, operator

preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
Expand Down Expand Up @@ -207,8 +207,8 @@ def format_table_content(f, content, indent):
def load_properties(f, interestingprops):
fetch(f)
props = {}
re1 = re.compile("^([0-9A-F]+) +; (\w+)")
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)")
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")

for line in fileinput.input(os.path.basename(f)):
prop = None
Expand All @@ -234,6 +234,11 @@ def load_properties(f, interestingprops):
if prop not in props:
props[prop] = []
props[prop].append((d_lo, d_hi))

# optimize if possible
for prop in props:
props[prop] = group_cat(ungroup_cat(props[prop]))

return props

# load all widths of want_widths, except those in except_cats
Expand Down
Loading

0 comments on commit f14d289

Please sign in to comment.