117,120c117 < with path.open('rb') as fd: < encoding = fd.readline().decode() < if encoding.lower() == 'microsoft-cp1251': < encoding = 'cp1251' --- > with open(path, 'r') as fd: 122,152c119,149 < for pattern in path.read_text(encoding).split('\n')[1:]: < pattern = pattern.strip() < if not pattern or pattern.startswith(ignored): < continue < < # replace ^^hh with the real character < pattern = parse_hex( < lambda match: chr(int(match.group(1), 16)), pattern) < < # read nonstandard hyphen alternatives < if '/' in pattern and '=' in pattern: < pattern, alternative = pattern.split('/', 1) < factory = AlternativeParser(pattern, alternative) < else: < factory = int < < tags, values = zip(*[ < (string, factory(i or '0')) for i, string in parse(pattern)]) < < # if only zeros, skip this pattern < if max(values) == 0: < continue < < # chop zeros from beginning and end, and store start offset < start, end = 0, len(values) < while not values[start]: < start += 1 < while not values[end - 1]: < end -= 1 < < self.patterns[''.join(tags)] = start, values[start:end] --- > for pattern in fd.readlines()[1:]: > pattern = pattern.strip() > if not pattern or pattern.startswith(ignored): > continue > > # replace ^^hh with the real character > pattern = parse_hex( > lambda match: chr(int(match.group(1), 16)), pattern) > > # read nonstandard hyphen alternatives > if '/' in pattern and '=' in pattern: > pattern, alternative = pattern.split('/', 1) > factory = AlternativeParser(pattern, alternative) > else: > factory = int > > tags, values = zip(*[ > (string, factory(i or '0')) for i, string in parse(pattern)]) > > # if only zeros, skip this pattern > if max(values) == 0: > continue > > # chop zeros from beginning and end, and store start offset > start, end = 0, len(values) > while not values[start]: > start += 1 > while not values[end - 1]: > end -= 1 > > self.patterns[''.join(tags)] = start, values[start:end]