-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4b88b01
Showing
15 changed files
with
58,287 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.vscode | ||
.idea |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# 小鹤双拼五笔形 FLYPY_YK方案 | ||
|
||
## 说明 | ||
|
||
- 本项目是基于小鹤双拼输入法的 Rime 方案,将小鹤双拼的形码转换为五笔形码,以便五笔用户更轻松地使用小鹤双拼输入法,达到与鹤形输入相同的输入效率。 | ||
- 小鹤双拼使用鹤形补码来解决重码问题,因为它有大量同音字。但对于五笔用户来说,学习鹤形反而可能会更混淆。因此,本方案使用五笔补码来解决重码问题,避免学习鹤形码。 | ||
|
||
## 输入方案 | ||
|
||
本方案默认为单字方案,单字全码为4码,前两码为小鹤双拼码,后两码为五笔码。 | ||
|
||
例如: | ||
|
||
- `衡`,小鹤双拼为`hk`,五笔为`tqdh`,本方案为`hktq` | ||
- `框`,小鹤双拼为`kl`,五笔为`sagg`,本方案为`klsa` | ||
|
||
五笔的一级简码不会作为补码,例如: | ||
|
||
- `我`,小鹤双拼为`wo`,五笔一级简码为`q`,五笔全码为`trnt`,本方案为`wotr`,而不是`woq` | ||
- `这`,小鹤双拼为`ve`,五笔一级简码为`p`,五笔全码为`ypi`,本方案为`veyp`,而不是`vep` | ||
|
||
## 安装 | ||
|
||
1. 将本项目的`flypy_yk`目录下的文件复制到Rime输入法的目录下。 | ||
2. 重新部署即可完成`FLTPY_YK`方案的安装。 | ||
|
||
- 本项目还提供了五笔方案与五笔小鹤混输方案。 | ||
- 将本项目`wubi`目录下的文件复制到 Rime 输入法的目录下,重新部署即可完成安装。 | ||
- 该五笔方案提供了按`z`键输入小鹤双拼反查五笔码的功能。 | ||
- 由于五笔码与小鹤双拼码重码率不高,直接使用五笔小鹤混输方案更为方便。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
import collections | ||
|
||
|
||
def is_chinese(uchar): | ||
"""判断一个unicode是否是汉字""" | ||
return uchar >= u'\u4e00' and uchar <= u'\u9fa5' | ||
|
||
|
||
def is_can_stay(uchar): | ||
if is_chinese(uchar) and len(uchar) > 1: | ||
return False | ||
if uchar.startswith("?"): | ||
return False | ||
return True | ||
|
||
|
||
def is_line_can_stay(line): | ||
first = str(line.split("\t")[0]) | ||
if not is_can_stay(first): | ||
print("trim", line) | ||
return False | ||
return True | ||
|
||
|
||
def get_single_char_line(line): | ||
if not is_line_can_stay(line): | ||
return | ||
return line | ||
|
||
|
||
def simp_file(name, out): | ||
simp_file_fn(name, out, get_single_char_line) | ||
|
||
|
||
def simp_file_fn(name, out, fn): | ||
mark="..." | ||
lines = [] | ||
begin = False | ||
with open(name, "r", encoding="utf8") as f: | ||
for line in f: | ||
if begin: | ||
line = fn(line) | ||
else: | ||
if line.startswith(mark): | ||
begin = True | ||
if line: | ||
lines.append(line) | ||
|
||
if not begin: | ||
print("never begin") | ||
return | ||
with open(out, "w+", encoding="utf8") as f: | ||
f.writelines(lines) | ||
|
||
|
||
def get_flypy_no_end_line(): | ||
word_to_py = collections.defaultdict(set) | ||
def fn(line): | ||
args = line.split("\t") | ||
word = args[0] | ||
if not is_chinese(word): | ||
return | ||
|
||
py = args[1] | ||
if len(py) > 2: | ||
py = py[:2] | ||
|
||
if py in word_to_py[word]: | ||
return | ||
word_to_py[word].add(py) | ||
|
||
args[1] = py | ||
return "\t".join(args) + "\n" | ||
return fn | ||
|
||
|
||
|
||
def flypy_no_end(filename): | ||
"""小鹤双拼去掉加形""" | ||
simp_file_fn(filename, filename + ".no_end", get_flypy_no_end_line()) | ||
|
||
|
||
def get_word_to_code(filename): | ||
word_to_code = collections.defaultdict(list) | ||
begin = False | ||
with open(filename, "r", encoding="utf8") as f: | ||
for line in f: | ||
if begin: | ||
args = line.split("\t") | ||
if len(args) < 2: | ||
continue | ||
word = args[0] | ||
code = args[1].strip() | ||
word_to_code[word].append(code) | ||
else: | ||
if line.startswith("..."): | ||
begin = True | ||
return word_to_code | ||
|
||
|
||
FLYPY_WUBI_PREFIX = """ | ||
# Rime dict | ||
# encoding: utf-8 | ||
# 小鹤双拼加五笔形码 | ||
# 如"这"字,双拼码为ve,五笔码为yp,则加形后的码为vey | ||
--- | ||
name: flypy_yk.wubi | ||
version: "0.0.1" | ||
sort: original | ||
use_preset_vocabulary: false | ||
... | ||
""" | ||
|
||
|
||
def to_flypy_wubi(filename="flypy_yk.wubi.dict.yaml"): | ||
word_to_wubi = get_word_to_code("wubi86_jidian.dict.yaml") | ||
word_to_py = get_word_to_code("flypy_yk.base.dict.yaml") | ||
print("wubi", len(word_to_wubi)) | ||
print("flypy", len(word_to_py)) | ||
|
||
lines = [FLYPY_WUBI_PREFIX] | ||
for word, pys in word_to_py.items(): | ||
wubis = word_to_wubi.get(word) | ||
if not wubis: | ||
continue | ||
for py in pys: | ||
wubi = max(wubis, key=len) | ||
lines.append(word + "\t" + py + wubi[:1] + "\n") | ||
lines.append(word + "\t" + py + wubi[:2] + "\n") | ||
|
||
with open(filename, "w", encoding="utf8") as f: | ||
f.writelines(lines) | ||
|
||
|
||
def code_word_swap(filename): | ||
def fn(line): | ||
args = line.split("\t") | ||
if len(args) < 2: | ||
return | ||
code = args[0].strip() | ||
word = args[1].strip() | ||
return word + "\t" + code + "\n" | ||
simp_file_fn(filename, filename + ".swap", fn) | ||
|
||
|
||
def main(): | ||
# simp_file("wubi86_jidian.dict.yaml.org", "wubi86_jidian.dict.yaml") | ||
# simp_file("double_pinyin_flypy.dict.yaml.org", "double_pinyin_flypy.dict.yaml") | ||
|
||
code_word_swap("x.yaml") | ||
# flypy_no_end("x.yaml.swap") | ||
# to_flypy_wubi("flypy_yk.wubi.dict.yaml") | ||
print("done") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.