Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSON を原本として各辞書を生成する #55

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18,749 changes: 9,393 additions & 9,356 deletions ChangeLog

Large diffs are not rendered by default.

74 changes: 57 additions & 17 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,25 @@
COUNT = skkdic-count
CURL = curl
DATE = date
DENO = deno
EMACS = emacs --batch --directory ./
EXPR = skkdic-expr
EXPR2 = skkdic-expr2
GAWK = LC_ALL=C gawk
GREP = grep
SED = sed
GZIP = gzip -9
ICONV = iconv
MD5 = md5
MV = mv --force
RM = /bin/rm -f
RUBY = ruby -I $(TOOLS_DIR)/filters
SED = sed
SORT = skkdic-sort
TAR = tar
TOUCH = touch
UNZIP = unzip -o

ZIPDIC_DIR = ./zipcode
DENO = deno

DIC2PDB = dic2pdb
DICCOMPACT = diccompact.rb
Expand Down Expand Up @@ -49,7 +52,8 @@ CDB_TARGET = ./`basename $(CDB_SOURCE)`.cdb

clean:
$(RM) *.gz* *~ `find . -name '*~'` `find . -name '.*~'` `find . -name '.#*'` \
*.unannotated SKK-JISYO.wrong PBinlineDB.pdb *.tmp *.w PBinlineDB.dic *.taciturn SKK-JISYO.L+ SKK-JISYO.total SKK-JISYO.total+zipcode SKK-JISYO.L.header SKK-JISYO.china_taiwan \
*.unannotated SKK-JISYO.wrong PBinlineDB.pdb *.tmp *.u8 *.w PBinlineDB.dic *.taciturn \
SKK-JISYO.L+ SKK-JISYO.total SKK-JISYO.total+zipcode SKK-JISYO.L.header SKK-JISYO.china_taiwan \
emoji-list.txt

archive: gzip
Expand Down Expand Up @@ -117,33 +121,35 @@ SKK-JISYO.L+: SKK-JISYO.L SKK-JISYO.L.header
$(EXPR2) SKK-JISYO.L + SKK-JISYO.tmp | cat SKK-JISYO.L.header - > SKK-JISYO.L+
$(RM) SKK-JISYO.tmp SKK-JISYO.addition

SKK-JISYO.total: SKK-JISYO.L SKK-JISYO.geo SKK-JISYO.station SKK-JISYO.jinmei SKK-JISYO.propernoun SKK-JISYO.fullname SKK-JISYO.law SKK-JISYO.okinawa SKK-JISYO.hukugougo SKK-JISYO.assoc SKK-JISYO.notes SKK-JISYO.L.header
SKK-JISYO.total: SKK-JISYO.L.u8 SKK-JISYO.geo.u8 SKK-JISYO.station.u8 SKK-JISYO.jinmei.u8 SKK-JISYO.propernoun.u8 SKK-JISYO.fullname SKK-JISYO.law.u8 SKK-JISYO.okinawa.u8 SKK-JISYO.hukugougo.u8 SKK-JISYO.assoc.u8 SKK-JISYO.notes SKK-JISYO.L.header.u8
$(RUBY) $(TOOLS_DIR)/filters/conjugation.rb -Cpox SKK-JISYO.notes > SKK-JISYO.tmp
$(RUBY) $(TOOLS_DIR)/filters/asayaKe.rb -p SKK-JISYO.L >> SKK-JISYO.tmp
$(RUBY) $(TOOLS_DIR)/filters/complete-numerative.rb -pU SKK-JISYO.L >> SKK-JISYO.tmp
$(RUBY) $(TOOLS_DIR)/filters/abbrev-convert.rb -K -s 2 SKK-JISYO.L >> SKK-JISYO.tmp
$(RUBY) $(TOOLS_DIR)/filters/abbrev-convert.rb -w -s 2 SKK-JISYO.L >> SKK-JISYO.tmp
$(ICONV) -f euc-jp -t utf-8 SKK-JISYO.tmp > SKK-JISYO.tmp.u8
# order is very important here
$(EXPR2) SKK-JISYO.geo + SKK-JISYO.station + SKK-JISYO.jinmei + SKK-JISYO.propernoun + SKK-JISYO.fullname + SKK-JISYO.tmp + SKK-JISYO.law + SKK-JISYO.okinawa + SKK-JISYO.hukugougo + SKK-JISYO.assoc - SKK-JISYO.L > SKK-JISYO.addition
$(EXPR2) SKK-JISYO.geo.u8 + SKK-JISYO.station.u8 + SKK-JISYO.jinmei.u8 + SKK-JISYO.propernoun.u8 + SKK-JISYO.fullname + SKK-JISYO.tmp.u8 + SKK-JISYO.law.u8 + SKK-JISYO.okinawa.u8 + SKK-JISYO.hukugougo.u8 + SKK-JISYO.assoc.u8 - SKK-JISYO.L.u8 > SKK-JISYO.addition
# why eliminating SKK-JISYO.L once? -- to not add too noisy
# annotations from SKK-JISYO.jinmei and so on.
$(EXPR2) SKK-JISYO.L + SKK-JISYO.addition | cat SKK-JISYO.L.header - > SKK-JISYO.total
$(EXPR2) SKK-JISYO.L.u8 + SKK-JISYO.addition | cat SKK-JISYO.L.header.u8 - > SKK-JISYO.total
$(RM) SKK-JISYO.tmp SKK-JISYO.addition

SKK-JISYO.total+zipcode: SKK-JISYO.total $(ZIPDIC_DIR)/SKK-JISYO.zipcode $(ZIPDIC_DIR)/SKK-JISYO.office.zipcode SKK-JISYO.L.header
$(EXPR2) SKK-JISYO.total + $(ZIPDIC_DIR)/SKK-JISYO.zipcode + $(ZIPDIC_DIR)/SKK-JISYO.office.zipcode | cat SKK-JISYO.L.header - > SKK-JISYO.total+zipcode
# zipcode がまだ UTF-8 ではない場合
SKK-JISYO.total+zipcode: SKK-JISYO.total $(ZIPDIC_DIR)/SKK-JISYO.zipcode.u8 $(ZIPDIC_DIR)/SKK-JISYO.office.zipcode.u8 SKK-JISYO.L.header.u8
$(EXPR2) SKK-JISYO.total + $(ZIPDIC_DIR)/SKK-JISYO.zipcode.u8 + $(ZIPDIC_DIR)/SKK-JISYO.office.zipcode.u8 | cat SKK-JISYO.L.header.u8 - > SKK-JISYO.total+zipcode

SKK-JISYO.L.taciturn: SKK-JISYO.L SKK-JISYO.L.header
$(RUBY) $(TOOLS_DIR)/filters/annotation-filter.rb -d SKK-JISYO.L | $(EXPR2) | cat SKK-JISYO.L.header - > SKK-JISYO.L.taciturn

SKK-JISYO.L+.taciturn: SKK-JISYO.L+ SKK-JISYO.L.header
$(RUBY) $(TOOLS_DIR)/filters/annotation-filter.rb -d SKK-JISYO.L+ | $(EXPR2) | cat SKK-JISYO.L.header - > SKK-JISYO.L+.taciturn

SKK-JISYO.total.taciturn: SKK-JISYO.total SKK-JISYO.L.header
$(RUBY) $(TOOLS_DIR)/filters/annotation-filter.rb -d SKK-JISYO.total | $(EXPR2) | cat SKK-JISYO.L.header - > SKK-JISYO.total.taciturn
SKK-JISYO.total.taciturn: SKK-JISYO.total SKK-JISYO.L.header.u8
$(RUBY) script/annotation-filter.rb -8 -d SKK-JISYO.total | $(EXPR2) | cat SKK-JISYO.L.header.u8 - > SKK-JISYO.total.taciturn

SKK-JISYO.total+zipcode.taciturn: SKK-JISYO.total+zipcode SKK-JISYO.L.header
$(RUBY) $(TOOLS_DIR)/filters/annotation-filter.rb -d SKK-JISYO.total+zipcode | $(EXPR2) | cat SKK-JISYO.L.header - > SKK-JISYO.total+zipcode.taciturn
SKK-JISYO.total+zipcode.taciturn: SKK-JISYO.total+zipcode SKK-JISYO.L.header.u8
$(RUBY) script/annotation-filter.rb -8 -d SKK-JISYO.total+zipcode | $(EXPR2) | cat SKK-JISYO.L.header.u8 - > SKK-JISYO.total+zipcode.taciturn

SKK-JISYO.L+.unannotated: SKK-JISYO.L+
$(GAWK) -f $(TOOLS_DIR)/unannotation.awk SKK-JISYO.L+ > SKK-JISYO.L+.unannotated
Expand All @@ -158,6 +164,15 @@ SKK-JISYO.L.header: SKK-JISYO.L
echo ';; (This dictionary was automatically generated from SKK dictionaries)' > SKK-JISYO.L.header
$(SED) -n '/^;; okuri-ari entries./q;p' SKK-JISYO.L >> SKK-JISYO.L.header

$(ZIPDIC_DIR)/SKK-JISYO.office.zipcode.u8: $(ZIPDIC_DIR)/SKK-JISYO.office.zipcode
$(ICONV) -f euc-jisx0213 -t utf-8 $< > $@
SKK-JISYO.L.header.u8: SKK-JISYO.L.header
$(ICONV) -f euc-jp -t utf-8 SKK-JISYO.L.header > SKK-JISYO.L.header.u8
$(SED) -i "2s/coding: euc-jp /coding: utf-8 /" SKK-JISYO.L.header.u8
%.u8: %
$(ICONV) -f euc-jp -t utf-8 $< > $@


unannotated-all: unannotated SKK-JISYO.L+.unannotated SKK-JISYO.total.unannotated SKK-JISYO.total+zipcode.unannotated

taciturn-all: SKK-JISYO.L.taciturn SKK-JISYO.L+.taciturn SKK-JISYO.total.taciturn SKK-JISYO.total+zipcode.taciturn
Expand Down Expand Up @@ -189,7 +204,7 @@ SKK-JISYO.emoji.ja: cldr-common.zip
test -f ja.xml || $(UNZIP) -p cldr-common.zip "*common/annotations/ja.xml" > ja.xml
$(EMACS) --load emoji.el --funcall ja > SKK-JISYO.emoji.ja

SKK-JISYO.emoji.kana: SKK-JISYO.emoji.kanji
SKK-JISYO.emoji.kana: SKK-JISYO.emoji.kanji SKK-JISYO.L.unannotated
$(EMACS) --load emoji.el --funcall kanji-to-kana > SKK-JISYO.emoji.kana
$(RM) SKK-JISYO.emoji.kanji

Expand All @@ -206,6 +221,8 @@ cldr-common.zip:
# http://www.edrdg.org/jmdict/edict.html
# ELECTRONIC DICTIONARY RESEARCH AND DEVELOPMENT GROUP GENERAL DICTIONARY LICENCE STATEMENT
# http://www.edrdg.org/edrdg/licence.html
# http://ftp.edrdg.org/pub/Nihongo/00INDEX.html
# After nearly 30 years of operation the Monash ftp server has been closed down.

SKK-JISYO.edict2: edict2u
$(MV) SKK-JISYO.edict2 SKK-JISYO.edict2.ORIG
Expand All @@ -216,7 +233,7 @@ SKK-JISYO.edict2: edict2u
$(MD5) SKK-JISYO.edict2.gz > SKK-JISYO.edict2.gz.md5

edict2u:
$(CURL) -o edict2u.gz http://ftp.monash.edu/pub/nihongo/edict2u.gz
$(CURL) -o edict2u.gz http://ftp.edrdg.org/pub/Nihongo/edict2u.gz
$(GZIP) --force --decompress edict2u.gz


Expand All @@ -233,8 +250,31 @@ IVD_Sequences.txt:
IVD_Collections.txt:
test -f IVD_Collections.txt || $(CURL) -o IVD_Collections.txt https://unicode.org/ivd/data/2017-12-12/IVD_Collections.txt

json: json/SKK-JISYO.assoc.json json/SKK-JISYO.china_taiwan.json json/SKK-JISYO.edict2.json json/SKK-JISYO.edict.json json/SKK-JISYO.emoji.json json/SKK-JISYO.fullname.json json/SKK-JISYO.geo.json json/SKK-JISYO.hukugougo.json json/SKK-JISYO.itaiji.json json/SKK-JISYO.jinmei.json json/SKK-JISYO.JIS2.json json/SKK-JISYO.law.json json/SKK-JISYO.L.json json/SKK-JISYO.mazegaki.json json/SKK-JISYO.M.json json/SKK-JISYO.ML.json json/SKK-JISYO.okinawa.json json/SKK-JISYO.pinyin.json json/SKK-JISYO.propernoun.json json/SKK-JISYO.pubdic+.json json/SKK-JISYO.S.json json/SKK-JISYO.station.json

json/%.json: %
$(DENO) run --allow-read --allow-write --allow-net script/txt2json.ts -i $< -m meta/$<.yaml -o $@
# JSON <--> txt

EUC_SRCS = SKK-JISYO.assoc SKK-JISYO.china_taiwan SKK-JISYO.edict SKK-JISYO.geo SKK-JISYO.hukugougo SKK-JISYO.itaiji SKK-JISYO.jinmei SKK-JISYO.JIS2 SKK-JISYO.law SKK-JISYO.L SKK-JISYO.mazegaki SKK-JISYO.M SKK-JISYO.ML SKK-JISYO.okinawa SKK-JISYO.propernoun SKK-JISYO.pubdic+ SKK-JISYO.S SKK-JISYO.station
UTF_SRCS = SKK-JISYO.edict2 SKK-JISYO.emoji SKK-JISYO.fullname SKK-JISYO.pinyin
EUC_JSON = $(EUC_SRCS:%=json/%.json)
UTF_JSON = $(UTF_SRCS:%=json/%.json)

json: $(EUC_JSON) $(UTF_JSON)
SKK-JISYO.%: json/SKK-JISYO.%.json meta/SKK-JISYO.%.yaml
if [ "x$(filter $@,$(EUC_SRCS))" = "x$@" ]; then \
$(DENO) run --allow-read --allow-write --allow-net script/json2txt.ts \
-c EUC-JP -i json/[email protected] -o $@ ; \
elif [ "x$(filter $@,$(UTF_SRCS))" = "x$@" ]; then \
$(DENO) run --allow-read --allow-write --allow-net script/json2txt.ts \
-c UTF-8 -i json/[email protected] -o $@ ; \
fi
# % に依存すると循環するので注意
json/%.json:
TXT=$(patsubst json/%.json,%,$@) ; \
if [ "x$(filter $@,$(EUC_JSON))" = "x$@" ]; then \
$(DENO) run --allow-read --allow-write --allow-net script/txt2json.ts \
-c EUC-JP -i $${TXT} -m meta/$${TXT}.yaml -o $@ -s schema/jisyo.schema.v0.1.0.json ; \
elif [ "x$(filter $@,$(UTF_JSON))" = "x$@" ]; then \
$(DENO) run --allow-read --allow-write --allow-net script/txt2json.ts \
-c UTF-8 -i $${TXT} -m meta/$${TXT}.yaml -o $@ -s schema/jisyo.schema.v0.1.0.json ; \
fi
# end of Makefile.
Loading